thead-kernel/drivers/nna/vha/single/vha_dev.c

/*
 *****************************************************************************
 * Copyright (c) Imagination Technologies Ltd.
 *
 * The contents of this file are subject to the MIT license as set out below.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 *
 * Alternatively, the contents of this file may be used under the terms of the
 * GNU General Public License Version 2 ("GPL")in which case the provisions of
 * GPL are applicable instead of those above.
 *
 * If you wish to allow use of your version of this file only under the terms
 * of GPL, and not to allow others to use your version of this file under the
 * terms of the MIT license, indicate your decision by deleting the provisions
 * above and replace them with the notice and other provisions required by GPL
 * as set out in the file called "GPLHEADER" included in this distribution. If
 * you do not delete the provisions above, a recipient may use your version of
 * this file under the terms of either the MIT license or GPL.
 *
 * This License is also included in this distribution in the file called
 * "MIT_COPYING".
 *
 *****************************************************************************/

#include <linux/delay.h>
#include <linux/irq.h>
#include <linux/moduleparam.h>
#include <linux/pm_runtime.h>
#include <linux/slab.h>

#include <uapi/vha.h>
#include "vha_common.h"
#include "vha_plat.h"
#include "vha_regs.h"

#if defined(CFG_SYS_VAGUS)
#include <hwdefs/nn_sys_cr_vagus.h>
#endif

#include <vha_trace_point.h>

#define ERR_EVENT_DESC(b) VHA_CR_OS(VHA_EVENT_STATUS_VHA_##b##_EN), __stringify(b)

static void vha_dev_disable_events(struct vha_dev *vha)
{
	img_pdump_printf("-- Clear CNN events\n");
	IOWRITE64_PDUMP(VHA_EVNTS_DEFAULT, VHA_CR_OS(VHA_EVENT_CLEAR));
	img_pdump_printf("-- Disable CNN events\n");
	IOWRITE64_PDUMP(0, VHA_CR_OS(VHA_EVENT_ENABLE));
	/* Clear the START bit !
	 * Note: It is stated that writing 0 to this bit has no effect,
	 * however in error cases, some hw blocks may start
	 * to process previous requests after turning on the clocks
	 * which was previously disabled */
	IOWRITE64_PDUMP(0, VHA_CR_OS(CNN_CONTROL));

	/* Disable core events */
	img_pdump_printf("-- Disable CORE events\n");
	IOWRITE64_PDUMP(0, VHA_CR_OS(VHA_EVENT_ENABLE));
}

__maybe_unused
static void vha_dev_enable_clocks(struct vha_dev *vha)
{
	uint64_t __maybe_unused sys_clks = 0;
	uint64_t __maybe_unused main_clks = 0;

	/* Always AUTO gating  when needed */
	sys_clks = VHA_SYS_CLOCKS_DEFAULT(AUTO);
	main_clks = VHA_MAIN_CLOCKS_DEFAULT(AUTO);
	/* Enable sys clocks ! */
	img_pdump_printf("-- Enable SYS clocks\n");
	IOWRITE64_PDUMP(sys_clks, VHA_CR_SYS_CLK_CTRL0);
	/* Enable main clocks ! */
	img_pdump_printf("-- Enable MAIN clocks\n");
	IOWRITE64_PDUMP(main_clks, VHA_CR_CLK_CTRL0);
#if defined(CFG_SYS_VAGUS)
	img_pdump_printf("-- Enable NN_SYS clocks\n");
	IOWRITE64_PDUMP_REGIO(NN_SYS_CR_CLK_CTRL_MODE_AUTO,
			NN_SYS_CR_BASE, NN_SYS_CR_CLK_CTRL, "REG_NNSYS");
#endif
}

static void vha_dev_ready(struct vha_dev *vha)
{
#ifndef CONFIG_VHA_DUMMY
	if (!vha->is_ready)
		return;
#endif
	dev_dbg(vha->dev, "%s\n", __func__);

	vha_dev_wait(vha);

	/* Finally enable ALL events */
	img_pdump_printf("-- Enable ALL events\n");
	IOWRITE64_PDUMP(VHA_EVNTS_DEFAULT, VHA_CR_OS(VHA_EVENT_ENABLE));
	img_pdump_printf("-- Clear ALL events\n");
	IOWRITE64_PDUMP(VHA_EVNTS_DEFAULT, VHA_CR_OS(VHA_EVENT_CLEAR));
#ifdef HW_AX2
	img_pdump_printf("-- Clear CNN status\n");
	IOWRITE64_PDUMP(0, VHA_CR_OS(CNN_STATUS));
#endif
	img_pdump_printf("-- Clear MMU fault status\n");
	IOWRITE64_PDUMP(0, VHA_CR_OS(MMU_FAULT_STATUS1));
	img_pdump_printf("-- Clear SLC debug status\n");
	IOWRITE64_PDUMP(0, VHA_CR_SLC_STATUS_DEBUG);
	img_pdump_printf("-- Reset PERF counters\n");
	IOWRITE64_PDUMP(0, VHA_CR_PERF_RESET_FULL);
}

__maybe_unused
static int vha_dev_reset(struct vha_dev *vha)
{
	img_pdump_printf("-- Set RESET bits\n");
#if defined(CFG_SYS_VAGUS)
	IOWRITE64_PDUMP_REGIO(NN_SYS_CR_RESET_CTRL_NN_SYS_EN,
			NN_SYS_CR_BASE, NN_SYS_CR_RESET_CTRL, "REG_NNSYS");
#endif
	/* Perform reset procedure */
	IOWRITE64_PDUMP(VHA_RESET_DEFAULT, VHA_CR_RESET_CTRL);

	/* poll for reset deassertion
	 * count=16, delay=256cycles
	 */
	img_pdump_printf("-- Wait for RESET deassertion\n");
#if defined(CFG_SYS_VAGUS)
	IOPOLL64_PDUMP_REGIO(0, 16, 256, NN_SYS_CR_RESET_CTRL_MASKFULL,
			NN_SYS_CR_BASE, NN_SYS_CR_RESET_CTRL, "REG_NNSYS");
#endif
	IOPOLL64_PDUMP(0, 16, 256, VHA_CR_RESET_CTRL_MASKFULL,
					VHA_CR_RESET_CTRL);
	return 0;
}

__maybe_unused
static int vha_dev_disable_clocks(struct vha_dev *vha)
{
	/* If auto gating was turned on, wait for clocks idle state */
	img_pdump_printf("-- Wait for clocks IDLE state\n");
	IOPOLL64_PDUMP(0, 1000, 1000,
			VHA_CR_CLK_STATUS0_MASKFULL,
			VHA_CR_CLK_STATUS0);
#if defined(CFG_SYS_VAGUS)
	IOPOLL64_PDUMP_REGIO(0, 100, 1000, NN_SYS_CR_CLK_STATUS_MASKFULL,
			NN_SYS_CR_BASE, NN_SYS_CR_CLK_STATUS, "REG_NNSYS");
#endif
	/* Wait for MMU,CCM,RDI,XBAR  IDLE state */
	img_pdump_printf("-- Wait for memory bus interface IDLE state\n");
	IOPOLL64_PDUMP(0xFFFF, 100, 1000, VHA_CR_SLC_IDLE_MASKFULL,
			VHA_CR_SLC_IDLE);

	/* Finally disable clocks */
	img_pdump_printf("-- Disable MAIN clocks\n");
	IOWRITE64_PDUMP(0, VHA_CR_CLK_CTRL0); /* main */
	img_pdump_printf("-- Disable SYS clocks\n");
	IOWRITE64_PDUMP(0, VHA_CR_SYS_CLK_CTRL0); /* sys */
#if defined(CFG_SYS_VAGUS)
	img_pdump_printf("-- NN_SYS clocks\n");
	IOWRITE64_PDUMP_REGIO(0, NN_SYS_CR_BASE,
			NN_SYS_CR_CLK_CTRL, "REG_NNSYS"); /* nn_sys */
#endif
	return 0;
}

/* start the device */
int vha_dev_start(struct vha_dev *vha)
{
	int ret = 0;

	/* Cancel APM request if new inference comes */
	cancel_delayed_work(&vha->apm_dworks[0].dwork);

	if (vha->state == VHA_STATE_ON)
		return 0; /* not an error */

	dev_dbg(vha->dev, "%s\n", __func__);

/* Assuming OS0 is the privileged one */
#if _OSID_ == 0 /* For HW_AX2 this is always true */
	pm_runtime_get_sync(vha->dev);
	/////////////// POWER ON //////////////////////////
	img_pdump_printf("-- POWER_ON_BEGIN\n");

	/* Prepare device ...  */
	ret = vha_dev_prepare(vha);
	if (ret) {
		dev_err(vha->dev, "%s: Error preparing device!\n", __func__);
		return ret;
	}
	/* Reset device */
	ret = vha_dev_reset(vha);
	if (ret){
		dev_err(vha->dev, "%s: Error reseting device!\n", __func__);
		return ret;
	}
	/* Enable device clocks */
	vha_dev_enable_clocks(vha);
	img_pdump_printf("-- POWER_ON_END\n");
	/* Call device specific setup */
	vha_dev_setup(vha);
	/////////////////////////////////////////////////////
#endif

	vha_dev_ready(vha);

	vha->state = VHA_STATE_ON;
	/* Remember the time hw is powered on */
	GETNSTIMEOFDAY(&vha->stats.hw_start);
	return ret;
}

/* stop the device */
int vha_dev_stop(struct vha_dev *vha, bool reset)
{
	int ret = 0;

	if (vha->state == VHA_STATE_OFF)
		return 0;  /* not an error */

	/* Cancel APM request if we are about to power off the core */
	cancel_delayed_work(&vha->apm_dworks[0].dwork);

	dev_dbg(vha->dev, "%s\n", __func__);
	/* Disable events at first */
	vha_dev_disable_events(vha);

	vha->is_ready = false;
/* Assuming OS0 is the privileged one */
#if _OSID_ == 0 /* For HW_AX2 */
	/////////////// POWER_OFF //////////////////////////
	img_pdump_printf("-- POWER_OFF_BEGIN\n");
	/* Reset core in case of error or pending inference */
	if (reset) {
		/* ensure that clocks are set to AUTO before reset */
		vha_dev_enable_clocks(vha);
		ret = vha_dev_reset(vha);
	}
	if(ret)
		dev_warn(vha->dev,
			"%s: Problem with resetting device!\n",
			__func__);

	/* Disable device clocks */
	ret = vha_dev_disable_clocks(vha);
	if(ret)
		dev_warn(vha->dev,
					"%s: Problem with disabling clocks!\n",
					__func__);

	img_pdump_printf("-- POWER_OFF_END\n");
	/////////////////////////////////////////////////////
	if (reset) {
		pm_runtime_mark_last_busy(vha->dev);
		pm_runtime_put_sync_autosuspend(vha->dev);
	} else {
		pm_runtime_put_sync(vha->dev);
	}
#endif

	vha->state = VHA_STATE_OFF;
	/* Update the up time of the core */
	if (!vha->do_calibration) {
		uint64_t tmp = 0;
		struct TIMESPEC now;
		GETNSTIMEOFDAY(&now);
		if (get_timespan_us(&vha->stats.hw_start, &now, &tmp)) {
			do_div(tmp, 1000UL);
			vha->stats.uptime_ms += tmp;
			if (vha->stats.uptime_ms)
				vha_update_utilization(vha);
			else
				dev_dbg(vha->dev,
					"%s Too short execution time to calculate utilization!\n",
					__func__);
		} else
			WARN_ON(1);
	}

	vha->active_mmu_ctx = VHA_INVALID_ID;

	spin_lock_irq(&vha->irq_lock);
	vha->irq_status = 0;
	vha->irq_count = 0;
	vha->stream_count = 0;
	spin_unlock_irq(&vha->irq_lock);

	return ret;
}

void vha_update_utilization(struct vha_dev *vha)
{
	uint64_t tmp;
	tmp = vha->stats.cnn_total_proc_us;
	do_div(tmp, vha->stats.uptime_ms);
	vha->stats.cnn_utilization = tmp;
}

#ifdef VHA_EVENT_INJECT
/*
 * Inject EVENT_STATUS bits, requested by respective debugfs nodes, to
 * the status register.
 */
static inline void __inject_event_regs(struct vha_dev* vha, uint64_t* event_status)
{
	if(!__EVENT_INJECT())
		return;

	if (*event_status & (1 << VHA_CR_VHA_EVENT_STATUS_TYPE_VHA_CNN0_COMPLETE_SHIFT))
		*event_status |= vha->injection.vha_cr_event;
}
#endif

/* Top half */
irqreturn_t vha_handle_irq(struct device *dev)
{
	struct vha_dev *vha = vha_dev_get_drvdata(dev);
	int ret = IRQ_HANDLED;
	uint64_t event_status;

	if (!vha)
		return IRQ_NONE;

	event_status = IOREAD64(vha->reg_base, VHA_CR_OS(VHA_EVENT_STATUS));
	event_status &= IOREAD64(vha->reg_base, VHA_CR_OS(VHA_EVENT_ENABLE));
	/* On fpga platform it is possible to get
	 * a spurious interrupt when the hw died
	 * Do not proceed, just throw a warning */
	if (event_status == VHA_DEAD_HW || event_status == ~0) {
		WARN_ONCE(1, "Hardware is dead!");
		return IRQ_NONE;
	}

#ifdef VHA_EVENT_INJECT
	__inject_event_regs(vha, &event_status);
#endif

#ifdef VHA_SCF
	if (vha->hw_props.supported.parity &&
			!vha->parity_disable) {
		bool par_bit = img_mem_calc_parity(event_status &
				~VHA_CR_BITMASK(VHA_EVENT_STATUS_TYPE, PARITY));
		if (par_bit !=
				VHA_CR_GETBITS(VHA_EVENT_STATUS_TYPE, PARITY,
						event_status)) {
			dev_err(dev, "Event status register parity error!\n");
			/* Use the real event to indicate the error */
			event_status |=  VHA_CR_OS(VHA_EVENT_STATUS_VHA_PARITY_ERROR_EN);
		}
		/* Clear the PARITY bit - it's not a valid event */
		VHA_CR_CLEARBITS(event_status, VHA_EVENT_STATUS_TYPE, PARITY);
	}
#endif

	if (event_status & VHA_EVNTS_DEFAULT) {
		uint64_t cnn_status;
		uint8_t count;

		/* clear the interrupt:
		 * best not to write pdump in interrupts */
		IOWRITE64(vha->reg_base, VHA_CR_OS(VHA_EVENT_CLEAR),
				event_status & VHA_EVNTS_DEFAULT);

		/* Read the stream count as single IRQ may be raised for multiple kicks */
		cnn_status = IOREAD64(vha->reg_base, VHA_CR_OS(CNN_STATUS));

#ifdef VHA_SCF
		if (vha->hw_props.supported.parity &&
				!vha->parity_disable) {
			bool par_bit = img_mem_calc_parity(cnn_status &
					~VHA_CR_BITMASK_OS(CNN_STATUS, PARITY));
			if (par_bit != VHA_CR_GETBITS_OS(CNN_STATUS, PARITY, cnn_status)) {
				dev_err(dev, "CNN status register parity error!\n");
				/* Use the real event to indicate the error */
				event_status |=  VHA_CR_OS(VHA_EVENT_STATUS_VHA_PARITY_ERROR_EN);
			}
		}
#endif
		if (vha->is_ready) {
			/* Post check for AXI bus errors */
			uint64_t ace_status = IOREAD64(vha->reg_base, VHA_CR_ACE_STATUS);
			if (ace_status) {
				dev_err(vha->dev, "AXI bus protocol error: %#llx\n",
							ace_status);
				/* Use AXI error event to indicate that */
				event_status |=  VHA_CR_OS(VHA_EVENT_STATUS_VHA_AXI_ERROR_EN);
			}
		}

		/* Read the stream count as single IRQ may be raised for multiple kicks */
		count = VHA_CR_GETBITS_OS(CNN_STATUS, STREAM_COUNT, cnn_status);

		spin_lock(&vha->irq_lock);
		/* store the status to be processed later */
		if (vha->do_calibration ||
				vha_is_busy(vha)) {
			vha->irq_status |= event_status;

			if (vha->low_latency == VHA_LL_SELF_KICK)
				/* Two separate IRQs may be raised for multiple kicks */
				vha->irq_count += count - vha->stream_count;
			else
				/* Only single IRQ may be raised otherwise ... */
				vha->irq_count = count - vha->stream_count;

			vha->stream_count = count;
			/* Record hw processing end timestamps */
			vha->stats.hw_proc_end_prev = vha->stats.hw_proc_end;
			GETNSTIMEOFDAY(&vha->stats.hw_proc_end);
		} else {
			/* Command may have been aborted before this handler is executed */
			vha->irq_status = 0;
			vha->irq_count = 0;
			vha->stream_count = 0;
		}
		spin_unlock(&vha->irq_lock);

		ret = IRQ_WAKE_THREAD;
	} else
		return IRQ_NONE;

	dev_dbg(dev, "IRQ 0x%08llx\n", event_status);

	return ret;
}

static bool vha_rollback_cnn_cmds(struct vha_dev *vha)
{
	bool processing = false;
	/* Not processed commands are still on the pending list
	 * of each session, so just mark the hw pending lists as empty */
	if (vha->pendcmd[VHA_CNN_CMD].cmd) {
		struct vha_cmd *pendcmd = vha->pendcmd[VHA_CNN_CMD].cmd;
		pendcmd->in_hw = false;
		pendcmd->queued = false;
		pendcmd->rolled_back = true;
		processing = true;
		vha->stats.cnn_kicks_aborted += pendcmd->subseg_current;
		vha->stats.cnn_kicks_completed -= pendcmd->subsegs_completed;
		vha->pri_q_counters[pendcmd->user_cmd.priority] += pendcmd->subseg_current;
		pendcmd->subseg_current = 0;
		pendcmd->subsegs_completed = 0;
		vha->pendcmd[VHA_CNN_CMD].cmd = NULL;
	}
	/* low_latency ...*/
	if (vha->queuedcmd[VHA_CNN_CMD].cmd) {
		struct vha_cmd *queuedcmd = vha->queuedcmd[VHA_CNN_CMD].cmd;
		queuedcmd->in_hw = false;
		queuedcmd->queued = false;
		queuedcmd->rolled_back = true;
		vha->stats.cnn_kicks_aborted += queuedcmd->subseg_current;
		vha->stats.cnn_kicks_completed -= queuedcmd->subsegs_completed;
		vha->pri_q_counters[queuedcmd->user_cmd.priority] += queuedcmd->subseg_current;
		queuedcmd->subseg_current = 0;
		queuedcmd->subsegs_completed = 0;
		vha->queuedcmd[VHA_CNN_CMD].cmd = NULL;
	}
	dev_dbg(vha->dev, "%s: (%d)\n", __func__, processing);

	return processing;
}

bool vha_rollback_cmds(struct vha_dev *vha)
{
	return vha_rollback_cnn_cmds(vha);
}

static bool vha_is_processing(struct vha_dev *vha)
{
	return vha->pendcmd[VHA_CNN_CMD].cmd != NULL;
}

int vha_dev_suspend_work(struct vha_dev *vha)
{
	bool processing = false;
	int ret;

	/* Check if anything is being processed right now. */
	processing = vha_is_processing(vha);
	/* Forcing hardware disable. */
	ret = vha_dev_stop(vha, processing);
	/* Rollback commands after hw is stopped. */
	vha_rollback_cmds(vha);

	return ret;
}

/*
 * handles the command already processed by the hw.
 */
static bool vha_handle_cmd(struct vha_dev *vha, int status)
{
	struct vha_cmd *cmd = NULL;

	cmd = vha->pendcmd[VHA_CNN_CMD].cmd;
	if (unlikely(!cmd)) {
		dev_dbg(vha->dev, "No command. Probably it has been aborted\n");
		return false;
	}

	{
		uint64_t proc_time = 0;
		struct TIMESPEC *from = &cmd->hw_proc_start;
		struct TIMESPEC *to = &vha->stats.hw_proc_end;

		if (TIMESPEC_COMPARE(&vha->stats.hw_proc_end_prev, &cmd->hw_proc_start) >= 0)
			from = &vha->stats.hw_proc_end_prev;

		if (get_timespan_us(from, to, &proc_time)) {
			vha->stats.last_proc_us = proc_time;
		} else {
			vha->stats.last_proc_us = 0;
		}
		/* Update cnn stats */
		vha_cnn_update_stats(vha);

		/* Update cmd stats. */
		cmd->proc_us += vha->stats.cnn_last_proc_us;
		cmd->hw_cycles += vha->stats.cnn_last_cycles;
	}

	/* Mark this subsegment as completed. */
	if (status == 0)
		vha->pendcmd[VHA_CNN_CMD].cmd->subsegs_completed++;
	/* If this isn't the last subsegment, just return to process the next one. */
	if ((cmd->subseg_current < VHA_CMD_SUBSEG_NUM(cmd)) && (status == 0)) {
		vha->pendcmd[VHA_CNN_CMD].cmd->in_hw = false;
		vha->pendcmd[VHA_CNN_CMD].cmd = NULL;
		return true;
	}

	vha_cnn_cmd_completed(cmd, status);

	if (status) {
		/* Rollback any queued command ... */
		vha_rollback_cnn_cmds(vha);
		/* Adjust for just rolled back pending cmd. */
		vha->pri_q_counters[cmd->user_cmd.priority] -= VHA_CMD_SUBSEG_NUM(cmd);
		/* Notify immediately current command */
		vha_cmd_notify(cmd);

		return false;
	}

	if (vha->queuedcmd[VHA_CNN_CMD].cmd)
		vha->pendcmd[VHA_CNN_CMD].cmd = vha->queuedcmd[VHA_CNN_CMD].cmd;
	else
		vha->pendcmd[VHA_CNN_CMD].cmd = NULL;

	vha->queuedcmd[VHA_CNN_CMD].cmd = NULL;
	dev_dbg(vha->dev,
			"%s: %p -> new pending %p\n",
			__func__, cmd, vha->pendcmd[VHA_CNN_CMD].cmd);

	vha_cmd_notify(cmd);

	return true;
}

static void vha_do_queued_cmd(struct vha_dev *vha)
{
	struct vha_cmd *cmd, *pend;

	cmd = vha->queuedcmd[VHA_CNN_CMD].cmd;

	dev_dbg(vha->dev,
			"%s: queued %p pending %p\n",
			__func__, cmd, vha->pendcmd[VHA_CNN_CMD].cmd);

	if (!cmd || (cmd &&
				((vha->low_latency == VHA_LL_DISABLED ||
				vha->low_latency == VHA_LL_SELF_KICK) ||
						!cmd->queued))) {
		dev_dbg(vha->dev, "%s: skipping!\n", __func__);
		return;
	}

	/* store actual pending command as it will be modified */
	pend = vha->pendcmd[VHA_CNN_CMD].cmd;

	/* at this point we should be able to process the cmd */
	vha_do_cnn_cmd(cmd);

	/* restore pending */
	vha->pendcmd[VHA_CNN_CMD].cmd = pend;
}

static int vha_report_failure(struct vha_dev *vha, uint64_t status,
		const struct vha_biterr bits[], int bits_size)
{
	int error = 0;
	int i;
	int cmdid = -1;
	int sesid = -1;

	if (vha->pendcmd[VHA_CNN_CMD].cmd) {
		cmdid = vha->pendcmd[VHA_CNN_CMD].cmd->user_cmd.cmd_id;
		sesid = vha->pendcmd[VHA_CNN_CMD].cmd->session->id;
	}

	if (vha_observers.error)
		vha_observers.error(vha->id, sesid, cmdid, status);

	/* event status in human readable form */
	for (i = 0; i < bits_size; i++) {
		if (status & bits[i].b) {
			dev_err(vha->dev,
				" event status: %s\n",
				bits[i].s);
			/* convert from register bits into POSIX errno
			* if multiple errors, then arbitrary errno choice */
			error = bits[i].e;
		}
	}

	return error;
}

/* if vha event register reports CNN events, so handle them */
static int vha_handle_cnn_event(struct vha_dev *vha, uint64_t event_status)
{
	int err = 0;

	if (vha_check_calibration(vha))
		return 0;

	if (event_status & VHA_CNN_ERR_EVNTS) {
		static const struct vha_biterr err_bits[] = {
			{-ETIMEDOUT, ERR_EVENT_DESC(CNN0_MEM_WDT)},
#ifdef HW_AX2
			{-ETIMEDOUT, ERR_EVENT_DESC(CNN0_WDT)},
#endif
			{-EIO,       ERR_EVENT_DESC(CNN0_ERROR)}
		};

		err = vha_report_failure(vha,
				event_status, err_bits, ARRAY_SIZE(err_bits));

		vha_cnn_dump_status(vha);
	}

	/* Poke the hw if there were already
	 * command queued in the hw */
	if (!err)
		vha_do_queued_cmd(vha);
	/* Handle actual command */
	if (vha_handle_cmd(vha, err) == false)
		err = -ENOENT;

	return err;
}

#ifdef CONFIG_VHA_DUMMY_SIMULATE_HW_PROCESSING_TIME
/* Simulating hw execution time by scheduling this delayed work. */
void vha_dummy_worker(struct work_struct *work)
{
	struct vha_dev *vha = container_of(work, struct vha_dev, dummy_dwork.work);

	mutex_lock(&vha->lock);

	if (vha->pendcmd[VHA_CNN_CMD].cmd) {
		/* Record hw processing end timestamps */
		vha->stats.hw_proc_end_prev = vha->stats.hw_proc_end;
		GETNSTIMEOFDAY(&vha->stats.hw_proc_end);
		/* Handle current pending command */
		vha_handle_cnn_event(vha, VHA_CNN_CMPLT_EVNT);
		vha->stats.cnn_kicks_completed++;
		/* Schedule following commands */
		vha_chk_cmd_queues(vha, true);
	}

	mutex_unlock(&vha->lock);
}
#endif

/* Bottom half */
irqreturn_t vha_handle_thread_irq(struct device *dev)
{
	struct vha_dev *vha = vha_dev_get_drvdata(dev);
	irqreturn_t ret = IRQ_HANDLED;
	uint64_t status;
	uint8_t count, c = 0;
	int err = 0;

	if (!vha)
		return IRQ_NONE;

	mutex_lock(&vha->lock);

#ifdef CONFIG_FAULT_INJECTION
	if (!vha->irq_bh_pid)
		vha->irq_bh_pid = task_pid_nr(current);

	if (vha->fault_inject & VHA_FI_IRQ_WORKER)
		current->make_it_fail = true;
	else
		current->make_it_fail = false;
#endif

	spin_lock_irq(&vha->irq_lock);
	status = vha->irq_status;
	vha->irq_status = 0;
	count = vha->irq_count;
	vha->irq_count = 0;
	if (!count) {
		uint64_t proc_time = 0;

		if (get_timespan_us(&vha->stats.hw_proc_start, &vha->stats.hw_proc_end,
					&proc_time)) {
			vha->stats.last_proc_us = proc_time;
		} else {
			vha->stats.last_proc_us = 0;
		}
	}
	spin_unlock_irq(&vha->irq_lock);
	/* Command may have been aborted before this handler is executed */
	if (!status)
		goto exit;

	/* There can be two inferences already finished for self kick mode,
	 * otherwise, only single inference at the time */
	if ((vha->low_latency == VHA_LL_SELF_KICK && count > 2) ||
			(vha->low_latency != VHA_LL_SELF_KICK && count > 1))
		WARN_ON(1);

	dev_dbg(dev, "%s: status:%llx count:%d\n",
			__func__, status, count);

	do {
		if (status & VHA_CORE_EVNTS) {
			static const struct vha_biterr err_bits[] = {
				{-EIO,       ERR_EVENT_DESC(AXI_ERROR)},
				{-EFAULT,    ERR_EVENT_DESC(MMU_PAGE_FAULT)},
#ifdef HW_AX3
#ifdef VHA_SCF
				{-EIO,       ERR_EVENT_DESC(MMU_PARITY_ERROR)},
				{-EIO,       ERR_EVENT_DESC(PARITY_ERROR)},
				{-EIO,       ERR_EVENT_DESC(LOCKSTEP_ERROR)},
#endif
				{-ETIMEDOUT, ERR_EVENT_DESC(HL_WDT)},
				{-EIO,       ERR_EVENT_DESC(ERROR)}
#endif
			};

#ifdef HW_AX3
			if (status & VHA_EVENT_TYPE(HL_WDT)
					&& vha->is_ready)
				if (vha_check_calibration(vha))
					break;

			if ((status & VHA_CORE_EVNTS)==
					VHA_EVENT_TYPE(READY)
					&& !vha->is_ready) {
				vha->is_ready = true;
				vha_dev_ready(vha);
				if (vha->do_calibration) {
					vha_cnn_start_calib(vha);
					break;
				} else
					vha_chk_cmd_queues(vha, true);
			}
#endif

			err = vha_report_failure(vha, status,
					err_bits, ARRAY_SIZE(err_bits));
			if (err) {
				dev_err(vha->dev, "NNA hw failure: %llx\n", status);
				dev_err(vha->dev, "   CLK_STATUS0:%llx ",
					IOREAD64(vha->reg_base, VHA_CR_CLK_STATUS0));
				dev_err(vha->dev, " VHA_EVENT_STATUS:%llx ", status);
			}

			if (status & VHA_EVENT_TYPE(MMU_PAGE_FAULT))
				/* dump mmu status */
				vha_mmu_status(vha);
		}

		/* If no core level error process cnn events */
		if (!err && status & VHA_CNN_EVNTS)
			err = vha_handle_cnn_event(vha, status);
#ifdef HW_AX3
		else if (status == VHA_EVENT_TYPE(ERROR)) {
			/* Resubmit command next time if no CNN error detected
			 * and only ERROR bit is set.
			 * That means other OS caused the error */
			vha_rollback_cnn_cmds(vha);
		}
#endif
		else if (err && vha->is_ready) { /* Core level error */
			if (vha_handle_cmd(vha, err) == false)
				err = -ENOENT;
		}

		c++;
	} while (c < count && !err);

	if (err) {
		vha->stats.total_failures += count ? count : 1;
		vha_dev_stop(vha, true);
		/* Check queues ... */
		vha_chk_cmd_queues(vha, true);
	} else {
		/* Run in BH context! */
		vha_chk_cmd_queues(vha, false);
	}
	vha->stats.cnn_kicks_completed += count;

exit:
#ifdef CONFIG_FAULT_INJECTION
	if (vha->fault_inject & VHA_FI_IRQ_WORKER)
		current->make_it_fail = false;
#endif
	trace_vha_irq(vha->id, status, count, vha->stats.last_proc_us);
	mutex_unlock(&vha->lock);

	return ret;
}

bool vha_rm_session_cmds(struct vha_session *session)
{
	struct vha_dev *vha = session->vha;
	bool pend_removed = false;
	bool queued_removed = false;
	bool reschedule = false;
	struct vha_cmd *cur_cmd, *tmp_cmd;
	uint8_t pri;

	/* Check if pend/queued commands will be removed. */
	if (vha->pendcmd[VHA_CNN_CMD].cmd &&
			vha->pendcmd[VHA_CNN_CMD].cmd->session == session) {
		dev_warn(vha->dev,
				"Removing a session while cnn cmd is still pending\n");
		pend_removed = true;
#ifdef CONFIG_VHA_DUMMY_SIMULATE_HW_PROCESSING_TIME
		cancel_delayed_work(&vha->dummy_dwork);
#endif
	}
	if (vha->queuedcmd[VHA_CNN_CMD].cmd &&
			vha->queuedcmd[VHA_CNN_CMD].cmd->session == session) {
		dev_warn(vha->dev,
				"Removing a session while cnn cmd is still queued\n");
		queued_removed = true;
	}

	/* Update session scheduling. */
	if (vha->queuedcmd[VHA_CNN_CMD].cmd &&
			(pend_removed && !queued_removed)) {
		uint8_t pri = vha->queuedcmd[VHA_CNN_CMD].cmd->user_cmd.priority;
		if (vha->queuedcmd[VHA_CNN_CMD].cmd->session !=
					list_entry(&vha->sched_sessions[pri], struct vha_session,
								sched_list[pri]))
			while(list_first_entry(&vha->sched_sessions[pri], struct vha_session,
						sched_list[pri]) != vha->queuedcmd[VHA_CNN_CMD].cmd->session)
				list_rotate_left(&vha->sched_sessions[pri]);
	}

	/* Remove pend/queued commands if needed. */
	if (pend_removed || queued_removed) {
		vha_rollback_cnn_cmds(vha);
		/* Need to reschedule too. */
		reschedule = true;
	}

	/* Remove session related commands. */
	for (pri = 0; pri < VHA_MAX_PRIORITIES; pri++) {
		list_for_each_entry_safe(cur_cmd, tmp_cmd, &session->cmds[pri], list[pri]) {
			/* rsp didn't make it to rsps list, free it now */
			kfree(cur_cmd->rsp);

			list_del(&cur_cmd->list[cur_cmd->user_cmd.priority]);
			vha->pri_q_counters[cur_cmd->user_cmd.priority] -=
								(VHA_CMD_SUBSEG_NUM(cur_cmd) - cur_cmd->subseg_current);
			if (vha_observers.canceled)
				vha_observers.canceled(vha->id, session->id, cur_cmd->user_cmd.cmd_id,
										cur_cmd->user_cmd.priority);
			kfree(cur_cmd);
		}
	}

	return reschedule;
}

bool vha_rm_session_cmds_masked(struct vha_session *session, uint32_t cmd_id,
		uint32_t cmd_id_mask)
{
	struct vha_dev *vha = session->vha;
	bool reschedule = false;
	bool pend_removed = false;
	uint32_t pend_aborted_kicks_adj_val = 0;
	bool queued_removed = false;
	uint32_t queued_aborted_kicks_adj_val = 0;

	/* Check if pend/queued commands will be removed. */
	if (vha->pendcmd[VHA_CNN_CMD].cmd &&
			(vha->pendcmd[VHA_CNN_CMD].cmd->session == session) &&
			(vha->pendcmd[VHA_CNN_CMD].cmd->user_cmd.cmd_id & cmd_id_mask)
																	== cmd_id) {
		pend_removed = true;
		vha->stats.cnn_kicks_cancelled += vha->pendcmd[VHA_CNN_CMD].cmd->subseg_current;
		pend_aborted_kicks_adj_val = vha->pendcmd[VHA_CNN_CMD].cmd->subseg_current;
#ifdef CONFIG_VHA_DUMMY_SIMULATE_HW_PROCESSING_TIME
		cancel_delayed_work(&vha->dummy_dwork);
#endif
	}
	if (vha->queuedcmd[VHA_CNN_CMD].cmd &&
			(vha->queuedcmd[VHA_CNN_CMD].cmd->session == session) &&
			(vha->queuedcmd[VHA_CNN_CMD].cmd->user_cmd.cmd_id & cmd_id_mask)
																	== cmd_id) {
		queued_removed = true;
		vha->stats.cnn_kicks_cancelled += vha->queuedcmd[VHA_CNN_CMD].cmd->subseg_current;
		queued_aborted_kicks_adj_val = vha->pendcmd[VHA_CNN_CMD].cmd->subseg_current;
	}

	/* Update session scheduling. */
	if (vha->queuedcmd[VHA_CNN_CMD].cmd &&
			(pend_removed && !queued_removed)) {
		uint8_t pri = vha->queuedcmd[VHA_CNN_CMD].cmd->user_cmd.priority;
		if (vha->queuedcmd[VHA_CNN_CMD].cmd->session !=
					list_entry(&vha->sched_sessions[pri], struct vha_session,
								sched_list[pri]))
			while(list_first_entry(&vha->sched_sessions[pri], struct vha_session,
						sched_list[pri]) != vha->queuedcmd[VHA_CNN_CMD].cmd->session)
				list_rotate_left(&vha->sched_sessions[pri]);
	}

	/* Remove pend/queued commands if needed. */
	if (pend_removed || queued_removed) {
		vha_rollback_cnn_cmds(vha);
		/* Correct aborted stats. */
		if (queued_removed)
			vha->stats.cnn_kicks_aborted -= queued_aborted_kicks_adj_val;
		if (pend_removed)
			vha->stats.cnn_kicks_aborted -= pend_aborted_kicks_adj_val;
		reschedule = true;
	}

	return reschedule;
}

int vha_rm_cmds(struct vha_session *session, uint32_t cmd_id,
		uint32_t cmd_id_mask, bool respond)
{
	struct vha_dev *vha = session->vha;
	struct vha_cmd *cur_cmd, *tmp_cmd;
	struct vha_rsp *cur_rsp, *tmp_rsp;
	bool reschedule = false;
	bool respond_aux = false;
	int ret = 0;
	uint8_t pri;

	mutex_lock(&vha->lock);

	/* Remove pend/queued session commands that match the cmd_id. */
	reschedule = vha_rm_session_cmds_masked(session, cmd_id, cmd_id_mask);

	/* Remove session related commands matching command id template. */
	for (pri = 0; pri < VHA_MAX_PRIORITIES; pri++) {
		list_for_each_entry_safe(cur_cmd, tmp_cmd, &session->cmds[pri], list[pri]) {
			if ((cur_cmd->user_cmd.cmd_id & cmd_id_mask) == cmd_id) {

#ifdef KERNEL_DMA_FENCE_SUPPORT
				switch (cur_cmd->user_cmd.cmd_type)
				{
				case VHA_CMD_CNN_SUBMIT:
				{
					struct vha_user_cnn_submit_cmd *cnn_cmd =
							(struct vha_user_cnn_submit_cmd *)&cur_cmd->user_cmd;
					int j;
					for (j = 0; j < (cnn_cmd->msg.num_bufs - 1); j++) {
						struct vha_buffer *buf = vha_find_bufid(session, cnn_cmd->bufs[j]);
						if (buf == NULL) {
							dev_warn(vha->dev, "%s: could not find buf %x\n", __func__,
											cnn_cmd->bufs[j]);
						} else {
							vha_rm_buf_fence(session, buf);
						}
					}
					break;
				}
				default:
					dev_warn(vha->dev, "%s: invalid cmd type %x\n", __func__,
								cur_cmd->user_cmd.cmd_type);
					break;
				}
#endif

				/* rsp didn't make it to rsps list; free it now. */
				kfree(cur_cmd->rsp);

				list_del(&cur_cmd->list[cur_cmd->user_cmd.priority]);
				vha->pri_q_counters[cur_cmd->user_cmd.priority] -=
								(VHA_CMD_SUBSEG_NUM(cur_cmd) - cur_cmd->subseg_current);
				if (vha_observers.canceled)
					vha_observers.canceled(vha->id, session->id, cur_cmd->user_cmd.cmd_id,
											cur_cmd->user_cmd.priority);
				kfree(cur_cmd);

				/* There were commands matching command id template in the list,
				 * so respond to wake user space. */
				respond_aux = true;
			}
		}
	}

	/* Remove responses for session related commands
	 * matching command id template. */
	list_for_each_entry_safe(cur_rsp, tmp_rsp, &session->rsps, list) {
		if ((cur_rsp->user_rsp.cmd_id & cmd_id_mask) == cmd_id) {
			list_del(&cur_rsp->list);
			kfree(cur_rsp);
			respond_aux = true;
		}
	}

	/* Reset hardware if required. */
	if (reschedule)
		ret = vha_dev_stop(vha, reschedule);

	/* Generate "cancel" response if any commands matching command id template
	 * were removed. */
	if (respond_aux && respond) {
		/* Calculate space for the response. */
		size_t sz = sizeof(struct vha_rsp)
			+ sizeof(struct vha_user_cnn_submit_rsp)
			- sizeof(struct vha_user_rsp);
		/* Allocate space for standard response. */
		struct vha_rsp *rsp = kzalloc(sz, GFP_KERNEL);
		if (rsp == NULL) {
			dev_crit(session->vha->dev,
					"Failed to allocate memory to notify cancel for cmds 0x%08x\n", cmd_id);
			session->oom = true;
		} else {
			rsp->size = sizeof(struct vha_user_cnn_submit_rsp);
			rsp->user_rsp.cmd_id = cmd_id;
			list_add_tail(&rsp->list, &session->rsps);
		}
		wake_up(&session->wq);
	}

	mutex_unlock(&vha->lock);

	/* Just return in case of oom. */
	if (session->oom)
		return -ENOMEM;

	/* Reschedule once all commands matching command id template are removed. */
	if (reschedule)
		vha_chk_cmd_queues(vha, true);

	return ret;
}

bool vha_is_busy(struct vha_dev *vha)
{
#ifndef CONFIG_VHA_DUMMY
	if (!vha->is_ready)
		return true;
#endif

	if (vha->low_latency != VHA_LL_DISABLED) {
		return vha->pendcmd[VHA_CNN_CMD].cmd != NULL ||
				vha->queuedcmd[VHA_CNN_CMD].cmd != NULL;
	}
	return vha->pendcmd[VHA_CNN_CMD].cmd != NULL;
}

/* returns true if the cmd queue is full */
bool vha_is_queue_full(struct vha_dev *vha, struct vha_cmd *cmd)
{
	if (vha->low_latency != VHA_LL_DISABLED) {
		if (vha->low_latency == VHA_LL_SELF_KICK
#ifdef HW_AX3
			/* if current command we are trying to queue belongs to a different session than pending one */
			&& (vha->pendcmd[VHA_CNN_CMD].cmd != NULL && cmd != NULL &&
					vha->pendcmd[VHA_CNN_CMD].cmd->session != cmd->session)
			/* if session of the command we are trying to queue, shares the hw mmu ctx with the session of pending cmd */
			&& (cmd->session->mmu_ctxs[VHA_MMU_REQ_MODEL_CTXID].hw_id ==
					vha->pendcmd[VHA_CNN_CMD].cmd->session->mmu_ctxs[VHA_MMU_REQ_MODEL_CTXID].hw_id)
			/* Sanity if hw mmu ctx is really shared at this point */
			&& (vha->mmu_ctxs[cmd->session->mmu_ctxs[VHA_MMU_REQ_MODEL_CTXID].hw_id] > 1)
			) {
#else
			) {
			dev_warn(vha->dev, "%s: LL=2 not supported!\n", __func__);
#endif
			/* skip low latency mode */
			return vha->pendcmd[VHA_CNN_CMD].cmd != NULL;
		}

		return vha->pendcmd[VHA_CNN_CMD].cmd != NULL &&
					vha->queuedcmd[VHA_CNN_CMD].cmd != NULL;
	}
	return vha->pendcmd[VHA_CNN_CMD].cmd != NULL;
}

/* check all input buffers are filled and ready to go */
bool vha_is_waiting_for_inputs(struct vha_session *session,
	struct vha_cmd *cmd)
{
	if (!cmd->inbufs_ready) {
		const struct vha_user_cnn_submit_cmd *user_cmd =
			(struct vha_user_cnn_submit_cmd *)&cmd->user_cmd;
		int i;

		for (i = 0; i < cmd->user_cmd.num_inbufs - 1; i++) {
			struct vha_buffer *buf = vha_find_bufid(session, user_cmd->bufs[i]);

			if (buf && buf->status == VHA_BUF_UNFILLED) {
				dev_dbg(session->vha->dev,
					"%s: cmd %u waiting for input "
					"buf %d to be ready\n",
					__func__,
					cmd->user_cmd.cmd_id,
					buf->id);
				return true;
			}
		}
	}

	cmd->inbufs_ready = true;
	return false;
}

static bool vha_can_schedule(struct vha_dev *vha)
{
#ifndef CONFIG_VHA_DUMMY
	if (!vha->is_ready)
		return false;
#endif

	if (vha->low_latency != VHA_LL_DISABLED) {
		return vha->pendcmd[VHA_CNN_CMD].cmd == NULL ||
				vha->queuedcmd[VHA_CNN_CMD].cmd == NULL;
	}
	return vha->pendcmd[VHA_CNN_CMD].cmd == NULL;
}

static void vha_scheduler_set_starting_session(struct vha_dev *vha,
	uint8_t priority, struct vha_session *session, bool set_next)
{
	/* Rotate scheduling list to the current session
	 * to make it a starting point for the next scheduling round. */
	if (session != list_entry(&vha->sched_sessions[priority],
								struct vha_session, sched_list[priority]))
		while(list_first_entry(&vha->sched_sessions[priority],
								struct vha_session, sched_list[priority]) != session)
			list_rotate_left(&vha->sched_sessions[priority]);
	/* Set a starting point session for the next scheduling round
	 * to next to the current one if requested. */
	if (set_next)
		list_rotate_left(&vha->sched_sessions[priority]);
}

static uint8_t vha_scheduler_get_priority(struct vha_dev *vha)
{
	uint8_t pri;

	/* Calculate current total window width. */
	for (pri = VHA_MAX_PRIORITIES - 1; (int8_t)pri >= 0; pri--)
		if (vha->pri_q_counters[pri] > 0)
			return pri;

	/* If there's no priority with WLs to schedule, just return 0. */
	return VHA_INVALID_PRI;
}

void vha_scheduler_loop(struct vha_dev *vha)
{
	struct vha_cmd *cmd, *tmp;
	struct vha_session *session = NULL;
	enum do_cmd_status cmd_status = CMD_OK;
	bool scheduled = false;
	uint8_t current_pri = VHA_DEFAULT_PRI;

	if (vha_is_queue_full(vha, NULL)) {
		/* Postpone worker task if command queue is full. */
		dev_dbg(vha->dev, "%s Queue full. Postpone worker task!\n", __func__);
		return;
	}

	do {
		scheduled = false;
		current_pri = vha_scheduler_get_priority(vha);
		if (current_pri == VHA_INVALID_PRI)
			break;
		list_for_each_entry(session, &vha->sched_sessions[current_pri], sched_list[current_pri]) {
			list_for_each_entry_safe(cmd, tmp, &session->cmds[current_pri], list[current_pri]) {

				/* For hw commands... */
				if (CMD_EXEC_ON_HW(cmd)) {
					if (!VHA_IS_DUMMY(vha)) {
						/* Start device. */
						if(vha_dev_start(vha))
							return;
					}
				}

				/* Skip this workload as it's already scheduled. */
				if (cmd->queued || cmd->in_hw)
					continue;

				dev_dbg(vha->dev, "%s cur_prio=<%d>\n", __func__,current_pri);
				/* Attempt to schedule command for execution. */
				cmd_status = vha_do_cmd(cmd);

				/* Update scheduling loop based on command scheduling status. */
				if ((cmd_status == CMD_OK) || (cmd_status == CMD_HW_BUSY)) {
					bool set_next = false;
					if (cmd_status == CMD_OK) {
						scheduled = true;
						if (cmd->subseg_current == VHA_CMD_SUBSEG_NUM(cmd))
							set_next = true;
					}
					vha_scheduler_set_starting_session(vha, current_pri, session, set_next);
					goto exit_session_loop;
				}
			}
		}
exit_session_loop:;
	/* Iterate until a workload was scheduled and no other can be scheduled. */
	} while (vha_can_schedule(vha) && scheduled);

	if (!VHA_IS_DUMMY(vha)) {
		/* Schedule APM if needed */
		if (!vha_is_busy(vha) &&
				!vha->no_clock_disable) {
			if (!vha->pm_delay) {
				if (vha_dev_stop(vha, false)) {
					dev_warn(vha->dev, "%s: Failed to soft stop device. trying with reset",
						__func__);
					if (vha_dev_stop(vha, true))
						dev_err(vha->dev, "%s: Failed to stop device with reset!", __func__);
				}
			}
			else {
				vha->apm_dworks[0].delay_ms = vha->pm_delay;
				vha_sched_apm(vha, &vha->apm_dworks[0]);
			}
		}
	}
}

void vha_dev_apm_stop(struct vha_dev *vha, struct vha_apm_work *apm_work)
{
	if (!vha->do_calibration &&
			(vha->pendcmd[VHA_CNN_CMD].cmd == NULL &&
			vha->queuedcmd[VHA_CNN_CMD].cmd == NULL))
		if (vha_dev_stop(vha, false)) {
			dev_warn(vha->dev, "%s: Failed to soft stop device. trying with reset",
				__func__);
			if (vha_dev_stop(vha, true))
				dev_err(vha->dev, "%s: Failed to stop device with reset!", __func__);
		}
}

int vha_dev_get_props(struct vha_dev *vha, uint32_t onchipmem_size)
{
	struct vha_hw_props *props = &vha->hw_props;
	uint64_t ip_config;
	uint32_t ocm_size_kb = 0;

	memset(props, 0, sizeof(*props));

#ifdef CONFIG_VHA_DUMMY
	/* Note: dummy dev always reads zeroes from registers */
	props->product_id  = 0x8070605040302010ULL;
	props->core_id  = (long)HW_SERIES << (int)VHA_CR_CORE_ID_BRANCH_ID_SHIFT;
	props->core_id += 0x010203040505ULL;   // provide a dummy core id
	props->dummy_dev = true;
	props->num_cnn_core_devs = 1;
#else
	props->product_id  = IOREAD64(vha->reg_base, VHA_CR_PRODUCT_ID);
	props->core_id  = IOREAD64(vha->reg_base, VHA_CR_CORE_ID);
#endif
	props->skip_bvnc_check = false;
	/*
	 * New mmu version 3 and onwards operates on 40bit physical & virtual addresses
	 */
	props->mmu_width = 40;

	/* HW from 1.1 onwards */
	ip_config = IOREAD64(vha->reg_base, VHA_CR_CORE_IP_CONFIG);
#ifdef HW_AX3
	props->mmu_ver = VHA_CR_GETBITS(CORE_IP_CONFIG, MMU_VERSION, ip_config);
#endif
	/* Mirage uses MMU version 3 hardware */
	if (!props->mmu_ver)
		props->mmu_ver = 3;
	if (VHA_CR_GETBITS(CORE_IP_CONFIG, CNN_SUPPORTED, ip_config))
		props->num_cnn_core_devs = 1;
	if (VHA_CR_GETBITS(CORE_IP_CONFIG, RTM_SUPPORTED, ip_config))
		props->supported.rtm = 1;
#ifdef HW_AX3
	if (VHA_CR_GETBITS(CORE_IP_CONFIG, PARITY_REGISTERS, ip_config))
		props->supported.parity = 1;

#if defined(CONFIG_VHA_DUMMY) && defined(VHA_SCF)
	/* Force parity for pdump generation */
	props->supported.parity = 1;
#endif
#endif

	if ((props->num_cnn_core_devs == 0)
		|| VHA_CR_GETBITS(CORE_ID, BRANCH_ID, props->core_id) != HW_SERIES) {
		dev_err(vha->dev, "%s: Wrong core configuration detected. "
			"Expected BVNC %d.x.x.x, got %llu.x.x.x. "
			"Maybe kernel module was built with wrong params.\n",
			__func__, HW_SERIES,
			VHA_CR_GETBITS(CORE_ID, BRANCH_ID, props->core_id));
		return -ENODEV;
	}

	props->soc_axi  = IOREAD64(vha->reg_base, VHA_CR_SOC_AXI);

	dev_info(vha->dev, "%s: Product id: %#llx\n",
			__func__, props->product_id);
	dev_info(vha->dev, "%s: Core id: %#llx\n",
			__func__, props->core_id);
	dev_info(vha->dev, "%s: MMU version:%d (%dbit)\n",
			__func__, props->mmu_ver, props->mmu_width);
	dev_dbg(vha->dev, "%s: supported: %#x\n",
			__func__, props->features);
	dev_dbg(vha->dev, "%s: soc_axi: %#llx\n",
			__func__, props->soc_axi);
	{
		uint64_t tmp = IOREAD64(vha->reg_base,
				VHA_CR_CORE_IP_INTEGRATOR_ID);
		dev_dbg(vha->dev, "%s: ip integrator id: %#llx\n",
				__func__, tmp);
		tmp = IOREAD64(vha->reg_base, VHA_CR_CORE_IP_CHANGELIST);
		dev_dbg(vha->dev, "%s: ip change list: %llu\n", __func__, tmp);
	}

#if defined(CFG_SYS_VAGUS)
	ocm_size_kb = IOREAD64(vha->reg_base, NN_SYS_CR(CORE_IP_CONFIG)) &
				~NN_SYS_CR_CORE_IP_CONFIG_NN_SYS_OCM_RAM_SIZE_4KB_CLRMSK;
	ocm_size_kb *= 4;
#endif

	if (ocm_size_kb) {
		vha->hw_props.locm_size_bytes = ocm_size_kb * 1024;
		/* User may wanted to limit OCM ... */
		if (onchipmem_size) {
			if (onchipmem_size < vha->hw_props.locm_size_bytes) {
				dev_warn(vha->dev, "%s:Limiting onchip memory to %u bytes (available:%u)\n",
						__func__, onchipmem_size, vha->hw_props.locm_size_bytes);
				vha->hw_props.locm_size_bytes = onchipmem_size;
			} else if (onchipmem_size > vha->hw_props.locm_size_bytes) {
				dev_err(vha->dev, "%s: User defined onchip memory size exceeded (%u > %u))\n",
						__func__, onchipmem_size, vha->hw_props.locm_size_bytes);
			}
		}
	} else {
		vha->hw_props.locm_size_bytes = onchipmem_size;
	}

	dev_info(vha->dev, "%s: Total onchip memory: %u [kB]\n",
			__func__, vha->hw_props.locm_size_bytes / 1024);

	dev_info(vha->dev, "%s: Devices: DUMMY:%u CNN:%u\n", __func__,
			props->dummy_dev ? props->num_cnn_core_devs : 0,
			props->dummy_dev ? 0 : props->num_cnn_core_devs);

	return 0;
}

void vha_dev_ocm_configure(struct vha_dev *vha)
{
#if defined(CFG_SYS_VAGUS)
	dev_dbg(vha->dev, "%s: OCM address range: %#lx - %#lx\n",
			__func__, vha->ocm_paddr,
			vha->ocm_paddr + vha->hw_props.locm_size_bytes - 1);
	IOWRITE64(vha->reg_base, NN_SYS_CR(NOC_LOWER_ADDR1), vha->ocm_paddr);
	IOWRITE64(vha->reg_base, NN_SYS_CR(NOC_UPPER_ADDR1),
			vha->ocm_paddr + vha->hw_props.locm_size_bytes - 1);
	img_pdump_printf("-- Setup NN_SYS OCM phys address range\n"
		"WRW "_PMEM_":$0 :OCM:BLOCK_CACHE:0x0\n"
		"WRW64 :REG_NNSYS:%#x "_PMEM_":$0\n"
		"WRW "_PMEM_":$0 :OCM:BLOCK_CACHE:%#x\n"
		"WRW64 :REG_NNSYS:%#x "_PMEM_":$0\n",
		NN_SYS_CR_NOC_LOWER_ADDR1, vha->hw_props.locm_size_bytes-1,
		NN_SYS_CR_NOC_UPPER_ADDR1);
#endif
}

/* prepare CRC and DEBUG data buffers */
void vha_dbg_prepare_hwbufs(struct vha_session *session, struct vha_cmd *cmd,
		struct vha_crc_config_regs *regs)
{
	struct vha_dev *vha = session->vha;
	(void)cmd;

	if (session->cnn_dbg.cnn_crc_buf[0]) {
		struct vha_buffer *buf = session->cnn_dbg.cnn_crc_buf[0];
		uint64_t val64;

		/* enable CRC: address + mode */
		val64 = VHA_CR_SETBITS_OS(CNN_CRC_CONTROL, CNN_CRC_ENABLE,
				session->cnn_dbg.cnn_crc_mode);
		img_pdump_printf("-- CRC_CONTROL=%u buf 'CRC' size=%zx\n",
				session->cnn_dbg.cnn_crc_mode, buf->size);
		IOWRITE_PDUMP_BUFADDR(session, buf, 0, VHA_CR_OS(CNN_CRC_ADDRESS));

		IOWRITE64_PDUMP(val64, VHA_CR_OS(CNN_CRC_CONTROL));

#ifdef HW_AX3
		img_pdump_printf("-- CRC_MASK=%#x\n", session->cnn_dbg.cnn_crc_mask);
		IOWRITE64_PDUMP(session->cnn_dbg.cnn_crc_mask, VHA_CR_OS(CNN_CRC_MASK_CTRL));
#endif
	}
	if (session->cnn_dbg.cnn_dbg_buf[0] && session->cnn_dbg.cnn_dbg_pdump_enable) {
		struct vha_buffer *buf = session->cnn_dbg.cnn_dbg_buf[0];
		uint64_t val64;

		/* enable DEBUG: address, perf mode, band mode */
		img_pdump_printf("-- DEBUG_CONTROL=%u,%u buf 'DBG' size=%zx\n",
				GET_CNN_DBG_MODE(PERF, session), GET_CNN_DBG_MODE(BAND, session),
				buf->size);
		IOWRITE_PDUMP_BUFADDR(session, buf, 0,
							VHA_CR_OS(CNN_DEBUG_ADDRESS));
		val64 = VHA_CR_ALIGN_SETBITS_OS(CNN_DEBUG_SIZE,
								CNN_DEBUG_SIZE,
								buf->size);
		IOWRITE64_PDUMP(val64, VHA_CR_OS(CNN_DEBUG_SIZE));

		/* Set the CONTROL register only if requested */
		if (CNN_DBG_MODE_ON(PERF, session) || CNN_DBG_MODE_ON(BAND, session)) {
			val64 = VHA_CR_SETBITS_OS(CNN_DEBUG_CONTROL, CNN_PERF_ENABLE,
										GET_CNN_DBG_MODE(PERF, session));
			val64 |= VHA_CR_SETBITS_OS(CNN_DEBUG_CONTROL, CNN_BAND_ENABLE,
										GET_CNN_DBG_MODE(BAND, session));
			IOWRITE64_PDUMP(val64, VHA_CR_OS(CNN_DEBUG_CONTROL));
		}
	}
}

/* flush CRC and DEBUG data buffers */
void vha_dbg_flush_hwbufs(struct vha_session *session, char checkpoint, uint8_t mask)
{
	struct vha_dev* vha = session->vha;
	(void)mask;
	if (session->cnn_dbg.cnn_dbg_flush != checkpoint)
		return;

	if (session->cnn_dbg.cnn_crc_buf[0]) {
		struct vha_buffer *buf = session->cnn_dbg.cnn_crc_buf[0];
		/*
		 * TOBEDONE: calculate CRC buffer size based
		 * on num passes, num layers, etc
		 */
		img_pdump_printf("-- Save signatures\n");
		img_pdump_printf("IF CHECK_CRCS\n");
		img_pdump_printf("COM Checking CRCs ...\n");
		vha_pdump_sab_buf(session, PDUMP_CRC,
					buf, 0, buf->size);
		img_pdump_printf("ELSE CHECK_CRCS\n");
		img_pdump_printf("COM Not checking CRCs!\n");
		img_pdump_printf("FI CHECK_CRCS\n");
	}
	if (session->cnn_dbg.cnn_dbg_buf[0] && session->cnn_dbg.cnn_dbg_pdump_enable) {
		struct vha_buffer *buf = session->cnn_dbg.cnn_dbg_buf[0];
		/* read the size of the DEBUG buffer */
		uint64_t size = IOREAD64(vha->reg_base, VHA_CR_OS(CNN_DEBUG_STATUS));
		/*
		 * SAB the DBG buffer, even though "it is not deterministic"
		 */
		size = VHA_CR_GETBITS_OS(CNN_DEBUG_STATUS,
							CNN_DEBUG_OFFSET,
							size);
		img_pdump_printf("-- Save DEBUG info\n");

		vha_pdump_sab_buf(session, PDUMP_DBG, buf, 0, buf->size);
	}
}

/* stop capturing CRC and DEBUG data */
void vha_dbg_stop_hwbufs(struct vha_session *session, uint8_t mask)
{
	struct vha_dev *vha = session->vha;
	(void)mask;

	/* Flush hw debug buffers */
	vha_dbg_flush_hwbufs(session, 0, 0);

	if (session->cnn_dbg.cnn_crc_buf[0]) {
		IOWRITE64_PDUMP(0, VHA_CR_OS(CNN_CRC_CONTROL));
	}
	if (session->cnn_dbg.cnn_dbg_buf[0]) {
		/* read the size of the DEBUG buffer */
		uint64_t size = IOREAD64(vha->reg_base, VHA_CR_OS(CNN_DEBUG_STATUS));

		if (CNN_DBG_MODE_ON(PERF, session) || CNN_DBG_MODE_ON(BAND, session)) {
			IOWRITE64_PDUMP(0, VHA_CR_OS(CNN_DEBUG_CONTROL));
			/* just give a hint in the pdump:
			 * dummy device returns 0 */
			img_pdump_printf(
					"-- POL64 :REG:%#x 0 0 0 1 1 -- DEBUG_STATUS=%llx\n",
					 VHA_CR_OS(CNN_DEBUG_STATUS),
				size);
		}
	}
}

uint64_t vha_dbg_rtm_read(struct vha_dev *vha, uint64_t addr)
{
	/* Turn on all clocks forcefully */
	IOWRITE64(vha->reg_base, VHA_CR_SYS_CLK_CTRL0, VHA_SYS_CLOCKS_DEFAULT(ON));
	IOWRITE64(vha->reg_base, VHA_CR_CLK_CTRL0, VHA_MAIN_CLOCKS_DEFAULT(ON));

	/* Set up address of the signal */
	IOWRITE64(vha->reg_base, VHA_CR_RTM_CTRL, addr | VHA_CR_RTM_CTRL_RTM_ENABLE_EN);


	/* but N_OF_RTM_STAGES is not accessible by SW*/
	/* so waiting 1 ms for now */
	msleep(1);

	/* Read the data */
	return IOREAD64(vha->reg_base, VHA_CR_RTM_DATA);
}

int vha_currcmd_exetime_req(struct vha_dev *vha, uint64_t *proc_us)
{
	uint64_t proc_time = 0;
	struct vha_cmd *cmd = NULL;
	struct TIMESPEC to;

	cmd = vha->pendcmd[VHA_CNN_CMD].cmd;
	if(!cmd || !cmd->in_hw) {
		goto err_out;
	}

	{
		struct TIMESPEC from = vha->stats.hw_proc_start;
		GETNSTIMEOFDAY(&to);

		if (cmd->subsegs_completed == cmd->subseg_current) {
			*proc_us = 0;
		} else if (get_timespan_us(&from, &to, &proc_time)) {
			*proc_us = proc_time;
		} else {
			goto err_out;
		}
	}

	return 0;

err_out:
	*proc_us = 0;
	return -1;
}

/* List of predefined registers to be shown in debugfs */
const struct vha_reg vha_regs[] = {
#define REG_DESC(reg) VHA_CR_##reg, VHA_CR_##reg##_MASKFULL
#define REG_DESC_OS(reg) VHA_CR_OS(reg), VHA_CR_OS(reg##_MASKFULL)
	{"main_clocks_control  ", REG_DESC(CLK_CTRL0)},
	{"main_clocks_status   ", REG_DESC(CLK_STATUS0)},
	{"sys_clocks_control   ", REG_DESC(SYS_CLK_CTRL0)},
	{"sys_clocks_status    ", REG_DESC(SYS_CLK_STATUS0)},
	{"product_id           ", REG_DESC(PRODUCT_ID)},
	{"core_id              ", REG_DESC(CORE_ID)},
	{"soc_axi              ", REG_DESC(SOC_AXI)},
	{"integrator_id        ", REG_DESC(CORE_IP_INTEGRATOR_ID)},
	{"ip_changelist        ", REG_DESC(CORE_IP_CHANGELIST)},
	{"core_ip_config       ", REG_DESC(CORE_IP_CONFIG)},
	{"reset                ", REG_DESC(RESET_CTRL)},
	{"event_enable         ", REG_DESC_OS(VHA_EVENT_ENABLE)},
	{"event_status         ", REG_DESC_OS(VHA_EVENT_STATUS)},
	{"cnn_control          ", REG_DESC_OS(CNN_CONTROL)},
	{"cnn_status           ", REG_DESC_OS(CNN_STATUS)},
#ifdef HW_AX2
	{"cnn_wdt_cmpmatch     ", REG_DESC(CNN_WDT_COMPAREMATCH)},
	{"cnn_wdt_control      ", REG_DESC(CNN_WDT_CTRL)},
	{"cnn_wdt_timer        ", REG_DESC(CNN_WDT_TIMER)},
#endif
	{"cnn_mem_wdt_cmpmatch ", REG_DESC(CNN_MEM_WDT_COMPAREMATCH)},
	{"cnn_mem_wdt_control  ", REG_DESC(CNN_MEM_WDT_CTRL)},
	{"cnn_mem_wdt_timer    ", REG_DESC(CNN_MEM_WDT_TIMER)},
	{"mmu_control          ", REG_DESC_OS(MMU_CTRL)},
	{"mmu_context          ", REG_DESC_OS(MMU_CBASE_MAPPING_CONTEXT)},
	{"mmu_mapping          ", REG_DESC_OS(MMU_CBASE_MAPPING)},
	{"mmu_status           ", REG_DESC(MMU_STATUS)},
	{"mmu_fault_status1    ", REG_DESC_OS(MMU_FAULT_STATUS1)},
	{"mmu_fault_status2    ", REG_DESC_OS(MMU_FAULT_STATUS2)},
	{"slc_control          ", REG_DESC(SLC_CTRL)},
#if 0
	{"slc_bypass_control   ", REG_DESC(SLC_BYPASS_CTRL)},
#endif
	{"slc_status1          ", REG_DESC(SLC_STATUS1)},
	{"slc_status2          ", REG_DESC(SLC_STATUS2)},
	{"slc_status3          ", REG_DESC(SLC_STATUS3)},
	{"slc_idle             ", REG_DESC(SLC_IDLE)},
	{"bif_outstanding_read ", REG_DESC(BIF_OUTSTANDING_READ)},
#undef REG_DESC
#undef REG_DESC_OS
	{NULL                   , 0},
};