Files
thead-kernel/drivers/nna/vha/vha_common.c
Han Gao e0da9d9718 nna: sync SDK 1.5.4
Signed-off-by: Han Gao <gaohan@iscas.ac.cn>
2024-05-30 01:07:11 +08:00

2738 lines
76 KiB
C

/*
*****************************************************************************
* Copyright (c) Imagination Technologies Ltd.
*
* The contents of this file are subject to the MIT license as set out below.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*
* Alternatively, the contents of this file may be used under the terms of the
* GNU General Public License Version 2 ("GPL")in which case the provisions of
* GPL are applicable instead of those above.
*
* If you wish to allow use of your version of this file only under the terms
* of GPL, and not to allow others to use your version of this file under the
* terms of the MIT license, indicate your decision by deleting the provisions
* above and replace them with the notice and other provisions required by GPL
* as set out in the file called "GPLHEADER" included in this distribution. If
* you do not delete the provisions above, a recipient may use your version of
* this file under the terms of either the MIT license or GPL.
*
* This License is also included in this distribution in the file called
* "MIT_COPYING".
*
*****************************************************************************/
#include <linux/slab.h>
#include <linux/device.h>
#include <linux/gfp.h>
#include <linux/sched.h>
#include <linux/moduleparam.h>
#include <linux/jiffies.h>
#include <linux/list.h>
#include <linux/delay.h>
#include <linux/pm_runtime.h>
#include <linux/debugfs.h>
#include <linux/crc32.h>
#include <uapi/vha.h>
#include <uapi/version.h>
#include "vha_common.h"
#include "vha_plat.h"
#include <vha_regs.h>
#ifdef KERNEL_DMA_FENCE_SUPPORT
#include <linux/dma-fence.h>
#include <linux/dma-fence-array.h>
#include <linux/sync_file.h>
#include <linux/file.h>
#include <linux/kernel.h>
#endif
#if !defined(HW_AX2) && !defined(HW_AX3)
#error No HW architecture series defined. Either HW_AX2 or HW_AX3 must be defined
#elseif defined(HW_AX2) && defined(HW_AX3)
#error Invalid HW architecture series define. Only one of HW_AX2 or HW_AX3 must be defined.
#endif
#undef linux
#define CREATE_TRACE_POINTS
#include <vha_trace_point.h>
#define MIN_ONCHIP_MAP 1
#define MAX_ONCHIP_MAP 128
static uint8_t mmu_mode = VHA_MMU_40BIT;
module_param(mmu_mode, byte, 0444);
MODULE_PARM_DESC(mmu_mode,
"MMU mode: 0=no-MMU, 1=direct (1:1) mappings or 40=40bit (default)");
static uint32_t mmu_ctx_default;
module_param(mmu_ctx_default, uint, 0444);
MODULE_PARM_DESC(mmu_ctx_default, "MMU default context id(0:31) to be used");
static uint32_t mmu_page_size; /* 0-4kB */
module_param(mmu_page_size, uint, 0444);
MODULE_PARM_DESC(mmu_page_size,
"MMU page size: 0-4kB, 1-16kB, 2-64kB, 3-256kB, 4-1MB; 5-2MB");
static bool no_clock_disable = false;
module_param(no_clock_disable, bool, 0444);
MODULE_PARM_DESC(no_clock_disable,
"if Y, the device is not disabled when inactive, otherwise APM is used");
static int pm_delay = 100;
module_param(pm_delay, int, S_IRUSR | S_IRGRP);
MODULE_PARM_DESC(pm_delay, "Delay, in ms, before powering off the core that's idle");
static int freq_khz = -1;
module_param(freq_khz, int, 0444);
MODULE_PARM_DESC(freq_khz,
"core frequency in kHz, -1=start self measurement during driver load, 0=use platform defined value, otherwise (>0) declared value is used");
static uint32_t hw_bypass;
module_param(hw_bypass, uint, 0444);
MODULE_PARM_DESC(hw_bypass,
"Number of cnn kicks(segments) to be bypassed within the session, 0=none");
static uint32_t slc_bypass;
module_param(slc_bypass, uint, 0444);
MODULE_PARM_DESC(slc_bypass, "SLC bypass mode");
#if defined(HW_AX2) || defined(CONFIG_VHA_DUMMY_SIMULATE_HW_PROCESSING_TIME)
static uint32_t low_latency = VHA_LL_SW_KICK;
#elif defined(HW_AX3) && defined(VHA_USE_LO_PRI_SUB_SEGMENTS)
static uint32_t low_latency = VHA_LL_DISABLED;
#else
static uint32_t low_latency = VHA_LL_SELF_KICK;
#endif
module_param(low_latency, uint, 0444);
MODULE_PARM_DESC(low_latency, "Low latency mode: 0-disabled, 1-sw kick, 2-self kick");
static bool zero_buffers;
module_param(zero_buffers, bool, 0444);
MODULE_PARM_DESC(zero_buffers, "fill every allocated buffer with zeros");
static bool dump_buff_digest = 0;
module_param(dump_buff_digest, bool, 0444);
MODULE_PARM_DESC(dump_buff_digest, "Calculate & dump digest for in/out buffers. This is crc32");
static unsigned long onchipmem_phys_start= VHA_OCM_ADDR_START;
module_param(onchipmem_phys_start, ulong, 0444);
MODULE_PARM_DESC(onchipmem_phys_start,
"Physical address of start of on-chip ram. '0xFs' means that ocm is disabled");
static uint32_t onchipmem_size;
module_param(onchipmem_size, uint, 0444);
MODULE_PARM_DESC(onchipmem_size,
"Size of on-chip memory in bytes");
/* bringup test: force MMU fault with MMU base register */
static bool test_mmu_base_pf;
module_param(test_mmu_base_pf, bool, 0444);
MODULE_PARM_DESC(test_mmu_base_pf,
"Bringup test: force MMU page fault on first access");
/* bringup test: do not map into the device after the Nth buffer */
static int32_t test_mmu_no_map_count = -1;
module_param(test_mmu_no_map_count, int, 0444);
MODULE_PARM_DESC(test_mmu_no_map_count,
"Bringup test: force MMU page faults if count >= 0");
#ifdef VHA_SCF
static bool parity_disable = false;
module_param(parity_disable, bool, 0444);
MODULE_PARM_DESC(parity_disable,
"if Y, the core parity feature will be disabled, if it is supported");
static bool confirm_config_reg = false;
module_param(confirm_config_reg, bool, 0444);
MODULE_PARM_DESC(confirm_config_reg,
"Enables confirmation of register writes");
#endif
static bool test_without_bvnc_check;
module_param(test_without_bvnc_check, bool, 0444);
MODULE_PARM_DESC(test_without_bvnc_check,
"When set BVNC check is ignored, allowing to kick the hw");
/* Fault inject parameter is only applicable when
* kernel fault injection feature is enabled
* in the kernel options -> CONFIG_FAULT_INJECTION=y
* See Documentation/fault-injection/
*/
static uint8_t fault_inject;
module_param(fault_inject, byte, 0444);
MODULE_PARM_DESC(fault_inject,
"Enable fault injection using bitwise value: 1-open,2-read,4-write,8-ioctl,16-mmap,32-cmd worker,64-irq worker,128-user space");
/* Interval in milliseconds for testing/simulating system suspend/resume functionality */
static uint8_t suspend_interval_msec;
module_param(suspend_interval_msec, byte, 0444);
MODULE_PARM_DESC(suspend_interval_msec,
"Test suspend/resume interval, 0=disabled, otherwise defines interval in milliseconds");
#ifdef VHA_SCF
static bool cnn_combined_crc_enable = true;
#else
static bool cnn_combined_crc_enable = false;
#endif
module_param(cnn_combined_crc_enable, bool, 0444);
MODULE_PARM_DESC(cnn_combined_crc_enable,
"Enables the combined CRC feature");
#ifdef VHA_SCF
static u32 swd_period = 10;
module_param(swd_period, uint, 0444);
MODULE_PARM_DESC(swd_period,
"The timer expiration period in miliseconds, 0=disable");
static unsigned long swd_timeout_default = 0;
module_param(swd_timeout_default, ulong, 0444);
MODULE_PARM_DESC(swd_timeout_default,
"The default expected execution time in us, 0=use MBS values only");
static u32 swd_timeout_m0 = 100;
module_param(swd_timeout_m0, uint, 0444);
MODULE_PARM_DESC(swd_timeout_m0,
"The m0 value in the expected execution time equation: T = (T0 * m0)/100 + m1");
static u32 swd_timeout_m1 = 10000;
module_param(swd_timeout_m1, uint, 0444);
MODULE_PARM_DESC(swd_timeout_m1,
"The m1 value in the expected execution time equation: T = (T0 * m0)/100 + m1");
#endif
/* Event observers, to be notified when significant events occur */
struct vha_observers vha_observers;
/* Driver context */
static struct {
/* Available driver memory heaps. List of <struct vha_heap> */
struct list_head heaps;
/* Memory Management context for driver */
struct mem_ctx *mem_ctx;
/* List of associated <struct vha_dev> */
struct list_head devices;
unsigned int num_devs;
int initialised;
} drv;
/* Session id counter. */
static uint32_t vha_session_id_cnt = 0;
/* Reset counter. */
static uint32_t vha_reset_cnt = 0;
static void cmd_worker(struct work_struct *work);
static const size_t mmu_page_size_kb_lut[] =
{ 4096, 16384, 65536, 262144, 1048576, 2097152};
#ifdef CONFIG_FUNCTION_ERROR_INJECTION
noinline int __IOPOLL64_RET(int ret) {
return ret;
}
#include <asm-generic/error-injection.h>
/* this is the placeholder function to support error code injection from
* all IOPOLL_PDUMP* macros
*/
ALLOW_ERROR_INJECTION(__IOPOLL64_RET, ERRNO);
#ifdef VHA_EVENT_INJECT
/*
* called in __handle_event_injection()
* if normal circumstances, return 0 and do not inject EVENT
* otherwise, return -errno
*/
noinline int __EVENT_INJECT(void) {
return 0;
}
ALLOW_ERROR_INJECTION(__EVENT_INJECT, ERRNO);
#endif /* VHA_EVENT_INJECT */
#endif
/* Calculate current timespan for the given timestamp */
bool get_timespan_us(struct TIMESPEC *from, struct TIMESPEC *to, uint64_t *result)
{
long long total = 0;
if (!TIMESPEC_VALID(from) || !TIMESPEC_VALID(to))
return false;
if (TIMESPEC_COMPARE(from, to) >= 0)
return false;
total = NSEC_PER_SEC * to->tv_sec +
to->tv_nsec;
total -= NSEC_PER_SEC * from->tv_sec +
from->tv_nsec;
do_div(total, 1000UL);
*result = total;
return true;
}
/* Used for simulating system level suspend/resume functionality */
static void suspend_test_worker(struct work_struct *work)
{
struct vha_dev *vha = container_of(work, struct vha_dev, suspend_dwork.work);
int ret;
/* Make resume/suspend cycle */
ret = vha_suspend_dev(vha->dev);
WARN_ON(ret != 0);
vha_resume_dev(vha->dev);
mutex_lock(&vha->lock);
/* Retrigger suspend worker */
schedule_delayed_work(&vha->suspend_dwork,
msecs_to_jiffies(vha->suspend_interval_msec));
mutex_unlock(&vha->lock);
}
/*
* Initialize common platform (driver) memory heaps.
* device (cluster) heaps are initialized in vha_init()
*/
int vha_init_plat_heaps(const struct heap_config heap_configs[], int heaps)
{
int i;
int ret = 0;
/* Initialise memory management component */
for (i = 0; i < heaps; i++) {
struct vha_heap *heap;
pr_debug("%s: adding platform heap of type %d\n",
__func__, heap_configs[i].type);
heap = kzalloc(sizeof(struct vha_heap), GFP_KERNEL);
if (!heap) {
ret = -ENOMEM;
goto drv_heap_add_failed;
}
heap->global = true;
ret = img_mem_add_heap(&heap_configs[i], &heap->id);
if (ret < 0) {
pr_err("%s: failed to init platform heap (type %d)!\n",
__func__, heap_configs[i].type);
kfree(heap);
goto drv_heap_add_failed;
}
list_add(&heap->list, &drv.heaps);
}
return ret;
drv_heap_add_failed:
while (!list_empty(&drv.heaps)) {
struct vha_heap *heap;
heap = list_first_entry(&drv.heaps, struct vha_heap, list);
list_del(&heap->list);
img_mem_del_heap(heap->id);
kfree(heap);
}
return ret;
}
int vha_early_init(void)
{
int ret;
INIT_LIST_HEAD(&drv.heaps);
INIT_LIST_HEAD(&drv.devices);
/* Create memory management context for HW buffers */
ret = img_mem_create_proc_ctx(&drv.mem_ctx);
if (ret) {
pr_err("%s: failed to create mem context (err:%d)!\n",
__func__, ret);
drv.mem_ctx = NULL;
}
return ret;
}
/*
* Lazy intialization of main driver context (when first core is probed)
*/
static int vha_init(struct vha_dev *vha,
const struct heap_config heap_configs[], int heaps)
{
struct device *dev = vha->dev;
int ret, i;
#ifdef CONFIG_HW_MULTICORE
ret = vha_dev_scheduler_init(vha);
if (ret != 0) {
dev_err(dev, "%s: failed initializing scheduler!\n", __func__);
return ret;
}
if (!vha_dev_dbg_params_init(vha)) {
dev_err(dev, "%s: invalid debug params detected!\n", __func__);
return -EINVAL;
}
#endif
/* Initialise local device (cluster) heaps */
for (i = 0; i < heaps; i++) {
struct vha_heap *heap;
dev_dbg(dev, "%s: adding device heap of type %d\n",
__func__, heap_configs[i].type);
heap = kzalloc(sizeof(struct vha_heap), GFP_KERNEL);
if (!heap) {
ret = -ENOMEM;
goto heap_add_failed;
}
ret = img_mem_add_heap(&heap_configs[i], &heap->id);
if (ret < 0) {
dev_err(dev, "%s: failed to init device heap (type %d)!\n",
__func__, heap_configs[i].type);
kfree(heap);
goto heap_add_failed;
}
list_add(&heap->list, &vha->heaps);
}
/* now copy platform (global) heap id's to device vha_heap list, the global heap id's are
* not owned by vha_dev anyway (heap->global=true)
* This is done for vha_ioctl_query_heaps() to be able to report both platform
* and device heaps easily. */
{
struct list_head* pos;
list_for_each_prev(pos, &drv.heaps) {
struct vha_heap* heap = list_entry(pos, struct vha_heap, list);
struct vha_heap* heap_copy = kmemdup(heap, sizeof(*heap), GFP_KERNEL);
if(!heap_copy) {
ret = -ENOMEM;
goto heap_add_failed;
}
INIT_LIST_HEAD(&heap_copy->list);
list_add(&heap_copy->list, &vha->heaps);
}
}
/* initialize local ocm cluster heaps */
if (vha->hw_props.locm_size_bytes && onchipmem_phys_start == ~0)
dev_warn(dev, "%s: Onchip memory physical address not set!\n",
__func__);
/* OCM heap type is automatically appended */
if (vha->hw_props.locm_size_bytes && onchipmem_phys_start != ~0) {
struct heap_config heap_cfg;
struct vha_heap *heap;
memset(&heap_cfg, 0, sizeof(heap_cfg));
heap_cfg.type = IMG_MEM_HEAP_TYPE_OCM;
heap_cfg.options.ocm.phys = onchipmem_phys_start;
heap_cfg.options.ocm.size = vha->hw_props.locm_size_bytes;
heap_cfg.options.ocm.hattr = IMG_MEM_HEAP_ATTR_LOCAL;
dev_dbg(dev, "%s: adding heap of type %d\n",
__func__, heap_cfg.type);
heap = kzalloc(sizeof(struct vha_heap), GFP_KERNEL);
if (!heap) {
ret = -ENOMEM;
goto heap_add_failed;
}
ret = img_mem_add_heap(&heap_cfg, &heap->id);
if (ret < 0) {
dev_err(dev, "%s: failed to init heap (type %d)!\n",
__func__, heap_cfg.type);
kfree(heap);
goto heap_add_failed;
}
list_add(&heap->list, &vha->heaps);
}
#ifdef CONFIG_HW_MULTICORE
if (vha->hw_props.socm_size_bytes && onchipmem_phys_start != ~0) {
struct heap_config heap_cfg;
struct vha_heap *heap;
memset(&heap_cfg, 0, sizeof(heap_cfg));
heap_cfg.type = IMG_MEM_HEAP_TYPE_OCM;
heap_cfg.options.ocm.phys = onchipmem_phys_start +
vha->hw_props.locm_size_bytes + IMG_MEM_VA_GUARD_GAP;
heap_cfg.options.ocm.size = vha->hw_props.socm_size_bytes;
heap_cfg.options.ocm.hattr = IMG_MEM_HEAP_ATTR_SHARED;
dev_dbg(dev, "%s: adding heap of type %d\n",
__func__, heap_cfg.type);
heap = kzalloc(sizeof(struct vha_heap), GFP_KERNEL);
if (!heap) {
ret = -ENOMEM;
goto heap_add_failed;
}
ret = img_mem_add_heap(&heap_cfg, &heap->id);
if (ret < 0) {
dev_err(dev, "%s: failed to init heap (type %d)!\n",
__func__, heap_cfg.type);
kfree(heap);
goto heap_add_failed;
}
list_add(&heap->list, &vha->heaps);
}
#endif
{
/* now get the last entry and make it responsible for internal allocations
* use last entry because list_add() inserts at the head
* When choosing the internal alloc heap, the device local heaps take precedence over
* global platform heaps */
struct vha_heap* heap = list_last_entry(&vha->heaps, struct vha_heap, list);
if(!heap) {
dev_err(dev, "%s: failed to locate heap for internal alloc\n",
__func__);
ret = -EINVAL;
/* Loop registered heaps just for sanity */
goto heap_add_failed;
}
vha->int_heap_id = heap->id;
dev_dbg(dev, "%s: using heap %d for internal alloc\n",
__func__, vha->int_heap_id);
}
/* Do not proceed if internal heap not defined */
drv.initialised = 1;
dev_dbg(dev, "%s: vha drv init done\n", __func__);
return 0;
heap_add_failed:
while (!list_empty(&vha->heaps)) {
struct vha_heap *heap;
heap = list_first_entry(&vha->heaps, struct vha_heap, list);
list_del(&heap->list);
if(!heap->global)
img_mem_del_heap(heap->id);
kfree(heap);
}
return ret;
}
int vha_deinit(void)
{
/* Destroy memory management context */
if (drv.mem_ctx) {
size_t mem_usage;
uint32_t MB, bytes, kB;
img_mem_get_usage(drv.mem_ctx, &mem_usage, NULL);
MB = mem_usage / (1024 * 1024);
bytes = mem_usage - (MB * (1024 * 1024));
kB = (bytes * 1000) / (1024 * 1024);
pr_debug("%s: Total kernel memory used: %u.%u MB\n",
__func__, MB, kB);
img_mem_destroy_proc_ctx(drv.mem_ctx);
drv.mem_ctx = NULL;
}
/* Deinitialize memory management component */
while (!list_empty(&drv.heaps)) {
struct vha_heap *heap;
heap = list_first_entry(&drv.heaps, struct vha_heap, list);
BUG_ON(!heap->global);
list_del(&heap->list);
img_mem_del_heap(heap->id);
kfree(heap);
}
drv.initialised = 0;
return 0;
}
/*
* Returns: true if hardware has required capabilities, false otherwise.
* Implementation is a simple check of expected BVNC against hw CORE_ID
*/
bool vha_dev_check_hw_capab(struct vha_dev* vha, uint64_t expected_hw_capab)
{
uint64_t __maybe_unused hw = vha->hw_props.core_id
& VHA_CR_CORE_ID_BVNC_CLRMSK;
uint64_t __maybe_unused mbs = expected_hw_capab
& VHA_CR_CORE_ID_BVNC_CLRMSK;
if (!test_without_bvnc_check) {
img_pdump_printf(
"IF SKIP_COREID_CHECK\n"
"COM Skip COREID Check\n"
"ELSE SKIP_COREID_CHECK\n"
"COM CHECKING CORE_ID: expecting BVNC:%llu.%llu.%llu.%llu\n",
core_id_quad(expected_hw_capab));
IOPOLL64_PDUMP(expected_hw_capab, 1, 1,
VHA_CR_CORE_ID_BVNC_CLRMSK,
VHA_CR_CORE_ID);
img_pdump_printf(
"FI SKIP_COREID_CHECK\n");
}
if ((expected_hw_capab >> 48) != HW_SERIES) {
dev_err(vha->dev,
"%s: network was compiled for incorrect hardware series: expected %llu / found %u\n",
__func__,
(expected_hw_capab >> 48), HW_SERIES);
return false;
}
#ifndef CONFIG_VHA_DUMMY
if (hw != mbs) {
dev_warn(vha->dev,
"%s: network was compiled for an incorrect hardware variant (BVNC): "
"found %llu.%llu.%llu.%llu, expected %llu.%llu.%llu.%llu\n",
__func__,
core_id_quad(vha->hw_props.core_id),
core_id_quad(expected_hw_capab));
/* Conditionally allow the hw to be kicked */
if (test_without_bvnc_check)
dev_warn(vha->dev, "%s: trying to kick the hw ... ", __func__);
else {
dev_err(vha->dev, "%s: can't kick the hardware!", __func__);
return false;
}
}
#endif
return true;
}
/* notify the user space if a response msg is ready */
void vha_cmd_notify(struct vha_cmd *cmd)
{
struct vha_session *session = cmd->session;
struct vha_rsp *rsp = cmd->rsp;
dev_dbg(session->vha->dev, "%s: 0x%08x/%u\n",
__func__, cmd->user_cmd.cmd_id, session->id);
if (rsp) {
cmd->rsp = NULL;
list_add_tail(&rsp->list, &session->rsps);
}
wake_up(&session->wq);
/* we are done with this cmd, let's free it */
list_del(&cmd->list[cmd->user_cmd.priority]);
kfree(cmd);
}
static void vha_measure_core_freq(struct vha_dev *vha)
{
if (vha->stats.last_proc_us) {
uint64_t proc = vha->stats.last_proc_us;
do_div(proc, 1000UL);
if (proc) {
uint64_t cycles = vha->calibration_cycles;
do_div(cycles, proc);
vha->freq_khz = cycles;
dev_info(vha->dev,
"%s: Measured core clock frequency[kHz]: %u\n",
__func__, vha->freq_khz);
return;
}
}
dev_info(vha->dev,
"%s: Can't measure core clock frequency!\n",
__func__);
}
bool vha_check_calibration(struct vha_dev *vha)
{
if (vha->stats.last_proc_us) {
/* Core may have been kicked to
* measure frequency */
if (vha->do_calibration) {
vha_dev_stop(vha, true);
vha_measure_core_freq(vha);
vha->do_calibration = false;
/* Something may have been scheduled in
* the middle so poke the worker */
vha_chk_cmd_queues(vha, false);
return true;
}
}
return false;
}
/*
* A session represents a single device and a set of buffers
* to be used for inferences.
* If required, buffers will be allocated for hardware CRC and DEBUG.
*/
int vha_add_session(struct vha_session *session)
{
struct vha_dev *vha = session->vha;
int ret;
struct mmu_config mmu_config;
int ctx_id;
uint8_t pri;
img_pdump_printf("-- OPEN_BEGIN\n");
img_pdump_printf("-- VHA driver session started\n");
ret = mutex_lock_interruptible(&vha->lock);
if (ret)
return ret;
#ifdef CONFIG_VHA_DUMMY
if (list_empty(&vha->sessions) && !vha->do_calibration)
vha_dev_start(vha);
#endif
idr_init(&session->onchip_maps);
memset(&mmu_config, 0, sizeof(mmu_config));
/* Create a memory context for this session */
if (vha->mmu_mode == VHA_MMU_DISABLED) {
/* if MMU is disabled,
* bypass the mmu hw layer,
* but still need do the buffer
* allocation through img_mem api
*/
mmu_config.bypass_hw = true;
#ifdef CONFIG_HW_MULTICORE
mmu_config.bypass_offset = IMG_MEM_VA_HEAP1_BASE;
#endif
}
#ifdef VHA_SCF
/* Do not calculate parity when core does not support it,
* or we forced the core to disable it */
if (vha->hw_props.supported.parity &&
!vha->parity_disable) {
mmu_config.use_pte_parity = true;
dev_dbg(vha->dev,
"%s: Enabling MMU parity protection!\n",
__func__);
}
#endif
mmu_config.addr_width = vha->hw_props.mmu_width;
mmu_config.alloc_attr = IMG_MEM_ATTR_MMU | /* Indicate MMU allocation */
IMG_MEM_ATTR_WRITECOMBINE;
mmu_config.page_size = mmu_page_size_kb_lut[vha->mmu_page_size];
img_pdump_printf("-- MMU context: using %zukB MMU pages, %lukB CPU pages\n",
mmu_page_size_kb_lut[vha->mmu_page_size]/1024, PAGE_SIZE/1024);
/* Update current MMU page size, so that the correct
* granularity is used when generating virtual addresses */
vha->hw_props.mmu_pagesize = mmu_config.page_size;
/* Update clock frequency stored in props */
vha->hw_props.clock_freq = vha->freq_khz;
for (ctx_id = 0; ctx_id < ARRAY_SIZE(session->mmu_ctxs); ctx_id++) {
ret = img_mmu_ctx_create(vha->dev, &mmu_config,
session->mem_ctx, vha->int_heap_id,
vha_mmu_callback, session,
&session->mmu_ctxs[ctx_id].ctx);
if (ret < 0) {
dev_err(vha->dev, "%s: failed to create sw mmu context%d!\n",
__func__, ctx_id);
goto out_unlock;
}
if (vha->mmu_mode != VHA_MMU_DISABLED) {
/* Store mmu context id */
session->mmu_ctxs[ctx_id].id = ret;
ret = img_mmu_get_pc(session->mmu_ctxs[ctx_id].ctx,
&session->mmu_ctxs[ctx_id].pc_baddr,
&session->mmu_ctxs[ctx_id].pc_bufid);
if (ret) {
dev_err(vha->dev, "%s: failed to get PC for context%d!\n",
__func__, ctx_id);
ret = -EFAULT;
goto out_free_mmu_ctx;
}
}
}
#ifndef CONFIG_HW_MULTICORE
if (vha->hw_props.locm_size_bytes && onchipmem_phys_start != ~0) {
/* OCM data is considered as IO (or shared)*/
ret = img_mmu_init_cache(session->mmu_ctxs[VHA_MMU_REQ_IO_CTXID].ctx,
onchipmem_phys_start, vha->hw_props.locm_size_bytes
#if defined(CFG_SYS_VAGUS)
+ sizeof(uint32_t)
#endif
);
if (ret < 0) {
dev_err(vha->dev, "%s: failed to create init cache!\n",
__func__);
goto out_free_mmu_ctx;
}
vha_dev_ocm_configure(vha);
}
#endif
/* enable CRC and DEBUG registers */
ret = vha_dbg_create_hwbufs(session);
if (ret)
goto out_free_mmu_ctx;
img_pdump_printf("-- OPEN_END\n");
/* Used for simulating system level suspend/resume functionality */
if (list_empty(&vha->sessions) && vha->suspend_interval_msec) {
INIT_DELAYED_WORK(&vha->suspend_dwork, suspend_test_worker);
/* Start suspend worker */
schedule_delayed_work(&vha->suspend_dwork,
msecs_to_jiffies(vha->suspend_interval_msec));
}
/* Assign session id. */
session->id = vha_session_id_cnt++;
list_add_tail(&session->list, &vha->sessions);
for (pri = 0; pri < VHA_MAX_PRIORITIES; pri++) {
struct vha_session *aux_head = list_prev_entry(session, list);
list_add(&session->sched_list[pri], &aux_head->sched_list[pri]);
}
/* All mmu contextes are successfully created,
it is safe to incremet the counters and assign id. */
if (vha->mmu_mode != VHA_MMU_DISABLED)
for (ctx_id = 0; ctx_id < ARRAY_SIZE(session->mmu_ctxs); ctx_id++) {
uint8_t hw_ctxid = 0;
/* Assign mmu hardware context */
hw_ctxid = VHA_MMU_GET_CTXID(session);
hw_ctxid += (VHA_MMU_AUX_HW_CTX_SHIFT*ctx_id);
vha->mmu_ctxs[hw_ctxid]++;
session->mmu_ctxs[ctx_id].hw_id = hw_ctxid;
}
dev_dbg(vha->dev,
"%s: %p ctxid:%d\n", __func__, session,
session->mmu_ctxs[VHA_MMU_REQ_MODEL_CTXID].id);
trace_vha_session_in(session->id, 0);
mutex_unlock(&vha->lock);
return ret;
out_free_mmu_ctx:
for (ctx_id = 0; ctx_id < ARRAY_SIZE(session->mmu_ctxs); ctx_id++)
if (session->mmu_ctxs[ctx_id].ctx)
img_mmu_ctx_destroy(session->mmu_ctxs[ctx_id].ctx);
out_unlock:
mutex_unlock(&vha->lock);
return ret;
}
static void vha_clean_onchip_maps(struct vha_session *session, struct vha_buffer *buf)
{
struct vha_onchip_map *onchip_map = NULL, *tmp = NULL;
WARN_ON(!buf);
WARN_ON(!session);
list_for_each_entry_safe(onchip_map, tmp, &buf->onchip_maps, list) {
idr_remove(&session->onchip_maps, onchip_map->mapid);
list_del(&onchip_map->list);
kfree(onchip_map);
}
}
#ifdef KERNEL_DMA_FENCE_SUPPORT
void vha_rm_buf_fence(struct vha_session *session, struct vha_buffer *buf)
{
struct vha_buf_sync_info *sync_info = &buf->sync_info;
img_mem_remove_fence(session->mem_ctx, buf->id);
if (sync_info->in_fence) {
if (!dma_fence_is_signaled(sync_info->in_fence))
dma_fence_remove_callback(sync_info->in_fence, &sync_info->in_sync_cb);
if (sync_info->in_sync_file) {
fput(sync_info->in_sync_file);
sync_info->in_sync_file = NULL;
}
sync_info->in_sync_fd = VHA_SYNC_NONE;
dma_fence_put(sync_info->in_fence);
sync_info->in_fence = NULL;
memset(&sync_info->in_sync_cb, 0, sizeof(struct dma_fence_cb));
}
}
#endif
#if defined(VHA_SCF) && defined(CONFIG_HW_MULTICORE)
void vha_start_swd(struct vha_dev *vha, int cmd_idx)
{
if (vha->swd_period) {
schedule_delayed_work(&vha->swd_dwork, msecs_to_jiffies(vha->swd_period));
}
}
#endif
void vha_rm_session(struct vha_session *session)
{
struct vha_dev *vha = session->vha;
struct vha_session *cur_session, *tmp_session;
struct vha_rsp *cur_rsp, *tmp_rsp;
struct vha_buffer *cur_buf, *tmp_buf;
bool reschedule = false;
int ctx_id;
uint8_t pri;
mutex_lock(&vha->lock);
img_pdump_printf("-- FREE_END\n");
session->freeing = false;
img_pdump_printf("-- CLOSE_BEGIN\n");
/* Remove pend/queued session commands. */
reschedule = vha_rm_session_cmds(session);
/* Remove responses for session related commands. */
list_for_each_entry_safe(cur_rsp, tmp_rsp, &session->rsps, list) {
dev_warn(vha->dev,
"Removing a session while the rsp is still pending\n");
list_del(&cur_rsp->list);
kfree(cur_rsp);
}
/* Disable CRC and DEBUG capture. */
#ifdef CONFIG_HW_MULTICORE
vha_dbg_stop_hwbufs(session, vha->full_core_mask);
#else
vha_dbg_stop_hwbufs(session, 0);
#endif
vha_dbg_destroy_hwbufs(session);
list_for_each_entry_safe(cur_buf, tmp_buf, &session->bufs, list) {
dev_warn(vha->dev,
"Removing a session while the buffer wasn't freed\n");
#ifdef KERNEL_DMA_FENCE_SUPPORT
vha_rm_buf_fence(session, cur_buf);
#endif
vha_clean_onchip_maps(session, cur_buf);
list_del(&cur_buf->list);
kfree(cur_buf);
}
/* Remove link from VHA's list. */
list_for_each_entry_safe(cur_session, tmp_session,
&vha->sessions, list) {
if (cur_session == session)
list_del(&cur_session->list);
}
for (pri = 0; pri < VHA_MAX_PRIORITIES; pri++) {
list_for_each_entry_safe(cur_session, tmp_session,
&vha->sched_sessions[pri], sched_list[pri]) {
if (cur_session == session)
list_del(&cur_session->sched_list[pri]);
}
}
/* Reset hardware if required. */
if ((list_empty(&vha->sessions) && !vha->do_calibration)
|| reschedule
)
vha_dev_stop(vha, reschedule);
#ifndef CONFIG_HW_MULTICORE
img_mmu_clear_cache(session->mmu_ctxs[VHA_MMU_REQ_IO_CTXID].ctx);
#endif
/* Delete session's MMU memory contexts. */
for (ctx_id = 0; ctx_id < ARRAY_SIZE(session->mmu_ctxs); ctx_id++) {
img_mmu_ctx_destroy(session->mmu_ctxs[ctx_id].ctx);
if (vha->mmu_mode != VHA_MMU_DISABLED) {
uint8_t hw_ctxid = session->mmu_ctxs[ctx_id].hw_id;
WARN_ON(!vha->mmu_ctxs[hw_ctxid]);
if (vha->mmu_ctxs[hw_ctxid])
vha->mmu_ctxs[hw_ctxid]--;
}
}
/* Update mem stats - max memory usage in this session. */
img_mem_get_usage(session->mem_ctx,
(size_t *)&vha->stats.mem_usage_last, NULL);
{
uint32_t MB = vha->stats.mem_usage_last / (1024 * 1024);
uint32_t bytes = vha->stats.mem_usage_last -
(MB * (1024 * 1024));
uint32_t kB = (bytes * 1000) / (1024 * 1024);
dev_dbg(vha->dev,
"%s: Total user memory used in session: %u.%u MB\n",
__func__, MB, kB);
}
img_mmu_get_usage(session->mem_ctx,
(size_t *)&vha->stats.mmu_usage_last, NULL);
vha->active_mmu_ctx = VHA_INVALID_ID;
img_pdump_printf("-- VHA driver session complete\n");
img_pdump_printf("-- CLOSE_END\n");
/* Used for simulating system level suspend/resume functionality */
if (list_empty(&vha->sessions) && vha->suspend_interval_msec) {
mutex_unlock(&vha->lock);
flush_scheduled_work();
cancel_delayed_work_sync(&vha->suspend_dwork);
mutex_lock(&vha->lock);
}
trace_vha_session_out(session->id, session->kicks);
mutex_unlock(&vha->lock);
/* Reschedule once the session is removed. */
if (reschedule)
vha_chk_cmd_queues(vha, true);
}
static int vha_alloc_common(struct vha_dev *vha)
{
#if 0
img_pdump_printf("-- INIT_BEGIN\n");
img_pdump_printf("-- INIT_END\n");
#endif
return 0;
}
/****************** vha sysfs definition *************************************/
static ssize_t
BVNC_show(struct device *dev, struct device_attribute *attr, char *buf)
{
struct vha_dev *vha = vha_dev_get_drvdata(dev);
struct vha_hw_props *props = &vha->hw_props;
return snprintf(buf, 4*6, "%hu.%hu.%hu.%hu\n",
(unsigned short)(props->core_id >> 48),
(unsigned short)(props->core_id >> 32),
(unsigned short)(props->core_id >> 16),
(unsigned short)props->core_id);
}
static ssize_t log_store(struct device *dev, struct device_attribute *attr,
const char *buf, size_t count)
{
struct vha_dev *vha = vha_dev_get_drvdata(dev);
mutex_lock(&vha->lock);
memset(&vha->stats, 0, sizeof(struct vha_stats));
vha_reset_cnt++;
mutex_unlock(&vha->lock);
return count;
}
static ssize_t log_show(struct device *dev, struct device_attribute *attr, char *buf)
{
ssize_t len = 0;
struct vha_dev *vha = vha_dev_get_drvdata(dev);
struct vha_session *session = NULL;
int ret = 0;
size_t mem_val = 0;
mutex_lock(&vha->lock);
len += scnprintf(buf + len, PAGE_SIZE - len,
"[NPU] Driver Version: " VERSION_STRING "\n"
"---------------------------------------MODULE STATUS--------------------------------------\n"
"DevId DevStatus DevSessionNum DevLoadingAvg_%% TotalTasks CompletedTasks\n"
" %d %d %d %d %lld %lld\n"
"-----------------------------------------MEM INFO-----------------------------------------\n"
"MMU_page_size MMU_mode\n"
" %ld %d\n"
"---------------------------------------INSTANCE INFO--------------------------------------\n"
"AvgHwProcUs LastHwProcUs TotalHwProcUs LastMemUsage LastMmuUsage\n"
" %lld %lld %lld %d %d\n"
"--------------------------------------EXCEPTION INFO--------------------------------------\n"
"total_failures reset\n"
" %lld %d\n",
vha->id, vha->state, vha_session_id_cnt, vha->stats.cnn_utilization/10,
vha->stats.cnn_kicks, vha->stats.cnn_kicks_completed,
mmu_page_size_kb_lut[vha->mmu_page_size], vha->mmu_mode,
vha->stats.cnn_avg_proc_us, vha->stats.last_proc_us, vha->stats.cnn_total_proc_us,
vha->stats.mem_usage_last, vha->stats.mmu_usage_last,
vha->stats.total_failures, vha_reset_cnt);
list_for_each_entry(session, &vha->sessions, list) {
if (!ret++) {
len += scnprintf(buf + len, PAGE_SIZE - len,
"---------------------------------------SESSION INFO--------------------------------------\n"
"SessionId MemUsage Cmds AvgProcUs LastProcUs TotalProcUs\n");
}
img_mem_get_usage(session->mem_ctx, &mem_val, NULL);
len += scnprintf(buf + len, PAGE_SIZE - len,
" %-7d %-12ld %-8lld %-13lld %-13lld %lld\n",
session->id, mem_val, session->kicks, session->avg_proc_us,
session->last_proc_us, session->total_proc_us);
}
mutex_unlock(&vha->lock);
return len;
}
static DEVICE_ATTR_RO(BVNC);
static struct attribute *vha_sysfs_entries[] = {
&dev_attr_BVNC.attr,
NULL,
};
static struct device_attribute dev_attr_log = __ATTR(log, 0664, log_show, log_store);
static struct attribute *vha_sysfs_attrs[] = {
&dev_attr_log.attr,
NULL,
};
static const struct attribute_group vha_attr_group = {
.name = NULL, /* put in device directory */
.attrs = vha_sysfs_entries,
};
static struct attribute_group vha_dev_attr_group = {
.name = "info", /* put in info directory */
.attrs = vha_sysfs_attrs,
};
void vha_sched_apm(struct vha_dev *vha, struct vha_apm_work *apm_work)
{
unsigned long work_at = jiffies + msecs_to_jiffies(apm_work->delay_ms);
int ret;
dev_dbg(vha->dev, "%s: core_mask:%#x delay:%d\n",
__func__, apm_work->core_mask, apm_work->delay_ms);
/*
* Try to queue the work.
*/
ret = schedule_delayed_work(&apm_work->dwork,
work_at - jiffies);
if (!ret) {
/* Work is already in the queue.
* Canceling & rescheduling might be problematic,
* so just modify to postpone.
*/
mod_delayed_work(system_wq, &apm_work->dwork,
work_at - jiffies);
}
}
static void vha_apm_worker(struct work_struct *work)
{
struct vha_apm_work *apm_work =
container_of(work, struct vha_apm_work, dwork.work);
struct vha_dev *vha = apm_work->vha;
mutex_lock(&vha->lock);
dev_dbg(vha->dev, "%s: apm expired! core_mask:%#x\n",
__func__, apm_work->core_mask);
vha_dev_apm_stop(vha, apm_work);
mutex_unlock(&vha->lock);
}
int vha_add_dev(struct device *dev,
const struct heap_config heap_configs[], const int heaps,
void *plat_data, void __iomem *reg_base, uint32_t reg_size)
{
struct vha_dev_common* vha_common;
struct vha_dev *vha;
int ret;
uint8_t id, pri;
/* Validate module params. */
ret = -EINVAL;
if (low_latency > VHA_LL_SELF_KICK) {
dev_err(dev, "%s: Unsupported low latency mode %u!\n", __func__, low_latency);
goto out_validate_params;
} else if ((mmu_mode != VHA_MMU_DISABLED) &&
(mmu_mode != VHA_MMU_DIRECT) &&
(mmu_mode != VHA_MMU_40BIT)) {
dev_err(dev, "%s: Unsupported MMU mode %u!\n", __func__, mmu_mode);
goto out_validate_params;
} else if (mmu_ctx_default >= VHA_MMU_MAX_HW_CTXS) {
dev_err(dev, "%s: Unsupported MMU context id %u!\n", __func__, mmu_ctx_default);
goto out_validate_params;
} else if (mmu_page_size > ARRAY_SIZE(mmu_page_size_kb_lut)) {
dev_err(dev, "%s: Unsupported MMU page size %u!\n", __func__, mmu_page_size);
goto out_validate_params;
}
ret = 0;
vha_common = devm_kzalloc(dev, sizeof(struct vha_dev_common), GFP_KERNEL);
if (!vha_common)
return -ENOMEM;
vha = devm_kzalloc(dev, sizeof(struct vha_dev), GFP_KERNEL);
if (!vha) {
ret = -ENOMEM;
goto out_free_dev;
}
vha_common->vha_dev = vha;
dev_dbg(dev, "%s: allocated vha_dev @ %px\n", __func__, vha);
vha->dev = dev;
vha->reg_base = reg_base;
vha->reg_size = reg_size;
vha->plat_data = plat_data;
vha->fault_inject = fault_inject;
vha->suspend_interval_msec = suspend_interval_msec;
vha->hw_bypass = hw_bypass;
vha->low_latency = low_latency;
vha->no_clock_disable = no_clock_disable;
vha->pm_delay = pm_delay;
vha->mmu_mode = mmu_mode;
vha->mmu_ctx_default = mmu_ctx_default;
vha->mmu_page_size = mmu_page_size;
vha->mmu_base_pf_test = test_mmu_base_pf;
vha->mmu_no_map_count = test_mmu_no_map_count;
vha->ocm_paddr = onchipmem_phys_start;
#ifdef VHA_SCF
vha->parity_disable = parity_disable;
vha->confirm_config_reg = confirm_config_reg;
#endif
vha->cnn_combined_crc_enable = cnn_combined_crc_enable;
vha->active_mmu_ctx = VHA_INVALID_ID;
vha->dump_buff_digest = dump_buff_digest;
/* Enable and configure pm_runtime*/
if (!pm_runtime_enabled(vha->dev))
pm_runtime_enable(vha->dev);
pm_runtime_set_autosuspend_delay(vha->dev, VHA_CORE_SUSPEND_DELAY);
pm_runtime_use_autosuspend(vha->dev);
/* Resume device so that we can read the core props */
if (pm_runtime_status_suspended(vha->dev))
pm_runtime_get_sync(vha->dev);
/* Read HW properties */
ret = vha_dev_get_props(vha, onchipmem_size);
if (ret) {
dev_err(dev, "%s: could not get vha properties at %px\n",
__func__, (__force void *)vha->reg_base);
pm_runtime_put_sync_suspend(vha->dev);
goto out_free_dev;
}
if (test_without_bvnc_check)
vha->hw_props.skip_bvnc_check = true;
mutex_init(&vha->lock);
spin_lock_init(&vha->irq_lock);
INIT_LIST_HEAD(&vha->sessions);
for (pri = 0; pri < VHA_MAX_PRIORITIES; pri++)
INIT_LIST_HEAD(&vha->sched_sessions[pri]);
INIT_LIST_HEAD(&vha->heaps);
ret = vha_init(vha, heap_configs, heaps);
if (ret) {
dev_err(dev, "%s: main component initialisation failed!",
__func__);
goto out_free_dev;
}
/* Initialise command data pump worker */
INIT_WORK(&vha->worker, cmd_worker);
#ifdef CONFIG_VHA_DUMMY_SIMULATE_HW_PROCESSING_TIME
/* Initialise hw processing time simulation worker */
#ifdef CONFIG_HW_MULTICORE
{
for (id = 0; id < vha->hw_props.num_cnn_core_devs; id ++) {
INIT_DELAYED_WORK(&vha->dummy_dworks[id].dummy_dwork,
vha_dummy_worker);
vha->dummy_dworks[id].wm_id = id;
vha->dummy_dworks[id].vha = vha;
}
}
#else
INIT_DELAYED_WORK(&vha->dummy_dwork, vha_dummy_worker);
#endif
#endif
dev_set_drvdata(dev, vha_common);
ret = vha_api_add_dev(dev, vha, drv.num_devs);
if (ret) {
dev_err(dev, "%s: failed to add UM node!", __func__);
goto out_add_dev;
}
vha_dbg_init(vha);
ret = vha_pdump_init(vha, &vha_common->pdump);
if (ret == 0)
vha->hw_props.use_pdump = true;
if (ret == -EPERM)
goto out_alloc_common;
else
ret = 0;
ret = vha_alloc_common(vha);
if (ret) {
dev_err(dev, "%s: failed to allocate common dev buffers!",
__func__);
goto out_alloc_common;
}
#ifdef CONFIG_PM_DEVFREQ
ret = vha_devfreq_init(vha->dev);
if (ret) {
dev_err(vha->dev, "failed to add vha dev to devfreq!\n");
}
#endif
pm_runtime_put_sync_autosuspend(vha->dev);
/* Add device to driver context */
list_add(&vha->list, &drv.devices);
drv.num_devs++;
if (sysfs_create_group(&dev->kobj, &vha_attr_group))
dev_err(dev, "failed to create sysfs entries\n");
if (sysfs_create_group(&dev->kobj, &vha_dev_attr_group))
dev_err(dev, "failed to create info sysfs entries\n");
vha->freq_khz = freq_khz;
#ifndef CONFIG_VHA_DUMMY
if (vha->freq_khz < 0)
vha->do_calibration = true; /* ??? OS0 ? */
if (vha->freq_khz <= 0)
vha->freq_khz = VHA_CORE_CLOCK_MHZ * 1000;
if (vha->do_calibration)
dev_info(dev, "%s: Core freq[kHz]: to be calibrated",
__func__);
else
dev_info(dev, "%s: Core freq[kHz]: %u",
__func__, vha->freq_khz);
#else
# ifdef CONFIG_VHA_DUMMY_SIMULATE_HW_PROCESSING_TIME
vha->freq_khz = VHA_CORE_CLOCK_MHZ * 1000;
dev_info(dev, "%s: Core freq[kHz]: %u (faked for DUMMY device)",
__func__, vha->freq_khz);
# endif
#endif
for (id = 0; id < vha->hw_props.num_cnn_core_devs; id++) {
vha->apm_dworks[id].vha = vha;
vha->apm_dworks[id].core_mask = 1 << id;
vha->apm_dworks[id].delay_ms = vha->pm_delay;
INIT_DELAYED_WORK(&vha->apm_dworks[id].dwork, vha_apm_worker);
}
#if defined(VHA_SCF) && defined(CONFIG_HW_MULTICORE)
/* Initialise the SW wachdog */
INIT_DELAYED_WORK(&vha->swd_dwork, wd_timer_callback);
vha->swd_period = swd_period;
vha->swd_timeout_default = swd_timeout_default;
vha->swd_timeout_m0 = swd_timeout_m0;
vha->swd_timeout_m1 = swd_timeout_m1;
#endif
return ret;
out_alloc_common:
vha_api_rm_dev(dev, vha);
vha_dbg_deinit(vha);
out_add_dev:
dev_set_drvdata(dev, NULL);
vha_deinit();
out_free_dev:
devm_kfree(dev, vha);
devm_kfree(dev, vha_common);
out_validate_params:
return ret;
}
static void vha_free_common(struct vha_dev *vha)
{
if (vha->fp_bufid) {
img_mem_free(drv.mem_ctx, vha->fp_bufid);
vha->fp_bufid = VHA_INVALID_ID;
}
}
void vha_rm_dev(struct device *dev)
{
struct vha_dev *vha;
struct vha_dev_common* vha_common;
int ret;
uint8_t id, pri;
vha_common = dev_get_drvdata(dev);
BUG_ON(vha_common == NULL);
vha = vha_common->vha_dev;
if (!vha) {
pr_err("%s: vha ptr is invalid!\n", __func__);
return;
}
if (dev != vha->dev) {
pr_err("%s: vha->dev is not properly initialised! (%p!=%p)\n", __func__, dev, vha->dev);
return;
}
flush_scheduled_work();
for (id = 0; id < vha->hw_props.num_cnn_core_devs; id++)
cancel_delayed_work_sync(&vha->apm_dworks[id].dwork);
#if defined(VHA_SCF) && defined(CONFIG_HW_MULTICORE)
cancel_delayed_work_sync(&vha->swd_dwork);
#endif
#ifdef CONFIG_VHA_DUMMY_SIMULATE_HW_PROCESSING_TIME
#ifdef CONFIG_HW_MULTICORE
{
for (id = 0; id < vha->hw_props.num_cnn_core_devs; id++)
cancel_delayed_work_sync(&vha->dummy_dworks[id].dummy_dwork);
}
#else
cancel_delayed_work_sync(&vha->dummy_dwork);
#endif
#endif
if (!pm_runtime_status_suspended(vha->dev))
pm_runtime_put_sync_suspend(vha->dev);
pm_runtime_dont_use_autosuspend(vha->dev);
pm_runtime_disable(vha->dev);
#ifdef CONFIG_PM_DEVFREQ
vha_devfreq_term(dev);
#endif
vha_free_common(vha);
#ifdef CONFIG_HW_MULTICORE
vha_dev_scheduler_deinit(vha);
#endif
while (!list_empty(&vha->heaps)) {
struct vha_heap *heap = list_first_entry(&vha->heaps, struct vha_heap, list);
list_del(&heap->list);
if(!heap->global) /* remove only device heaps */
img_mem_del_heap(heap->id);
kfree(heap);
}
ret = vha_api_rm_dev(dev, vha);
if (ret)
dev_err(dev, "%s: failed to remove UM node!\n", __func__);
list_del(&vha->sessions);
for (pri = 0; pri < VHA_MAX_PRIORITIES; pri++)
list_del(&vha->sched_sessions[pri]);
list_del(&vha->list);
list_del(&vha->heaps);
BUG_ON(!drv.num_devs--);
sysfs_remove_group(&dev->kobj, &vha_attr_group);
sysfs_remove_group(&dev->kobj, &vha_dev_attr_group);
vha_dbg_deinit(vha);
vha_pdump_deinit(&vha_common->pdump);
dev_set_drvdata(dev, NULL);
devm_kfree(dev, vha);
devm_kfree(dev, vha_common);
}
/* performs device self test operations */
int vha_dev_calibrate(struct device *dev, uint32_t cycles)
{
int ret = 0;
struct vha_dev *vha = vha_dev_get_drvdata(dev);
if (!vha) {
WARN_ON(1);
return -EFAULT;
}
mutex_lock(&vha->lock);
if (vha->do_calibration) {
vha->calibration_cycles = cycles;
dev_info(dev, "%s: Starting core frequency measurement (%d)...",
__func__, cycles);
ret = vha_dev_start(vha);
if (ret)
goto calib_err;
#if (defined(HW_AX2) || defined(CONFIG_HW_MULTICORE))
vha_cnn_start_calib(vha);
#endif
}
calib_err:
mutex_unlock(&vha->lock);
return ret;
}
/* map buffer into the device */
int vha_map_to_onchip(struct vha_session *session,
uint32_t buf_id, uint64_t virt_addr, uint32_t page_size,
unsigned int num_pages, uint32_t page_idxs[], uint32_t *mapid)
{
struct vha_dev *vha = session->vha;
struct vha_onchip_map *onchip_map = NULL;
struct vha_buffer *buf = NULL;
int map_id = *mapid;
int ret = 0;
int i = 0;
ret = mutex_lock_interruptible(&vha->lock);
if (ret)
return ret;
buf = vha_find_bufid(session, buf_id);
if (!buf) {
dev_err(vha->dev, "%s: buffer id %d not found\n", __func__, buf_id);
ret = -EINVAL;
goto out_unlock;
}
if (map_id == 0) {
onchip_map = kzalloc(sizeof(struct vha_onchip_map), GFP_KERNEL);
if (!onchip_map) {
ret = -ENOMEM;
goto out_unlock;
}
map_id = idr_alloc(&session->onchip_maps, onchip_map,
MIN_ONCHIP_MAP, MAX_ONCHIP_MAP, GFP_KERNEL);
if (map_id < 0) {
dev_err(vha->dev, "%s: idr_alloc failed\n", __func__);
ret = map_id;
goto alloc_id_failed;
}
ret = img_mmu_map(session->mmu_ctxs[VHA_MMU_REQ_IO_CTXID].ctx,
session->mem_ctx, buf_id,
virt_addr, IMG_MMU_PTE_FLAG_NONE);
if (ret) {
dev_err(vha->dev, "%s: map failed!\n", __func__);
ret = -EFAULT;
goto mmu_map_failed;
}
onchip_map->devvirt = virt_addr;
onchip_map->mapid = map_id;
onchip_map->bufid = buf_id;
list_add(&onchip_map->list, &buf->onchip_maps);
*mapid = map_id;
} else {
onchip_map = idr_find(&session->onchip_maps, map_id);
if (!onchip_map) {
dev_err(vha->dev, "%s: idr_find failed\n", __func__);
ret = -EINVAL;
goto out_unlock;
}
}
for (i = 0; i < num_pages; i++) {
ret = img_mmu_move_pg_to_cache(
session->mmu_ctxs[VHA_MMU_REQ_IO_CTXID].ctx,
session->mem_ctx, buf_id,
onchip_map->devvirt, page_size, page_idxs[i]);
if (ret) {
dev_warn(vha->dev, "%s: moving a page to on chip ram failed!\n", __func__);
goto out_unlock;
}
}
dev_dbg(vha->dev, "%s: mapped buf %s (%u) to %#llx, num_pages: %d\n",
__func__, buf->name, buf_id, virt_addr, num_pages);
mutex_unlock(&vha->lock);
return 0;
mmu_map_failed:
idr_remove(&session->onchip_maps, map_id);
alloc_id_failed:
kfree(onchip_map);
out_unlock:
mutex_unlock(&vha->lock);
return ret;
}
/* map buffer into the device */
int vha_map_buffer(struct vha_session *session,
uint32_t buf_id, uint64_t virt_addr,
uint32_t map_flags)
{
struct vha_dev *vha = session->vha;
uint32_t flags = IMG_MMU_PTE_FLAG_NONE;
struct vha_buffer *buf = NULL;
int ret = 0;
ret = mutex_lock_interruptible(&vha->lock);
if (ret)
return ret;
if ((map_flags & (VHA_MAP_FLAG_READ_ONLY|VHA_MAP_FLAG_WRITE_ONLY)) ==
(VHA_MAP_FLAG_READ_ONLY|VHA_MAP_FLAG_WRITE_ONLY)) {
dev_err(vha->dev, "%s: invalid mapping flags combination: 0x%x\n",
__func__, map_flags);
ret = -EINVAL;
goto out_unlock;
}
/* Convert permission flags to internal definitions */
if (map_flags & VHA_MAP_FLAG_READ_ONLY)
flags |= IMG_MMU_PTE_FLAG_READ_ONLY;
/* Note: VHA_MAP_FLAG_WRITE_ONLY is not supported by the mmuv3 hw */
/* Direct 1:1 mappings */
if (vha->mmu_mode == VHA_MMU_DIRECT) {
uint64_t *phys = img_mem_get_page_array(session->mem_ctx,
buf_id);
WARN_ON(!phys);
/* Override virtual address,
* only applicable for physically contiguous memory regions */
if (phys && phys[0]) {
virt_addr = phys[0];
dev_dbg(vha->dev,
"%s: using direct mapping!\n",
__func__);
} else {
dev_err(vha->dev,
"%s: not contiguous memory!\n",
__func__);
}
}
buf = vha_find_bufid(session, buf_id);
#ifdef CONFIG_HW_MULTICORE
if (buf->attr & IMG_MEM_ATTR_OCM) {
uint64_t *phys = img_mem_get_page_array(session->mem_ctx,
buf_id);
/* Virtual == physical */
buf->devvirt = phys[0];
dev_dbg(vha->dev,
"%s: buf %s (%u), is OCM buffer, no MMU mapping needed!\n",
__func__, buf->name, buf_id);
goto out_unlock;
}
#endif
/* force MMU fault after N buffer map operations */
if (vha->mmu_no_map_count != 0) {
int ctx_id;
if (map_flags & VHA_MAP_FLAG_MODEL) {
ctx_id = VHA_MMU_REQ_MODEL_CTXID;
buf->req_type = VHA_REQ_MODEL;
} else if (map_flags & VHA_MAP_FLAG_IO) {
ctx_id = VHA_MMU_REQ_IO_CTXID;
buf->req_type = VHA_REQ_IO;
} else {
WARN_ONCE(1, "No requestor flags!");
ctx_id = VHA_MMU_REQ_IO_CTXID;
buf->req_type = VHA_REQ_IO;
}
ret = img_mmu_map(session->mmu_ctxs[ctx_id].ctx,
session->mem_ctx, buf_id, virt_addr, flags);
if (ret || buf == NULL) {
dev_err(vha->dev, "%s: map failed!\n", __func__);
goto out_unlock;
}
if (vha->mmu_no_map_count >= 0)
--vha->mmu_no_map_count;
} else
dev_info(vha->dev, "Bringup test: MMU no map count = %d\n",
vha->mmu_no_map_count);
buf->devvirt = virt_addr;
dev_dbg(vha->dev, "%s: mapped buf %s (%u) to %#llx, flags: 0x%x\n",
__func__, buf->name, buf_id, virt_addr, map_flags);
out_unlock:
mutex_unlock(&vha->lock);
return ret;
}
/* unmap buffer from the device */
int vha_unmap_buffer(struct vha_session *session,
uint32_t buf_id)
{
struct vha_dev *vha = session->vha;
struct vha_buffer *buf = NULL;
int ret = 0;
int ctx_id;
ret = mutex_lock_interruptible(&vha->lock);
if (ret)
return ret;
buf = vha_find_bufid(session, buf_id);
#ifdef CONFIG_HW_MULTICORE
if (buf->attr & IMG_MEM_ATTR_OCM) {
dev_dbg(vha->dev,
"%s: buf %s (%u) is OCM buffer, no MMU unmapping needed!\n",
__func__, buf->name, buf_id);
buf->devvirt = ~0ULL;
goto out_unlock;
}
#endif
if (buf->req_type == VHA_REQ_MODEL)
ctx_id = VHA_MMU_REQ_MODEL_CTXID;
else
ctx_id = VHA_MMU_REQ_IO_CTXID;
ret = img_mmu_unmap(session->mmu_ctxs[ctx_id].ctx,
session->mem_ctx, buf_id);
if (ret || buf == NULL) {
dev_err(vha->dev, "%s: unmap failed!\n", __func__);
goto out_unlock;
}
buf->devvirt = 0ULL;
vha_clean_onchip_maps(session, buf);
dev_dbg(vha->dev, "%s: unmapped buf %s(%u)\n",
__func__, buf->name, buf_id);
out_unlock:
mutex_unlock(&vha->lock);
return ret;
}
/*
* return either dev virtual address or physical address of buffer
* phys address only applicable if contiguous memory
* virtual address only if MMU enabled
*/
uint64_t vha_buf_addr(struct vha_session *session, struct vha_buffer *buf)
{
struct vha_dev *vha = session->vha;
if (vha->mmu_mode == VHA_MMU_DISABLED) {
uint64_t *phys;
/* no-MMU mode */
if (vha->hw_props.dummy_dev)
return 0; /* no-MMU: dummy hardware */
phys = img_mem_get_page_array(session->mem_ctx, buf->id);
if (phys)
/*
* no-MMU: carveout memory
* Get the address that dev expects.
*/
return img_mem_get_dev_addr(session->mem_ctx,
buf->id, phys[0]);
dev_err(vha->dev, "%s: ERROR: buffer %x is not contiguous\n",
__func__, buf->id);
return 0; /* no-MMU: system memory */
}
/* mmu mode */
if (buf == NULL)
return 0; /* error */
return buf->devvirt; /* MMU mode: virt address */
}
struct vha_buffer *vha_find_bufid(const struct vha_session *session, uint32_t buf_id)
{
struct vha_buffer *buf;
list_for_each_entry(buf, &session->bufs, list) {
if (buf_id == buf->id)
return buf;
}
return NULL;
}
struct vha_buffer *vha_find_bufvaddr(const struct vha_session *session,
uint64_t virt_addr)
{
struct vha_buffer *buf;
list_for_each_entry(buf, &session->bufs, list) {
/* check if virtual address belongs to specific buffer */
if (virt_addr >= buf->devvirt &&
virt_addr < (buf->devvirt + buf->size))
return buf;
}
return NULL;
}
/* when a buffer is allocated or imported, it is added to session.bufs */
int vha_add_buf(struct vha_session *session,
uint32_t buf_id, size_t size, const char *name, enum img_mem_attr attr)
{
struct vha_buffer *buf = kzalloc(sizeof(*buf), GFP_KERNEL);
struct vha_dev *vha = session->vha;
int ret = 0;
dev_dbg(vha->dev, "%s buf '%.*s' id:%d\n", __func__,
(int)(sizeof(buf->name))-1, name, buf_id);
if (buf == NULL)
return -ENOMEM;
buf->id = buf_id;
buf->size = size;
strncpy(buf->name, name, sizeof(buf->name)-1);
buf->attr = attr;
buf->status = VHA_BUF_UNFILLED;
buf->session = session;
#ifdef KERNEL_DMA_FENCE_SUPPORT
buf->sync_info.in_sync_fd = VHA_SYNC_NONE;
#endif
list_add(&buf->list, &session->bufs);
INIT_LIST_HEAD(&buf->onchip_maps);
if (!(attr & IMG_MEM_ATTR_OCM))
img_pdump_printf("-- <-- New buffer name: %s\n", buf->name);
if (zero_buffers && !(buf->attr & IMG_MEM_ATTR_NOMAP)) {
ret = img_mem_map_km(session->mem_ctx, buf_id);
if (ret) {
dev_err(session->vha->dev, "failed to map buff %x to km: %d\n",
buf_id, ret);
ret = -EFAULT;
goto out_err;
}
buf->kptr = img_mem_get_kptr(session->mem_ctx, buf_id);
{
void *ptr = buf->kptr;
int max_chunk = 1 * 1024 * 1024;
while (size) {
int chunk_size = size > max_chunk ?
max_chunk : size;
pr_debug("memset buf chunk %d!\n", chunk_size);
memset(ptr, 0, chunk_size);
ptr += chunk_size;
size -= chunk_size;
schedule();
}
}
ret = img_mem_unmap_km(session->mem_ctx, buf->id);
if (ret) {
dev_err(session->vha->dev,
"%s: failed to unmap buff %x from km: %d\n",
__func__, buf->id, ret);
ret = -EFAULT;
goto out_err;
}
buf->kptr = NULL;
}
return 0;
out_err:
list_del(&buf->list);
kfree(buf);
return ret;
}
/* remove buffer from the session */
int vha_rm_buf(struct vha_session *session, uint32_t buf_id)
{
struct vha_buffer *buf = vha_find_bufid(session, buf_id);
dev_dbg(session->vha->dev, "%s buf_id:%d\n", __func__, buf_id);
if (buf == NULL) {
dev_err(session->vha->dev, "%s: could not find buf %x\n",
__func__, buf_id);
return -EINVAL;
}
#ifdef KERNEL_DMA_FENCE_SUPPORT
vha_rm_buf_fence(session, buf);
#endif
vha_clean_onchip_maps(session, buf);
list_del(&buf->list);
kfree(buf);
return 0;
}
/* process the cmd if everything is ready */
enum do_cmd_status vha_do_cmd(struct vha_cmd *cmd)
{
struct vha_session *session = cmd->session;
struct vha_dev* vha = session->vha;
/* already submitted, wait until processed */
if (cmd->in_hw)
return CMD_IN_HW;
/* check all input buffers are filled and ready to go */
if (vha_is_waiting_for_inputs(session, cmd))
return CMD_WAIT_INBUFS;
#if !defined(CONFIG_VHA_DUMMY) && !defined(CONFIG_HW_MULTICORE)
if (!session->vha->is_ready)
return CMD_HW_BUSY;
#endif
/* check hw availability (if needed) */
#ifdef CONFIG_HW_MULTICORE
/* Attempt to schedule command on available cores. */
if (vha_dev_schedule_cmd(session->vha, cmd) != 0)
#else
/* Check if the core's queue is full. */
if (vha_is_queue_full(session->vha, cmd))
#endif
return CMD_HW_BUSY;
if (cmd->user_cmd.cmd_type == VHA_CMD_CNN_SUBMIT &&
!session->vha->stats.cnn_kicks)
img_pdump_printf("-- ALLOC_END\n");
/* at this point we should be able to process the cmd */
if (vha_do_cnn_cmd(cmd) != 0)
return CMD_DONE;
return CMD_OK;
}
/* check if there is any work to be done */
static void cmd_worker(struct work_struct *work)
{
struct vha_dev *vha = container_of(work, struct vha_dev, worker);
dev_dbg(vha->dev, "%s\n", __func__);
mutex_lock(&vha->lock);
#ifdef CONFIG_FAULT_INJECTION
if (task_pid_nr(current) != vha->irq_bh_pid) {
if (vha->fault_inject & VHA_FI_CMD_WORKER)
current->make_it_fail = true;
else
current->make_it_fail = false;
}
#endif
if (vha->do_calibration) {
/* Postpone any worker tasks. */
dev_dbg(vha->dev, "%s: Postpone worker task!\n", __func__);
goto exit;
}
/* Execute the main scheduling loop. */
vha_scheduler_loop(vha);
exit:
#ifdef CONFIG_FAULT_INJECTION
if (task_pid_nr(current) != vha->irq_bh_pid) {
if (vha->fault_inject & VHA_FI_CMD_WORKER)
current->make_it_fail = false;
}
#endif
mutex_unlock(&vha->lock);
}
/* this is wrapper func for scheduling command worker task */
void vha_chk_cmd_queues(struct vha_dev *vha, bool threaded)
{
dev_dbg(vha->dev, "%s threaded:%u\n", __func__, threaded);
if (threaded) {
/* If work has been already scheduled from other context,
* the below call does nothing (returns false).
* However the worker is only used as command data pump,
* so it is not necessary to do any kind of rescheduling,
* as it will be executed anyway!
*/
schedule_work(&vha->worker); /* call asynchronously */
} else {
/* Direct calls must be always invoked
* with vha_dev.lock == locked
*/
BUG_ON(!mutex_is_locked(&vha->lock));
mutex_unlock(&vha->lock);
cmd_worker(&vha->worker); /* call synchronously */
mutex_lock(&vha->lock);
}
}
#ifdef KERNEL_DMA_FENCE_SUPPORT
/* input buffer sync callback */
static void _vha_in_buf_sync_cb(struct dma_fence *fence,
struct dma_fence_cb *cb)
{
struct vha_buffer *buf = container_of(cb, struct vha_buffer, sync_info.in_sync_cb);
vha_set_buf_status(buf->session, buf->id, VHA_BUF_FILLED_BY_SW,
VHA_SYNC_NONE, false);
fput(buf->sync_info.in_sync_file);
dma_fence_put(fence);
memset(&buf->sync_info, 0, sizeof(struct vha_buf_sync_info));
buf->sync_info.in_sync_fd = VHA_SYNC_NONE;
}
#endif
/* set buffer status per user request: either filled or unfilled */
int vha_set_buf_status(struct vha_session *session, uint32_t buf_id,
enum vha_buf_status status, int in_sync_fd, bool out_sync_sig)
{
struct vha_buffer *buf = vha_find_bufid(session, buf_id);
if (buf == NULL) {
dev_err(session->vha->dev, "%s: invalid buf id:%d\n",
__func__, buf_id);
return -EINVAL;
}
dev_dbg(session->vha->dev, "%s: id:%d curr:%d new:%d sig:%d\n",
__func__, buf->id, buf->status, status, out_sync_sig);
/* If buffer has been filled by HW,
* mark that it probably needs invalidation, not necessarily,
* as it can be the input for the next hw segment,
* and may not be mapped by the UM */
if (buf->status != VHA_BUF_FILLED_BY_HW &&
status == VHA_BUF_FILLED_BY_HW) {
buf->inval = true;
#ifdef KERNEL_DMA_FENCE_SUPPORT
buf->status = status;
#endif
}
/* If buffer has been filled by SW,
* mark that it needs flushing */
if (buf->status == VHA_BUF_UNFILLED &&
status == VHA_BUF_FILLED_BY_SW) {
buf->flush = true;
#ifdef KERNEL_DMA_FENCE_SUPPORT
if (in_sync_fd > 0) {
if (buf->sync_info.in_sync_fd < 0) {
int ret = 0;
struct file *sync_file;
struct dma_fence *fence;
sync_file = fget(in_sync_fd);
if (sync_file == NULL) {
dev_err(session->vha->dev, "%s: could not get file for fd=%d and buf %d\n",
__func__, in_sync_fd, buf_id);
return -EINVAL;
}
fence = sync_file_get_fence(in_sync_fd);
if (!fence) {
fput(sync_file);
dev_err(session->vha->dev, "%s: could not get fence for fd=%d and buf %d\n",
__func__, in_sync_fd, buf_id);
return -EINVAL;
}
ret = dma_fence_add_callback(fence, &buf->sync_info.in_sync_cb,
_vha_in_buf_sync_cb);
if (ret) {
if (dma_fence_is_signaled(fence)) {
dma_fence_put(fence);
buf->status = status;
} else
dev_err(session->vha->dev, "%s: could not set cb for fd=%d and buf %x\n",
__func__, in_sync_fd, buf_id);
fput(sync_file);
return ret;
}
buf->sync_info.in_fence = fence;
buf->sync_info.in_sync_file = sync_file;
buf->sync_info.in_sync_fd = in_sync_fd;
} else if (in_sync_fd != buf->sync_info.in_sync_fd) {
dev_err(session->vha->dev, "%s: buf %d has already assigned sync file fd=%d\n",
__func__, buf_id, in_sync_fd);
return -EINVAL;
}
}
else {
if (out_sync_sig)
img_mem_signal_fence(session->mem_ctx, buf->id);
buf->status = status;
}
#endif
}
/* If buffer has been filled by SW,
* after being filled by the hw, flush it too */
if (buf->status == VHA_BUF_FILLED_BY_HW &&
status == VHA_BUF_FILLED_BY_SW) {
buf->flush = true;
}
#ifdef KERNEL_DMA_FENCE_SUPPORT
if (status != VHA_BUF_FILLED_BY_SW)
#endif
buf->status = status;
/* Poke the command queue only when filled by SW */
if (status == VHA_BUF_FILLED_BY_SW) {
/* We are already locked!
* Run in separate thread
*/
vha_chk_cmd_queues(session->vha, true);
}
return 0;
}
bool vha_buf_needs_inval(struct vha_session *session, uint32_t buf_id)
{
struct vha_buffer *buf = vha_find_bufid(session, buf_id);
bool inval;
if (buf == NULL) {
dev_err(session->vha->dev, "%s: invalid buf id:%d\n",
__func__, buf_id);
return false;
}
/* Buffer that has been allocated as HW access only
* does not need invalidation */
if (buf->attr & (IMG_MEM_ATTR_NOMAP|IMG_MEM_ATTR_NOSYNC)) {
dev_dbg(session->vha->dev, "%s: id:%d (skip)\n",
__func__, buf->id);
return false;
}
dev_dbg(session->vha->dev, "%s: id:%d (%d)\n",
__func__, buf->id, buf->inval);
inval = buf->inval;
buf->inval = false;
return inval;
}
bool vha_buf_needs_flush(struct vha_session *session, uint32_t buf_id)
{
struct vha_buffer *buf = vha_find_bufid(session, buf_id);
bool flush;
if (buf == NULL) {
dev_err(session->vha->dev, "%s: invalid buf id:%d\n",
__func__, buf_id);
return false;
}
dev_dbg(session->vha->dev, "%s: id:%d (%d)\n",
__func__, buf->id, buf->flush);
flush = buf->flush;
buf->flush = false;
return flush;
}
#ifdef KERNEL_DMA_FENCE_SUPPORT
struct vha_sync_cb_data {
struct dma_fence_cb cb;
union {
struct sync_file *sync_file;
struct file *file;
};
};
static void _vha_out_sync_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
{
struct vha_sync_cb_data *cb_data =
container_of(cb, struct vha_sync_cb_data, cb);
fput(cb_data->sync_file->file);
dma_fence_put(fence);
kfree(cb_data);
}
int vha_create_output_sync(struct vha_session *session, uint32_t buf_id_count,
uint32_t *buf_ids)
{
int i;
int ret = -ENOMEM;
int sync_fd = VHA_SYNC_NONE;
struct device *dev = session->vha->dev;
struct dma_fence_array *fence_array = NULL;
struct vha_sync_cb_data *cb_data = NULL;
struct dma_fence **fences =
(struct dma_fence **)kmalloc_array(sizeof(struct buffer_fence*),
buf_id_count, GFP_KERNEL);
if (fences == NULL) {
dev_err(dev, "%s: failed allocating fence container for %u buffers\n",
__func__, buf_id_count);
return -ENOMEM;
}
cb_data = kzalloc(sizeof(struct vha_sync_cb_data), GFP_KERNEL);
if (cb_data == NULL) {
dev_err(dev, "%s: failed allocating fence callback for %u buffers\n",
__func__, buf_id_count);
kfree(fences);
return -ENOMEM;
}
for (i = 0; i < buf_id_count; i++) {
fences[i] = img_mem_add_fence(session->mem_ctx, buf_ids[i]);
if (!fences[i]) {
dev_err(dev, "%s: failed allocating fence for buffer id=%u\n",
__func__, buf_ids[i]);
goto err_fences;
}
}
fence_array = dma_fence_array_create(buf_id_count, fences,
dma_fence_context_alloc(1), 1, false);
if (fence_array == NULL) {
dev_err(dev, "%s: failed allocating fence array for %u buffers\n",
__func__, buf_id_count);
goto err_fences;
}
cb_data->sync_file = sync_file_create(&fence_array->base);
if (cb_data->sync_file == NULL) {
dev_err(dev, "%s: failed creating sync file for %u buffers\n",
__func__, buf_id_count);
goto error_sf;
}
sync_fd = get_unused_fd_flags(O_CLOEXEC);
if (sync_fd < 0) {
dev_err(dev, "%s: failed creating file descriptor for %u buffers\n",
__func__, buf_id_count);
ret = sync_fd;
goto error_fd;
}
ret = dma_fence_add_callback(&fence_array->base, &cb_data->cb,
_vha_out_sync_cb);
if (ret < 0) {
dev_err(dev, "%s: failed adding callback file descriptor for %u buffers\n",
__func__, buf_id_count);
goto error_fd;
}
fd_install(sync_fd, cb_data->sync_file->file);
fget(sync_fd);
return sync_fd;
error_fd:
fput(cb_data->sync_file->file);
dma_fence_put(&fence_array->base);
error_sf:
dma_fence_put(&fence_array->base);
err_fences:
i--;
for (; i >= 0; i--) {
img_mem_remove_fence(session->mem_ctx, buf_ids[i]);
}
kfree(cb_data);
return ret;
}
/* input sync callback */
static void _vha_in_sync_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
{
struct vha_sync_cb_data *cb_data =
container_of(cb, struct vha_sync_cb_data, cb);
fput(cb_data->file);
dma_fence_put(fence);
kfree(cb_data);
}
/* merged input sync callback */
static void _vha_in_merged_sync_cb(struct dma_fence *fence,
struct dma_fence_cb *cb)
{
struct vha_sync_cb_data *cb_data =
container_of(cb, struct vha_sync_cb_data, cb);
fput(cb_data->sync_file->file);
dma_fence_put(fence);
}
int vha_merge_input_syncs(struct vha_session *session, uint32_t in_sync_fd_count,
int *in_sync_fds)
{
struct device *dev = session->vha->dev;
int i, actual_count = 0;
int ret = -ENOMEM;
int sync_fd = VHA_SYNC_NONE;
struct dma_fence_array *fence_array = NULL;
struct vha_sync_cb_data *cb_data = NULL;
struct vha_sync_cb_data *in_sync_cbs = NULL;
struct dma_fence **fences;
void *dma_fence_mem;
struct file *f;
/* Special cases. */
if (in_sync_fd_count == 0) {
dev_err(dev, "%s: requested 0 sync_fds to merge\n", __func__);
return -EINVAL;
} else if (in_sync_fd_count == 1) {
struct file *f;
struct dma_fence *fence;
f = fget(in_sync_fds[0]);
if (f == NULL) {
dev_err(dev, "%s: could not get file for input sync fd=%d\n",
__func__, in_sync_fds[0]);
return -EINVAL;
}
fence = sync_file_get_fence(in_sync_fds[0]);
if (!fence) {
fput(f);
dev_err(dev, "%s: could not get fence for input sync fd=%d\n",
__func__, in_sync_fds[0]);
return -EINVAL;
}
cb_data = kmalloc(sizeof(struct vha_sync_cb_data), GFP_KERNEL);
if (cb_data == NULL) {
fput(f);
dma_fence_put(fence);
dev_err(dev, "%s: failed allocating callback data for input sync fd=%d\n",
__func__, in_sync_fds[0]);
return -ENOMEM;
}
if (dma_fence_add_callback(fence, &cb_data->cb, _vha_in_sync_cb)) {
if (dma_fence_is_signaled(fence)) {
dev_warn(dev, "%s: input sync fd=%d already signalled\n",
__func__, in_sync_fds[0]);
ret = -EINVAL;
} else {
dev_err(dev, "%s: could not add fence callback for input sync fd=%d\n",
__func__, in_sync_fds[0]);
ret = -EFAULT;
}
fput(f);
dma_fence_put(fence);
kfree(cb_data);
return ret;
}
cb_data->file = f;
return in_sync_fds[0];
}
dma_fence_mem =
kmalloc_array(
(sizeof(struct dma_fence*) + sizeof(struct vha_sync_cb_data)),
in_sync_fd_count + sizeof(struct vha_sync_cb_data), GFP_KERNEL);
if (dma_fence_mem == NULL) {
dev_err(dev, "%s: failed allocating fence container for %u buffers\n",
__func__, in_sync_fd_count);
return -ENOMEM;
}
fences = (struct dma_fence**)dma_fence_mem;
in_sync_cbs = (struct vha_sync_cb_data *)(dma_fence_mem +
sizeof(struct dma_fence*) * in_sync_fd_count);
cb_data = (struct vha_sync_cb_data *)(dma_fence_mem +
(sizeof(struct dma_fence*) + sizeof(struct vha_sync_cb_data)) *
in_sync_fd_count);
for (i = 0; i < in_sync_fd_count; i++) {
struct dma_fence *fence;
f = fget(in_sync_fds[i]);
if (f == NULL) {
dev_warn(dev, "%s: could not get file for fd=%d; will not use it\n",
__func__, in_sync_fds[i]);
continue;
}
fence = sync_file_get_fence(in_sync_fds[i]);
if (!fence) {
fput(f);
dev_warn(dev, "%s: could not get fence for fd=%d; will not use it\n",
__func__, in_sync_fds[i]);
continue;
}
if (dma_fence_add_callback(fence, &in_sync_cbs[actual_count].cb,
_vha_in_sync_cb)) {
if (dma_fence_is_signaled(fence)) {
dev_warn(dev, "%s: input sync fd=%d already signalled\n",
__func__, in_sync_fds[i]);
} else {
dev_err(dev, "%s: could not add fence callback for input sync fd=%d;"
" will not use it\n", __func__, in_sync_fds[i]);
}
fput(f);
dma_fence_put(fence);
continue;
}
dma_fence_get(fence); /* should be freed in dma_fence_array_release() */
in_sync_cbs[actual_count].file = f;
fences[actual_count] = fence;
actual_count++;
}
if (actual_count == 0) {
dev_err(dev, "%s: failed merging input fences\n", __func__);
kfree(dma_fence_mem);
return -EINVAL;
}
fence_array = dma_fence_array_create(actual_count, fences,
dma_fence_context_alloc(1), 1, false);
if (fence_array == NULL) {
dev_err(dev, "%s: failed allocating fence array for %u buffers\n",
__func__, in_sync_fd_count);
kfree(dma_fence_mem);
return -ENOMEM;
}
cb_data->sync_file = sync_file_create(&fence_array->base);
if (cb_data->sync_file == NULL) {
dev_err(dev, "%s: failed creating sync file for %u buffers\n",
__func__, in_sync_fd_count);
goto error_sf;
}
sync_fd = get_unused_fd_flags(O_CLOEXEC);
if (sync_fd < 0) {
dev_err(dev, "%s: failed creating file descriptor for %u buffers\n",
__func__, in_sync_fd_count);
ret = sync_fd;
goto error_fd;
}
ret = dma_fence_add_callback(&fence_array->base, &cb_data->cb,
_vha_in_merged_sync_cb);
if (ret < 0) {
dev_err(dev, "%s: failed adding callback file descriptor for %u buffers\n",
__func__, in_sync_fd_count);
goto error_fd;
}
fd_install(sync_fd, cb_data->sync_file->file);
fget(sync_fd);
return sync_fd;
error_fd:
fput(cb_data->sync_file->file);
dma_fence_put(&fence_array->base);
error_sf:
for (i = 0; i < actual_count; i++) {
fput(in_sync_cbs[actual_count].file);
dma_fence_put(fences[actual_count]);
}
dma_fence_put(&fence_array->base);
return ret;
}
int vha_release_syncs(struct vha_session *session, uint32_t buf_id_count,
uint32_t *buf_ids)
{
struct device *dev = session->vha->dev;
int i;
for (i = 0; i < buf_id_count; i++) {
struct vha_buffer *buf = vha_find_bufid(session, buf_ids[i]);
if (buf == NULL) {
dev_warn(dev, "%s: could not find buf %u\n", __func__, buf_ids[i]);
} else {
vha_rm_buf_fence(session, buf);
}
}
return 0;
}
#endif
/* validate and queue a message from a user
* called with mutex locked */
int vha_add_cmd(struct vha_session *session, struct vha_cmd *cmd)
{
uint32_t i;
struct device *dev = session->vha->dev;
struct vha_user_cmd *user_cmd = &cmd->user_cmd;
/* number of words in vha_user_cmd->data[0] */
uint32_t num_params = (cmd->size - sizeof(struct vha_user_cmd))/sizeof(uint32_t);
uint32_t pri_q_count = 1;
#ifdef CONFIG_HW_MULTICORE
if (user_cmd->cmd_type == VHA_CMD_CNN_SUBMIT) {
dev_err(dev, "%s: invalid cmd type 0x%x\n", __func__, user_cmd->cmd_type);
return -EINVAL;
}
#endif
if (user_cmd->num_bufs > num_params * sizeof(uint32_t)) {
dev_err(dev, "%s: invalid number of buffers in message: in:%x total:%x>%lx\n",
__func__, user_cmd->num_inbufs, user_cmd->num_bufs,
num_params * sizeof(uint32_t));
return -EINVAL;
}
if (user_cmd->num_bufs > VHA_MAX_ALT_ADDRS) {
dev_err(dev, "%s: invalid number of buffers in message: %x max:%x\n",
__func__, user_cmd->num_bufs, VHA_MAX_ALT_ADDRS);
return -EINVAL;
}
if (!session->vha->cnn_combined_crc_enable && (cmd->user_cmd.flags & VHA_CHECK_CRC)) {
dev_err(dev, "%s: Trying to perform CRC check while combined CRCs are disabled!,"
" try cnn_combined_crc_enable=1\n", __func__);
return -EINVAL;
}
if (user_cmd->priority >= VHA_MAX_PRIORITIES) {
#if defined(CONFIG_HW_MULTICORE) || (defined(HW_AX3) && defined(VHA_USE_LO_PRI_SUB_SEGMENTS))
dev_warn(dev, "%s: Priority %u too high. Setting to max supported priority: %u.\n",
__func__, user_cmd->priority, VHA_MAX_PRIORITIES - 1);
user_cmd->priority = VHA_MAX_PRIORITIES - 1;
#else
dev_warn_once(dev, "%s: Priorities not supported.\n", __func__);
user_cmd->priority = VHA_DEFAULT_PRI;
#endif
}
switch(cmd->user_cmd.cmd_type) {
case VHA_CMD_CNN_SUBMIT:
{
struct vha_user_cnn_submit_cmd* submit_cmd =
(struct vha_user_cnn_submit_cmd*)user_cmd;
/* subsegments cannot be handled with low latency enabled */
if ((submit_cmd->subseg_num > 1) && (session->vha->low_latency != VHA_LL_DISABLED)) {
dev_err(dev, "%s: Subsegments are not supported with low latency enabled\n", __func__);
return -EINVAL;
}
/* include subsegments in priority counters */
pri_q_count = submit_cmd->subseg_num;
/* check input and output buffers are valid */
for (i = 0; i < user_cmd->num_bufs; i++) {
uint32_t buf_id = user_cmd->data[i];
if (vha_find_bufid(session, buf_id) == NULL) {
dev_err(dev, "%s: unrecognised buf id[%u]:%x\n",
__func__, i, buf_id);
return -EINVAL;
}
}
/* send out a event notifications when submit is enqueued */
if (vha_observers.enqueued)
vha_observers.enqueued(session->vha->id, session->id,
cmd->user_cmd.cmd_id, cmd->user_cmd.priority);
break;
}
case VHA_CMD_CNN_SUBMIT_MULTI:
{
uint32_t num_cmd_bufs = 0;
/* check if command stream buffers are valid */
for (i = 0; i < VHA_MAX_CORES; i++) {
uint32_t buf_id = user_cmd->data[i];
if (buf_id == 0)
break;
if (vha_find_bufid(session, buf_id) == NULL) {
dev_err(dev, "%s: unrecognised cmdstr buf id[%u]:%x\n",
__func__, i, buf_id);
return -EINVAL;
}
num_cmd_bufs++;
}
/* check input and output buffers are valid */
for (i = VHA_MAX_CORES; i < (user_cmd->num_bufs - 1); i++) {
uint32_t buf_id = user_cmd->data[i];
if (vha_find_bufid(session, buf_id) == NULL) {
dev_err(dev, "%s: unrecognised buf id[%u]:%x\n",
__func__, i, buf_id);
return -EINVAL;
}
}
/* send out a event notifications when submit is enqueued */
if (vha_observers.enqueued)
vha_observers.enqueued(session->vha->id, session->id,
cmd->user_cmd.cmd_id, cmd->user_cmd.priority);
break;
}
case VHA_CMD_CNN_PDUMP_MSG:
{
struct pdump_descr* pdump = vha_pdump_dev_get_drvdata(dev);
if (!img_pdump_enabled(pdump)) {
kfree(cmd);
/* Silently ignore this pdump message */
return 0;
}
}
}
/* add the command to the pending list */
list_add_tail(&cmd->list[cmd->user_cmd.priority], &session->cmds[cmd->user_cmd.priority]);
GETNSTIMEOFDAY(&cmd->submit_ts);
session->vha->pri_q_counters[cmd->user_cmd.priority] += pri_q_count;
/* We are already locked!
* Run in separate thread
*/
vha_chk_cmd_queues(session->vha, true);
return 0;
}
int vha_suspend_dev(struct device *dev)
{
struct vha_dev *vha = vha_dev_get_drvdata(dev);
int ret;
mutex_lock(&vha->lock);
#ifdef CONFIG_PM_DEVFREQ
ret = vha_devfreq_suspend(dev);
if (ret)
dev_err(dev, "%s: Failed to suspend the vha devfreq!\n", __func__);
#endif
dev_dbg(dev, "%s: taking a nap!\n", __func__);
ret = vha_dev_suspend_work(vha);
mutex_unlock(&vha->lock);
return ret;
}
int vha_resume_dev(struct device *dev)
{
struct vha_dev *vha = vha_dev_get_drvdata(dev);
int ret;
mutex_lock(&vha->lock);
dev_dbg(dev, "%s: waking up!\n", __func__);
/* Call the worker */
vha_chk_cmd_queues(vha, true);
#ifdef CONFIG_PM_DEVFREQ
ret = vha_devfreq_resume(dev);
if (ret)
dev_err(dev, "%s: Failed to resume the vha devfreq!\n", __func__);
#endif
mutex_unlock(&vha->lock);
return 0;
}
void vha_dump_digest(struct vha_session *session, struct vha_buffer *buf,
struct vha_cmd *cmd)
{
struct vha_dev *vha = session->vha;
int ret;
if (!vha->dump_buff_digest)
return;
if (!(buf->attr & IMG_MEM_ATTR_NOMAP)) {
ret = img_mem_map_km(session->mem_ctx, buf->id);
if (ret) {
dev_err(session->vha->dev, "failed to map buff %x to km: %d\n",
buf->id, ret);
return;
}
buf->kptr = img_mem_get_kptr(session->mem_ctx, buf->id);
dev_info(vha->dev, "%s: buff id:%d name:%s digest is [crc32]:%#x\n",
__func__, buf->id, buf->name, crc32(0, buf->kptr, buf->size));
ret = img_mem_unmap_km(session->mem_ctx, buf->id);
if (ret) {
dev_err(session->vha->dev,
"%s: failed to unmap buff %x from km: %d\n",
__func__, buf->id, ret);
}
buf->kptr = NULL;
}
}
int vha_get_cnntotal_proc_us(struct device *dev, uint64_t *proc_us, uint64_t *cur_proc_us)
{
struct vha_dev *vha = vha_dev_get_drvdata(dev);
if (!vha)
return -EFAULT;
*proc_us = vha->stats.cnn_total_proc_us;
vha_currcmd_exetime_req(vha, cur_proc_us);
return 0;
}
/*
* register event observers.
* only a SINGLE observer for each type of event.
* unregister by passing NULL parameter
*/
void vha_observe_event_enqueue(void (*func)(uint32_t devid,
uint32_t sessionid,
uint32_t cmdid,
uint32_t priority))
{
if (func && vha_observers.enqueued)
pr_warn("%s: vha_observer for ENQUEUED events is already set to '%pf'\n",
__func__, vha_observers.enqueued);
vha_observers.enqueued = func;
}
EXPORT_SYMBOL(vha_observe_event_enqueue);
void vha_observe_event_submit(void (*func)(uint32_t devid,
uint32_t sessionid,
uint32_t cmdid,
bool last_subsegment,
uint32_t priority))
{
if (func && vha_observers.submitted)
pr_warn("%s: vha_observer for SUBMITTED events is already set to '%pf'\n",
__func__, vha_observers.submitted);
vha_observers.submitted = func;
}
EXPORT_SYMBOL(vha_observe_event_submit);
void vha_observe_event_complete(void (*func)(uint32_t devid,
uint32_t sessionid,
uint32_t cmdid,
uint64_t status,
uint64_t cycles,
uint64_t mem_usage,
uint32_t priority))
{
if (func && vha_observers.completed)
pr_warn("%s: vha_observer for COMPLETED events is already set to '%pf'\n",
__func__, vha_observers.completed);
vha_observers.completed = func;
}
EXPORT_SYMBOL(vha_observe_event_complete);
void vha_observe_event_cancel(void (*func)(uint32_t devid,
uint32_t sessionid,
uint32_t cmdid,
uint32_t priority))
{
if (func && vha_observers.canceled)
pr_warn("%s: vha_observer for CANCELED events is already set to '%pf'\n",
__func__, vha_observers.canceled);
vha_observers.canceled = func;
}
EXPORT_SYMBOL(vha_observe_event_cancel);
void vha_observe_event_error(void (*func)(uint32_t devid,
uint32_t sessionid,
uint32_t cmdid,
uint64_t status))
{
if (func && vha_observers.error)
pr_warn("%s: vha_observer for ERROR events is already set to '%pf'\n",
__func__, vha_observers.error);
vha_observers.error = func;
}
EXPORT_SYMBOL(vha_observe_event_error);