mirror of
https://github.com/revyos/thead-kernel.git
synced 2026-06-21 09:12:26 +02:00
2738 lines
76 KiB
C
2738 lines
76 KiB
C
/*
|
|
*****************************************************************************
|
|
* Copyright (c) Imagination Technologies Ltd.
|
|
*
|
|
* The contents of this file are subject to the MIT license as set out below.
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be included in
|
|
* all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
* THE SOFTWARE.
|
|
*
|
|
* Alternatively, the contents of this file may be used under the terms of the
|
|
* GNU General Public License Version 2 ("GPL")in which case the provisions of
|
|
* GPL are applicable instead of those above.
|
|
*
|
|
* If you wish to allow use of your version of this file only under the terms
|
|
* of GPL, and not to allow others to use your version of this file under the
|
|
* terms of the MIT license, indicate your decision by deleting the provisions
|
|
* above and replace them with the notice and other provisions required by GPL
|
|
* as set out in the file called "GPLHEADER" included in this distribution. If
|
|
* you do not delete the provisions above, a recipient may use your version of
|
|
* this file under the terms of either the MIT license or GPL.
|
|
*
|
|
* This License is also included in this distribution in the file called
|
|
* "MIT_COPYING".
|
|
*
|
|
*****************************************************************************/
|
|
|
|
#include <linux/slab.h>
|
|
#include <linux/device.h>
|
|
#include <linux/gfp.h>
|
|
#include <linux/sched.h>
|
|
#include <linux/moduleparam.h>
|
|
#include <linux/jiffies.h>
|
|
#include <linux/list.h>
|
|
#include <linux/delay.h>
|
|
|
|
#include <linux/pm_runtime.h>
|
|
#include <linux/debugfs.h>
|
|
|
|
#include <linux/crc32.h>
|
|
|
|
#include <uapi/vha.h>
|
|
#include <uapi/version.h>
|
|
#include "vha_common.h"
|
|
#include "vha_plat.h"
|
|
#include <vha_regs.h>
|
|
|
|
#ifdef KERNEL_DMA_FENCE_SUPPORT
|
|
#include <linux/dma-fence.h>
|
|
#include <linux/dma-fence-array.h>
|
|
#include <linux/sync_file.h>
|
|
#include <linux/file.h>
|
|
#include <linux/kernel.h>
|
|
#endif
|
|
|
|
#if !defined(HW_AX2) && !defined(HW_AX3)
|
|
#error No HW architecture series defined. Either HW_AX2 or HW_AX3 must be defined
|
|
#elseif defined(HW_AX2) && defined(HW_AX3)
|
|
#error Invalid HW architecture series define. Only one of HW_AX2 or HW_AX3 must be defined.
|
|
#endif
|
|
|
|
#undef linux
|
|
#define CREATE_TRACE_POINTS
|
|
#include <vha_trace_point.h>
|
|
|
|
#define MIN_ONCHIP_MAP 1
|
|
#define MAX_ONCHIP_MAP 128
|
|
|
|
static uint8_t mmu_mode = VHA_MMU_40BIT;
|
|
module_param(mmu_mode, byte, 0444);
|
|
MODULE_PARM_DESC(mmu_mode,
|
|
"MMU mode: 0=no-MMU, 1=direct (1:1) mappings or 40=40bit (default)");
|
|
static uint32_t mmu_ctx_default;
|
|
module_param(mmu_ctx_default, uint, 0444);
|
|
MODULE_PARM_DESC(mmu_ctx_default, "MMU default context id(0:31) to be used");
|
|
static uint32_t mmu_page_size; /* 0-4kB */
|
|
module_param(mmu_page_size, uint, 0444);
|
|
MODULE_PARM_DESC(mmu_page_size,
|
|
"MMU page size: 0-4kB, 1-16kB, 2-64kB, 3-256kB, 4-1MB; 5-2MB");
|
|
|
|
static bool no_clock_disable = false;
|
|
module_param(no_clock_disable, bool, 0444);
|
|
MODULE_PARM_DESC(no_clock_disable,
|
|
"if Y, the device is not disabled when inactive, otherwise APM is used");
|
|
|
|
static int pm_delay = 100;
|
|
module_param(pm_delay, int, S_IRUSR | S_IRGRP);
|
|
MODULE_PARM_DESC(pm_delay, "Delay, in ms, before powering off the core that's idle");
|
|
|
|
static int freq_khz = -1;
|
|
module_param(freq_khz, int, 0444);
|
|
MODULE_PARM_DESC(freq_khz,
|
|
"core frequency in kHz, -1=start self measurement during driver load, 0=use platform defined value, otherwise (>0) declared value is used");
|
|
static uint32_t hw_bypass;
|
|
module_param(hw_bypass, uint, 0444);
|
|
MODULE_PARM_DESC(hw_bypass,
|
|
"Number of cnn kicks(segments) to be bypassed within the session, 0=none");
|
|
static uint32_t slc_bypass;
|
|
module_param(slc_bypass, uint, 0444);
|
|
MODULE_PARM_DESC(slc_bypass, "SLC bypass mode");
|
|
#if defined(HW_AX2) || defined(CONFIG_VHA_DUMMY_SIMULATE_HW_PROCESSING_TIME)
|
|
static uint32_t low_latency = VHA_LL_SW_KICK;
|
|
#elif defined(HW_AX3) && defined(VHA_USE_LO_PRI_SUB_SEGMENTS)
|
|
static uint32_t low_latency = VHA_LL_DISABLED;
|
|
#else
|
|
static uint32_t low_latency = VHA_LL_SELF_KICK;
|
|
#endif
|
|
module_param(low_latency, uint, 0444);
|
|
MODULE_PARM_DESC(low_latency, "Low latency mode: 0-disabled, 1-sw kick, 2-self kick");
|
|
|
|
static bool zero_buffers;
|
|
module_param(zero_buffers, bool, 0444);
|
|
MODULE_PARM_DESC(zero_buffers, "fill every allocated buffer with zeros");
|
|
|
|
static bool dump_buff_digest = 0;
|
|
module_param(dump_buff_digest, bool, 0444);
|
|
MODULE_PARM_DESC(dump_buff_digest, "Calculate & dump digest for in/out buffers. This is crc32");
|
|
|
|
static unsigned long onchipmem_phys_start= VHA_OCM_ADDR_START;
|
|
module_param(onchipmem_phys_start, ulong, 0444);
|
|
MODULE_PARM_DESC(onchipmem_phys_start,
|
|
"Physical address of start of on-chip ram. '0xFs' means that ocm is disabled");
|
|
static uint32_t onchipmem_size;
|
|
module_param(onchipmem_size, uint, 0444);
|
|
MODULE_PARM_DESC(onchipmem_size,
|
|
"Size of on-chip memory in bytes");
|
|
|
|
/* bringup test: force MMU fault with MMU base register */
|
|
static bool test_mmu_base_pf;
|
|
module_param(test_mmu_base_pf, bool, 0444);
|
|
MODULE_PARM_DESC(test_mmu_base_pf,
|
|
"Bringup test: force MMU page fault on first access");
|
|
|
|
/* bringup test: do not map into the device after the Nth buffer */
|
|
static int32_t test_mmu_no_map_count = -1;
|
|
module_param(test_mmu_no_map_count, int, 0444);
|
|
MODULE_PARM_DESC(test_mmu_no_map_count,
|
|
"Bringup test: force MMU page faults if count >= 0");
|
|
|
|
#ifdef VHA_SCF
|
|
static bool parity_disable = false;
|
|
module_param(parity_disable, bool, 0444);
|
|
MODULE_PARM_DESC(parity_disable,
|
|
"if Y, the core parity feature will be disabled, if it is supported");
|
|
|
|
static bool confirm_config_reg = false;
|
|
module_param(confirm_config_reg, bool, 0444);
|
|
MODULE_PARM_DESC(confirm_config_reg,
|
|
"Enables confirmation of register writes");
|
|
#endif
|
|
|
|
static bool test_without_bvnc_check;
|
|
module_param(test_without_bvnc_check, bool, 0444);
|
|
MODULE_PARM_DESC(test_without_bvnc_check,
|
|
"When set BVNC check is ignored, allowing to kick the hw");
|
|
|
|
/* Fault inject parameter is only applicable when
|
|
* kernel fault injection feature is enabled
|
|
* in the kernel options -> CONFIG_FAULT_INJECTION=y
|
|
* See Documentation/fault-injection/
|
|
*/
|
|
static uint8_t fault_inject;
|
|
module_param(fault_inject, byte, 0444);
|
|
MODULE_PARM_DESC(fault_inject,
|
|
"Enable fault injection using bitwise value: 1-open,2-read,4-write,8-ioctl,16-mmap,32-cmd worker,64-irq worker,128-user space");
|
|
|
|
/* Interval in milliseconds for testing/simulating system suspend/resume functionality */
|
|
static uint8_t suspend_interval_msec;
|
|
module_param(suspend_interval_msec, byte, 0444);
|
|
MODULE_PARM_DESC(suspend_interval_msec,
|
|
"Test suspend/resume interval, 0=disabled, otherwise defines interval in milliseconds");
|
|
|
|
#ifdef VHA_SCF
|
|
static bool cnn_combined_crc_enable = true;
|
|
#else
|
|
static bool cnn_combined_crc_enable = false;
|
|
#endif
|
|
module_param(cnn_combined_crc_enable, bool, 0444);
|
|
MODULE_PARM_DESC(cnn_combined_crc_enable,
|
|
"Enables the combined CRC feature");
|
|
#ifdef VHA_SCF
|
|
static u32 swd_period = 10;
|
|
module_param(swd_period, uint, 0444);
|
|
MODULE_PARM_DESC(swd_period,
|
|
"The timer expiration period in miliseconds, 0=disable");
|
|
|
|
static unsigned long swd_timeout_default = 0;
|
|
module_param(swd_timeout_default, ulong, 0444);
|
|
MODULE_PARM_DESC(swd_timeout_default,
|
|
"The default expected execution time in us, 0=use MBS values only");
|
|
|
|
static u32 swd_timeout_m0 = 100;
|
|
module_param(swd_timeout_m0, uint, 0444);
|
|
MODULE_PARM_DESC(swd_timeout_m0,
|
|
"The m0 value in the expected execution time equation: T = (T0 * m0)/100 + m1");
|
|
|
|
static u32 swd_timeout_m1 = 10000;
|
|
module_param(swd_timeout_m1, uint, 0444);
|
|
MODULE_PARM_DESC(swd_timeout_m1,
|
|
"The m1 value in the expected execution time equation: T = (T0 * m0)/100 + m1");
|
|
#endif
|
|
|
|
/* Event observers, to be notified when significant events occur */
|
|
struct vha_observers vha_observers;
|
|
|
|
/* Driver context */
|
|
static struct {
|
|
/* Available driver memory heaps. List of <struct vha_heap> */
|
|
struct list_head heaps;
|
|
|
|
/* Memory Management context for driver */
|
|
struct mem_ctx *mem_ctx;
|
|
|
|
/* List of associated <struct vha_dev> */
|
|
struct list_head devices;
|
|
|
|
unsigned int num_devs;
|
|
|
|
int initialised;
|
|
} drv;
|
|
|
|
/* Session id counter. */
|
|
static uint32_t vha_session_id_cnt = 0;
|
|
/* Reset counter. */
|
|
static uint32_t vha_reset_cnt = 0;
|
|
|
|
static void cmd_worker(struct work_struct *work);
|
|
|
|
static const size_t mmu_page_size_kb_lut[] =
|
|
{ 4096, 16384, 65536, 262144, 1048576, 2097152};
|
|
|
|
#ifdef CONFIG_FUNCTION_ERROR_INJECTION
|
|
noinline int __IOPOLL64_RET(int ret) {
|
|
return ret;
|
|
}
|
|
|
|
#include <asm-generic/error-injection.h>
|
|
/* this is the placeholder function to support error code injection from
|
|
* all IOPOLL_PDUMP* macros
|
|
*/
|
|
ALLOW_ERROR_INJECTION(__IOPOLL64_RET, ERRNO);
|
|
|
|
#ifdef VHA_EVENT_INJECT
|
|
/*
|
|
* called in __handle_event_injection()
|
|
* if normal circumstances, return 0 and do not inject EVENT
|
|
* otherwise, return -errno
|
|
*/
|
|
noinline int __EVENT_INJECT(void) {
|
|
return 0;
|
|
}
|
|
ALLOW_ERROR_INJECTION(__EVENT_INJECT, ERRNO);
|
|
#endif /* VHA_EVENT_INJECT */
|
|
|
|
#endif
|
|
|
|
/* Calculate current timespan for the given timestamp */
|
|
bool get_timespan_us(struct TIMESPEC *from, struct TIMESPEC *to, uint64_t *result)
|
|
{
|
|
long long total = 0;
|
|
|
|
if (!TIMESPEC_VALID(from) || !TIMESPEC_VALID(to))
|
|
return false;
|
|
|
|
if (TIMESPEC_COMPARE(from, to) >= 0)
|
|
return false;
|
|
|
|
total = NSEC_PER_SEC * to->tv_sec +
|
|
to->tv_nsec;
|
|
total -= NSEC_PER_SEC * from->tv_sec +
|
|
from->tv_nsec;
|
|
do_div(total, 1000UL);
|
|
*result = total;
|
|
|
|
return true;
|
|
}
|
|
|
|
/* Used for simulating system level suspend/resume functionality */
|
|
static void suspend_test_worker(struct work_struct *work)
|
|
{
|
|
struct vha_dev *vha = container_of(work, struct vha_dev, suspend_dwork.work);
|
|
int ret;
|
|
|
|
/* Make resume/suspend cycle */
|
|
ret = vha_suspend_dev(vha->dev);
|
|
WARN_ON(ret != 0);
|
|
vha_resume_dev(vha->dev);
|
|
|
|
mutex_lock(&vha->lock);
|
|
/* Retrigger suspend worker */
|
|
schedule_delayed_work(&vha->suspend_dwork,
|
|
msecs_to_jiffies(vha->suspend_interval_msec));
|
|
mutex_unlock(&vha->lock);
|
|
}
|
|
|
|
/*
|
|
* Initialize common platform (driver) memory heaps.
|
|
* device (cluster) heaps are initialized in vha_init()
|
|
*/
|
|
int vha_init_plat_heaps(const struct heap_config heap_configs[], int heaps)
|
|
{
|
|
int i;
|
|
int ret = 0;
|
|
/* Initialise memory management component */
|
|
for (i = 0; i < heaps; i++) {
|
|
struct vha_heap *heap;
|
|
|
|
pr_debug("%s: adding platform heap of type %d\n",
|
|
__func__, heap_configs[i].type);
|
|
|
|
heap = kzalloc(sizeof(struct vha_heap), GFP_KERNEL);
|
|
if (!heap) {
|
|
ret = -ENOMEM;
|
|
goto drv_heap_add_failed;
|
|
}
|
|
heap->global = true;
|
|
ret = img_mem_add_heap(&heap_configs[i], &heap->id);
|
|
if (ret < 0) {
|
|
pr_err("%s: failed to init platform heap (type %d)!\n",
|
|
__func__, heap_configs[i].type);
|
|
kfree(heap);
|
|
goto drv_heap_add_failed;
|
|
}
|
|
list_add(&heap->list, &drv.heaps);
|
|
}
|
|
|
|
return ret;
|
|
|
|
drv_heap_add_failed:
|
|
while (!list_empty(&drv.heaps)) {
|
|
struct vha_heap *heap;
|
|
|
|
heap = list_first_entry(&drv.heaps, struct vha_heap, list);
|
|
list_del(&heap->list);
|
|
img_mem_del_heap(heap->id);
|
|
kfree(heap);
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
int vha_early_init(void)
|
|
{
|
|
int ret;
|
|
INIT_LIST_HEAD(&drv.heaps);
|
|
INIT_LIST_HEAD(&drv.devices);
|
|
|
|
/* Create memory management context for HW buffers */
|
|
ret = img_mem_create_proc_ctx(&drv.mem_ctx);
|
|
if (ret) {
|
|
pr_err("%s: failed to create mem context (err:%d)!\n",
|
|
__func__, ret);
|
|
drv.mem_ctx = NULL;
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* Lazy intialization of main driver context (when first core is probed)
|
|
*/
|
|
static int vha_init(struct vha_dev *vha,
|
|
const struct heap_config heap_configs[], int heaps)
|
|
{
|
|
struct device *dev = vha->dev;
|
|
int ret, i;
|
|
|
|
#ifdef CONFIG_HW_MULTICORE
|
|
ret = vha_dev_scheduler_init(vha);
|
|
if (ret != 0) {
|
|
dev_err(dev, "%s: failed initializing scheduler!\n", __func__);
|
|
return ret;
|
|
}
|
|
if (!vha_dev_dbg_params_init(vha)) {
|
|
dev_err(dev, "%s: invalid debug params detected!\n", __func__);
|
|
return -EINVAL;
|
|
}
|
|
#endif
|
|
|
|
/* Initialise local device (cluster) heaps */
|
|
for (i = 0; i < heaps; i++) {
|
|
struct vha_heap *heap;
|
|
|
|
dev_dbg(dev, "%s: adding device heap of type %d\n",
|
|
__func__, heap_configs[i].type);
|
|
|
|
heap = kzalloc(sizeof(struct vha_heap), GFP_KERNEL);
|
|
if (!heap) {
|
|
ret = -ENOMEM;
|
|
goto heap_add_failed;
|
|
}
|
|
|
|
ret = img_mem_add_heap(&heap_configs[i], &heap->id);
|
|
if (ret < 0) {
|
|
dev_err(dev, "%s: failed to init device heap (type %d)!\n",
|
|
__func__, heap_configs[i].type);
|
|
kfree(heap);
|
|
goto heap_add_failed;
|
|
}
|
|
list_add(&heap->list, &vha->heaps);
|
|
}
|
|
|
|
/* now copy platform (global) heap id's to device vha_heap list, the global heap id's are
|
|
* not owned by vha_dev anyway (heap->global=true)
|
|
* This is done for vha_ioctl_query_heaps() to be able to report both platform
|
|
* and device heaps easily. */
|
|
{
|
|
struct list_head* pos;
|
|
list_for_each_prev(pos, &drv.heaps) {
|
|
struct vha_heap* heap = list_entry(pos, struct vha_heap, list);
|
|
struct vha_heap* heap_copy = kmemdup(heap, sizeof(*heap), GFP_KERNEL);
|
|
if(!heap_copy) {
|
|
ret = -ENOMEM;
|
|
goto heap_add_failed;
|
|
}
|
|
INIT_LIST_HEAD(&heap_copy->list);
|
|
list_add(&heap_copy->list, &vha->heaps);
|
|
}
|
|
}
|
|
|
|
/* initialize local ocm cluster heaps */
|
|
if (vha->hw_props.locm_size_bytes && onchipmem_phys_start == ~0)
|
|
dev_warn(dev, "%s: Onchip memory physical address not set!\n",
|
|
__func__);
|
|
/* OCM heap type is automatically appended */
|
|
if (vha->hw_props.locm_size_bytes && onchipmem_phys_start != ~0) {
|
|
struct heap_config heap_cfg;
|
|
struct vha_heap *heap;
|
|
|
|
memset(&heap_cfg, 0, sizeof(heap_cfg));
|
|
heap_cfg.type = IMG_MEM_HEAP_TYPE_OCM;
|
|
heap_cfg.options.ocm.phys = onchipmem_phys_start;
|
|
heap_cfg.options.ocm.size = vha->hw_props.locm_size_bytes;
|
|
heap_cfg.options.ocm.hattr = IMG_MEM_HEAP_ATTR_LOCAL;
|
|
|
|
dev_dbg(dev, "%s: adding heap of type %d\n",
|
|
__func__, heap_cfg.type);
|
|
|
|
heap = kzalloc(sizeof(struct vha_heap), GFP_KERNEL);
|
|
if (!heap) {
|
|
ret = -ENOMEM;
|
|
goto heap_add_failed;
|
|
}
|
|
|
|
ret = img_mem_add_heap(&heap_cfg, &heap->id);
|
|
if (ret < 0) {
|
|
dev_err(dev, "%s: failed to init heap (type %d)!\n",
|
|
__func__, heap_cfg.type);
|
|
kfree(heap);
|
|
goto heap_add_failed;
|
|
}
|
|
list_add(&heap->list, &vha->heaps);
|
|
}
|
|
#ifdef CONFIG_HW_MULTICORE
|
|
if (vha->hw_props.socm_size_bytes && onchipmem_phys_start != ~0) {
|
|
struct heap_config heap_cfg;
|
|
struct vha_heap *heap;
|
|
|
|
memset(&heap_cfg, 0, sizeof(heap_cfg));
|
|
heap_cfg.type = IMG_MEM_HEAP_TYPE_OCM;
|
|
heap_cfg.options.ocm.phys = onchipmem_phys_start +
|
|
vha->hw_props.locm_size_bytes + IMG_MEM_VA_GUARD_GAP;
|
|
heap_cfg.options.ocm.size = vha->hw_props.socm_size_bytes;
|
|
heap_cfg.options.ocm.hattr = IMG_MEM_HEAP_ATTR_SHARED;
|
|
|
|
dev_dbg(dev, "%s: adding heap of type %d\n",
|
|
__func__, heap_cfg.type);
|
|
|
|
heap = kzalloc(sizeof(struct vha_heap), GFP_KERNEL);
|
|
if (!heap) {
|
|
ret = -ENOMEM;
|
|
goto heap_add_failed;
|
|
}
|
|
|
|
ret = img_mem_add_heap(&heap_cfg, &heap->id);
|
|
if (ret < 0) {
|
|
dev_err(dev, "%s: failed to init heap (type %d)!\n",
|
|
__func__, heap_cfg.type);
|
|
kfree(heap);
|
|
goto heap_add_failed;
|
|
}
|
|
list_add(&heap->list, &vha->heaps);
|
|
}
|
|
#endif
|
|
|
|
{
|
|
/* now get the last entry and make it responsible for internal allocations
|
|
* use last entry because list_add() inserts at the head
|
|
* When choosing the internal alloc heap, the device local heaps take precedence over
|
|
* global platform heaps */
|
|
struct vha_heap* heap = list_last_entry(&vha->heaps, struct vha_heap, list);
|
|
if(!heap) {
|
|
dev_err(dev, "%s: failed to locate heap for internal alloc\n",
|
|
__func__);
|
|
ret = -EINVAL;
|
|
/* Loop registered heaps just for sanity */
|
|
goto heap_add_failed;
|
|
}
|
|
vha->int_heap_id = heap->id;
|
|
dev_dbg(dev, "%s: using heap %d for internal alloc\n",
|
|
__func__, vha->int_heap_id);
|
|
}
|
|
/* Do not proceed if internal heap not defined */
|
|
|
|
drv.initialised = 1;
|
|
|
|
dev_dbg(dev, "%s: vha drv init done\n", __func__);
|
|
return 0;
|
|
|
|
heap_add_failed:
|
|
while (!list_empty(&vha->heaps)) {
|
|
struct vha_heap *heap;
|
|
|
|
heap = list_first_entry(&vha->heaps, struct vha_heap, list);
|
|
list_del(&heap->list);
|
|
if(!heap->global)
|
|
img_mem_del_heap(heap->id);
|
|
kfree(heap);
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
int vha_deinit(void)
|
|
{
|
|
/* Destroy memory management context */
|
|
if (drv.mem_ctx) {
|
|
size_t mem_usage;
|
|
uint32_t MB, bytes, kB;
|
|
|
|
img_mem_get_usage(drv.mem_ctx, &mem_usage, NULL);
|
|
MB = mem_usage / (1024 * 1024);
|
|
bytes = mem_usage - (MB * (1024 * 1024));
|
|
kB = (bytes * 1000) / (1024 * 1024);
|
|
|
|
pr_debug("%s: Total kernel memory used: %u.%u MB\n",
|
|
__func__, MB, kB);
|
|
|
|
img_mem_destroy_proc_ctx(drv.mem_ctx);
|
|
drv.mem_ctx = NULL;
|
|
}
|
|
|
|
/* Deinitialize memory management component */
|
|
while (!list_empty(&drv.heaps)) {
|
|
struct vha_heap *heap;
|
|
|
|
heap = list_first_entry(&drv.heaps, struct vha_heap, list);
|
|
BUG_ON(!heap->global);
|
|
list_del(&heap->list);
|
|
img_mem_del_heap(heap->id);
|
|
kfree(heap);
|
|
}
|
|
|
|
drv.initialised = 0;
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Returns: true if hardware has required capabilities, false otherwise.
|
|
* Implementation is a simple check of expected BVNC against hw CORE_ID
|
|
*/
|
|
bool vha_dev_check_hw_capab(struct vha_dev* vha, uint64_t expected_hw_capab)
|
|
{
|
|
uint64_t __maybe_unused hw = vha->hw_props.core_id
|
|
& VHA_CR_CORE_ID_BVNC_CLRMSK;
|
|
uint64_t __maybe_unused mbs = expected_hw_capab
|
|
& VHA_CR_CORE_ID_BVNC_CLRMSK;
|
|
|
|
if (!test_without_bvnc_check) {
|
|
img_pdump_printf(
|
|
"IF SKIP_COREID_CHECK\n"
|
|
"COM Skip COREID Check\n"
|
|
"ELSE SKIP_COREID_CHECK\n"
|
|
"COM CHECKING CORE_ID: expecting BVNC:%llu.%llu.%llu.%llu\n",
|
|
core_id_quad(expected_hw_capab));
|
|
IOPOLL64_PDUMP(expected_hw_capab, 1, 1,
|
|
VHA_CR_CORE_ID_BVNC_CLRMSK,
|
|
VHA_CR_CORE_ID);
|
|
img_pdump_printf(
|
|
"FI SKIP_COREID_CHECK\n");
|
|
}
|
|
|
|
if ((expected_hw_capab >> 48) != HW_SERIES) {
|
|
dev_err(vha->dev,
|
|
"%s: network was compiled for incorrect hardware series: expected %llu / found %u\n",
|
|
__func__,
|
|
(expected_hw_capab >> 48), HW_SERIES);
|
|
return false;
|
|
}
|
|
|
|
#ifndef CONFIG_VHA_DUMMY
|
|
if (hw != mbs) {
|
|
dev_warn(vha->dev,
|
|
"%s: network was compiled for an incorrect hardware variant (BVNC): "
|
|
"found %llu.%llu.%llu.%llu, expected %llu.%llu.%llu.%llu\n",
|
|
__func__,
|
|
core_id_quad(vha->hw_props.core_id),
|
|
core_id_quad(expected_hw_capab));
|
|
/* Conditionally allow the hw to be kicked */
|
|
if (test_without_bvnc_check)
|
|
dev_warn(vha->dev, "%s: trying to kick the hw ... ", __func__);
|
|
else {
|
|
dev_err(vha->dev, "%s: can't kick the hardware!", __func__);
|
|
return false;
|
|
}
|
|
}
|
|
#endif
|
|
return true;
|
|
}
|
|
|
|
/* notify the user space if a response msg is ready */
|
|
void vha_cmd_notify(struct vha_cmd *cmd)
|
|
{
|
|
struct vha_session *session = cmd->session;
|
|
struct vha_rsp *rsp = cmd->rsp;
|
|
dev_dbg(session->vha->dev, "%s: 0x%08x/%u\n",
|
|
__func__, cmd->user_cmd.cmd_id, session->id);
|
|
|
|
if (rsp) {
|
|
cmd->rsp = NULL;
|
|
list_add_tail(&rsp->list, &session->rsps);
|
|
}
|
|
wake_up(&session->wq);
|
|
/* we are done with this cmd, let's free it */
|
|
list_del(&cmd->list[cmd->user_cmd.priority]);
|
|
kfree(cmd);
|
|
}
|
|
|
|
static void vha_measure_core_freq(struct vha_dev *vha)
|
|
{
|
|
if (vha->stats.last_proc_us) {
|
|
uint64_t proc = vha->stats.last_proc_us;
|
|
do_div(proc, 1000UL);
|
|
if (proc) {
|
|
uint64_t cycles = vha->calibration_cycles;
|
|
do_div(cycles, proc);
|
|
vha->freq_khz = cycles;
|
|
dev_info(vha->dev,
|
|
"%s: Measured core clock frequency[kHz]: %u\n",
|
|
__func__, vha->freq_khz);
|
|
return;
|
|
}
|
|
}
|
|
|
|
dev_info(vha->dev,
|
|
"%s: Can't measure core clock frequency!\n",
|
|
__func__);
|
|
}
|
|
|
|
bool vha_check_calibration(struct vha_dev *vha)
|
|
{
|
|
if (vha->stats.last_proc_us) {
|
|
/* Core may have been kicked to
|
|
* measure frequency */
|
|
if (vha->do_calibration) {
|
|
vha_dev_stop(vha, true);
|
|
vha_measure_core_freq(vha);
|
|
vha->do_calibration = false;
|
|
/* Something may have been scheduled in
|
|
* the middle so poke the worker */
|
|
vha_chk_cmd_queues(vha, false);
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
* A session represents a single device and a set of buffers
|
|
* to be used for inferences.
|
|
* If required, buffers will be allocated for hardware CRC and DEBUG.
|
|
*/
|
|
int vha_add_session(struct vha_session *session)
|
|
{
|
|
struct vha_dev *vha = session->vha;
|
|
int ret;
|
|
struct mmu_config mmu_config;
|
|
int ctx_id;
|
|
uint8_t pri;
|
|
|
|
img_pdump_printf("-- OPEN_BEGIN\n");
|
|
img_pdump_printf("-- VHA driver session started\n");
|
|
ret = mutex_lock_interruptible(&vha->lock);
|
|
if (ret)
|
|
return ret;
|
|
|
|
#ifdef CONFIG_VHA_DUMMY
|
|
if (list_empty(&vha->sessions) && !vha->do_calibration)
|
|
vha_dev_start(vha);
|
|
#endif
|
|
|
|
idr_init(&session->onchip_maps);
|
|
|
|
memset(&mmu_config, 0, sizeof(mmu_config));
|
|
/* Create a memory context for this session */
|
|
if (vha->mmu_mode == VHA_MMU_DISABLED) {
|
|
/* if MMU is disabled,
|
|
* bypass the mmu hw layer,
|
|
* but still need do the buffer
|
|
* allocation through img_mem api
|
|
*/
|
|
mmu_config.bypass_hw = true;
|
|
#ifdef CONFIG_HW_MULTICORE
|
|
mmu_config.bypass_offset = IMG_MEM_VA_HEAP1_BASE;
|
|
#endif
|
|
}
|
|
|
|
#ifdef VHA_SCF
|
|
/* Do not calculate parity when core does not support it,
|
|
* or we forced the core to disable it */
|
|
if (vha->hw_props.supported.parity &&
|
|
!vha->parity_disable) {
|
|
mmu_config.use_pte_parity = true;
|
|
dev_dbg(vha->dev,
|
|
"%s: Enabling MMU parity protection!\n",
|
|
__func__);
|
|
}
|
|
#endif
|
|
|
|
mmu_config.addr_width = vha->hw_props.mmu_width;
|
|
mmu_config.alloc_attr = IMG_MEM_ATTR_MMU | /* Indicate MMU allocation */
|
|
IMG_MEM_ATTR_WRITECOMBINE;
|
|
mmu_config.page_size = mmu_page_size_kb_lut[vha->mmu_page_size];
|
|
img_pdump_printf("-- MMU context: using %zukB MMU pages, %lukB CPU pages\n",
|
|
mmu_page_size_kb_lut[vha->mmu_page_size]/1024, PAGE_SIZE/1024);
|
|
|
|
/* Update current MMU page size, so that the correct
|
|
* granularity is used when generating virtual addresses */
|
|
vha->hw_props.mmu_pagesize = mmu_config.page_size;
|
|
|
|
/* Update clock frequency stored in props */
|
|
vha->hw_props.clock_freq = vha->freq_khz;
|
|
|
|
for (ctx_id = 0; ctx_id < ARRAY_SIZE(session->mmu_ctxs); ctx_id++) {
|
|
ret = img_mmu_ctx_create(vha->dev, &mmu_config,
|
|
session->mem_ctx, vha->int_heap_id,
|
|
vha_mmu_callback, session,
|
|
&session->mmu_ctxs[ctx_id].ctx);
|
|
if (ret < 0) {
|
|
dev_err(vha->dev, "%s: failed to create sw mmu context%d!\n",
|
|
__func__, ctx_id);
|
|
goto out_unlock;
|
|
}
|
|
|
|
if (vha->mmu_mode != VHA_MMU_DISABLED) {
|
|
/* Store mmu context id */
|
|
session->mmu_ctxs[ctx_id].id = ret;
|
|
|
|
ret = img_mmu_get_pc(session->mmu_ctxs[ctx_id].ctx,
|
|
&session->mmu_ctxs[ctx_id].pc_baddr,
|
|
&session->mmu_ctxs[ctx_id].pc_bufid);
|
|
if (ret) {
|
|
dev_err(vha->dev, "%s: failed to get PC for context%d!\n",
|
|
__func__, ctx_id);
|
|
ret = -EFAULT;
|
|
goto out_free_mmu_ctx;
|
|
}
|
|
}
|
|
}
|
|
|
|
#ifndef CONFIG_HW_MULTICORE
|
|
if (vha->hw_props.locm_size_bytes && onchipmem_phys_start != ~0) {
|
|
/* OCM data is considered as IO (or shared)*/
|
|
ret = img_mmu_init_cache(session->mmu_ctxs[VHA_MMU_REQ_IO_CTXID].ctx,
|
|
onchipmem_phys_start, vha->hw_props.locm_size_bytes
|
|
#if defined(CFG_SYS_VAGUS)
|
|
+ sizeof(uint32_t)
|
|
#endif
|
|
);
|
|
if (ret < 0) {
|
|
dev_err(vha->dev, "%s: failed to create init cache!\n",
|
|
__func__);
|
|
goto out_free_mmu_ctx;
|
|
}
|
|
vha_dev_ocm_configure(vha);
|
|
}
|
|
#endif
|
|
|
|
/* enable CRC and DEBUG registers */
|
|
ret = vha_dbg_create_hwbufs(session);
|
|
if (ret)
|
|
goto out_free_mmu_ctx;
|
|
|
|
img_pdump_printf("-- OPEN_END\n");
|
|
|
|
/* Used for simulating system level suspend/resume functionality */
|
|
if (list_empty(&vha->sessions) && vha->suspend_interval_msec) {
|
|
INIT_DELAYED_WORK(&vha->suspend_dwork, suspend_test_worker);
|
|
/* Start suspend worker */
|
|
schedule_delayed_work(&vha->suspend_dwork,
|
|
msecs_to_jiffies(vha->suspend_interval_msec));
|
|
}
|
|
|
|
/* Assign session id. */
|
|
session->id = vha_session_id_cnt++;
|
|
|
|
list_add_tail(&session->list, &vha->sessions);
|
|
for (pri = 0; pri < VHA_MAX_PRIORITIES; pri++) {
|
|
struct vha_session *aux_head = list_prev_entry(session, list);
|
|
list_add(&session->sched_list[pri], &aux_head->sched_list[pri]);
|
|
}
|
|
|
|
/* All mmu contextes are successfully created,
|
|
it is safe to incremet the counters and assign id. */
|
|
if (vha->mmu_mode != VHA_MMU_DISABLED)
|
|
for (ctx_id = 0; ctx_id < ARRAY_SIZE(session->mmu_ctxs); ctx_id++) {
|
|
uint8_t hw_ctxid = 0;
|
|
/* Assign mmu hardware context */
|
|
hw_ctxid = VHA_MMU_GET_CTXID(session);
|
|
hw_ctxid += (VHA_MMU_AUX_HW_CTX_SHIFT*ctx_id);
|
|
vha->mmu_ctxs[hw_ctxid]++;
|
|
session->mmu_ctxs[ctx_id].hw_id = hw_ctxid;
|
|
}
|
|
|
|
dev_dbg(vha->dev,
|
|
"%s: %p ctxid:%d\n", __func__, session,
|
|
session->mmu_ctxs[VHA_MMU_REQ_MODEL_CTXID].id);
|
|
|
|
trace_vha_session_in(session->id, 0);
|
|
|
|
mutex_unlock(&vha->lock);
|
|
return ret;
|
|
|
|
out_free_mmu_ctx:
|
|
for (ctx_id = 0; ctx_id < ARRAY_SIZE(session->mmu_ctxs); ctx_id++)
|
|
if (session->mmu_ctxs[ctx_id].ctx)
|
|
img_mmu_ctx_destroy(session->mmu_ctxs[ctx_id].ctx);
|
|
out_unlock:
|
|
mutex_unlock(&vha->lock);
|
|
return ret;
|
|
}
|
|
|
|
static void vha_clean_onchip_maps(struct vha_session *session, struct vha_buffer *buf)
|
|
{
|
|
struct vha_onchip_map *onchip_map = NULL, *tmp = NULL;
|
|
|
|
WARN_ON(!buf);
|
|
WARN_ON(!session);
|
|
|
|
list_for_each_entry_safe(onchip_map, tmp, &buf->onchip_maps, list) {
|
|
idr_remove(&session->onchip_maps, onchip_map->mapid);
|
|
list_del(&onchip_map->list);
|
|
kfree(onchip_map);
|
|
}
|
|
}
|
|
|
|
#ifdef KERNEL_DMA_FENCE_SUPPORT
|
|
void vha_rm_buf_fence(struct vha_session *session, struct vha_buffer *buf)
|
|
{
|
|
struct vha_buf_sync_info *sync_info = &buf->sync_info;
|
|
img_mem_remove_fence(session->mem_ctx, buf->id);
|
|
if (sync_info->in_fence) {
|
|
if (!dma_fence_is_signaled(sync_info->in_fence))
|
|
dma_fence_remove_callback(sync_info->in_fence, &sync_info->in_sync_cb);
|
|
if (sync_info->in_sync_file) {
|
|
fput(sync_info->in_sync_file);
|
|
sync_info->in_sync_file = NULL;
|
|
}
|
|
sync_info->in_sync_fd = VHA_SYNC_NONE;
|
|
dma_fence_put(sync_info->in_fence);
|
|
sync_info->in_fence = NULL;
|
|
memset(&sync_info->in_sync_cb, 0, sizeof(struct dma_fence_cb));
|
|
}
|
|
}
|
|
#endif
|
|
|
|
#if defined(VHA_SCF) && defined(CONFIG_HW_MULTICORE)
|
|
void vha_start_swd(struct vha_dev *vha, int cmd_idx)
|
|
{
|
|
if (vha->swd_period) {
|
|
schedule_delayed_work(&vha->swd_dwork, msecs_to_jiffies(vha->swd_period));
|
|
}
|
|
}
|
|
#endif
|
|
|
|
void vha_rm_session(struct vha_session *session)
|
|
{
|
|
struct vha_dev *vha = session->vha;
|
|
struct vha_session *cur_session, *tmp_session;
|
|
struct vha_rsp *cur_rsp, *tmp_rsp;
|
|
struct vha_buffer *cur_buf, *tmp_buf;
|
|
bool reschedule = false;
|
|
int ctx_id;
|
|
uint8_t pri;
|
|
|
|
mutex_lock(&vha->lock);
|
|
|
|
img_pdump_printf("-- FREE_END\n");
|
|
session->freeing = false;
|
|
|
|
img_pdump_printf("-- CLOSE_BEGIN\n");
|
|
|
|
/* Remove pend/queued session commands. */
|
|
reschedule = vha_rm_session_cmds(session);
|
|
|
|
/* Remove responses for session related commands. */
|
|
list_for_each_entry_safe(cur_rsp, tmp_rsp, &session->rsps, list) {
|
|
dev_warn(vha->dev,
|
|
"Removing a session while the rsp is still pending\n");
|
|
list_del(&cur_rsp->list);
|
|
kfree(cur_rsp);
|
|
}
|
|
|
|
/* Disable CRC and DEBUG capture. */
|
|
#ifdef CONFIG_HW_MULTICORE
|
|
vha_dbg_stop_hwbufs(session, vha->full_core_mask);
|
|
#else
|
|
vha_dbg_stop_hwbufs(session, 0);
|
|
#endif
|
|
vha_dbg_destroy_hwbufs(session);
|
|
|
|
list_for_each_entry_safe(cur_buf, tmp_buf, &session->bufs, list) {
|
|
dev_warn(vha->dev,
|
|
"Removing a session while the buffer wasn't freed\n");
|
|
#ifdef KERNEL_DMA_FENCE_SUPPORT
|
|
vha_rm_buf_fence(session, cur_buf);
|
|
#endif
|
|
vha_clean_onchip_maps(session, cur_buf);
|
|
list_del(&cur_buf->list);
|
|
kfree(cur_buf);
|
|
}
|
|
|
|
/* Remove link from VHA's list. */
|
|
list_for_each_entry_safe(cur_session, tmp_session,
|
|
&vha->sessions, list) {
|
|
if (cur_session == session)
|
|
list_del(&cur_session->list);
|
|
}
|
|
for (pri = 0; pri < VHA_MAX_PRIORITIES; pri++) {
|
|
list_for_each_entry_safe(cur_session, tmp_session,
|
|
&vha->sched_sessions[pri], sched_list[pri]) {
|
|
if (cur_session == session)
|
|
list_del(&cur_session->sched_list[pri]);
|
|
}
|
|
}
|
|
|
|
/* Reset hardware if required. */
|
|
if ((list_empty(&vha->sessions) && !vha->do_calibration)
|
|
|| reschedule
|
|
)
|
|
vha_dev_stop(vha, reschedule);
|
|
|
|
#ifndef CONFIG_HW_MULTICORE
|
|
img_mmu_clear_cache(session->mmu_ctxs[VHA_MMU_REQ_IO_CTXID].ctx);
|
|
#endif
|
|
|
|
/* Delete session's MMU memory contexts. */
|
|
for (ctx_id = 0; ctx_id < ARRAY_SIZE(session->mmu_ctxs); ctx_id++) {
|
|
img_mmu_ctx_destroy(session->mmu_ctxs[ctx_id].ctx);
|
|
|
|
if (vha->mmu_mode != VHA_MMU_DISABLED) {
|
|
uint8_t hw_ctxid = session->mmu_ctxs[ctx_id].hw_id;
|
|
WARN_ON(!vha->mmu_ctxs[hw_ctxid]);
|
|
if (vha->mmu_ctxs[hw_ctxid])
|
|
vha->mmu_ctxs[hw_ctxid]--;
|
|
}
|
|
}
|
|
|
|
/* Update mem stats - max memory usage in this session. */
|
|
img_mem_get_usage(session->mem_ctx,
|
|
(size_t *)&vha->stats.mem_usage_last, NULL);
|
|
{
|
|
uint32_t MB = vha->stats.mem_usage_last / (1024 * 1024);
|
|
uint32_t bytes = vha->stats.mem_usage_last -
|
|
(MB * (1024 * 1024));
|
|
uint32_t kB = (bytes * 1000) / (1024 * 1024);
|
|
|
|
dev_dbg(vha->dev,
|
|
"%s: Total user memory used in session: %u.%u MB\n",
|
|
__func__, MB, kB);
|
|
}
|
|
img_mmu_get_usage(session->mem_ctx,
|
|
(size_t *)&vha->stats.mmu_usage_last, NULL);
|
|
|
|
vha->active_mmu_ctx = VHA_INVALID_ID;
|
|
img_pdump_printf("-- VHA driver session complete\n");
|
|
img_pdump_printf("-- CLOSE_END\n");
|
|
|
|
/* Used for simulating system level suspend/resume functionality */
|
|
if (list_empty(&vha->sessions) && vha->suspend_interval_msec) {
|
|
mutex_unlock(&vha->lock);
|
|
flush_scheduled_work();
|
|
cancel_delayed_work_sync(&vha->suspend_dwork);
|
|
mutex_lock(&vha->lock);
|
|
}
|
|
|
|
trace_vha_session_out(session->id, session->kicks);
|
|
|
|
mutex_unlock(&vha->lock);
|
|
|
|
/* Reschedule once the session is removed. */
|
|
if (reschedule)
|
|
vha_chk_cmd_queues(vha, true);
|
|
}
|
|
|
|
static int vha_alloc_common(struct vha_dev *vha)
|
|
{
|
|
#if 0
|
|
img_pdump_printf("-- INIT_BEGIN\n");
|
|
|
|
img_pdump_printf("-- INIT_END\n");
|
|
#endif
|
|
return 0;
|
|
}
|
|
|
|
/****************** vha sysfs definition *************************************/
|
|
static ssize_t
|
|
BVNC_show(struct device *dev, struct device_attribute *attr, char *buf)
|
|
{
|
|
struct vha_dev *vha = vha_dev_get_drvdata(dev);
|
|
struct vha_hw_props *props = &vha->hw_props;
|
|
|
|
return snprintf(buf, 4*6, "%hu.%hu.%hu.%hu\n",
|
|
(unsigned short)(props->core_id >> 48),
|
|
(unsigned short)(props->core_id >> 32),
|
|
(unsigned short)(props->core_id >> 16),
|
|
(unsigned short)props->core_id);
|
|
}
|
|
|
|
static ssize_t log_store(struct device *dev, struct device_attribute *attr,
|
|
const char *buf, size_t count)
|
|
{
|
|
struct vha_dev *vha = vha_dev_get_drvdata(dev);
|
|
|
|
mutex_lock(&vha->lock);
|
|
memset(&vha->stats, 0, sizeof(struct vha_stats));
|
|
vha_reset_cnt++;
|
|
mutex_unlock(&vha->lock);
|
|
|
|
return count;
|
|
}
|
|
|
|
static ssize_t log_show(struct device *dev, struct device_attribute *attr, char *buf)
|
|
{
|
|
ssize_t len = 0;
|
|
struct vha_dev *vha = vha_dev_get_drvdata(dev);
|
|
struct vha_session *session = NULL;
|
|
int ret = 0;
|
|
size_t mem_val = 0;
|
|
|
|
mutex_lock(&vha->lock);
|
|
|
|
len += scnprintf(buf + len, PAGE_SIZE - len,
|
|
"[NPU] Driver Version: " VERSION_STRING "\n"
|
|
"---------------------------------------MODULE STATUS--------------------------------------\n"
|
|
"DevId DevStatus DevSessionNum DevLoadingAvg_%% TotalTasks CompletedTasks\n"
|
|
" %d %d %d %d %lld %lld\n"
|
|
"-----------------------------------------MEM INFO-----------------------------------------\n"
|
|
"MMU_page_size MMU_mode\n"
|
|
" %ld %d\n"
|
|
"---------------------------------------INSTANCE INFO--------------------------------------\n"
|
|
"AvgHwProcUs LastHwProcUs TotalHwProcUs LastMemUsage LastMmuUsage\n"
|
|
" %lld %lld %lld %d %d\n"
|
|
"--------------------------------------EXCEPTION INFO--------------------------------------\n"
|
|
"total_failures reset\n"
|
|
" %lld %d\n",
|
|
vha->id, vha->state, vha_session_id_cnt, vha->stats.cnn_utilization/10,
|
|
vha->stats.cnn_kicks, vha->stats.cnn_kicks_completed,
|
|
mmu_page_size_kb_lut[vha->mmu_page_size], vha->mmu_mode,
|
|
vha->stats.cnn_avg_proc_us, vha->stats.last_proc_us, vha->stats.cnn_total_proc_us,
|
|
vha->stats.mem_usage_last, vha->stats.mmu_usage_last,
|
|
vha->stats.total_failures, vha_reset_cnt);
|
|
|
|
list_for_each_entry(session, &vha->sessions, list) {
|
|
if (!ret++) {
|
|
len += scnprintf(buf + len, PAGE_SIZE - len,
|
|
"---------------------------------------SESSION INFO--------------------------------------\n"
|
|
"SessionId MemUsage Cmds AvgProcUs LastProcUs TotalProcUs\n");
|
|
}
|
|
img_mem_get_usage(session->mem_ctx, &mem_val, NULL);
|
|
len += scnprintf(buf + len, PAGE_SIZE - len,
|
|
" %-7d %-12ld %-8lld %-13lld %-13lld %lld\n",
|
|
session->id, mem_val, session->kicks, session->avg_proc_us,
|
|
session->last_proc_us, session->total_proc_us);
|
|
}
|
|
|
|
mutex_unlock(&vha->lock);
|
|
|
|
return len;
|
|
}
|
|
|
|
static DEVICE_ATTR_RO(BVNC);
|
|
|
|
static struct attribute *vha_sysfs_entries[] = {
|
|
&dev_attr_BVNC.attr,
|
|
NULL,
|
|
};
|
|
|
|
static struct device_attribute dev_attr_log = __ATTR(log, 0664, log_show, log_store);
|
|
|
|
static struct attribute *vha_sysfs_attrs[] = {
|
|
&dev_attr_log.attr,
|
|
NULL,
|
|
};
|
|
|
|
static const struct attribute_group vha_attr_group = {
|
|
.name = NULL, /* put in device directory */
|
|
.attrs = vha_sysfs_entries,
|
|
};
|
|
|
|
static struct attribute_group vha_dev_attr_group = {
|
|
.name = "info", /* put in info directory */
|
|
.attrs = vha_sysfs_attrs,
|
|
};
|
|
|
|
void vha_sched_apm(struct vha_dev *vha, struct vha_apm_work *apm_work)
|
|
{
|
|
unsigned long work_at = jiffies + msecs_to_jiffies(apm_work->delay_ms);
|
|
int ret;
|
|
|
|
dev_dbg(vha->dev, "%s: core_mask:%#x delay:%d\n",
|
|
__func__, apm_work->core_mask, apm_work->delay_ms);
|
|
|
|
/*
|
|
* Try to queue the work.
|
|
*/
|
|
ret = schedule_delayed_work(&apm_work->dwork,
|
|
work_at - jiffies);
|
|
if (!ret) {
|
|
/* Work is already in the queue.
|
|
* Canceling & rescheduling might be problematic,
|
|
* so just modify to postpone.
|
|
*/
|
|
mod_delayed_work(system_wq, &apm_work->dwork,
|
|
work_at - jiffies);
|
|
}
|
|
}
|
|
|
|
static void vha_apm_worker(struct work_struct *work)
|
|
{
|
|
struct vha_apm_work *apm_work =
|
|
container_of(work, struct vha_apm_work, dwork.work);
|
|
struct vha_dev *vha = apm_work->vha;
|
|
|
|
mutex_lock(&vha->lock);
|
|
dev_dbg(vha->dev, "%s: apm expired! core_mask:%#x\n",
|
|
__func__, apm_work->core_mask);
|
|
vha_dev_apm_stop(vha, apm_work);
|
|
mutex_unlock(&vha->lock);
|
|
}
|
|
|
|
int vha_add_dev(struct device *dev,
|
|
const struct heap_config heap_configs[], const int heaps,
|
|
void *plat_data, void __iomem *reg_base, uint32_t reg_size)
|
|
{
|
|
struct vha_dev_common* vha_common;
|
|
struct vha_dev *vha;
|
|
int ret;
|
|
uint8_t id, pri;
|
|
|
|
/* Validate module params. */
|
|
ret = -EINVAL;
|
|
if (low_latency > VHA_LL_SELF_KICK) {
|
|
dev_err(dev, "%s: Unsupported low latency mode %u!\n", __func__, low_latency);
|
|
goto out_validate_params;
|
|
} else if ((mmu_mode != VHA_MMU_DISABLED) &&
|
|
(mmu_mode != VHA_MMU_DIRECT) &&
|
|
(mmu_mode != VHA_MMU_40BIT)) {
|
|
dev_err(dev, "%s: Unsupported MMU mode %u!\n", __func__, mmu_mode);
|
|
goto out_validate_params;
|
|
} else if (mmu_ctx_default >= VHA_MMU_MAX_HW_CTXS) {
|
|
dev_err(dev, "%s: Unsupported MMU context id %u!\n", __func__, mmu_ctx_default);
|
|
goto out_validate_params;
|
|
} else if (mmu_page_size > ARRAY_SIZE(mmu_page_size_kb_lut)) {
|
|
dev_err(dev, "%s: Unsupported MMU page size %u!\n", __func__, mmu_page_size);
|
|
goto out_validate_params;
|
|
}
|
|
ret = 0;
|
|
|
|
vha_common = devm_kzalloc(dev, sizeof(struct vha_dev_common), GFP_KERNEL);
|
|
if (!vha_common)
|
|
return -ENOMEM;
|
|
|
|
vha = devm_kzalloc(dev, sizeof(struct vha_dev), GFP_KERNEL);
|
|
if (!vha) {
|
|
ret = -ENOMEM;
|
|
goto out_free_dev;
|
|
}
|
|
|
|
vha_common->vha_dev = vha;
|
|
|
|
dev_dbg(dev, "%s: allocated vha_dev @ %px\n", __func__, vha);
|
|
vha->dev = dev;
|
|
vha->reg_base = reg_base;
|
|
vha->reg_size = reg_size;
|
|
vha->plat_data = plat_data;
|
|
vha->fault_inject = fault_inject;
|
|
vha->suspend_interval_msec = suspend_interval_msec;
|
|
vha->hw_bypass = hw_bypass;
|
|
vha->low_latency = low_latency;
|
|
vha->no_clock_disable = no_clock_disable;
|
|
vha->pm_delay = pm_delay;
|
|
vha->mmu_mode = mmu_mode;
|
|
vha->mmu_ctx_default = mmu_ctx_default;
|
|
vha->mmu_page_size = mmu_page_size;
|
|
vha->mmu_base_pf_test = test_mmu_base_pf;
|
|
vha->mmu_no_map_count = test_mmu_no_map_count;
|
|
vha->ocm_paddr = onchipmem_phys_start;
|
|
#ifdef VHA_SCF
|
|
vha->parity_disable = parity_disable;
|
|
vha->confirm_config_reg = confirm_config_reg;
|
|
#endif
|
|
vha->cnn_combined_crc_enable = cnn_combined_crc_enable;
|
|
vha->active_mmu_ctx = VHA_INVALID_ID;
|
|
vha->dump_buff_digest = dump_buff_digest;
|
|
|
|
/* Enable and configure pm_runtime*/
|
|
if (!pm_runtime_enabled(vha->dev))
|
|
pm_runtime_enable(vha->dev);
|
|
pm_runtime_set_autosuspend_delay(vha->dev, VHA_CORE_SUSPEND_DELAY);
|
|
pm_runtime_use_autosuspend(vha->dev);
|
|
/* Resume device so that we can read the core props */
|
|
if (pm_runtime_status_suspended(vha->dev))
|
|
pm_runtime_get_sync(vha->dev);
|
|
|
|
/* Read HW properties */
|
|
ret = vha_dev_get_props(vha, onchipmem_size);
|
|
if (ret) {
|
|
dev_err(dev, "%s: could not get vha properties at %px\n",
|
|
__func__, (__force void *)vha->reg_base);
|
|
pm_runtime_put_sync_suspend(vha->dev);
|
|
goto out_free_dev;
|
|
}
|
|
|
|
if (test_without_bvnc_check)
|
|
vha->hw_props.skip_bvnc_check = true;
|
|
|
|
mutex_init(&vha->lock);
|
|
spin_lock_init(&vha->irq_lock);
|
|
INIT_LIST_HEAD(&vha->sessions);
|
|
for (pri = 0; pri < VHA_MAX_PRIORITIES; pri++)
|
|
INIT_LIST_HEAD(&vha->sched_sessions[pri]);
|
|
INIT_LIST_HEAD(&vha->heaps);
|
|
|
|
ret = vha_init(vha, heap_configs, heaps);
|
|
if (ret) {
|
|
dev_err(dev, "%s: main component initialisation failed!",
|
|
__func__);
|
|
goto out_free_dev;
|
|
}
|
|
|
|
/* Initialise command data pump worker */
|
|
INIT_WORK(&vha->worker, cmd_worker);
|
|
|
|
#ifdef CONFIG_VHA_DUMMY_SIMULATE_HW_PROCESSING_TIME
|
|
/* Initialise hw processing time simulation worker */
|
|
#ifdef CONFIG_HW_MULTICORE
|
|
{
|
|
for (id = 0; id < vha->hw_props.num_cnn_core_devs; id ++) {
|
|
INIT_DELAYED_WORK(&vha->dummy_dworks[id].dummy_dwork,
|
|
vha_dummy_worker);
|
|
vha->dummy_dworks[id].wm_id = id;
|
|
vha->dummy_dworks[id].vha = vha;
|
|
}
|
|
}
|
|
#else
|
|
INIT_DELAYED_WORK(&vha->dummy_dwork, vha_dummy_worker);
|
|
#endif
|
|
#endif
|
|
|
|
dev_set_drvdata(dev, vha_common);
|
|
|
|
ret = vha_api_add_dev(dev, vha, drv.num_devs);
|
|
if (ret) {
|
|
dev_err(dev, "%s: failed to add UM node!", __func__);
|
|
goto out_add_dev;
|
|
}
|
|
|
|
vha_dbg_init(vha);
|
|
ret = vha_pdump_init(vha, &vha_common->pdump);
|
|
if (ret == 0)
|
|
vha->hw_props.use_pdump = true;
|
|
if (ret == -EPERM)
|
|
goto out_alloc_common;
|
|
else
|
|
ret = 0;
|
|
|
|
ret = vha_alloc_common(vha);
|
|
if (ret) {
|
|
dev_err(dev, "%s: failed to allocate common dev buffers!",
|
|
__func__);
|
|
goto out_alloc_common;
|
|
}
|
|
|
|
#ifdef CONFIG_PM_DEVFREQ
|
|
ret = vha_devfreq_init(vha->dev);
|
|
if (ret) {
|
|
dev_err(vha->dev, "failed to add vha dev to devfreq!\n");
|
|
}
|
|
#endif
|
|
|
|
pm_runtime_put_sync_autosuspend(vha->dev);
|
|
|
|
/* Add device to driver context */
|
|
list_add(&vha->list, &drv.devices);
|
|
drv.num_devs++;
|
|
|
|
if (sysfs_create_group(&dev->kobj, &vha_attr_group))
|
|
dev_err(dev, "failed to create sysfs entries\n");
|
|
|
|
if (sysfs_create_group(&dev->kobj, &vha_dev_attr_group))
|
|
dev_err(dev, "failed to create info sysfs entries\n");
|
|
|
|
vha->freq_khz = freq_khz;
|
|
#ifndef CONFIG_VHA_DUMMY
|
|
if (vha->freq_khz < 0)
|
|
vha->do_calibration = true; /* ??? OS0 ? */
|
|
|
|
if (vha->freq_khz <= 0)
|
|
vha->freq_khz = VHA_CORE_CLOCK_MHZ * 1000;
|
|
|
|
if (vha->do_calibration)
|
|
dev_info(dev, "%s: Core freq[kHz]: to be calibrated",
|
|
__func__);
|
|
else
|
|
dev_info(dev, "%s: Core freq[kHz]: %u",
|
|
__func__, vha->freq_khz);
|
|
#else
|
|
# ifdef CONFIG_VHA_DUMMY_SIMULATE_HW_PROCESSING_TIME
|
|
vha->freq_khz = VHA_CORE_CLOCK_MHZ * 1000;
|
|
dev_info(dev, "%s: Core freq[kHz]: %u (faked for DUMMY device)",
|
|
__func__, vha->freq_khz);
|
|
# endif
|
|
#endif
|
|
|
|
for (id = 0; id < vha->hw_props.num_cnn_core_devs; id++) {
|
|
vha->apm_dworks[id].vha = vha;
|
|
vha->apm_dworks[id].core_mask = 1 << id;
|
|
vha->apm_dworks[id].delay_ms = vha->pm_delay;
|
|
INIT_DELAYED_WORK(&vha->apm_dworks[id].dwork, vha_apm_worker);
|
|
}
|
|
|
|
#if defined(VHA_SCF) && defined(CONFIG_HW_MULTICORE)
|
|
/* Initialise the SW wachdog */
|
|
INIT_DELAYED_WORK(&vha->swd_dwork, wd_timer_callback);
|
|
|
|
vha->swd_period = swd_period;
|
|
vha->swd_timeout_default = swd_timeout_default;
|
|
vha->swd_timeout_m0 = swd_timeout_m0;
|
|
vha->swd_timeout_m1 = swd_timeout_m1;
|
|
#endif
|
|
|
|
return ret;
|
|
out_alloc_common:
|
|
vha_api_rm_dev(dev, vha);
|
|
vha_dbg_deinit(vha);
|
|
out_add_dev:
|
|
dev_set_drvdata(dev, NULL);
|
|
vha_deinit();
|
|
out_free_dev:
|
|
devm_kfree(dev, vha);
|
|
devm_kfree(dev, vha_common);
|
|
out_validate_params:
|
|
return ret;
|
|
}
|
|
|
|
static void vha_free_common(struct vha_dev *vha)
|
|
{
|
|
if (vha->fp_bufid) {
|
|
img_mem_free(drv.mem_ctx, vha->fp_bufid);
|
|
vha->fp_bufid = VHA_INVALID_ID;
|
|
}
|
|
}
|
|
|
|
void vha_rm_dev(struct device *dev)
|
|
{
|
|
struct vha_dev *vha;
|
|
struct vha_dev_common* vha_common;
|
|
int ret;
|
|
uint8_t id, pri;
|
|
|
|
vha_common = dev_get_drvdata(dev);
|
|
BUG_ON(vha_common == NULL);
|
|
vha = vha_common->vha_dev;
|
|
|
|
if (!vha) {
|
|
pr_err("%s: vha ptr is invalid!\n", __func__);
|
|
return;
|
|
}
|
|
|
|
if (dev != vha->dev) {
|
|
pr_err("%s: vha->dev is not properly initialised! (%p!=%p)\n", __func__, dev, vha->dev);
|
|
return;
|
|
}
|
|
|
|
flush_scheduled_work();
|
|
|
|
for (id = 0; id < vha->hw_props.num_cnn_core_devs; id++)
|
|
cancel_delayed_work_sync(&vha->apm_dworks[id].dwork);
|
|
|
|
#if defined(VHA_SCF) && defined(CONFIG_HW_MULTICORE)
|
|
cancel_delayed_work_sync(&vha->swd_dwork);
|
|
#endif
|
|
|
|
#ifdef CONFIG_VHA_DUMMY_SIMULATE_HW_PROCESSING_TIME
|
|
#ifdef CONFIG_HW_MULTICORE
|
|
{
|
|
for (id = 0; id < vha->hw_props.num_cnn_core_devs; id++)
|
|
cancel_delayed_work_sync(&vha->dummy_dworks[id].dummy_dwork);
|
|
}
|
|
#else
|
|
cancel_delayed_work_sync(&vha->dummy_dwork);
|
|
#endif
|
|
#endif
|
|
if (!pm_runtime_status_suspended(vha->dev))
|
|
pm_runtime_put_sync_suspend(vha->dev);
|
|
pm_runtime_dont_use_autosuspend(vha->dev);
|
|
pm_runtime_disable(vha->dev);
|
|
|
|
#ifdef CONFIG_PM_DEVFREQ
|
|
vha_devfreq_term(dev);
|
|
#endif
|
|
|
|
vha_free_common(vha);
|
|
#ifdef CONFIG_HW_MULTICORE
|
|
vha_dev_scheduler_deinit(vha);
|
|
#endif
|
|
|
|
while (!list_empty(&vha->heaps)) {
|
|
struct vha_heap *heap = list_first_entry(&vha->heaps, struct vha_heap, list);
|
|
list_del(&heap->list);
|
|
if(!heap->global) /* remove only device heaps */
|
|
img_mem_del_heap(heap->id);
|
|
kfree(heap);
|
|
}
|
|
|
|
ret = vha_api_rm_dev(dev, vha);
|
|
if (ret)
|
|
dev_err(dev, "%s: failed to remove UM node!\n", __func__);
|
|
|
|
list_del(&vha->sessions);
|
|
for (pri = 0; pri < VHA_MAX_PRIORITIES; pri++)
|
|
list_del(&vha->sched_sessions[pri]);
|
|
list_del(&vha->list);
|
|
list_del(&vha->heaps);
|
|
BUG_ON(!drv.num_devs--);
|
|
sysfs_remove_group(&dev->kobj, &vha_attr_group);
|
|
sysfs_remove_group(&dev->kobj, &vha_dev_attr_group);
|
|
|
|
vha_dbg_deinit(vha);
|
|
vha_pdump_deinit(&vha_common->pdump);
|
|
dev_set_drvdata(dev, NULL);
|
|
|
|
devm_kfree(dev, vha);
|
|
devm_kfree(dev, vha_common);
|
|
}
|
|
|
|
/* performs device self test operations */
|
|
int vha_dev_calibrate(struct device *dev, uint32_t cycles)
|
|
{
|
|
int ret = 0;
|
|
struct vha_dev *vha = vha_dev_get_drvdata(dev);
|
|
if (!vha) {
|
|
WARN_ON(1);
|
|
return -EFAULT;
|
|
}
|
|
|
|
mutex_lock(&vha->lock);
|
|
if (vha->do_calibration) {
|
|
vha->calibration_cycles = cycles;
|
|
dev_info(dev, "%s: Starting core frequency measurement (%d)...",
|
|
__func__, cycles);
|
|
ret = vha_dev_start(vha);
|
|
if (ret)
|
|
goto calib_err;
|
|
#if (defined(HW_AX2) || defined(CONFIG_HW_MULTICORE))
|
|
vha_cnn_start_calib(vha);
|
|
#endif
|
|
}
|
|
calib_err:
|
|
mutex_unlock(&vha->lock);
|
|
return ret;
|
|
}
|
|
|
|
/* map buffer into the device */
|
|
int vha_map_to_onchip(struct vha_session *session,
|
|
uint32_t buf_id, uint64_t virt_addr, uint32_t page_size,
|
|
unsigned int num_pages, uint32_t page_idxs[], uint32_t *mapid)
|
|
{
|
|
struct vha_dev *vha = session->vha;
|
|
struct vha_onchip_map *onchip_map = NULL;
|
|
struct vha_buffer *buf = NULL;
|
|
int map_id = *mapid;
|
|
int ret = 0;
|
|
int i = 0;
|
|
|
|
ret = mutex_lock_interruptible(&vha->lock);
|
|
if (ret)
|
|
return ret;
|
|
|
|
buf = vha_find_bufid(session, buf_id);
|
|
if (!buf) {
|
|
dev_err(vha->dev, "%s: buffer id %d not found\n", __func__, buf_id);
|
|
ret = -EINVAL;
|
|
goto out_unlock;
|
|
}
|
|
|
|
if (map_id == 0) {
|
|
onchip_map = kzalloc(sizeof(struct vha_onchip_map), GFP_KERNEL);
|
|
if (!onchip_map) {
|
|
ret = -ENOMEM;
|
|
goto out_unlock;
|
|
}
|
|
|
|
map_id = idr_alloc(&session->onchip_maps, onchip_map,
|
|
MIN_ONCHIP_MAP, MAX_ONCHIP_MAP, GFP_KERNEL);
|
|
if (map_id < 0) {
|
|
dev_err(vha->dev, "%s: idr_alloc failed\n", __func__);
|
|
ret = map_id;
|
|
goto alloc_id_failed;
|
|
}
|
|
|
|
ret = img_mmu_map(session->mmu_ctxs[VHA_MMU_REQ_IO_CTXID].ctx,
|
|
session->mem_ctx, buf_id,
|
|
virt_addr, IMG_MMU_PTE_FLAG_NONE);
|
|
if (ret) {
|
|
dev_err(vha->dev, "%s: map failed!\n", __func__);
|
|
ret = -EFAULT;
|
|
goto mmu_map_failed;
|
|
}
|
|
|
|
onchip_map->devvirt = virt_addr;
|
|
onchip_map->mapid = map_id;
|
|
onchip_map->bufid = buf_id;
|
|
list_add(&onchip_map->list, &buf->onchip_maps);
|
|
|
|
*mapid = map_id;
|
|
} else {
|
|
onchip_map = idr_find(&session->onchip_maps, map_id);
|
|
if (!onchip_map) {
|
|
dev_err(vha->dev, "%s: idr_find failed\n", __func__);
|
|
ret = -EINVAL;
|
|
goto out_unlock;
|
|
}
|
|
}
|
|
|
|
for (i = 0; i < num_pages; i++) {
|
|
ret = img_mmu_move_pg_to_cache(
|
|
session->mmu_ctxs[VHA_MMU_REQ_IO_CTXID].ctx,
|
|
session->mem_ctx, buf_id,
|
|
onchip_map->devvirt, page_size, page_idxs[i]);
|
|
if (ret) {
|
|
dev_warn(vha->dev, "%s: moving a page to on chip ram failed!\n", __func__);
|
|
goto out_unlock;
|
|
}
|
|
}
|
|
|
|
dev_dbg(vha->dev, "%s: mapped buf %s (%u) to %#llx, num_pages: %d\n",
|
|
__func__, buf->name, buf_id, virt_addr, num_pages);
|
|
|
|
mutex_unlock(&vha->lock);
|
|
return 0;
|
|
|
|
mmu_map_failed:
|
|
idr_remove(&session->onchip_maps, map_id);
|
|
alloc_id_failed:
|
|
kfree(onchip_map);
|
|
out_unlock:
|
|
mutex_unlock(&vha->lock);
|
|
return ret;
|
|
}
|
|
|
|
/* map buffer into the device */
|
|
int vha_map_buffer(struct vha_session *session,
|
|
uint32_t buf_id, uint64_t virt_addr,
|
|
uint32_t map_flags)
|
|
{
|
|
struct vha_dev *vha = session->vha;
|
|
uint32_t flags = IMG_MMU_PTE_FLAG_NONE;
|
|
struct vha_buffer *buf = NULL;
|
|
int ret = 0;
|
|
|
|
ret = mutex_lock_interruptible(&vha->lock);
|
|
if (ret)
|
|
return ret;
|
|
|
|
if ((map_flags & (VHA_MAP_FLAG_READ_ONLY|VHA_MAP_FLAG_WRITE_ONLY)) ==
|
|
(VHA_MAP_FLAG_READ_ONLY|VHA_MAP_FLAG_WRITE_ONLY)) {
|
|
dev_err(vha->dev, "%s: invalid mapping flags combination: 0x%x\n",
|
|
__func__, map_flags);
|
|
ret = -EINVAL;
|
|
goto out_unlock;
|
|
}
|
|
|
|
/* Convert permission flags to internal definitions */
|
|
if (map_flags & VHA_MAP_FLAG_READ_ONLY)
|
|
flags |= IMG_MMU_PTE_FLAG_READ_ONLY;
|
|
|
|
/* Note: VHA_MAP_FLAG_WRITE_ONLY is not supported by the mmuv3 hw */
|
|
|
|
/* Direct 1:1 mappings */
|
|
if (vha->mmu_mode == VHA_MMU_DIRECT) {
|
|
uint64_t *phys = img_mem_get_page_array(session->mem_ctx,
|
|
buf_id);
|
|
WARN_ON(!phys);
|
|
/* Override virtual address,
|
|
* only applicable for physically contiguous memory regions */
|
|
if (phys && phys[0]) {
|
|
virt_addr = phys[0];
|
|
dev_dbg(vha->dev,
|
|
"%s: using direct mapping!\n",
|
|
__func__);
|
|
} else {
|
|
dev_err(vha->dev,
|
|
"%s: not contiguous memory!\n",
|
|
__func__);
|
|
}
|
|
}
|
|
|
|
buf = vha_find_bufid(session, buf_id);
|
|
|
|
#ifdef CONFIG_HW_MULTICORE
|
|
if (buf->attr & IMG_MEM_ATTR_OCM) {
|
|
uint64_t *phys = img_mem_get_page_array(session->mem_ctx,
|
|
buf_id);
|
|
/* Virtual == physical */
|
|
buf->devvirt = phys[0];
|
|
dev_dbg(vha->dev,
|
|
"%s: buf %s (%u), is OCM buffer, no MMU mapping needed!\n",
|
|
__func__, buf->name, buf_id);
|
|
|
|
goto out_unlock;
|
|
}
|
|
#endif
|
|
|
|
/* force MMU fault after N buffer map operations */
|
|
if (vha->mmu_no_map_count != 0) {
|
|
int ctx_id;
|
|
if (map_flags & VHA_MAP_FLAG_MODEL) {
|
|
ctx_id = VHA_MMU_REQ_MODEL_CTXID;
|
|
buf->req_type = VHA_REQ_MODEL;
|
|
} else if (map_flags & VHA_MAP_FLAG_IO) {
|
|
ctx_id = VHA_MMU_REQ_IO_CTXID;
|
|
buf->req_type = VHA_REQ_IO;
|
|
} else {
|
|
WARN_ONCE(1, "No requestor flags!");
|
|
ctx_id = VHA_MMU_REQ_IO_CTXID;
|
|
buf->req_type = VHA_REQ_IO;
|
|
}
|
|
ret = img_mmu_map(session->mmu_ctxs[ctx_id].ctx,
|
|
session->mem_ctx, buf_id, virt_addr, flags);
|
|
if (ret || buf == NULL) {
|
|
dev_err(vha->dev, "%s: map failed!\n", __func__);
|
|
goto out_unlock;
|
|
}
|
|
if (vha->mmu_no_map_count >= 0)
|
|
--vha->mmu_no_map_count;
|
|
} else
|
|
dev_info(vha->dev, "Bringup test: MMU no map count = %d\n",
|
|
vha->mmu_no_map_count);
|
|
|
|
buf->devvirt = virt_addr;
|
|
dev_dbg(vha->dev, "%s: mapped buf %s (%u) to %#llx, flags: 0x%x\n",
|
|
__func__, buf->name, buf_id, virt_addr, map_flags);
|
|
|
|
out_unlock:
|
|
mutex_unlock(&vha->lock);
|
|
return ret;
|
|
}
|
|
|
|
/* unmap buffer from the device */
|
|
int vha_unmap_buffer(struct vha_session *session,
|
|
uint32_t buf_id)
|
|
{
|
|
struct vha_dev *vha = session->vha;
|
|
struct vha_buffer *buf = NULL;
|
|
int ret = 0;
|
|
int ctx_id;
|
|
|
|
ret = mutex_lock_interruptible(&vha->lock);
|
|
if (ret)
|
|
return ret;
|
|
|
|
buf = vha_find_bufid(session, buf_id);
|
|
|
|
#ifdef CONFIG_HW_MULTICORE
|
|
if (buf->attr & IMG_MEM_ATTR_OCM) {
|
|
dev_dbg(vha->dev,
|
|
"%s: buf %s (%u) is OCM buffer, no MMU unmapping needed!\n",
|
|
__func__, buf->name, buf_id);
|
|
buf->devvirt = ~0ULL;
|
|
goto out_unlock;
|
|
}
|
|
#endif
|
|
|
|
if (buf->req_type == VHA_REQ_MODEL)
|
|
ctx_id = VHA_MMU_REQ_MODEL_CTXID;
|
|
else
|
|
ctx_id = VHA_MMU_REQ_IO_CTXID;
|
|
|
|
ret = img_mmu_unmap(session->mmu_ctxs[ctx_id].ctx,
|
|
session->mem_ctx, buf_id);
|
|
if (ret || buf == NULL) {
|
|
dev_err(vha->dev, "%s: unmap failed!\n", __func__);
|
|
goto out_unlock;
|
|
}
|
|
|
|
buf->devvirt = 0ULL;
|
|
|
|
vha_clean_onchip_maps(session, buf);
|
|
|
|
dev_dbg(vha->dev, "%s: unmapped buf %s(%u)\n",
|
|
__func__, buf->name, buf_id);
|
|
|
|
out_unlock:
|
|
mutex_unlock(&vha->lock);
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* return either dev virtual address or physical address of buffer
|
|
* phys address only applicable if contiguous memory
|
|
* virtual address only if MMU enabled
|
|
*/
|
|
uint64_t vha_buf_addr(struct vha_session *session, struct vha_buffer *buf)
|
|
{
|
|
struct vha_dev *vha = session->vha;
|
|
|
|
if (vha->mmu_mode == VHA_MMU_DISABLED) {
|
|
uint64_t *phys;
|
|
|
|
/* no-MMU mode */
|
|
if (vha->hw_props.dummy_dev)
|
|
return 0; /* no-MMU: dummy hardware */
|
|
|
|
phys = img_mem_get_page_array(session->mem_ctx, buf->id);
|
|
if (phys)
|
|
/*
|
|
* no-MMU: carveout memory
|
|
* Get the address that dev expects.
|
|
*/
|
|
return img_mem_get_dev_addr(session->mem_ctx,
|
|
buf->id, phys[0]);
|
|
|
|
dev_err(vha->dev, "%s: ERROR: buffer %x is not contiguous\n",
|
|
__func__, buf->id);
|
|
return 0; /* no-MMU: system memory */
|
|
}
|
|
|
|
/* mmu mode */
|
|
if (buf == NULL)
|
|
return 0; /* error */
|
|
|
|
return buf->devvirt; /* MMU mode: virt address */
|
|
}
|
|
|
|
struct vha_buffer *vha_find_bufid(const struct vha_session *session, uint32_t buf_id)
|
|
{
|
|
struct vha_buffer *buf;
|
|
|
|
list_for_each_entry(buf, &session->bufs, list) {
|
|
if (buf_id == buf->id)
|
|
return buf;
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
struct vha_buffer *vha_find_bufvaddr(const struct vha_session *session,
|
|
uint64_t virt_addr)
|
|
{
|
|
struct vha_buffer *buf;
|
|
|
|
list_for_each_entry(buf, &session->bufs, list) {
|
|
/* check if virtual address belongs to specific buffer */
|
|
if (virt_addr >= buf->devvirt &&
|
|
virt_addr < (buf->devvirt + buf->size))
|
|
return buf;
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
/* when a buffer is allocated or imported, it is added to session.bufs */
|
|
int vha_add_buf(struct vha_session *session,
|
|
uint32_t buf_id, size_t size, const char *name, enum img_mem_attr attr)
|
|
{
|
|
struct vha_buffer *buf = kzalloc(sizeof(*buf), GFP_KERNEL);
|
|
struct vha_dev *vha = session->vha;
|
|
int ret = 0;
|
|
|
|
dev_dbg(vha->dev, "%s buf '%.*s' id:%d\n", __func__,
|
|
(int)(sizeof(buf->name))-1, name, buf_id);
|
|
|
|
if (buf == NULL)
|
|
return -ENOMEM;
|
|
|
|
buf->id = buf_id;
|
|
buf->size = size;
|
|
strncpy(buf->name, name, sizeof(buf->name)-1);
|
|
buf->attr = attr;
|
|
buf->status = VHA_BUF_UNFILLED;
|
|
buf->session = session;
|
|
#ifdef KERNEL_DMA_FENCE_SUPPORT
|
|
buf->sync_info.in_sync_fd = VHA_SYNC_NONE;
|
|
#endif
|
|
list_add(&buf->list, &session->bufs);
|
|
INIT_LIST_HEAD(&buf->onchip_maps);
|
|
if (!(attr & IMG_MEM_ATTR_OCM))
|
|
img_pdump_printf("-- <-- New buffer name: %s\n", buf->name);
|
|
|
|
if (zero_buffers && !(buf->attr & IMG_MEM_ATTR_NOMAP)) {
|
|
ret = img_mem_map_km(session->mem_ctx, buf_id);
|
|
if (ret) {
|
|
dev_err(session->vha->dev, "failed to map buff %x to km: %d\n",
|
|
buf_id, ret);
|
|
ret = -EFAULT;
|
|
goto out_err;
|
|
}
|
|
buf->kptr = img_mem_get_kptr(session->mem_ctx, buf_id);
|
|
|
|
{
|
|
void *ptr = buf->kptr;
|
|
int max_chunk = 1 * 1024 * 1024;
|
|
while (size) {
|
|
int chunk_size = size > max_chunk ?
|
|
max_chunk : size;
|
|
pr_debug("memset buf chunk %d!\n", chunk_size);
|
|
memset(ptr, 0, chunk_size);
|
|
ptr += chunk_size;
|
|
size -= chunk_size;
|
|
schedule();
|
|
}
|
|
}
|
|
ret = img_mem_unmap_km(session->mem_ctx, buf->id);
|
|
if (ret) {
|
|
dev_err(session->vha->dev,
|
|
"%s: failed to unmap buff %x from km: %d\n",
|
|
__func__, buf->id, ret);
|
|
ret = -EFAULT;
|
|
goto out_err;
|
|
}
|
|
buf->kptr = NULL;
|
|
}
|
|
|
|
return 0;
|
|
|
|
out_err:
|
|
list_del(&buf->list);
|
|
kfree(buf);
|
|
return ret;
|
|
}
|
|
|
|
/* remove buffer from the session */
|
|
int vha_rm_buf(struct vha_session *session, uint32_t buf_id)
|
|
{
|
|
struct vha_buffer *buf = vha_find_bufid(session, buf_id);
|
|
|
|
dev_dbg(session->vha->dev, "%s buf_id:%d\n", __func__, buf_id);
|
|
if (buf == NULL) {
|
|
dev_err(session->vha->dev, "%s: could not find buf %x\n",
|
|
__func__, buf_id);
|
|
return -EINVAL;
|
|
}
|
|
|
|
#ifdef KERNEL_DMA_FENCE_SUPPORT
|
|
vha_rm_buf_fence(session, buf);
|
|
#endif
|
|
vha_clean_onchip_maps(session, buf);
|
|
|
|
list_del(&buf->list);
|
|
kfree(buf);
|
|
|
|
return 0;
|
|
}
|
|
|
|
/* process the cmd if everything is ready */
|
|
enum do_cmd_status vha_do_cmd(struct vha_cmd *cmd)
|
|
{
|
|
struct vha_session *session = cmd->session;
|
|
struct vha_dev* vha = session->vha;
|
|
|
|
/* already submitted, wait until processed */
|
|
if (cmd->in_hw)
|
|
return CMD_IN_HW;
|
|
|
|
/* check all input buffers are filled and ready to go */
|
|
if (vha_is_waiting_for_inputs(session, cmd))
|
|
return CMD_WAIT_INBUFS;
|
|
|
|
#if !defined(CONFIG_VHA_DUMMY) && !defined(CONFIG_HW_MULTICORE)
|
|
if (!session->vha->is_ready)
|
|
return CMD_HW_BUSY;
|
|
#endif
|
|
|
|
/* check hw availability (if needed) */
|
|
#ifdef CONFIG_HW_MULTICORE
|
|
/* Attempt to schedule command on available cores. */
|
|
if (vha_dev_schedule_cmd(session->vha, cmd) != 0)
|
|
#else
|
|
/* Check if the core's queue is full. */
|
|
if (vha_is_queue_full(session->vha, cmd))
|
|
#endif
|
|
return CMD_HW_BUSY;
|
|
|
|
if (cmd->user_cmd.cmd_type == VHA_CMD_CNN_SUBMIT &&
|
|
!session->vha->stats.cnn_kicks)
|
|
img_pdump_printf("-- ALLOC_END\n");
|
|
|
|
/* at this point we should be able to process the cmd */
|
|
if (vha_do_cnn_cmd(cmd) != 0)
|
|
return CMD_DONE;
|
|
|
|
return CMD_OK;
|
|
}
|
|
|
|
/* check if there is any work to be done */
|
|
static void cmd_worker(struct work_struct *work)
|
|
{
|
|
struct vha_dev *vha = container_of(work, struct vha_dev, worker);
|
|
|
|
dev_dbg(vha->dev, "%s\n", __func__);
|
|
mutex_lock(&vha->lock);
|
|
|
|
#ifdef CONFIG_FAULT_INJECTION
|
|
if (task_pid_nr(current) != vha->irq_bh_pid) {
|
|
if (vha->fault_inject & VHA_FI_CMD_WORKER)
|
|
current->make_it_fail = true;
|
|
else
|
|
current->make_it_fail = false;
|
|
}
|
|
#endif
|
|
|
|
if (vha->do_calibration) {
|
|
/* Postpone any worker tasks. */
|
|
dev_dbg(vha->dev, "%s: Postpone worker task!\n", __func__);
|
|
goto exit;
|
|
}
|
|
|
|
/* Execute the main scheduling loop. */
|
|
vha_scheduler_loop(vha);
|
|
|
|
exit:
|
|
#ifdef CONFIG_FAULT_INJECTION
|
|
if (task_pid_nr(current) != vha->irq_bh_pid) {
|
|
if (vha->fault_inject & VHA_FI_CMD_WORKER)
|
|
current->make_it_fail = false;
|
|
}
|
|
#endif
|
|
mutex_unlock(&vha->lock);
|
|
}
|
|
|
|
/* this is wrapper func for scheduling command worker task */
|
|
void vha_chk_cmd_queues(struct vha_dev *vha, bool threaded)
|
|
{
|
|
dev_dbg(vha->dev, "%s threaded:%u\n", __func__, threaded);
|
|
if (threaded) {
|
|
/* If work has been already scheduled from other context,
|
|
* the below call does nothing (returns false).
|
|
* However the worker is only used as command data pump,
|
|
* so it is not necessary to do any kind of rescheduling,
|
|
* as it will be executed anyway!
|
|
*/
|
|
schedule_work(&vha->worker); /* call asynchronously */
|
|
} else {
|
|
/* Direct calls must be always invoked
|
|
* with vha_dev.lock == locked
|
|
*/
|
|
BUG_ON(!mutex_is_locked(&vha->lock));
|
|
mutex_unlock(&vha->lock);
|
|
cmd_worker(&vha->worker); /* call synchronously */
|
|
mutex_lock(&vha->lock);
|
|
}
|
|
}
|
|
|
|
#ifdef KERNEL_DMA_FENCE_SUPPORT
|
|
/* input buffer sync callback */
|
|
static void _vha_in_buf_sync_cb(struct dma_fence *fence,
|
|
struct dma_fence_cb *cb)
|
|
{
|
|
struct vha_buffer *buf = container_of(cb, struct vha_buffer, sync_info.in_sync_cb);
|
|
|
|
vha_set_buf_status(buf->session, buf->id, VHA_BUF_FILLED_BY_SW,
|
|
VHA_SYNC_NONE, false);
|
|
fput(buf->sync_info.in_sync_file);
|
|
dma_fence_put(fence);
|
|
memset(&buf->sync_info, 0, sizeof(struct vha_buf_sync_info));
|
|
buf->sync_info.in_sync_fd = VHA_SYNC_NONE;
|
|
}
|
|
#endif
|
|
|
|
/* set buffer status per user request: either filled or unfilled */
|
|
int vha_set_buf_status(struct vha_session *session, uint32_t buf_id,
|
|
enum vha_buf_status status, int in_sync_fd, bool out_sync_sig)
|
|
{
|
|
struct vha_buffer *buf = vha_find_bufid(session, buf_id);
|
|
|
|
if (buf == NULL) {
|
|
dev_err(session->vha->dev, "%s: invalid buf id:%d\n",
|
|
__func__, buf_id);
|
|
return -EINVAL;
|
|
}
|
|
|
|
dev_dbg(session->vha->dev, "%s: id:%d curr:%d new:%d sig:%d\n",
|
|
__func__, buf->id, buf->status, status, out_sync_sig);
|
|
/* If buffer has been filled by HW,
|
|
* mark that it probably needs invalidation, not necessarily,
|
|
* as it can be the input for the next hw segment,
|
|
* and may not be mapped by the UM */
|
|
if (buf->status != VHA_BUF_FILLED_BY_HW &&
|
|
status == VHA_BUF_FILLED_BY_HW) {
|
|
buf->inval = true;
|
|
#ifdef KERNEL_DMA_FENCE_SUPPORT
|
|
buf->status = status;
|
|
#endif
|
|
}
|
|
|
|
/* If buffer has been filled by SW,
|
|
* mark that it needs flushing */
|
|
if (buf->status == VHA_BUF_UNFILLED &&
|
|
status == VHA_BUF_FILLED_BY_SW) {
|
|
buf->flush = true;
|
|
#ifdef KERNEL_DMA_FENCE_SUPPORT
|
|
if (in_sync_fd > 0) {
|
|
if (buf->sync_info.in_sync_fd < 0) {
|
|
int ret = 0;
|
|
struct file *sync_file;
|
|
struct dma_fence *fence;
|
|
|
|
sync_file = fget(in_sync_fd);
|
|
if (sync_file == NULL) {
|
|
dev_err(session->vha->dev, "%s: could not get file for fd=%d and buf %d\n",
|
|
__func__, in_sync_fd, buf_id);
|
|
return -EINVAL;
|
|
}
|
|
|
|
fence = sync_file_get_fence(in_sync_fd);
|
|
if (!fence) {
|
|
fput(sync_file);
|
|
dev_err(session->vha->dev, "%s: could not get fence for fd=%d and buf %d\n",
|
|
__func__, in_sync_fd, buf_id);
|
|
return -EINVAL;
|
|
}
|
|
|
|
ret = dma_fence_add_callback(fence, &buf->sync_info.in_sync_cb,
|
|
_vha_in_buf_sync_cb);
|
|
if (ret) {
|
|
if (dma_fence_is_signaled(fence)) {
|
|
dma_fence_put(fence);
|
|
buf->status = status;
|
|
} else
|
|
dev_err(session->vha->dev, "%s: could not set cb for fd=%d and buf %x\n",
|
|
__func__, in_sync_fd, buf_id);
|
|
fput(sync_file);
|
|
return ret;
|
|
}
|
|
buf->sync_info.in_fence = fence;
|
|
buf->sync_info.in_sync_file = sync_file;
|
|
buf->sync_info.in_sync_fd = in_sync_fd;
|
|
} else if (in_sync_fd != buf->sync_info.in_sync_fd) {
|
|
dev_err(session->vha->dev, "%s: buf %d has already assigned sync file fd=%d\n",
|
|
__func__, buf_id, in_sync_fd);
|
|
return -EINVAL;
|
|
}
|
|
}
|
|
else {
|
|
if (out_sync_sig)
|
|
img_mem_signal_fence(session->mem_ctx, buf->id);
|
|
buf->status = status;
|
|
}
|
|
#endif
|
|
}
|
|
|
|
/* If buffer has been filled by SW,
|
|
* after being filled by the hw, flush it too */
|
|
if (buf->status == VHA_BUF_FILLED_BY_HW &&
|
|
status == VHA_BUF_FILLED_BY_SW) {
|
|
buf->flush = true;
|
|
}
|
|
|
|
#ifdef KERNEL_DMA_FENCE_SUPPORT
|
|
if (status != VHA_BUF_FILLED_BY_SW)
|
|
#endif
|
|
buf->status = status;
|
|
|
|
/* Poke the command queue only when filled by SW */
|
|
if (status == VHA_BUF_FILLED_BY_SW) {
|
|
/* We are already locked!
|
|
* Run in separate thread
|
|
*/
|
|
vha_chk_cmd_queues(session->vha, true);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
bool vha_buf_needs_inval(struct vha_session *session, uint32_t buf_id)
|
|
{
|
|
struct vha_buffer *buf = vha_find_bufid(session, buf_id);
|
|
bool inval;
|
|
|
|
if (buf == NULL) {
|
|
dev_err(session->vha->dev, "%s: invalid buf id:%d\n",
|
|
__func__, buf_id);
|
|
return false;
|
|
}
|
|
|
|
/* Buffer that has been allocated as HW access only
|
|
* does not need invalidation */
|
|
if (buf->attr & (IMG_MEM_ATTR_NOMAP|IMG_MEM_ATTR_NOSYNC)) {
|
|
dev_dbg(session->vha->dev, "%s: id:%d (skip)\n",
|
|
__func__, buf->id);
|
|
return false;
|
|
}
|
|
|
|
dev_dbg(session->vha->dev, "%s: id:%d (%d)\n",
|
|
__func__, buf->id, buf->inval);
|
|
|
|
inval = buf->inval;
|
|
buf->inval = false;
|
|
|
|
return inval;
|
|
}
|
|
|
|
bool vha_buf_needs_flush(struct vha_session *session, uint32_t buf_id)
|
|
{
|
|
struct vha_buffer *buf = vha_find_bufid(session, buf_id);
|
|
bool flush;
|
|
|
|
if (buf == NULL) {
|
|
dev_err(session->vha->dev, "%s: invalid buf id:%d\n",
|
|
__func__, buf_id);
|
|
return false;
|
|
}
|
|
dev_dbg(session->vha->dev, "%s: id:%d (%d)\n",
|
|
__func__, buf->id, buf->flush);
|
|
|
|
flush = buf->flush;
|
|
buf->flush = false;
|
|
|
|
return flush;
|
|
}
|
|
|
|
#ifdef KERNEL_DMA_FENCE_SUPPORT
|
|
struct vha_sync_cb_data {
|
|
struct dma_fence_cb cb;
|
|
union {
|
|
struct sync_file *sync_file;
|
|
struct file *file;
|
|
};
|
|
};
|
|
|
|
static void _vha_out_sync_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
|
|
{
|
|
struct vha_sync_cb_data *cb_data =
|
|
container_of(cb, struct vha_sync_cb_data, cb);
|
|
fput(cb_data->sync_file->file);
|
|
dma_fence_put(fence);
|
|
kfree(cb_data);
|
|
}
|
|
|
|
int vha_create_output_sync(struct vha_session *session, uint32_t buf_id_count,
|
|
uint32_t *buf_ids)
|
|
{
|
|
int i;
|
|
int ret = -ENOMEM;
|
|
int sync_fd = VHA_SYNC_NONE;
|
|
struct device *dev = session->vha->dev;
|
|
struct dma_fence_array *fence_array = NULL;
|
|
struct vha_sync_cb_data *cb_data = NULL;
|
|
struct dma_fence **fences =
|
|
(struct dma_fence **)kmalloc_array(sizeof(struct buffer_fence*),
|
|
buf_id_count, GFP_KERNEL);
|
|
if (fences == NULL) {
|
|
dev_err(dev, "%s: failed allocating fence container for %u buffers\n",
|
|
__func__, buf_id_count);
|
|
return -ENOMEM;
|
|
}
|
|
|
|
cb_data = kzalloc(sizeof(struct vha_sync_cb_data), GFP_KERNEL);
|
|
if (cb_data == NULL) {
|
|
dev_err(dev, "%s: failed allocating fence callback for %u buffers\n",
|
|
__func__, buf_id_count);
|
|
kfree(fences);
|
|
return -ENOMEM;
|
|
}
|
|
|
|
for (i = 0; i < buf_id_count; i++) {
|
|
fences[i] = img_mem_add_fence(session->mem_ctx, buf_ids[i]);
|
|
if (!fences[i]) {
|
|
dev_err(dev, "%s: failed allocating fence for buffer id=%u\n",
|
|
__func__, buf_ids[i]);
|
|
goto err_fences;
|
|
}
|
|
}
|
|
|
|
fence_array = dma_fence_array_create(buf_id_count, fences,
|
|
dma_fence_context_alloc(1), 1, false);
|
|
if (fence_array == NULL) {
|
|
dev_err(dev, "%s: failed allocating fence array for %u buffers\n",
|
|
__func__, buf_id_count);
|
|
goto err_fences;
|
|
}
|
|
|
|
cb_data->sync_file = sync_file_create(&fence_array->base);
|
|
if (cb_data->sync_file == NULL) {
|
|
dev_err(dev, "%s: failed creating sync file for %u buffers\n",
|
|
__func__, buf_id_count);
|
|
goto error_sf;
|
|
}
|
|
|
|
sync_fd = get_unused_fd_flags(O_CLOEXEC);
|
|
if (sync_fd < 0) {
|
|
dev_err(dev, "%s: failed creating file descriptor for %u buffers\n",
|
|
__func__, buf_id_count);
|
|
ret = sync_fd;
|
|
goto error_fd;
|
|
}
|
|
|
|
ret = dma_fence_add_callback(&fence_array->base, &cb_data->cb,
|
|
_vha_out_sync_cb);
|
|
if (ret < 0) {
|
|
dev_err(dev, "%s: failed adding callback file descriptor for %u buffers\n",
|
|
__func__, buf_id_count);
|
|
goto error_fd;
|
|
}
|
|
|
|
fd_install(sync_fd, cb_data->sync_file->file);
|
|
fget(sync_fd);
|
|
|
|
return sync_fd;
|
|
|
|
error_fd:
|
|
fput(cb_data->sync_file->file);
|
|
dma_fence_put(&fence_array->base);
|
|
error_sf:
|
|
dma_fence_put(&fence_array->base);
|
|
err_fences:
|
|
i--;
|
|
for (; i >= 0; i--) {
|
|
img_mem_remove_fence(session->mem_ctx, buf_ids[i]);
|
|
}
|
|
kfree(cb_data);
|
|
return ret;
|
|
}
|
|
|
|
/* input sync callback */
|
|
static void _vha_in_sync_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
|
|
{
|
|
struct vha_sync_cb_data *cb_data =
|
|
container_of(cb, struct vha_sync_cb_data, cb);
|
|
fput(cb_data->file);
|
|
dma_fence_put(fence);
|
|
kfree(cb_data);
|
|
}
|
|
/* merged input sync callback */
|
|
static void _vha_in_merged_sync_cb(struct dma_fence *fence,
|
|
struct dma_fence_cb *cb)
|
|
{
|
|
struct vha_sync_cb_data *cb_data =
|
|
container_of(cb, struct vha_sync_cb_data, cb);
|
|
fput(cb_data->sync_file->file);
|
|
dma_fence_put(fence);
|
|
}
|
|
|
|
int vha_merge_input_syncs(struct vha_session *session, uint32_t in_sync_fd_count,
|
|
int *in_sync_fds)
|
|
{
|
|
struct device *dev = session->vha->dev;
|
|
int i, actual_count = 0;
|
|
int ret = -ENOMEM;
|
|
int sync_fd = VHA_SYNC_NONE;
|
|
struct dma_fence_array *fence_array = NULL;
|
|
struct vha_sync_cb_data *cb_data = NULL;
|
|
struct vha_sync_cb_data *in_sync_cbs = NULL;
|
|
struct dma_fence **fences;
|
|
void *dma_fence_mem;
|
|
struct file *f;
|
|
|
|
/* Special cases. */
|
|
if (in_sync_fd_count == 0) {
|
|
dev_err(dev, "%s: requested 0 sync_fds to merge\n", __func__);
|
|
return -EINVAL;
|
|
} else if (in_sync_fd_count == 1) {
|
|
struct file *f;
|
|
struct dma_fence *fence;
|
|
f = fget(in_sync_fds[0]);
|
|
if (f == NULL) {
|
|
dev_err(dev, "%s: could not get file for input sync fd=%d\n",
|
|
__func__, in_sync_fds[0]);
|
|
return -EINVAL;
|
|
}
|
|
fence = sync_file_get_fence(in_sync_fds[0]);
|
|
if (!fence) {
|
|
fput(f);
|
|
dev_err(dev, "%s: could not get fence for input sync fd=%d\n",
|
|
__func__, in_sync_fds[0]);
|
|
return -EINVAL;
|
|
}
|
|
cb_data = kmalloc(sizeof(struct vha_sync_cb_data), GFP_KERNEL);
|
|
if (cb_data == NULL) {
|
|
fput(f);
|
|
dma_fence_put(fence);
|
|
dev_err(dev, "%s: failed allocating callback data for input sync fd=%d\n",
|
|
__func__, in_sync_fds[0]);
|
|
return -ENOMEM;
|
|
}
|
|
if (dma_fence_add_callback(fence, &cb_data->cb, _vha_in_sync_cb)) {
|
|
if (dma_fence_is_signaled(fence)) {
|
|
dev_warn(dev, "%s: input sync fd=%d already signalled\n",
|
|
__func__, in_sync_fds[0]);
|
|
ret = -EINVAL;
|
|
} else {
|
|
dev_err(dev, "%s: could not add fence callback for input sync fd=%d\n",
|
|
__func__, in_sync_fds[0]);
|
|
ret = -EFAULT;
|
|
}
|
|
fput(f);
|
|
dma_fence_put(fence);
|
|
kfree(cb_data);
|
|
return ret;
|
|
}
|
|
cb_data->file = f;
|
|
return in_sync_fds[0];
|
|
}
|
|
|
|
dma_fence_mem =
|
|
kmalloc_array(
|
|
(sizeof(struct dma_fence*) + sizeof(struct vha_sync_cb_data)),
|
|
in_sync_fd_count + sizeof(struct vha_sync_cb_data), GFP_KERNEL);
|
|
if (dma_fence_mem == NULL) {
|
|
dev_err(dev, "%s: failed allocating fence container for %u buffers\n",
|
|
__func__, in_sync_fd_count);
|
|
return -ENOMEM;
|
|
}
|
|
fences = (struct dma_fence**)dma_fence_mem;
|
|
in_sync_cbs = (struct vha_sync_cb_data *)(dma_fence_mem +
|
|
sizeof(struct dma_fence*) * in_sync_fd_count);
|
|
cb_data = (struct vha_sync_cb_data *)(dma_fence_mem +
|
|
(sizeof(struct dma_fence*) + sizeof(struct vha_sync_cb_data)) *
|
|
in_sync_fd_count);
|
|
|
|
for (i = 0; i < in_sync_fd_count; i++) {
|
|
struct dma_fence *fence;
|
|
f = fget(in_sync_fds[i]);
|
|
if (f == NULL) {
|
|
dev_warn(dev, "%s: could not get file for fd=%d; will not use it\n",
|
|
__func__, in_sync_fds[i]);
|
|
continue;
|
|
}
|
|
fence = sync_file_get_fence(in_sync_fds[i]);
|
|
if (!fence) {
|
|
fput(f);
|
|
dev_warn(dev, "%s: could not get fence for fd=%d; will not use it\n",
|
|
__func__, in_sync_fds[i]);
|
|
continue;
|
|
}
|
|
if (dma_fence_add_callback(fence, &in_sync_cbs[actual_count].cb,
|
|
_vha_in_sync_cb)) {
|
|
if (dma_fence_is_signaled(fence)) {
|
|
dev_warn(dev, "%s: input sync fd=%d already signalled\n",
|
|
__func__, in_sync_fds[i]);
|
|
} else {
|
|
dev_err(dev, "%s: could not add fence callback for input sync fd=%d;"
|
|
" will not use it\n", __func__, in_sync_fds[i]);
|
|
}
|
|
fput(f);
|
|
dma_fence_put(fence);
|
|
continue;
|
|
}
|
|
dma_fence_get(fence); /* should be freed in dma_fence_array_release() */
|
|
in_sync_cbs[actual_count].file = f;
|
|
fences[actual_count] = fence;
|
|
actual_count++;
|
|
}
|
|
if (actual_count == 0) {
|
|
dev_err(dev, "%s: failed merging input fences\n", __func__);
|
|
kfree(dma_fence_mem);
|
|
return -EINVAL;
|
|
}
|
|
|
|
fence_array = dma_fence_array_create(actual_count, fences,
|
|
dma_fence_context_alloc(1), 1, false);
|
|
if (fence_array == NULL) {
|
|
dev_err(dev, "%s: failed allocating fence array for %u buffers\n",
|
|
__func__, in_sync_fd_count);
|
|
kfree(dma_fence_mem);
|
|
return -ENOMEM;
|
|
}
|
|
|
|
cb_data->sync_file = sync_file_create(&fence_array->base);
|
|
if (cb_data->sync_file == NULL) {
|
|
dev_err(dev, "%s: failed creating sync file for %u buffers\n",
|
|
__func__, in_sync_fd_count);
|
|
goto error_sf;
|
|
}
|
|
|
|
sync_fd = get_unused_fd_flags(O_CLOEXEC);
|
|
if (sync_fd < 0) {
|
|
dev_err(dev, "%s: failed creating file descriptor for %u buffers\n",
|
|
__func__, in_sync_fd_count);
|
|
ret = sync_fd;
|
|
goto error_fd;
|
|
}
|
|
|
|
ret = dma_fence_add_callback(&fence_array->base, &cb_data->cb,
|
|
_vha_in_merged_sync_cb);
|
|
if (ret < 0) {
|
|
dev_err(dev, "%s: failed adding callback file descriptor for %u buffers\n",
|
|
__func__, in_sync_fd_count);
|
|
goto error_fd;
|
|
}
|
|
|
|
fd_install(sync_fd, cb_data->sync_file->file);
|
|
fget(sync_fd);
|
|
|
|
return sync_fd;
|
|
|
|
error_fd:
|
|
fput(cb_data->sync_file->file);
|
|
dma_fence_put(&fence_array->base);
|
|
error_sf:
|
|
for (i = 0; i < actual_count; i++) {
|
|
fput(in_sync_cbs[actual_count].file);
|
|
dma_fence_put(fences[actual_count]);
|
|
}
|
|
dma_fence_put(&fence_array->base);
|
|
return ret;
|
|
}
|
|
|
|
int vha_release_syncs(struct vha_session *session, uint32_t buf_id_count,
|
|
uint32_t *buf_ids)
|
|
{
|
|
struct device *dev = session->vha->dev;
|
|
int i;
|
|
|
|
for (i = 0; i < buf_id_count; i++) {
|
|
struct vha_buffer *buf = vha_find_bufid(session, buf_ids[i]);
|
|
if (buf == NULL) {
|
|
dev_warn(dev, "%s: could not find buf %u\n", __func__, buf_ids[i]);
|
|
} else {
|
|
vha_rm_buf_fence(session, buf);
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
#endif
|
|
|
|
/* validate and queue a message from a user
|
|
* called with mutex locked */
|
|
int vha_add_cmd(struct vha_session *session, struct vha_cmd *cmd)
|
|
{
|
|
uint32_t i;
|
|
struct device *dev = session->vha->dev;
|
|
struct vha_user_cmd *user_cmd = &cmd->user_cmd;
|
|
/* number of words in vha_user_cmd->data[0] */
|
|
uint32_t num_params = (cmd->size - sizeof(struct vha_user_cmd))/sizeof(uint32_t);
|
|
uint32_t pri_q_count = 1;
|
|
|
|
#ifdef CONFIG_HW_MULTICORE
|
|
if (user_cmd->cmd_type == VHA_CMD_CNN_SUBMIT) {
|
|
dev_err(dev, "%s: invalid cmd type 0x%x\n", __func__, user_cmd->cmd_type);
|
|
return -EINVAL;
|
|
}
|
|
#endif
|
|
|
|
if (user_cmd->num_bufs > num_params * sizeof(uint32_t)) {
|
|
dev_err(dev, "%s: invalid number of buffers in message: in:%x total:%x>%lx\n",
|
|
__func__, user_cmd->num_inbufs, user_cmd->num_bufs,
|
|
num_params * sizeof(uint32_t));
|
|
return -EINVAL;
|
|
}
|
|
|
|
if (user_cmd->num_bufs > VHA_MAX_ALT_ADDRS) {
|
|
dev_err(dev, "%s: invalid number of buffers in message: %x max:%x\n",
|
|
__func__, user_cmd->num_bufs, VHA_MAX_ALT_ADDRS);
|
|
return -EINVAL;
|
|
}
|
|
|
|
if (!session->vha->cnn_combined_crc_enable && (cmd->user_cmd.flags & VHA_CHECK_CRC)) {
|
|
dev_err(dev, "%s: Trying to perform CRC check while combined CRCs are disabled!,"
|
|
" try cnn_combined_crc_enable=1\n", __func__);
|
|
return -EINVAL;
|
|
}
|
|
|
|
if (user_cmd->priority >= VHA_MAX_PRIORITIES) {
|
|
#if defined(CONFIG_HW_MULTICORE) || (defined(HW_AX3) && defined(VHA_USE_LO_PRI_SUB_SEGMENTS))
|
|
dev_warn(dev, "%s: Priority %u too high. Setting to max supported priority: %u.\n",
|
|
__func__, user_cmd->priority, VHA_MAX_PRIORITIES - 1);
|
|
user_cmd->priority = VHA_MAX_PRIORITIES - 1;
|
|
#else
|
|
dev_warn_once(dev, "%s: Priorities not supported.\n", __func__);
|
|
user_cmd->priority = VHA_DEFAULT_PRI;
|
|
#endif
|
|
}
|
|
|
|
switch(cmd->user_cmd.cmd_type) {
|
|
case VHA_CMD_CNN_SUBMIT:
|
|
{
|
|
struct vha_user_cnn_submit_cmd* submit_cmd =
|
|
(struct vha_user_cnn_submit_cmd*)user_cmd;
|
|
|
|
/* subsegments cannot be handled with low latency enabled */
|
|
if ((submit_cmd->subseg_num > 1) && (session->vha->low_latency != VHA_LL_DISABLED)) {
|
|
dev_err(dev, "%s: Subsegments are not supported with low latency enabled\n", __func__);
|
|
return -EINVAL;
|
|
}
|
|
/* include subsegments in priority counters */
|
|
pri_q_count = submit_cmd->subseg_num;
|
|
|
|
/* check input and output buffers are valid */
|
|
for (i = 0; i < user_cmd->num_bufs; i++) {
|
|
uint32_t buf_id = user_cmd->data[i];
|
|
|
|
if (vha_find_bufid(session, buf_id) == NULL) {
|
|
dev_err(dev, "%s: unrecognised buf id[%u]:%x\n",
|
|
__func__, i, buf_id);
|
|
return -EINVAL;
|
|
}
|
|
}
|
|
/* send out a event notifications when submit is enqueued */
|
|
if (vha_observers.enqueued)
|
|
vha_observers.enqueued(session->vha->id, session->id,
|
|
cmd->user_cmd.cmd_id, cmd->user_cmd.priority);
|
|
break;
|
|
}
|
|
case VHA_CMD_CNN_SUBMIT_MULTI:
|
|
{
|
|
uint32_t num_cmd_bufs = 0;
|
|
|
|
/* check if command stream buffers are valid */
|
|
for (i = 0; i < VHA_MAX_CORES; i++) {
|
|
uint32_t buf_id = user_cmd->data[i];
|
|
|
|
if (buf_id == 0)
|
|
break;
|
|
if (vha_find_bufid(session, buf_id) == NULL) {
|
|
dev_err(dev, "%s: unrecognised cmdstr buf id[%u]:%x\n",
|
|
__func__, i, buf_id);
|
|
return -EINVAL;
|
|
}
|
|
num_cmd_bufs++;
|
|
}
|
|
/* check input and output buffers are valid */
|
|
for (i = VHA_MAX_CORES; i < (user_cmd->num_bufs - 1); i++) {
|
|
uint32_t buf_id = user_cmd->data[i];
|
|
|
|
if (vha_find_bufid(session, buf_id) == NULL) {
|
|
dev_err(dev, "%s: unrecognised buf id[%u]:%x\n",
|
|
__func__, i, buf_id);
|
|
return -EINVAL;
|
|
}
|
|
}
|
|
/* send out a event notifications when submit is enqueued */
|
|
if (vha_observers.enqueued)
|
|
vha_observers.enqueued(session->vha->id, session->id,
|
|
cmd->user_cmd.cmd_id, cmd->user_cmd.priority);
|
|
break;
|
|
}
|
|
case VHA_CMD_CNN_PDUMP_MSG:
|
|
{
|
|
struct pdump_descr* pdump = vha_pdump_dev_get_drvdata(dev);
|
|
if (!img_pdump_enabled(pdump)) {
|
|
kfree(cmd);
|
|
/* Silently ignore this pdump message */
|
|
return 0;
|
|
}
|
|
}
|
|
}
|
|
/* add the command to the pending list */
|
|
list_add_tail(&cmd->list[cmd->user_cmd.priority], &session->cmds[cmd->user_cmd.priority]);
|
|
GETNSTIMEOFDAY(&cmd->submit_ts);
|
|
session->vha->pri_q_counters[cmd->user_cmd.priority] += pri_q_count;
|
|
|
|
/* We are already locked!
|
|
* Run in separate thread
|
|
*/
|
|
vha_chk_cmd_queues(session->vha, true);
|
|
|
|
return 0;
|
|
}
|
|
|
|
int vha_suspend_dev(struct device *dev)
|
|
{
|
|
struct vha_dev *vha = vha_dev_get_drvdata(dev);
|
|
int ret;
|
|
mutex_lock(&vha->lock);
|
|
|
|
#ifdef CONFIG_PM_DEVFREQ
|
|
ret = vha_devfreq_suspend(dev);
|
|
if (ret)
|
|
dev_err(dev, "%s: Failed to suspend the vha devfreq!\n", __func__);
|
|
#endif
|
|
|
|
dev_dbg(dev, "%s: taking a nap!\n", __func__);
|
|
|
|
ret = vha_dev_suspend_work(vha);
|
|
|
|
mutex_unlock(&vha->lock);
|
|
|
|
return ret;
|
|
}
|
|
|
|
int vha_resume_dev(struct device *dev)
|
|
{
|
|
struct vha_dev *vha = vha_dev_get_drvdata(dev);
|
|
int ret;
|
|
|
|
mutex_lock(&vha->lock);
|
|
dev_dbg(dev, "%s: waking up!\n", __func__);
|
|
/* Call the worker */
|
|
vha_chk_cmd_queues(vha, true);
|
|
|
|
#ifdef CONFIG_PM_DEVFREQ
|
|
ret = vha_devfreq_resume(dev);
|
|
if (ret)
|
|
dev_err(dev, "%s: Failed to resume the vha devfreq!\n", __func__);
|
|
#endif
|
|
|
|
mutex_unlock(&vha->lock);
|
|
|
|
return 0;
|
|
}
|
|
|
|
void vha_dump_digest(struct vha_session *session, struct vha_buffer *buf,
|
|
struct vha_cmd *cmd)
|
|
{
|
|
struct vha_dev *vha = session->vha;
|
|
int ret;
|
|
|
|
if (!vha->dump_buff_digest)
|
|
return;
|
|
|
|
if (!(buf->attr & IMG_MEM_ATTR_NOMAP)) {
|
|
ret = img_mem_map_km(session->mem_ctx, buf->id);
|
|
if (ret) {
|
|
dev_err(session->vha->dev, "failed to map buff %x to km: %d\n",
|
|
buf->id, ret);
|
|
return;
|
|
}
|
|
buf->kptr = img_mem_get_kptr(session->mem_ctx, buf->id);
|
|
|
|
dev_info(vha->dev, "%s: buff id:%d name:%s digest is [crc32]:%#x\n",
|
|
__func__, buf->id, buf->name, crc32(0, buf->kptr, buf->size));
|
|
|
|
ret = img_mem_unmap_km(session->mem_ctx, buf->id);
|
|
if (ret) {
|
|
dev_err(session->vha->dev,
|
|
"%s: failed to unmap buff %x from km: %d\n",
|
|
__func__, buf->id, ret);
|
|
}
|
|
buf->kptr = NULL;
|
|
}
|
|
}
|
|
|
|
int vha_get_cnntotal_proc_us(struct device *dev, uint64_t *proc_us, uint64_t *cur_proc_us)
|
|
{
|
|
struct vha_dev *vha = vha_dev_get_drvdata(dev);
|
|
if (!vha)
|
|
return -EFAULT;
|
|
|
|
*proc_us = vha->stats.cnn_total_proc_us;
|
|
|
|
vha_currcmd_exetime_req(vha, cur_proc_us);
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
/*
|
|
* register event observers.
|
|
* only a SINGLE observer for each type of event.
|
|
* unregister by passing NULL parameter
|
|
*/
|
|
void vha_observe_event_enqueue(void (*func)(uint32_t devid,
|
|
uint32_t sessionid,
|
|
uint32_t cmdid,
|
|
uint32_t priority))
|
|
{
|
|
if (func && vha_observers.enqueued)
|
|
pr_warn("%s: vha_observer for ENQUEUED events is already set to '%pf'\n",
|
|
__func__, vha_observers.enqueued);
|
|
vha_observers.enqueued = func;
|
|
}
|
|
EXPORT_SYMBOL(vha_observe_event_enqueue);
|
|
|
|
void vha_observe_event_submit(void (*func)(uint32_t devid,
|
|
uint32_t sessionid,
|
|
uint32_t cmdid,
|
|
bool last_subsegment,
|
|
uint32_t priority))
|
|
{
|
|
if (func && vha_observers.submitted)
|
|
pr_warn("%s: vha_observer for SUBMITTED events is already set to '%pf'\n",
|
|
__func__, vha_observers.submitted);
|
|
vha_observers.submitted = func;
|
|
}
|
|
EXPORT_SYMBOL(vha_observe_event_submit);
|
|
|
|
void vha_observe_event_complete(void (*func)(uint32_t devid,
|
|
uint32_t sessionid,
|
|
uint32_t cmdid,
|
|
uint64_t status,
|
|
uint64_t cycles,
|
|
uint64_t mem_usage,
|
|
uint32_t priority))
|
|
{
|
|
if (func && vha_observers.completed)
|
|
pr_warn("%s: vha_observer for COMPLETED events is already set to '%pf'\n",
|
|
__func__, vha_observers.completed);
|
|
vha_observers.completed = func;
|
|
}
|
|
EXPORT_SYMBOL(vha_observe_event_complete);
|
|
|
|
void vha_observe_event_cancel(void (*func)(uint32_t devid,
|
|
uint32_t sessionid,
|
|
uint32_t cmdid,
|
|
uint32_t priority))
|
|
{
|
|
if (func && vha_observers.canceled)
|
|
pr_warn("%s: vha_observer for CANCELED events is already set to '%pf'\n",
|
|
__func__, vha_observers.canceled);
|
|
vha_observers.canceled = func;
|
|
}
|
|
EXPORT_SYMBOL(vha_observe_event_cancel);
|
|
|
|
void vha_observe_event_error(void (*func)(uint32_t devid,
|
|
uint32_t sessionid,
|
|
uint32_t cmdid,
|
|
uint64_t status))
|
|
{
|
|
if (func && vha_observers.error)
|
|
pr_warn("%s: vha_observer for ERROR events is already set to '%pf'\n",
|
|
__func__, vha_observers.error);
|
|
vha_observers.error = func;
|
|
}
|
|
EXPORT_SYMBOL(vha_observe_event_error);
|