Skip to content

Commit 95244d4

Browse files
bisingha-xilinxBikash Singha
andauthored
Fix crash in xdna_aie_array initialization (amd#990)
Signed-off-by: Bikash Singha <bisingha@xcobisingha50x.amd.com> Co-authored-by: Bikash Singha <bisingha@xcobisingha50x.amd.com>
1 parent f10e887 commit 95244d4

File tree

5 files changed

+82
-20
lines changed

5 files changed

+82
-20
lines changed

src/driver/amdxdna/ve2_debug.c

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#include <linux/completion.h>
99
#include <linux/jiffies.h>
1010
#include <linux/sched.h>
11+
#include <linux/fdtable.h>
1112

1213
#include "ve2_fw.h"
1314
#include "ve2_of.h"
@@ -675,6 +676,52 @@ static int ve2_get_array_async_error(struct amdxdna_dev *xdna, struct amdxdna_dr
675676
return 0;
676677
}
677678

679+
static int ve2_get_aie_part_fd(struct amdxdna_client *client,
680+
struct amdxdna_drm_get_array *args)
681+
{
682+
struct amdxdna_dev *xdna = client->xdna;
683+
struct amdxdna_ctx_priv *nhwctx;
684+
struct amdxdna_ctx *ctx;
685+
u32 hwctx_handle;
686+
int srcu_idx;
687+
int ret = 0;
688+
int aie_fd;
689+
690+
hwctx_handle = args->num_element;
691+
srcu_idx = srcu_read_lock(&client->ctx_srcu);
692+
ctx = xa_load(&client->ctx_xa, hwctx_handle);
693+
if (!ctx) {
694+
XDNA_ERR(xdna, "Failed to get ctx %u", hwctx_handle);
695+
ret = -EINVAL;
696+
goto unlock;
697+
}
698+
699+
nhwctx = ctx->priv;
700+
if (!nhwctx || !nhwctx->aie_dev) {
701+
XDNA_ERR(xdna, "AIE partition not available for hwctx %p", ctx);
702+
ret = -ENODEV;
703+
goto unlock;
704+
}
705+
706+
aie_fd = aie_partition_get_fd(nhwctx->aie_dev);
707+
if (aie_fd < 0) {
708+
XDNA_ERR(xdna, "Failed to get AIE partition FD: %d", aie_fd);
709+
ret = aie_fd;
710+
goto unlock;
711+
}
712+
713+
if (copy_to_user(u64_to_user_ptr(args->buffer), &aie_fd, sizeof(aie_fd))) {
714+
XDNA_ERR(xdna, "Failed to copy AIE partition FD to user");
715+
close_fd(aie_fd);
716+
ret = -EFAULT;
717+
goto unlock;
718+
}
719+
720+
unlock:
721+
srcu_read_unlock(&client->ctx_srcu, srcu_idx);
722+
return ret;
723+
}
724+
678725
int ve2_get_array(struct amdxdna_client *client, struct amdxdna_drm_get_array *args)
679726
{
680727
struct amdxdna_dev *xdna = client->xdna;
@@ -699,6 +746,9 @@ int ve2_get_array(struct amdxdna_client *client, struct amdxdna_drm_get_array *a
699746
case DRM_AMDXDNA_HW_LAST_ASYNC_ERR:
700747
ret = ve2_get_array_async_error(xdna, args);
701748
break;
749+
case DRM_AMDXDNA_HWCTX_AIE_PART_FD:
750+
ret = ve2_get_aie_part_fd(client, args);
751+
break;
702752
default:
703753
XDNA_ERR(xdna, "Not supported request parameter %u", args->param);
704754
ret = -EOPNOTSUPP;

src/include/uapi/drm_local/amdxdna_accel.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -837,6 +837,7 @@ struct amdxdna_drm_get_array {
837837
#define DRM_AMDXDNA_FW_LOG_CONFIG 7
838838
#define DRM_AMDXDNA_FW_TRACE_CONFIG 8
839839
#define DRM_AMDXDNA_AIE_TILE_READ 9
840+
#define DRM_AMDXDNA_HWCTX_AIE_PART_FD 10
840841
__u32 param; /* in */
841842
__u32 element_size; /* in/out */
842843
#define AMDXDNA_MAX_NUM_ELEMENT 1024

src/shim_ve2/xdna_aie_array.cpp

Lines changed: 23 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,24 @@ get_driver_config(const pt::ptree& aie_meta)
4949
return driver_config;
5050
}
5151

52+
int
53+
xdna_aie_array::
54+
get_aie_partition_fd(const xdna_hwctx* hwctx_obj)
55+
{
56+
int aie_fd = -1;
57+
auto dev = const_cast<xdna_hwctx*>(hwctx_obj)->get_device();
58+
59+
amdxdna_drm_get_array arg = {};
60+
arg.param = DRM_AMDXDNA_HWCTX_AIE_PART_FD;
61+
arg.element_size = sizeof(aie_fd);
62+
arg.num_element = hwctx_obj->get_slotidx(); /* hwctx handle passed via num_element */
63+
arg.buffer = reinterpret_cast<uintptr_t>(&aie_fd);
64+
65+
dev->get_edev()->ioctl(DRM_IOCTL_AMDXDNA_GET_ARRAY, &arg);
66+
67+
return aie_fd;
68+
}
69+
5270
xdna_aie_array::
5371
xdna_aie_array(const xrt_core::device* device)
5472
{
@@ -72,7 +90,7 @@ xdna_aie_array(const xrt_core::device* device)
7290
int RC = XAie_GetPartitionFdList(&dev_inst_obj);
7391

7492
if (RC != XAIE_OK)
75-
throw xrt_core::error(RC,"XAie_GetPartitionFdList failed \n");
93+
throw xrt_core::error(RC, "XAie_GetPartitionFdList failed\n");
7694

7795
XAie_List *NodePtr;
7896
XAie_PartitionList *ListNode;
@@ -83,10 +101,8 @@ xdna_aie_array(const xrt_core::device* device)
83101

84102
int aie_part_fd = ListNode->PartitionFd;
85103

86-
//int aie_part_fd = fd;
87-
88104
if (aie_part_fd < 0)
89-
throw xrt_core::error(aie_part_fd,"fd is NEGATIVE\n");
105+
throw xrt_core::error(aie_part_fd, "fd is NEGATIVE\n");
90106

91107
fd = aie_part_fd;
92108
ConfigPtr.PartProp.Handle = fd;
@@ -125,22 +141,10 @@ xdna_aie_array(const xrt_core::device* device, const xdna_hwctx* hwctx_obj)
125141
throw xrt_core::error(-EINVAL, "Failed to setup AIE Partition: " + std::to_string(rc1));
126142
}
127143

128-
int RC = XAie_GetPartitionFdList(&dev_inst_obj);
129-
130-
if (RC != XAIE_OK)
131-
throw xrt_core::error(RC,"XAie_GetPartitionFdList failed \n");
132-
133-
XAie_List *NodePtr;
134-
XAie_PartitionList *ListNode;
135-
136-
NodePtr = (XAie_List *)&dev_inst_obj.PartitionList.Next->Next;
137-
138-
ListNode = (XAie_PartitionList *)XAIE_CONTAINER_OF(NodePtr, XAie_PartitionList, Node);
139-
140-
int aie_part_fd = ListNode->PartitionFd;
141-
144+
// Get AIE partition FD from kernel via ioctl
145+
int aie_part_fd = get_aie_partition_fd(hwctx_obj);
142146
if (aie_part_fd < 0)
143-
throw xrt_core::error(aie_part_fd,"fd is NEGATIVE\n");
147+
throw xrt_core::error(aie_part_fd, "Failed to get AIE partition FD\n");
144148

145149
fd = aie_part_fd;
146150
ConfigPtr.PartProp.Handle = fd;

src/shim_ve2/xdna_aie_array.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ class xdna_aie_array {
2828
XAie_DevInst *get_dev();
2929
adf::driver_config get_driver_config_hwctx(const xrt_core::device* device, const xdna_hwctx* hwctx);
3030
private:
31+
int get_aie_partition_fd(const xdna_hwctx* hwctx_obj);
3132
int num_cols;
3233
int fd;
3334
XAie_DevInst* dev_inst; // AIE Device Instance pointer

src/shim_ve2/xdna_hwctx.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,10 +47,11 @@ get_partition_info_main(const xrt_core::device* device,const pt::ptree& aie_meta
4747
info.base_address = aie_meta.get<uint64_t>("aie_metadata.driver_config.base_address");
4848

4949
bool partinfo_found = false;
50+
pid_t pid = getpid();
5051
auto data = xrt_core::device_query_default<xrt_core::query::aie_partition_info>(device, {});
5152

5253
for (const auto& entry : data) {
53-
if ( std::stoi(entry.metadata.id) == hw_context_id) {
54+
if (entry.pid == pid && std::stoi(entry.metadata.id) == hw_context_id) {
5455
info.num_columns = entry.num_cols;
5556
info.start_column = entry.start_col;
5657
info.partition_id = (entry.num_cols << 8U) | (entry.start_col & 0xffU);
@@ -152,6 +153,11 @@ xdna_hwctx::
152153
return;
153154

154155
m_hwq->unbind_hwctx();
156+
157+
// Explicitly destroy the aie_array before destroying the hw context
158+
if(m_aie_array)
159+
m_aie_array.reset();
160+
155161
struct amdxdna_drm_destroy_hwctx arg = {};
156162
arg.handle = m_handle;
157163
m_device->get_edev()->ioctl(DRM_IOCTL_AMDXDNA_DESTROY_HWCTX, &arg);

0 commit comments

Comments
 (0)