diff options
Diffstat (limited to 'tests/amdgpu/basic_tests.c')
-rw-r--r-- | tests/amdgpu/basic_tests.c | 264 |
1 files changed, 264 insertions, 0 deletions
diff --git a/tests/amdgpu/basic_tests.c b/tests/amdgpu/basic_tests.c index e7f48e39..a78cf521 100644 --- a/tests/amdgpu/basic_tests.c +++ b/tests/amdgpu/basic_tests.c @@ -50,6 +50,7 @@ static void amdgpu_command_submission_multi_fence(void); static void amdgpu_command_submission_sdma(void); static void amdgpu_userptr_test(void); static void amdgpu_semaphore_test(void); +static void amdgpu_sync_dependency_test(void); static void amdgpu_command_submission_write_linear_helper(unsigned ip_type); static void amdgpu_command_submission_const_fill_helper(unsigned ip_type); @@ -63,6 +64,7 @@ CU_TestInfo basic_tests[] = { { "Command submission Test (Multi-Fence)", amdgpu_command_submission_multi_fence }, { "Command submission Test (SDMA)", amdgpu_command_submission_sdma }, { "SW semaphore Test", amdgpu_semaphore_test }, + { "Sync dependency Test", amdgpu_sync_dependency_test }, CU_TEST_INFO_NULL, }; #define BUFFER_SIZE (8 * 1024) @@ -226,6 +228,60 @@ CU_TestInfo basic_tests[] = { */ # define PACKET3_DMA_DATA_SI_CP_SYNC (1 << 31) + +#define PKT3_CONTEXT_CONTROL 0x28 +#define CONTEXT_CONTROL_LOAD_ENABLE(x) (((unsigned)(x) & 0x1) << 31) +#define CONTEXT_CONTROL_LOAD_CE_RAM(x) (((unsigned)(x) & 0x1) << 28) +#define CONTEXT_CONTROL_SHADOW_ENABLE(x) (((unsigned)(x) & 0x1) << 31) + +#define PKT3_CLEAR_STATE 0x12 + +#define PKT3_SET_SH_REG 0x76 +#define PACKET3_SET_SH_REG_START 0x00002c00 + +#define PACKET3_DISPATCH_DIRECT 0x15 + + +/* gfx 8 */ +#define mmCOMPUTE_PGM_LO 0x2e0c +#define mmCOMPUTE_PGM_RSRC1 0x2e12 +#define mmCOMPUTE_TMPRING_SIZE 0x2e18 +#define mmCOMPUTE_USER_DATA_0 0x2e40 +#define mmCOMPUTE_USER_DATA_1 0x2e41 +#define mmCOMPUTE_RESOURCE_LIMITS 0x2e15 +#define mmCOMPUTE_NUM_THREAD_X 0x2e07 + + + +#define SWAP_32(num) ((num>>24)&0xff) | \ + ((num<<8)&0xff0000) | \ + ((num>>8)&0xff00) | \ + ((num<<24)&0xff000000) + + +/* Shader code + * void main() +{ + + float x = some_input; + for (unsigned i = 0; i < 1000000; i++) + x = sin(x); + + u[0] = 42u; +} +*/ + +static uint32_t shader_bin[] = { + SWAP_32(0x800082be), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0x040085bf), + SWAP_32(0x02810281), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0xfcff84bf), + SWAP_32(0xff0083be), SWAP_32(0x00f00000), SWAP_32(0xc10082be), SWAP_32(0xaa02007e), + SWAP_32(0x000070e0), SWAP_32(0x00000080), SWAP_32(0x000081bf) +}; + +#define CODE_OFFSET 512 +#define DATA_OFFSET 1024 + + int suite_basic_tests_init(void) { struct amdgpu_gpu_info gpu_info = {0}; @@ -1386,3 +1442,211 @@ static void amdgpu_userptr_test(void) wait(NULL); } + +static void amdgpu_sync_dependency_test(void) +{ + amdgpu_context_handle context_handle[2]; + amdgpu_bo_handle ib_result_handle; + void *ib_result_cpu; + uint64_t ib_result_mc_address; + struct amdgpu_cs_request ibs_request; + struct amdgpu_cs_ib_info ib_info; + struct amdgpu_cs_fence fence_status; + uint32_t expired; + int i, j, r, instance; + amdgpu_bo_list_handle bo_list; + amdgpu_va_handle va_handle; + static uint32_t *ptr; + uint64_t seq_no; + + CU_ASSERT_EQUAL(r, 0); + + r = amdgpu_cs_ctx_create(device_handle, &context_handle[0]); + CU_ASSERT_EQUAL(r, 0); + r = amdgpu_cs_ctx_create(device_handle, &context_handle[1]); + CU_ASSERT_EQUAL(r, 0); + + r = amdgpu_bo_alloc_and_map(device_handle, 8192, 4096, + AMDGPU_GEM_DOMAIN_GTT, 0, + &ib_result_handle, &ib_result_cpu, + &ib_result_mc_address, &va_handle); + CU_ASSERT_EQUAL(r, 0); + + r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL, + &bo_list); + CU_ASSERT_EQUAL(r, 0); + + ptr = ib_result_cpu; + i = 0; + + memcpy(ptr + CODE_OFFSET , shader_bin, sizeof(shader_bin)); + + /* Dispatch minimal init config and verify it's executed */ + ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1); + ptr[i++] = 0x80000000; + ptr[i++] = 0x80000000; + + ptr[i++] = PACKET3(PKT3_CLEAR_STATE, 0); + ptr[i++] = 0x80000000; + + + /* Program compute regs */ + ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2); + ptr[i++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; + ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 8; + ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 40; + + + ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2); + ptr[i++] = mmCOMPUTE_PGM_RSRC1 - PACKET3_SET_SH_REG_START; + /* + * 002c0040 COMPUTE_PGM_RSRC1 <- VGPRS = 0 + SGPRS = 1 + PRIORITY = 0 + FLOAT_MODE = 192 (0xc0) + PRIV = 0 + DX10_CLAMP = 1 + DEBUG_MODE = 0 + IEEE_MODE = 0 + BULKY = 0 + CDBG_USER = 0 + * + */ + ptr[i++] = 0x002c0040; + + + /* + * 00000010 COMPUTE_PGM_RSRC2 <- SCRATCH_EN = 0 + USER_SGPR = 8 + TRAP_PRESENT = 0 + TGID_X_EN = 0 + TGID_Y_EN = 0 + TGID_Z_EN = 0 + TG_SIZE_EN = 0 + TIDIG_COMP_CNT = 0 + EXCP_EN_MSB = 0 + LDS_SIZE = 0 + EXCP_EN = 0 + * + */ + ptr[i++] = 0x00000010; + + +/* + * 00000100 COMPUTE_TMPRING_SIZE <- WAVES = 256 (0x100) + WAVESIZE = 0 + * + */ + ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); + ptr[i++] = mmCOMPUTE_TMPRING_SIZE - PACKET3_SET_SH_REG_START; + ptr[i++] = 0x00000100; + + ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2); + ptr[i++] = mmCOMPUTE_USER_DATA_0 - PACKET3_SET_SH_REG_START; + ptr[i++] = 0xffffffff & (ib_result_mc_address + DATA_OFFSET * 4); + ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32; + + ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); + ptr[i++] = mmCOMPUTE_RESOURCE_LIMITS - PACKET3_SET_SH_REG_START; + ptr[i++] = 0; + + ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3); + ptr[i++] = mmCOMPUTE_NUM_THREAD_X - PACKET3_SET_SH_REG_START; + ptr[i++] = 1; + ptr[i++] = 1; + ptr[i++] = 1; + + + /* Dispatch */ + ptr[i++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); + ptr[i++] = 1; + ptr[i++] = 1; + ptr[i++] = 1; + ptr[i++] = 0x00000045; /* DISPATCH DIRECT field */ + + + while (i & 7) + ptr[i++] = 0xffff1000; /* type3 nop packet */ + + memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info)); + ib_info.ib_mc_address = ib_result_mc_address; + ib_info.size = i; + + memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request)); + ibs_request.ip_type = AMDGPU_HW_IP_GFX; + ibs_request.ring = 0; + ibs_request.number_of_ibs = 1; + ibs_request.ibs = &ib_info; + ibs_request.resources = bo_list; + ibs_request.fence_info.handle = NULL; + + r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request, 1); + CU_ASSERT_EQUAL(r, 0); + seq_no = ibs_request.seq_no; + + + + /* Prepare second command with dependency on the first */ + j = i; + ptr[i++] = PACKET3(PACKET3_WRITE_DATA, 3); + ptr[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; + ptr[i++] = 0xfffffffc & ib_result_mc_address + DATA_OFFSET * 4; + ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32; + ptr[i++] = 99; + + while (i & 7) + ptr[i++] = 0xffff1000; /* type3 nop packet */ + + memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info)); + ib_info.ib_mc_address = ib_result_mc_address + j * 4; + ib_info.size = i - j; + + memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request)); + ibs_request.ip_type = AMDGPU_HW_IP_GFX; + ibs_request.ring = 0; + ibs_request.number_of_ibs = 1; + ibs_request.ibs = &ib_info; + ibs_request.resources = bo_list; + ibs_request.fence_info.handle = NULL; + + ibs_request.number_of_dependencies = 1; + + ibs_request.dependencies = calloc(1, sizeof(*ibs_request.dependencies)); + ibs_request.dependencies[0].context = context_handle[1]; + ibs_request.dependencies[0].ip_instance = 0; + ibs_request.dependencies[0].ring = 0; + ibs_request.dependencies[0].fence = seq_no; + + + r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request, 1); + CU_ASSERT_EQUAL(r, 0); + + + memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence)); + fence_status.context = context_handle[0]; + fence_status.ip_type = AMDGPU_HW_IP_GFX; + fence_status.ip_instance = 0; + fence_status.ring = 0; + fence_status.fence = ibs_request.seq_no; + + r = amdgpu_cs_query_fence_status(&fence_status, + AMDGPU_TIMEOUT_INFINITE,0, &expired); + CU_ASSERT_EQUAL(r, 0); + + /* Expect the second command to wait for shader to complete */ + CU_ASSERT_EQUAL(ptr[DATA_OFFSET], 99); + + r = amdgpu_bo_list_destroy(bo_list); + CU_ASSERT_EQUAL(r, 0); + + r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, + ib_result_mc_address, 4096); + CU_ASSERT_EQUAL(r, 0); + + r = amdgpu_cs_ctx_free(context_handle[0]); + CU_ASSERT_EQUAL(r, 0); + r = amdgpu_cs_ctx_free(context_handle[1]); + CU_ASSERT_EQUAL(r, 0); + + free(ibs_request.dependencies); +} |