diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd')
| -rw-r--r-- | drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h | 487 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm | 52 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 73 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 1 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_device.c | 27 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 33 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_module.c | 1 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c | 11 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c | 21 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c | 3 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 4 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_process.c | 75 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 4 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 10 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 10 |
15 files changed, 752 insertions, 60 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h index 0c4c5499bb5c..73ca9aebf086 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h @@ -2936,3 +2936,490 @@ static const uint32_t cwsr_trap_gfx11_hex[] = { 0xbf9f0000, 0xbf9f0000, 0xbf9f0000, 0x00000000, }; + +static const uint32_t cwsr_trap_gfx9_4_3_hex[] = { + 0xbf820001, 0xbf8202d6, + 0xb8f8f802, 0x89788678, + 0xb8fbf803, 0x866eff78, + 0x00002000, 0xbf840009, + 0x866eff6d, 0x00ff0000, + 0xbf85001a, 0x866eff7b, + 0x00000400, 0xbf85004d, + 0xbf8e0010, 0xb8fbf803, + 0xbf82fffa, 0x866eff7b, + 0x03c00900, 0xbf850011, + 0x866eff7b, 0x000071ff, + 0xbf840008, 0x866fff7b, + 0x00007080, 0xbf840001, + 0xbeee1a87, 0xb8eff801, + 0x8e6e8c6e, 0x866e6f6e, + 0xbf850006, 0x866eff6d, + 0x00ff0000, 0xbf850003, + 0x866eff7b, 0x00000400, + 0xbf850036, 0xb8faf807, + 0x867aff7a, 0x001f8000, + 0x8e7a8b7a, 0x8979ff79, + 0xfc000000, 0x87797a79, + 0xba7ff807, 0x00000000, + 0xb8faf812, 0xb8fbf813, + 0x8efa887a, 0xc0031bbd, + 0x00000010, 0xbf8cc07f, + 0x8e6e976e, 0x8979ff79, + 0x00800000, 0x87796e79, + 0xc0071bbd, 0x00000000, + 0xbf8cc07f, 0xc0071ebd, + 0x00000008, 0xbf8cc07f, + 0x86ee6e6e, 0xbf840001, + 0xbe801d6e, 0x866eff6d, + 0x01ff0000, 0xbf850005, + 0x8778ff78, 0x00002000, + 0x80ec886c, 0x82ed806d, + 0xbf820005, 0x866eff6d, + 0x01000000, 0xbf850002, + 0x806c846c, 0x826d806d, + 0x866dff6d, 0x0000ffff, + 0x8f7a8b79, 0x867aff7a, + 0x001f8000, 0xb97af807, + 0x86fe7e7e, 0x86ea6a6a, + 0x8f6e8378, 0xb96ee0c2, + 0xbf800002, 0xb9780002, + 0xbe801f6c, 0x866dff6d, + 0x0000ffff, 0xbefa0080, + 0xb97a0283, 0xb8faf807, + 0x867aff7a, 0x001f8000, + 0x8e7a8b7a, 0x8979ff79, + 0xfc000000, 0x87797a79, + 0xba7ff807, 0x00000000, + 0xbeee007e, 0xbeef007f, + 0xbefe0180, 0xbf900004, + 0x877a8478, 0xb97af802, + 0xbf8e0002, 0xbf88fffe, + 0xb8fa2985, 0x807a817a, + 0x8e7a8a7a, 0x8e7a817a, + 0xb8fb1605, 0x807b817b, + 0x8e7b867b, 0x807a7b7a, + 0x807a7e7a, 0x827b807f, + 0x867bff7b, 0x0000ffff, + 0xc04b1c3d, 0x00000050, + 0xbf8cc07f, 0xc04b1d3d, + 0x00000060, 0xbf8cc07f, + 0xc0431e7d, 0x00000074, + 0xbf8cc07f, 0xbef4007e, + 0x8675ff7f, 0x0000ffff, + 0x8775ff75, 0x00040000, + 0xbef60080, 0xbef700ff, + 0x00807fac, 0xbef1007c, + 0xbef00080, 0xb8f02985, + 0x80708170, 0x8e708a70, + 0x8e708170, 0xb8fa1605, + 0x807a817a, 0x8e7a867a, + 0x80707a70, 0xbef60084, + 0xbef600ff, 0x01000000, + 0xbefe007c, 0xbefc0070, + 0xc0611c7a, 0x0000007c, + 0xbf8cc07f, 0x80708470, + 0xbefc007e, 0xbefe007c, + 0xbefc0070, 0xc0611b3a, + 0x0000007c, 0xbf8cc07f, + 0x80708470, 0xbefc007e, + 0xbefe007c, 0xbefc0070, + 0xc0611b7a, 0x0000007c, + 0xbf8cc07f, 0x80708470, + 0xbefc007e, 0xbefe007c, + 0xbefc0070, 0xc0611bba, + 0x0000007c, 0xbf8cc07f, + 0x80708470, 0xbefc007e, + 0xbefe007c, 0xbefc0070, + 0xc0611bfa, 0x0000007c, + 0xbf8cc07f, 0x80708470, + 0xbefc007e, 0xbefe007c, + 0xbefc0070, 0xc0611e3a, + 0x0000007c, 0xbf8cc07f, + 0x80708470, 0xbefc007e, + 0xb8fbf803, 0xbefe007c, + 0xbefc0070, 0xc0611efa, + 0x0000007c, 0xbf8cc07f, + 0x80708470, 0xbefc007e, + 0xbefe007c, 0xbefc0070, + 0xc0611a3a, 0x0000007c, + 0xbf8cc07f, 0x80708470, + 0xbefc007e, 0xbefe007c, + 0xbefc0070, 0xc0611a7a, + 0x0000007c, 0xbf8cc07f, + 0x80708470, 0xbefc007e, + 0xb8f1f801, 0xbefe007c, + 0xbefc0070, 0xc0611c7a, + 0x0000007c, 0xbf8cc07f, + 0x80708470, 0xbefc007e, + 0x867aff7f, 0x04000000, + 0xbeef0080, 0x876f6f7a, + 0xb8f02985, 0x80708170, + 0x8e708a70, 0x8e708170, + 0xb8fb1605, 0x807b817b, + 0x8e7b847b, 0x8e76827b, + 0xbef600ff, 0x01000000, + 0xbef20174, 0x80747074, + 0x82758075, 0xbefc0080, + 0xbf800000, 0xbe802b00, + 0xbe822b02, 0xbe842b04, + 0xbe862b06, 0xbe882b08, + 0xbe8a2b0a, 0xbe8c2b0c, + 0xbe8e2b0e, 0xc06b003a, + 0x00000000, 0xbf8cc07f, + 0xc06b013a, 0x00000010, + 0xbf8cc07f, 0xc06b023a, + 0x00000020, 0xbf8cc07f, + 0xc06b033a, 0x00000030, + 0xbf8cc07f, 0x8074c074, + 0x82758075, 0x807c907c, + 0xbf0a7b7c, 0xbf85ffe7, + 0xbef40172, 0xbef00080, + 0xbefe00c1, 0xbeff00c1, + 0xbee80080, 0xbee90080, + 0xbef600ff, 0x01000000, + 0x867aff78, 0x00400000, + 0xbf850003, 0xb8faf803, + 0x897a7aff, 0x10000000, + 0xbf85004d, 0xbe840080, + 0xd2890000, 0x00000900, + 0x80048104, 0xd2890001, + 0x00000900, 0x80048104, + 0xd2890002, 0x00000900, + 0x80048104, 0xd2890003, + 0x00000900, 0x80048104, + 0xc069003a, 0x00000070, + 0xbf8cc07f, 0x80709070, + 0xbf06c004, 0xbf84ffee, + 0xbe840080, 0xd2890000, + 0x00000901, 0x80048104, + 0xd2890001, 0x00000901, + 0x80048104, 0xd2890002, + 0x00000901, 0x80048104, + 0xd2890003, 0x00000901, + 0x80048104, 0xc069003a, + 0x00000070, 0xbf8cc07f, + 0x80709070, 0xbf06c004, + 0xbf84ffee, 0xbe840080, + 0xd2890000, 0x00000902, + 0x80048104, 0xd2890001, + 0x00000902, 0x80048104, + 0xd2890002, 0x00000902, + 0x80048104, 0xd2890003, + 0x00000902, 0x80048104, + 0xc069003a, 0x00000070, + 0xbf8cc07f, 0x80709070, + 0xbf06c004, 0xbf84ffee, + 0xbe840080, 0xd2890000, + 0x00000903, 0x80048104, + 0xd2890001, 0x00000903, + 0x80048104, 0xd2890002, + 0x00000903, 0x80048104, + 0xd2890003, 0x00000903, + 0x80048104, 0xc069003a, + 0x00000070, 0xbf8cc07f, + 0x80709070, 0xbf06c004, + 0xbf84ffee, 0xbf820008, + 0xe0724000, 0x701d0000, + 0xe0724100, 0x701d0100, + 0xe0724200, 0x701d0200, + 0xe0724300, 0x701d0300, + 0xbefe00c1, 0xbeff00c1, + 0xb8fb4306, 0x867bc17b, + 0xbf840064, 0xbf8a0000, + 0x867aff6f, 0x04000000, + 0xbf840060, 0x8e7b867b, + 0x8e7b827b, 0xbef6007b, + 0xb8f02985, 0x80708170, + 0x8e708a70, 0x8e708170, + 0xb8fa1605, 0x807a817a, + 0x8e7a867a, 0x80707a70, + 0x8070ff70, 0x00000080, + 0xbef600ff, 0x01000000, + 0xbefc0080, 0xd28c0002, + 0x000100c1, 0xd28d0003, + 0x000204c1, 0x867aff78, + 0x00400000, 0xbf850003, + 0xb8faf803, 0x897a7aff, + 0x10000000, 0xbf850030, + 0x24040682, 0xd86e4000, + 0x00000002, 0xbf8cc07f, + 0xbe840080, 0xd2890000, + 0x00000900, 0x80048104, + 0xd2890001, 0x00000900, + 0x80048104, 0xd2890002, + 0x00000900, 0x80048104, + 0xd2890003, 0x00000900, + 0x80048104, 0xc069003a, + 0x00000070, 0xbf8cc07f, + 0x80709070, 0xbf06c004, + 0xbf84ffee, 0xbe840080, + 0xd2890000, 0x00000901, + 0x80048104, 0xd2890001, + 0x00000901, 0x80048104, + 0xd2890002, 0x00000901, + 0x80048104, 0xd2890003, + 0x00000901, 0x80048104, + 0xc069003a, 0x00000070, + 0xbf8cc07f, 0x80709070, + 0xbf06c004, 0xbf84ffee, + 0x680404ff, 0x00000200, + 0xd0c9006a, 0x0000f702, + 0xbf87ffd2, 0xbf820015, + 0xd1060002, 0x00011103, + 0x7e0602ff, 0x00000200, + 0xbefc00ff, 0x00010000, + 0xbe800077, 0x8677ff77, + 0xff7fffff, 0x8777ff77, + 0x00058000, 0xd8ec0000, + 0x00000002, 0xbf8cc07f, + 0xe0765000, 0x701d0002, + 0x68040702, 0xd0c9006a, + 0x0000f702, 0xbf87fff7, + 0xbef70000, 0xbef000ff, + 0x00000400, 0xbefe00c1, + 0xbeff00c1, 0xb8fb2b05, + 0x807b817b, 0x8e7b827b, + 0xbef600ff, 0x01000000, + 0xbefc0084, 0xbf0a7b7c, + 0xbf84006d, 0xbf11017c, + 0x807bff7b, 0x00001000, + 0x867aff78, 0x00400000, + 0xbf850003, 0xb8faf803, + 0x897a7aff, 0x10000000, + 0xbf850051, 0xbe840080, + 0xd2890000, 0x00000900, + 0x80048104, 0xd2890001, + 0x00000900, 0x80048104, + 0xd2890002, 0x00000900, + 0x80048104, 0xd2890003, + 0x00000900, 0x80048104, + 0xc069003a, 0x00000070, + 0xbf8cc07f, 0x80709070, + 0xbf06c004, 0xbf84ffee, + 0xbe840080, 0xd2890000, + 0x00000901, 0x80048104, + 0xd2890001, 0x00000901, + 0x80048104, 0xd2890002, + 0x00000901, 0x80048104, + 0xd2890003, 0x00000901, + 0x80048104, 0xc069003a, + 0x00000070, 0xbf8cc07f, + 0x80709070, 0xbf06c004, + 0xbf84ffee, 0xbe840080, + 0xd2890000, 0x00000902, + 0x80048104, 0xd2890001, + 0x00000902, 0x80048104, + 0xd2890002, 0x00000902, + 0x80048104, 0xd2890003, + 0x00000902, 0x80048104, + 0xc069003a, 0x00000070, + 0xbf8cc07f, 0x80709070, + 0xbf06c004, 0xbf84ffee, + 0xbe840080, 0xd2890000, + 0x00000903, 0x80048104, + 0xd2890001, 0x00000903, + 0x80048104, 0xd2890002, + 0x00000903, 0x80048104, + 0xd2890003, 0x00000903, + 0x80048104, 0xc069003a, + 0x00000070, 0xbf8cc07f, + 0x80709070, 0xbf06c004, + 0xbf84ffee, 0x807c847c, + 0xbf0a7b7c, 0xbf85ffb1, + 0xbf9c0000, 0xbf820012, + 0x7e000300, 0x7e020301, + 0x7e040302, 0x7e060303, + 0xe0724000, 0x701d0000, + 0xe0724100, 0x701d0100, + 0xe0724200, 0x701d0200, + 0xe0724300, 0x701d0300, + 0x807c847c, 0x8070ff70, + 0x00000400, 0xbf0a7b7c, + 0xbf85ffef, 0xbf9c0000, + 0xb8fb2985, 0x807b817b, + 0x8e7b837b, 0xb8fa2b05, + 0x807a817a, 0x8e7a827a, + 0x80fb7a7b, 0x867b7b7b, + 0xbf84007a, 0x807bff7b, + 0x00001000, 0xbefc0080, + 0xbf11017c, 0x867aff78, + 0x00400000, 0xbf850003, + 0xb8faf803, 0x897a7aff, + 0x10000000, 0xbf850059, + 0xd3d84000, 0x18000100, + 0xd3d84001, 0x18000101, + 0xd3d84002, 0x18000102, + 0xd3d84003, 0x18000103, + 0xbe840080, 0xd2890000, + 0x00000900, 0x80048104, + 0xd2890001, 0x00000900, + 0x80048104, 0xd2890002, + 0x00000900, 0x80048104, + 0xd2890003, 0x00000900, + 0x80048104, 0xc069003a, + 0x00000070, 0xbf8cc07f, + 0x80709070, 0xbf06c004, + 0xbf84ffee, 0xbe840080, + 0xd2890000, 0x00000901, + 0x80048104, 0xd2890001, + 0x00000901, 0x80048104, + 0xd2890002, 0x00000901, + 0x80048104, 0xd2890003, + 0x00000901, 0x80048104, + 0xc069003a, 0x00000070, + 0xbf8cc07f, 0x80709070, + 0xbf06c004, 0xbf84ffee, + 0xbe840080, 0xd2890000, + 0x00000902, 0x80048104, + 0xd2890001, 0x00000902, + 0x80048104, 0xd2890002, + 0x00000902, 0x80048104, + 0xd2890003, 0x00000902, + 0x80048104, 0xc069003a, + 0x00000070, 0xbf8cc07f, + 0x80709070, 0xbf06c004, + 0xbf84ffee, 0xbe840080, + 0xd2890000, 0x00000903, + 0x80048104, 0xd2890001, + 0x00000903, 0x80048104, + 0xd2890002, 0x00000903, + 0x80048104, 0xd2890003, + 0x00000903, 0x80048104, + 0xc069003a, 0x00000070, + 0xbf8cc07f, 0x80709070, + 0xbf06c004, 0xbf84ffee, + 0x807c847c, 0xbf0a7b7c, + 0xbf85ffa9, 0xbf9c0000, + 0xbf820016, 0xd3d84000, + 0x18000100, 0xd3d84001, + 0x18000101, 0xd3d84002, + 0x18000102, 0xd3d84003, + 0x18000103, 0xe0724000, + 0x701d0000, 0xe0724100, + 0x701d0100, 0xe0724200, + 0x701d0200, 0xe0724300, + 0x701d0300, 0x807c847c, + 0x8070ff70, 0x00000400, + 0xbf0a7b7c, 0xbf85ffeb, + 0xbf9c0000, 0xbf8200ee, + 0xbef4007e, 0x8675ff7f, + 0x0000ffff, 0x8775ff75, + 0x00040000, 0xbef60080, + 0xbef700ff, 0x00807fac, + 0x866eff7f, 0x04000000, + 0xbf84001f, 0xbefe00c1, + 0xbeff00c1, 0xb8ef4306, + 0x866fc16f, 0xbf84001a, + 0x8e6f866f, 0x8e6f826f, + 0xbef6006f, 0xb8f82985, + 0x80788178, 0x8e788a78, + 0x8e788178, 0xb8ee1605, + 0x806e816e, 0x8e6e866e, + 0x80786e78, 0x8078ff78, + 0x00000080, 0xbef600ff, + 0x01000000, 0xbefc0080, + 0xe0510000, 0x781d0000, + 0xe0510100, 0x781d0000, + 0x807cff7c, 0x00000200, + 0x8078ff78, 0x00000200, + 0xbf0a6f7c, 0xbf85fff6, + 0xbefe00c1, 0xbeff00c1, + 0xbef600ff, 0x01000000, + 0xb8ef2b05, 0x806f816f, + 0x8e6f826f, 0x806fff6f, + 0x00008000, 0xbef80080, + 0xbeee0078, 0x8078ff78, + 0x00000400, 0xbefc0084, + 0xbf11087c, 0xe0524000, + 0x781d0000, 0xe0524100, + 0x781d0100, 0xe0524200, + 0x781d0200, 0xe0524300, + 0x781d0300, 0xbf8c0f70, + 0x7e000300, 0x7e020301, + 0x7e040302, 0x7e060303, + 0x807c847c, 0x8078ff78, + 0x00000400, 0xbf0a6f7c, + 0xbf85ffee, 0xb8ef2985, + 0x806f816f, 0x8e6f836f, + 0xb8f92b05, 0x80798179, + 0x8e798279, 0x80ef796f, + 0x866f6f6f, 0xbf84001a, + 0x806fff6f, 0x00008000, + 0xbefc0080, 0xbf11087c, + 0xe0524000, 0x781d0000, + 0xe0524100, 0x781d0100, + 0xe0524200, 0x781d0200, + 0xe0524300, 0x781d0300, + 0xbf8c0f70, 0xd3d94000, + 0x18000100, 0xd3d94001, + 0x18000101, 0xd3d94002, + 0x18000102, 0xd3d94003, + 0x18000103, 0x807c847c, + 0x8078ff78, 0x00000400, + 0xbf0a6f7c, 0xbf85ffea, + 0xbf9c0000, 0xe0524000, + 0x6e1d0000, 0xe0524100, + 0x6e1d0100, 0xe0524200, + 0x6e1d0200, 0xe0524300, + 0x6e1d0300, 0xbf8c0f70, + 0xb8f82985, 0x80788178, + 0x8e788a78, 0x8e788178, + 0xb8ee1605, 0x806e816e, + 0x8e6e866e, 0x80786e78, + 0x80f8c078, 0xb8ef1605, + 0x806f816f, 0x8e6f846f, + 0x8e76826f, 0xbef600ff, + 0x01000000, 0xbefc006f, + 0xc031003a, 0x00000078, + 0x80f8c078, 0xbf8cc07f, + 0x80fc907c, 0xbf800000, + 0xbe802d00, 0xbe822d02, + 0xbe842d04, 0xbe862d06, + 0xbe882d08, 0xbe8a2d0a, + 0xbe8c2d0c, 0xbe8e2d0e, + 0xbf06807c, 0xbf84fff0, + 0xb8f82985, 0x80788178, + 0x8e788a78, 0x8e788178, + 0xb8ee1605, 0x806e816e, + 0x8e6e866e, 0x80786e78, + 0xbef60084, 0xbef600ff, + 0x01000000, 0xc0211bfa, + 0x00000078, 0x80788478, + 0xc0211b3a, 0x00000078, + 0x80788478, 0xc0211b7a, + 0x00000078, 0x80788478, + 0xc0211c3a, 0x00000078, + 0x80788478, 0xc0211c7a, + 0x00000078, 0x80788478, + 0xc0211eba, 0x00000078, + 0x80788478, 0xc0211efa, + 0x00000078, 0x80788478, + 0xc0211a3a, 0x00000078, + 0x80788478, 0xc0211a7a, + 0x00000078, 0x80788478, + 0xc0211cfa, 0x00000078, + 0x80788478, 0xbf8cc07f, + 0xbefc006f, 0xbefe0070, + 0xbeff0071, 0x866f7bff, + 0x000003ff, 0xb96f4803, + 0x866f7bff, 0xfffff800, + 0x8f6f8b6f, 0xb96fa2c3, + 0xb973f801, 0xb8ee2985, + 0x806e816e, 0x8e6e8a6e, + 0x8e6e816e, 0xb8ef1605, + 0x806f816f, 0x8e6f866f, + 0x806e6f6e, 0x806e746e, + 0x826f8075, 0x866fff6f, + 0x0000ffff, 0xc00b1c37, + 0x00000050, 0xc00b1d37, + 0x00000060, 0xc0031e77, + 0x00000074, 0xbf8cc07f, + 0x8f6e8b79, 0x866eff6e, + 0x001f8000, 0xb96ef807, + 0x866dff6d, 0x0000ffff, + 0x86fe7e7e, 0x86ea6a6a, + 0x8f6e837a, 0xb96ee0c2, + 0xbf800002, 0xb97a0002, + 0xbf8a0000, 0xbe801f6c, + 0xbf810000, 0x00000000, +}; diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm index 6770cbe3250a..f2087cc2e89d 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm @@ -33,15 +33,20 @@ * aldebaran: * cpp -DASIC_FAMILY=CHIP_ALDEBARAN cwsr_trap_handler_gfx9.asm -P -o aldebaran.sp3 * sp3 aldebaran.sp3 -hex aldebaran.hex + * + * gc_9_4_3: + * cpp -DASIC_FAMILY=GC_9_4_3 cwsr_trap_handler_gfx9.asm -P -o gc_9_4_3.sp3 + * sp3 gc_9_4_3.sp3 -hex gc_9_4_3.hex */ #define CHIP_VEGAM 18 #define CHIP_ARCTURUS 23 #define CHIP_ALDEBARAN 25 +#define CHIP_GC_9_4_3 26 var ACK_SQC_STORE = 1 //workaround for suspected SQC store bug causing incorrect stores under concurrency var SAVE_AFTER_XNACK_ERROR = 1 //workaround for TCP store failure after XNACK error when ALLOW_REPLAY=0, for debugger -var SINGLE_STEP_MISSED_WORKAROUND = 1 //workaround for lost MODE.DEBUG_EN exception when SAVECTX raised +var SINGLE_STEP_MISSED_WORKAROUND = (ASIC_FAMILY <= CHIP_ALDEBARAN) //workaround for lost MODE.DEBUG_EN exception when SAVECTX raised /**************************************************************************/ /* variables */ @@ -77,6 +82,10 @@ var SQ_WAVE_TRAPSTS_ADDR_WATCH_MASK = 0x80 var SQ_WAVE_TRAPSTS_ADDR_WATCH_SHIFT = 7 var SQ_WAVE_TRAPSTS_MEM_VIOL_MASK = 0x100 var SQ_WAVE_TRAPSTS_MEM_VIOL_SHIFT = 8 +var SQ_WAVE_TRAPSTS_HOST_TRAP_MASK = 0x400000 +var SQ_WAVE_TRAPSTS_WAVE_BEGIN_MASK = 0x800000 +var SQ_WAVE_TRAPSTS_WAVE_END_MASK = 0x1000000 +var SQ_WAVE_TRAPSTS_TRAP_AFTER_INST_MASK = 0x2000000 var SQ_WAVE_TRAPSTS_PRE_SAVECTX_MASK = 0x3FF var SQ_WAVE_TRAPSTS_PRE_SAVECTX_SHIFT = 0x0 var SQ_WAVE_TRAPSTS_PRE_SAVECTX_SIZE = 10 @@ -95,10 +104,10 @@ var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK = 0x1F8000 var SQ_WAVE_MODE_DEBUG_EN_MASK = 0x800 -var TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT = 26 // bits [31:26] unused by SPI debug data -var TTMP11_SAVE_RCNT_FIRST_REPLAY_MASK = 0xFC000000 -var TTMP11_DEBUG_TRAP_ENABLED_SHIFT = 23 -var TTMP11_DEBUG_TRAP_ENABLED_MASK = 0x800000 +var TTMP_SAVE_RCNT_FIRST_REPLAY_SHIFT = 26 // bits [31:26] unused by SPI debug data +var TTMP_SAVE_RCNT_FIRST_REPLAY_MASK = 0xFC000000 +var TTMP_DEBUG_TRAP_ENABLED_SHIFT = 23 +var TTMP_DEBUG_TRAP_ENABLED_MASK = 0x800000 /* Save */ var S_SAVE_BUF_RSRC_WORD1_STRIDE = 0x00040000 //stride is 4 bytes @@ -129,6 +138,11 @@ var s_save_alloc_size = s_save_trapsts //conflict var s_save_m0 = ttmp5 var s_save_ttmps_lo = s_save_tmp //no conflict var s_save_ttmps_hi = s_save_trapsts //no conflict +#if ASIC_FAMILY >= CHIP_GC_9_4_3 +var s_save_ib_sts = ttmp13 +#else +var s_save_ib_sts = ttmp11 +#endif /* Restore */ var S_RESTORE_BUF_RSRC_WORD1_STRIDE = S_SAVE_BUF_RSRC_WORD1_STRIDE @@ -215,9 +229,15 @@ L_NOT_HALTED: // Any concurrent SAVECTX will be handled upon re-entry once halted. // Check non-maskable exceptions. memory_violation, illegal_instruction - // and xnack_error exceptions always cause the wave to enter the trap - // handler. - s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_MEM_VIOL_MASK|SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK + // and debugger (host trap, wave start/end, trap after instruction) + // exceptions always cause the wave to enter the trap handler. + s_and_b32 ttmp2, s_save_trapsts, \ + SQ_WAVE_TRAPSTS_MEM_VIOL_MASK | \ + SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK | \ + SQ_WAVE_TRAPSTS_HOST_TRAP_MASK | \ + SQ_WAVE_TRAPSTS_WAVE_BEGIN_MASK | \ + SQ_WAVE_TRAPSTS_WAVE_END_MASK | \ + SQ_WAVE_TRAPSTS_TRAP_AFTER_INST_MASK s_cbranch_scc1 L_FETCH_2ND_TRAP // Check for maskable exceptions in trapsts.excp and trapsts.excp_hi. @@ -265,9 +285,9 @@ L_FETCH_2ND_TRAP: s_load_dword ttmp2, [ttmp14, ttmp15], 0x10 glc:1 // debug trap enabled flag s_waitcnt lgkmcnt(0) - s_lshl_b32 ttmp2, ttmp2, TTMP11_DEBUG_TRAP_ENABLED_SHIFT - s_andn2_b32 ttmp11, ttmp11, TTMP11_DEBUG_TRAP_ENABLED_MASK - s_or_b32 ttmp11, ttmp11, ttmp2 + s_lshl_b32 ttmp2, ttmp2, TTMP_DEBUG_TRAP_ENABLED_SHIFT + s_andn2_b32 s_save_ib_sts, s_save_ib_sts, TTMP_DEBUG_TRAP_ENABLED_MASK + s_or_b32 s_save_ib_sts, s_save_ib_sts, ttmp2 s_load_dwordx2 [ttmp2, ttmp3], [ttmp14, ttmp15], 0x0 glc:1 // second-level TBA s_waitcnt lgkmcnt(0) @@ -1058,17 +1078,17 @@ function set_status_without_spi_prio(status, tmp) end function save_and_clear_ib_sts(tmp) - // Save IB_STS.FIRST_REPLAY[15] and IB_STS.RCNT[20:16] into unused space ttmp11[31:26]. + // Save IB_STS.FIRST_REPLAY[15] and IB_STS.RCNT[20:16] into unused space s_save_ib_sts[31:26]. s_getreg_b32 tmp, hwreg(HW_REG_IB_STS) s_and_b32 tmp, tmp, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK - s_lshl_b32 tmp, tmp, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT) - s_andn2_b32 ttmp11, ttmp11, TTMP11_SAVE_RCNT_FIRST_REPLAY_MASK - s_or_b32 ttmp11, ttmp11, tmp + s_lshl_b32 tmp, tmp, (TTMP_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT) + s_andn2_b32 s_save_ib_sts, s_save_ib_sts, TTMP_SAVE_RCNT_FIRST_REPLAY_MASK + s_or_b32 s_save_ib_sts, s_save_ib_sts, tmp s_setreg_imm32_b32 hwreg(HW_REG_IB_STS), 0x0 end function restore_ib_sts(tmp) - s_lshr_b32 tmp, ttmp11, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT) + s_lshr_b32 tmp, s_save_ib_sts, (TTMP_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT) s_and_b32 tmp, tmp, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK s_setreg_b32 hwreg(HW_REG_IB_STS), tmp end diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index a0e30f21e12e..1b54a9aaae70 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -93,7 +93,7 @@ int kfd_chardev_init(void) if (err < 0) goto err_register_chrdev; - kfd_class = class_create(THIS_MODULE, kfd_dev_name); + kfd_class = class_create(kfd_dev_name); err = PTR_ERR(kfd_class); if (IS_ERR(kfd_class)) goto err_class_create; @@ -1312,14 +1312,14 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep, args->n_success = i+1; } - mutex_unlock(&p->mutex); - err = amdgpu_amdkfd_gpuvm_sync_memory(dev->adev, (struct kgd_mem *) mem, true); if (err) { pr_debug("Sync memory failed, wait interrupted by user signal\n"); goto sync_memory_failed; } + mutex_unlock(&p->mutex); + /* Flush TLBs after waiting for the page table updates to complete */ for (i = 0; i < args->n_devices; i++) { peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]); @@ -1335,9 +1335,9 @@ get_process_device_data_failed: bind_process_to_device_failed: get_mem_obj_from_handle_failed: map_memory_to_gpu_failed: +sync_memory_failed: mutex_unlock(&p->mutex); copy_from_user_failed: -sync_memory_failed: kfree(devices_arr); return err; @@ -1351,6 +1351,7 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep, void *mem; long err = 0; uint32_t *devices_arr = NULL, i; + bool flush_tlb; if (!args->n_devices) { pr_debug("Device IDs array empty\n"); @@ -1403,16 +1404,19 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep, } args->n_success = i+1; } - mutex_unlock(&p->mutex); - if (kfd_flush_tlb_after_unmap(pdd->dev)) { + flush_tlb = kfd_flush_tlb_after_unmap(pdd->dev); + if (flush_tlb) { err = amdgpu_amdkfd_gpuvm_sync_memory(pdd->dev->adev, (struct kgd_mem *) mem, true); if (err) { pr_debug("Sync memory failed, wait interrupted by user signal\n"); goto sync_memory_failed; } + } + mutex_unlock(&p->mutex); + if (flush_tlb) { /* Flush TLBs after waiting for the page table updates to complete */ for (i = 0; i < args->n_devices; i++) { peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]); @@ -1428,9 +1432,9 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep, bind_process_to_device_failed: get_mem_obj_from_handle_failed: unmap_memory_from_gpu_failed: +sync_memory_failed: mutex_unlock(&p->mutex); copy_from_user_failed: -sync_memory_failed: kfree(devices_arr); return err; } @@ -1586,6 +1590,58 @@ err_unlock: return r; } +static int kfd_ioctl_export_dmabuf(struct file *filep, + struct kfd_process *p, void *data) +{ + struct kfd_ioctl_export_dmabuf_args *args = data; + struct kfd_process_device *pdd; + struct dma_buf *dmabuf; + struct kfd_dev *dev; + void *mem; + int ret = 0; + + dev = kfd_device_by_id(GET_GPU_ID(args->handle)); + if (!dev) + return -EINVAL; + + mutex_lock(&p->mutex); + + pdd = kfd_get_process_device_data(dev, p); + if (!pdd) { + ret = -EINVAL; + goto err_unlock; + } + + mem = kfd_process_device_translate_handle(pdd, + GET_IDR_HANDLE(args->handle)); + if (!mem) { + ret = -EINVAL; + goto err_unlock; + } + + ret = amdgpu_amdkfd_gpuvm_export_dmabuf(mem, &dmabuf); + mutex_unlock(&p->mutex); + if (ret) + goto err_out; + + ret = dma_buf_fd(dmabuf, args->flags); + if (ret < 0) { + dma_buf_put(dmabuf); + goto err_out; + } + /* dma_buf_fd assigns the reference count to the fd, no need to + * put the reference here. + */ + args->dmabuf_fd = ret; + + return 0; + +err_unlock: + mutex_unlock(&p->mutex); +err_out: + return ret; +} + /* Handle requests for watching SMI events */ static int kfd_ioctl_smi_events(struct file *filep, struct kfd_process *p, void *data) @@ -2768,6 +2824,9 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { AMDKFD_IOCTL_DEF(AMDKFD_IOC_AVAILABLE_MEMORY, kfd_ioctl_get_available_memory, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_EXPORT_DMABUF, + kfd_ioctl_export_dmabuf, 0), }; #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index 3251f4783ba1..475e47027354 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -1462,6 +1462,7 @@ int kfd_get_gpu_cache_info(struct kfd_dev *kdev, struct kfd_gpu_cache_info **pca num_of_cache_types = ARRAY_SIZE(vega20_cache_info); break; case IP_VERSION(9, 4, 2): + case IP_VERSION(9, 4, 3): *pcache_info = aldebaran_cache_info; num_of_cache_types = ARRAY_SIZE(aldebaran_cache_info); break; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 3de7f616a001..00f528eb9812 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -59,6 +59,7 @@ static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size, unsigned int chunk_size); static void kfd_gtt_sa_fini(struct kfd_dev *kfd); +static int kfd_resume_iommu(struct kfd_dev *kfd); static int kfd_resume(struct kfd_dev *kfd); static void kfd_device_info_set_sdma_info(struct kfd_dev *kfd) @@ -203,6 +204,14 @@ static void kfd_device_info_init(struct kfd_dev *kfd, /* Navi1x+ */ if (gc_version >= IP_VERSION(10, 1, 1)) kfd->device_info.needs_pci_atomics = true; + } else if (gc_version < IP_VERSION(12, 0, 0)) { + /* + * PCIe atomics support acknowledgment in GFX11 RS64 CPFW requires + * MEC version >= 509. Prior RS64 CPFW versions (and all F32) require + * PCIe atomics support. + */ + kfd->device_info.needs_pci_atomics = true; + kfd->device_info.no_atomic_fw_version = kfd->adev->gfx.rs64_enable ? 509 : 0; } } else { kfd->device_info.doorbell_size = 4; @@ -317,6 +326,10 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf) gfx_target_version = 90010; f2g = &aldebaran_kfd2kgd; break; + case IP_VERSION(9, 4, 3): + gfx_target_version = 90400; + f2g = &aldebaran_kfd2kgd; + break; /* Navi10 */ case IP_VERSION(10, 1, 10): gfx_target_version = 100100; @@ -449,6 +462,10 @@ static void kfd_cwsr_init(struct kfd_dev *kfd) BUILD_BUG_ON(sizeof(cwsr_trap_aldebaran_hex) > PAGE_SIZE); kfd->cwsr_isa = cwsr_trap_aldebaran_hex; kfd->cwsr_isa_size = sizeof(cwsr_trap_aldebaran_hex); + } else if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 3)) { + BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_4_3_hex) > PAGE_SIZE); + kfd->cwsr_isa = cwsr_trap_gfx9_4_3_hex; + kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_4_3_hex); } else if (KFD_GC_VERSION(kfd) < IP_VERSION(10, 1, 1)) { BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_hex) > PAGE_SIZE); kfd->cwsr_isa = cwsr_trap_gfx9_hex; @@ -624,7 +641,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, svm_migrate_init(kfd->adev); - if (kgd2kfd_resume_iommu(kfd)) + if (kfd_resume_iommu(kfd)) goto device_iommu_error; if (kfd_resume(kfd)) @@ -773,6 +790,14 @@ int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm) int kgd2kfd_resume_iommu(struct kfd_dev *kfd) { + if (!kfd->init_complete) + return 0; + + return kfd_resume_iommu(kfd); +} + +static int kfd_resume_iommu(struct kfd_dev *kfd) +{ int err = 0; err = kfd_iommu_resume(kfd); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index de8ce72344fc..54933903bcb8 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -289,7 +289,7 @@ static unsigned long svm_migrate_unsuccessful_pages(struct migrate_vma *migrate) static int svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange, struct migrate_vma *migrate, struct dma_fence **mfence, - dma_addr_t *scratch) + dma_addr_t *scratch, uint64_t ttm_res_offset) { uint64_t npages = migrate->npages; struct device *dev = adev->dev; @@ -299,19 +299,13 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange, uint64_t i, j; int r; - pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start, - prange->last); + pr_debug("svms 0x%p [0x%lx 0x%lx 0x%llx]\n", prange->svms, prange->start, + prange->last, ttm_res_offset); src = scratch; dst = (uint64_t *)(scratch + npages); - r = svm_range_vram_node_new(adev, prange, true); - if (r) { - dev_dbg(adev->dev, "fail %d to alloc vram\n", r); - goto out; - } - - amdgpu_res_first(prange->ttm_res, prange->offset << PAGE_SHIFT, + amdgpu_res_first(prange->ttm_res, ttm_res_offset, npages << PAGE_SHIFT, &cursor); for (i = j = 0; i < npages; i++) { struct page *spage; @@ -391,14 +385,14 @@ out_free_vram_pages: migrate->dst[i + 3] = 0; } #endif -out: + return r; } static long svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange, struct vm_area_struct *vma, uint64_t start, - uint64_t end, uint32_t trigger) + uint64_t end, uint32_t trigger, uint64_t ttm_res_offset) { struct kfd_process *p = container_of(prange->svms, struct kfd_process, svms); uint64_t npages = (end - start) >> PAGE_SHIFT; @@ -451,7 +445,7 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange, else pr_debug("0x%lx pages migrated\n", cpages); - r = svm_migrate_copy_to_vram(adev, prange, &migrate, &mfence, scratch); + r = svm_migrate_copy_to_vram(adev, prange, &migrate, &mfence, scratch, ttm_res_offset); migrate_vma_pages(&migrate); pr_debug("successful/cpages/npages 0x%lx/0x%lx/0x%lx\n", @@ -499,6 +493,7 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, unsigned long addr, start, end; struct vm_area_struct *vma; struct amdgpu_device *adev; + uint64_t ttm_res_offset; unsigned long cpages = 0; long r = 0; @@ -520,6 +515,13 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, start = prange->start << PAGE_SHIFT; end = (prange->last + 1) << PAGE_SHIFT; + r = svm_range_vram_node_new(adev, prange, true); + if (r) { + dev_dbg(adev->dev, "fail %ld to alloc vram\n", r); + return r; + } + ttm_res_offset = prange->offset << PAGE_SHIFT; + for (addr = start; addr < end;) { unsigned long next; @@ -528,18 +530,21 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, break; next = min(vma->vm_end, end); - r = svm_migrate_vma_to_vram(adev, prange, vma, addr, next, trigger); + r = svm_migrate_vma_to_vram(adev, prange, vma, addr, next, trigger, ttm_res_offset); if (r < 0) { pr_debug("failed %ld to migrate\n", r); break; } else { cpages += r; } + ttm_res_offset += next - addr; addr = next; } if (cpages) prange->actual_loc = best_loc; + else + svm_range_vram_node_free(prange); return r < 0 ? r : 0; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c index 09b966dc3768..aee2212e52f6 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c @@ -77,6 +77,7 @@ err_ioctl: static void kfd_exit(void) { + kfd_cleanup_processes(); kfd_debugfs_fini(); kfd_process_destroy_wq(); kfd_procfs_shutdown(); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c index 4a9af800b1f1..5aa75f72caa1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c @@ -143,6 +143,13 @@ static void init_mqd(struct mqd_manager *mm, void **mqd, 1 << CP_HQD_QUANTUM__QUANTUM_SCALE__SHIFT | 1 << CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT; + /* + * GFX11 RS64 CPFW version >= 509 supports PCIe atomics support + * acknowledgment. + */ + if (amdgpu_amdkfd_have_atomics_support(mm->dev->adev)) + m->cp_hqd_hq_status0 |= 1 << 29; + if (q->format == KFD_QUEUE_FORMAT_AQL) { m->cp_hqd_aql_control = 1 << CP_HQD_AQL_CONTROL__CONTROL0__SHIFT; @@ -350,6 +357,10 @@ static void update_mqd_sdma(struct mqd_manager *mm, void *mqd, m->sdmax_rlcx_doorbell_offset = q->doorbell_off << SDMA0_QUEUE0_DOORBELL_OFFSET__OFFSET__SHIFT; + m->sdmax_rlcx_sched_cntl = (amdgpu_sdma_phase_quantum + << SDMA0_QUEUE0_SCHEDULE_CNTL__CONTEXT_QUANTUM__SHIFT) + & SDMA0_QUEUE0_SCHEDULE_CNTL__CONTEXT_QUANTUM_MASK; + m->sdma_engine_id = q->sdma_engine_id; m->sdma_queue_id = q->sdma_queue_id; m->sdmax_rlcx_dummy_reg = SDMA_RLC_DUMMY_DEFAULT; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c index 0778e587a2d6..fdbfd725841f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c @@ -135,6 +135,7 @@ static void init_mqd(struct mqd_manager *mm, void **mqd, { uint64_t addr; struct v9_mqd *m; + struct amdgpu_device *adev = (struct amdgpu_device *)mm->dev->adev; m = (struct v9_mqd *) mqd_mem_obj->cpu_ptr; addr = mqd_mem_obj->gpu_addr; @@ -167,6 +168,20 @@ static void init_mqd(struct mqd_manager *mm, void **mqd, if (q->format == KFD_QUEUE_FORMAT_AQL) { m->cp_hqd_aql_control = 1 << CP_HQD_AQL_CONTROL__CONTROL0__SHIFT; + if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3)) { + /* On GC 9.4.3, DW 41 is re-purposed as + * compute_tg_chunk_size. + * TODO: review this setting when active CUs in the + * partition play a role + */ + m->compute_static_thread_mgmt_se6 = 1; + } + } else { + /* PM4 queue */ + if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3)) { + m->compute_static_thread_mgmt_se6 = 0; + /* TODO: program pm4_target_xcc */ + } } if (q->tba_addr) { @@ -209,6 +224,7 @@ static void update_mqd(struct mqd_manager *mm, void *mqd, struct queue_properties *q, struct mqd_update_info *minfo) { + struct amdgpu_device *adev = (struct amdgpu_device *)mm->dev->adev; struct v9_mqd *m; m = get_mqd(mqd); @@ -254,10 +270,13 @@ static void update_mqd(struct mqd_manager *mm, void *mqd, m->cp_hqd_vmid = q->vmid; if (q->format == KFD_QUEUE_FORMAT_AQL) { - m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK | + m->cp_hqd_pq_control |= 2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT | 1 << CP_HQD_PQ_CONTROL__QUEUE_FULL_EN__SHIFT | 1 << CP_HQD_PQ_CONTROL__WPP_CLAMP_EN__SHIFT; + if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 3)) + m->cp_hqd_pq_control |= + CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK; m->cp_hqd_pq_doorbell_control |= 1 << CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_BIF_DROP__SHIFT; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c index ed02b6d8bf63..f612325241aa 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c @@ -238,7 +238,8 @@ int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm) pm->pmf = &kfd_vi_pm_funcs; break; default: - if (KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 2)) + if (KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 2) || + KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 3)) pm->pmf = &kfd_aldebaran_pm_funcs; else if (KFD_GC_VERSION(dqm->dev) >= IP_VERSION(9, 0, 1)) pm->pmf = &kfd_v9_pm_funcs; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index bfa30d12406b..94a438956868 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -207,7 +207,8 @@ enum cache_policy { #define KFD_GC_VERSION(dev) ((dev)->adev->ip_versions[GC_HWIP][0]) #define KFD_IS_SOC15(dev) ((KFD_GC_VERSION(dev)) >= (IP_VERSION(9, 0, 1))) #define KFD_SUPPORT_XNACK_PER_PROCESS(dev)\ - (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 2)) + ((KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 2)) || \ + (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3))) struct kfd_event_interrupt_class { bool (*interrupt_isr)(struct kfd_dev *dev, @@ -928,6 +929,7 @@ bool kfd_dev_is_large_bar(struct kfd_dev *dev); int kfd_process_create_wq(void); void kfd_process_destroy_wq(void); +void kfd_cleanup_processes(void); struct kfd_process *kfd_create_process(struct file *filep); struct kfd_process *kfd_get_process(const struct task_struct *task); struct kfd_process *kfd_lookup_process_by_pasid(u32 pasid); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 7acd55a814b2..07a9eaf9b7d8 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -344,7 +344,7 @@ static const struct sysfs_ops kfd_procfs_ops = { .show = kfd_procfs_show, }; -static struct kobj_type procfs_type = { +static const struct kobj_type procfs_type = { .release = kfd_procfs_kobj_release, .sysfs_ops = &kfd_procfs_ops, }; @@ -469,7 +469,7 @@ static const struct sysfs_ops procfs_queue_ops = { .show = kfd_procfs_queue_show, }; -static struct kobj_type procfs_queue_type = { +static const struct kobj_type procfs_queue_type = { .sysfs_ops = &procfs_queue_ops, .default_groups = procfs_queue_groups, }; @@ -478,7 +478,7 @@ static const struct sysfs_ops procfs_stats_ops = { .show = kfd_procfs_stats_show, }; -static struct kobj_type procfs_stats_type = { +static const struct kobj_type procfs_stats_type = { .sysfs_ops = &procfs_stats_ops, .release = kfd_procfs_kobj_release, }; @@ -487,7 +487,7 @@ static const struct sysfs_ops sysfs_counters_ops = { .show = kfd_sysfs_counters_show, }; -static struct kobj_type sysfs_counters_type = { +static const struct kobj_type sysfs_counters_type = { .sysfs_ops = &sysfs_counters_ops, .release = kfd_procfs_kobj_release, }; @@ -1167,6 +1167,17 @@ static void kfd_process_free_notifier(struct mmu_notifier *mn) kfd_unref_process(container_of(mn, struct kfd_process, mmu_notifier)); } +static void kfd_process_notifier_release_internal(struct kfd_process *p) +{ + cancel_delayed_work_sync(&p->eviction_work); + cancel_delayed_work_sync(&p->restore_work); + + /* Indicate to other users that MM is no longer valid */ + p->mm = NULL; + + mmu_notifier_put(&p->mmu_notifier); +} + static void kfd_process_notifier_release(struct mmu_notifier *mn, struct mm_struct *mm) { @@ -1181,17 +1192,22 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn, return; mutex_lock(&kfd_processes_mutex); + /* + * Do early return if table is empty. + * + * This could potentially happen if this function is called concurrently + * by mmu_notifier and by kfd_cleanup_pocesses. + * + */ + if (hash_empty(kfd_processes_table)) { + mutex_unlock(&kfd_processes_mutex); + return; + } hash_del_rcu(&p->kfd_processes); mutex_unlock(&kfd_processes_mutex); synchronize_srcu(&kfd_processes_srcu); - cancel_delayed_work_sync(&p->eviction_work); - cancel_delayed_work_sync(&p->restore_work); - - /* Indicate to other users that MM is no longer valid */ - p->mm = NULL; - - mmu_notifier_put(&p->mmu_notifier); + kfd_process_notifier_release_internal(p); } static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = { @@ -1200,6 +1216,43 @@ static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = { .free_notifier = kfd_process_free_notifier, }; +/* + * This code handles the case when driver is being unloaded before all + * mm_struct are released. We need to safely free the kfd_process and + * avoid race conditions with mmu_notifier that might try to free them. + * + */ +void kfd_cleanup_processes(void) +{ + struct kfd_process *p; + struct hlist_node *p_temp; + unsigned int temp; + HLIST_HEAD(cleanup_list); + + /* + * Move all remaining kfd_process from the process table to a + * temp list for processing. Once done, callback from mmu_notifier + * release will not see the kfd_process in the table and do early return, + * avoiding double free issues. + */ + mutex_lock(&kfd_processes_mutex); + hash_for_each_safe(kfd_processes_table, temp, p_temp, p, kfd_processes) { + hash_del_rcu(&p->kfd_processes); + synchronize_srcu(&kfd_processes_srcu); + hlist_add_head(&p->kfd_processes, &cleanup_list); + } + mutex_unlock(&kfd_processes_mutex); + + hlist_for_each_entry_safe(p, p_temp, &cleanup_list, kfd_processes) + kfd_process_notifier_release_internal(p); + + /* + * Ensures that all outstanding free_notifier get called, triggering + * the release of the kfd_process struct. + */ + mmu_notifier_synchronize(); +} + static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep) { unsigned long offset; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 5137476ec18e..4236539d9f93 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -218,8 +218,8 @@ static int init_user_queue(struct process_queue_manager *pqm, return 0; cleanup: - if (dev->shared_resources.enable_mes) - uninit_queue(*q); + uninit_queue(*q); + *q = NULL; return retval; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index dc6fd6967050..96a138a39515 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -2172,7 +2172,15 @@ restart: pr_debug("drain retry fault gpu %d svms %p\n", i, svms); amdgpu_ih_wait_on_checkpoint_process_ts(pdd->dev->adev, - &pdd->dev->adev->irq.ih1); + pdd->dev->adev->irq.retry_cam_enabled ? + &pdd->dev->adev->irq.ih : + &pdd->dev->adev->irq.ih1); + + if (pdd->dev->adev->irq.retry_cam_enabled) + amdgpu_ih_wait_on_checkpoint_process_ts(pdd->dev->adev, + &pdd->dev->adev->irq.ih_soft); + + pr_debug("drain retry fault gpu %d svms 0x%p done\n", i, svms); } if (atomic_cmpxchg(&svms->drain_pagefaults, drain, 0) != drain) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 3fdaba56be6f..8e4124dcb6e4 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -278,7 +278,7 @@ static const struct sysfs_ops sysprops_ops = { .show = sysprops_show, }; -static struct kobj_type sysprops_type = { +static const struct kobj_type sysprops_type = { .release = kfd_topology_kobj_release, .sysfs_ops = &sysprops_ops, }; @@ -318,7 +318,7 @@ static const struct sysfs_ops iolink_ops = { .show = iolink_show, }; -static struct kobj_type iolink_type = { +static const struct kobj_type iolink_type = { .release = kfd_topology_kobj_release, .sysfs_ops = &iolink_ops, }; @@ -350,7 +350,7 @@ static const struct sysfs_ops mem_ops = { .show = mem_show, }; -static struct kobj_type mem_type = { +static const struct kobj_type mem_type = { .release = kfd_topology_kobj_release, .sysfs_ops = &mem_ops, }; @@ -395,7 +395,7 @@ static const struct sysfs_ops cache_ops = { .show = kfd_cache_show, }; -static struct kobj_type cache_type = { +static const struct kobj_type cache_type = { .release = kfd_topology_kobj_release, .sysfs_ops = &cache_ops, }; @@ -566,7 +566,7 @@ static const struct sysfs_ops node_ops = { .show = node_show, }; -static struct kobj_type node_type = { +static const struct kobj_type node_type = { .release = kfd_topology_kobj_release, .sysfs_ops = &node_ops, }; |
