diff options
Diffstat (limited to 'dev-util/hip/files/hip-5.7.1-extend-isa-compatibility-check.patch')
-rw-r--r-- | dev-util/hip/files/hip-5.7.1-extend-isa-compatibility-check.patch | 236 |
1 files changed, 236 insertions, 0 deletions
diff --git a/dev-util/hip/files/hip-5.7.1-extend-isa-compatibility-check.patch b/dev-util/hip/files/hip-5.7.1-extend-isa-compatibility-check.patch new file mode 100644 index 000000000000..7853de693cde --- /dev/null +++ b/dev-util/hip/files/hip-5.7.1-extend-isa-compatibility-check.patch @@ -0,0 +1,236 @@ +Combined with matching changes within rocr-runtime ebuild, this patch allows +to load compatible kernels whenever possible. +For example if AMDGPU_TARGETS is set to gfx1030 and some application +was started on gfx1036, it loads gfx1030 kernel. + +Author: Cordell Bloor <cgmb@slerp.xyz> +https://salsa.debian.org/rocm-team/rocm-hipamd/-/blob/master/debian/patches/0025-improve-rocclr-isa-compatibility-check.patch +https://salsa.debian.org/rocm-team/rocm-hipamd/-/blob/master/debian/patches/0026-extend-hip-isa-compatibility-check.patch +--- a/hipamd/src/hip_code_object.cpp ++++ b/hipamd/src/hip_code_object.cpp +@@ -390,47 +390,123 @@ static bool getTripleTargetID(std::string bundled_co_entry_id, const void* code_ + return true; + } + +-static bool isCodeObjectCompatibleWithDevice(std::string co_triple_target_id, +- std::string agent_triple_target_id) { ++struct GfxPattern { ++ std::string root; ++ std::string suffixes; ++}; ++ ++static bool matches(const GfxPattern& p, const std::string& s) { ++ if (p.root.size() + 1 != s.size()) { ++ return false; ++ } ++ if (0 != std::memcmp(p.root.data(), s.data(), p.root.size())) { ++ return false; ++ } ++ return p.suffixes.find(s[p.root.size()]) != std::string::npos; ++} ++ ++static bool isGfx900EquivalentProcessor(const std::string& processor) { ++ return matches(GfxPattern{"gfx90", "029c"}, processor); ++} ++ ++static bool isGfx900SupersetProcessor(const std::string& processor) { ++ return matches(GfxPattern{"gfx90", "0269c"}, processor); ++} ++ ++static bool isGfx1030EquivalentProcessor(const std::string& processor) { ++ return matches(GfxPattern{"gfx103", "0123456"}, processor); ++} ++ ++static bool isGfx1010EquivalentProcessor(const std::string& processor) { ++ return matches(GfxPattern{"gfx101", "02"}, processor); ++} ++ ++static bool isGfx1010SupersetProcessor(const std::string& processor) { ++ return matches(GfxPattern{"gfx101", "0123"}, processor); ++} ++ ++enum CompatibilityScore { ++ CS_EXACT_MATCH = 1 << 4, ++ CS_PROCESSOR_MATCH = 1 << 3, ++ CS_PROCESSOR_COMPATIBLE = 1 << 2, ++ CS_XNACK_SPECIALIZED = 1 << 1, ++ CS_SRAM_ECC_SPECIALIZED = 1 << 0, ++ CS_INCOMPATIBLE = 0, ++}; ++ ++static int getProcessorCompatibilityScore(const std::string& co_processor, ++ const std::string& agent_processor) { ++ if (co_processor == agent_processor) ++ return CS_PROCESSOR_MATCH; ++ ++ if (isGfx900SupersetProcessor(agent_processor)) ++ return isGfx900EquivalentProcessor(co_processor) ? CS_PROCESSOR_COMPATIBLE : CS_INCOMPATIBLE; ++ ++ if (isGfx1010SupersetProcessor(agent_processor)) ++ return isGfx1010EquivalentProcessor(co_processor) ? CS_PROCESSOR_COMPATIBLE : CS_INCOMPATIBLE; ++ ++ if (isGfx1030EquivalentProcessor(agent_processor)) ++ return isGfx1030EquivalentProcessor(co_processor) ? CS_PROCESSOR_COMPATIBLE : CS_INCOMPATIBLE; ++ ++ return CS_INCOMPATIBLE; ++} ++ ++static int getCompatiblityScore(std::string co_triple_target_id, ++ std::string agent_triple_target_id) { + // Primitive Check +- if (co_triple_target_id == agent_triple_target_id) return true; ++ if (co_triple_target_id == agent_triple_target_id) return CS_EXACT_MATCH; + + // Parse code object triple target id + if (!consume(co_triple_target_id, std::string(AMDGCN_TARGET_TRIPLE) + '-')) { +- return false; ++ return CS_INCOMPATIBLE; + } + + std::string co_processor; + char co_sram_ecc, co_xnack; + if (!getTargetIDValue(co_triple_target_id, co_processor, co_sram_ecc, co_xnack)) { +- return false; ++ return CS_INCOMPATIBLE; + } + +- if (!co_triple_target_id.empty()) return false; ++ if (!co_triple_target_id.empty()) return CS_INCOMPATIBLE; + + // Parse agent isa triple target id + if (!consume(agent_triple_target_id, std::string(AMDGCN_TARGET_TRIPLE) + '-')) { +- return false; ++ return CS_INCOMPATIBLE; + } + + std::string agent_isa_processor; + char isa_sram_ecc, isa_xnack; + if (!getTargetIDValue(agent_triple_target_id, agent_isa_processor, isa_sram_ecc, isa_xnack)) { +- return false; ++ return CS_INCOMPATIBLE; + } + +- if (!agent_triple_target_id.empty()) return false; ++ if (!agent_triple_target_id.empty()) return CS_INCOMPATIBLE; + + // Check for compatibility +- if (agent_isa_processor != co_processor) return false; +- if (co_sram_ecc != ' ') { +- if (co_sram_ecc != isa_sram_ecc) return false; ++ int processor_score = getProcessorCompatibilityScore(co_processor, agent_isa_processor); ++ if (processor_score == CS_INCOMPATIBLE) { ++ return CS_INCOMPATIBLE; + } +- if (co_xnack != ' ') { +- if (co_xnack != isa_xnack) return false; ++ ++ int xnack_bonus; ++ if (co_xnack == ' ') { ++ xnack_bonus = 0; ++ } else if (co_xnack == isa_xnack) { ++ xnack_bonus = CS_XNACK_SPECIALIZED; ++ } else { ++ return CS_INCOMPATIBLE; + } + +- return true; ++ int sram_ecc_bonus; ++ if (co_sram_ecc == ' ') { ++ sram_ecc_bonus = 0; ++ } else if (co_sram_ecc == isa_sram_ecc) { ++ sram_ecc_bonus = CS_SRAM_ECC_SPECIALIZED; ++ } else { ++ return CS_INCOMPATIBLE; ++ } ++ ++ return processor_score + xnack_bonus + sram_ecc_bonus; + } + + // This will be moved to COMGR eventually +@@ -483,6 +559,7 @@ hipError_t CodeObject::extractCodeObjectFromFatBinary( + for (size_t i = 0; i < agent_triple_target_ids.size(); i++) { + code_objs.push_back(std::make_pair(nullptr, 0)); + } ++ std::vector<int> compatibility_score(agent_triple_target_ids.size()); + + const auto obheader = reinterpret_cast<const __ClangOffloadBundleHeader*>(data); + const auto* desc = &obheader->desc[0]; +@@ -495,17 +572,19 @@ hipError_t CodeObject::extractCodeObjectFromFatBinary( + reinterpret_cast<const void*>(reinterpret_cast<uintptr_t>(obheader) + desc->offset); + const size_t image_size = desc->size; + +- if (num_code_objs == 0) break; + std::string bundleEntryId{desc->bundleEntryId, desc->bundleEntryIdSize}; + + std::string co_triple_target_id; + if (!getTripleTargetID(bundleEntryId, image, co_triple_target_id)) continue; + + for (size_t dev = 0; dev < agent_triple_target_ids.size(); ++dev) { +- if (code_objs[dev].first) continue; +- if (isCodeObjectCompatibleWithDevice(co_triple_target_id, agent_triple_target_ids[dev])) { ++ if (compatibility_score[dev] >= CS_PROCESSOR_MATCH) continue; ++ int score = getCompatiblityScore(co_triple_target_id, agent_triple_target_ids[dev]); ++ if (score > compatibility_score[dev]) { ++ compatibility_score[dev] = score; ++ if (!code_objs[dev].first) ++ --num_code_objs; + code_objs[dev] = std::make_pair(image, image_size); +- --num_code_objs; + } + } + } +--- a/rocclr/device/device.cpp ++++ b/rocclr/device/device.cpp +@@ -232,10 +232,49 @@ std::string Isa::isaName() const { + return std::string(hsaIsaNamePrefix) + targetId(); + } + ++template <class T, std::size_t N> ++static bool Contains(const std::array<T, N>& arr, const T& value) { ++ return std::find(std::begin(arr), std::end(arr), value) != std::end(arr); ++} ++ ++static bool IsVersionCompatible(const Isa &codeObjectIsa, ++ const Isa &agentIsa) { ++ if (codeObjectIsa.versionMajor() == agentIsa.versionMajor() && ++ codeObjectIsa.versionMinor() == agentIsa.versionMinor()) { ++ ++ if (codeObjectIsa.versionStepping() == agentIsa.versionStepping()) { ++ return true; // exact match ++ } ++ ++ // The code object and the agent may sometimes be compatible if ++ // they differ only by stepping version. ++ if (codeObjectIsa.versionMajor() == 9 && ++ codeObjectIsa.versionMinor() == 0) { ++ const std::array<uint32_t, 4> gfx900_equivalent = { 0, 2, 9, 12 }; ++ const std::array<uint32_t, 5> gfx900_superset = { 0, 2, 6, 9, 12 }; ++ if (Contains(gfx900_equivalent, codeObjectIsa.versionStepping()) && ++ Contains(gfx900_superset, agentIsa.versionStepping())) { ++ return true; // gfx900 compatible object and agent ++ } ++ } else if (codeObjectIsa.versionMajor() == 10) { ++ if (codeObjectIsa.versionMinor() == 1) { ++ const std::array<uint32_t, 2> gfx1010_equivalent = { 0, 2 }; ++ const std::array<uint32_t, 4> gfx1010_superset = { 0, 1, 2, 3 }; ++ if (Contains(gfx1010_equivalent, codeObjectIsa.versionStepping()) && ++ Contains(gfx1010_superset, agentIsa.versionStepping())) { ++ return true; // gfx1010 compatible object and agent ++ } ++ } else if (codeObjectIsa.versionMinor() == 3) { ++ return true; // gfx1030 compatible object and agent ++ } ++ } ++ } ++ ++ return false; ++} ++ + bool Isa::isCompatible(const Isa &codeObjectIsa, const Isa &agentIsa) { +- if (codeObjectIsa.versionMajor() != agentIsa.versionMajor() || +- codeObjectIsa.versionMinor() != agentIsa.versionMinor() || +- codeObjectIsa.versionStepping() != agentIsa.versionStepping()) ++ if (!IsVersionCompatible(codeObjectIsa, agentIsa)) + return false; + + assert(codeObjectIsa.isSrameccSupported() == agentIsa.isSrameccSupported() && |