summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'dev-util/hip/files/hip-5.7.1-extend-isa-compatibility-check.patch')
-rw-r--r--dev-util/hip/files/hip-5.7.1-extend-isa-compatibility-check.patch236
1 files changed, 236 insertions, 0 deletions
diff --git a/dev-util/hip/files/hip-5.7.1-extend-isa-compatibility-check.patch b/dev-util/hip/files/hip-5.7.1-extend-isa-compatibility-check.patch
new file mode 100644
index 000000000000..7853de693cde
--- /dev/null
+++ b/dev-util/hip/files/hip-5.7.1-extend-isa-compatibility-check.patch
@@ -0,0 +1,236 @@
+Combined with matching changes within rocr-runtime ebuild, this patch allows
+to load compatible kernels whenever possible.
+For example if AMDGPU_TARGETS is set to gfx1030 and some application
+was started on gfx1036, it loads gfx1030 kernel.
+
+Author: Cordell Bloor <cgmb@slerp.xyz>
+https://salsa.debian.org/rocm-team/rocm-hipamd/-/blob/master/debian/patches/0025-improve-rocclr-isa-compatibility-check.patch
+https://salsa.debian.org/rocm-team/rocm-hipamd/-/blob/master/debian/patches/0026-extend-hip-isa-compatibility-check.patch
+--- a/hipamd/src/hip_code_object.cpp
++++ b/hipamd/src/hip_code_object.cpp
+@@ -390,47 +390,123 @@ static bool getTripleTargetID(std::string bundled_co_entry_id, const void* code_
+ return true;
+ }
+
+-static bool isCodeObjectCompatibleWithDevice(std::string co_triple_target_id,
+- std::string agent_triple_target_id) {
++struct GfxPattern {
++ std::string root;
++ std::string suffixes;
++};
++
++static bool matches(const GfxPattern& p, const std::string& s) {
++ if (p.root.size() + 1 != s.size()) {
++ return false;
++ }
++ if (0 != std::memcmp(p.root.data(), s.data(), p.root.size())) {
++ return false;
++ }
++ return p.suffixes.find(s[p.root.size()]) != std::string::npos;
++}
++
++static bool isGfx900EquivalentProcessor(const std::string& processor) {
++ return matches(GfxPattern{"gfx90", "029c"}, processor);
++}
++
++static bool isGfx900SupersetProcessor(const std::string& processor) {
++ return matches(GfxPattern{"gfx90", "0269c"}, processor);
++}
++
++static bool isGfx1030EquivalentProcessor(const std::string& processor) {
++ return matches(GfxPattern{"gfx103", "0123456"}, processor);
++}
++
++static bool isGfx1010EquivalentProcessor(const std::string& processor) {
++ return matches(GfxPattern{"gfx101", "02"}, processor);
++}
++
++static bool isGfx1010SupersetProcessor(const std::string& processor) {
++ return matches(GfxPattern{"gfx101", "0123"}, processor);
++}
++
++enum CompatibilityScore {
++ CS_EXACT_MATCH = 1 << 4,
++ CS_PROCESSOR_MATCH = 1 << 3,
++ CS_PROCESSOR_COMPATIBLE = 1 << 2,
++ CS_XNACK_SPECIALIZED = 1 << 1,
++ CS_SRAM_ECC_SPECIALIZED = 1 << 0,
++ CS_INCOMPATIBLE = 0,
++};
++
++static int getProcessorCompatibilityScore(const std::string& co_processor,
++ const std::string& agent_processor) {
++ if (co_processor == agent_processor)
++ return CS_PROCESSOR_MATCH;
++
++ if (isGfx900SupersetProcessor(agent_processor))
++ return isGfx900EquivalentProcessor(co_processor) ? CS_PROCESSOR_COMPATIBLE : CS_INCOMPATIBLE;
++
++ if (isGfx1010SupersetProcessor(agent_processor))
++ return isGfx1010EquivalentProcessor(co_processor) ? CS_PROCESSOR_COMPATIBLE : CS_INCOMPATIBLE;
++
++ if (isGfx1030EquivalentProcessor(agent_processor))
++ return isGfx1030EquivalentProcessor(co_processor) ? CS_PROCESSOR_COMPATIBLE : CS_INCOMPATIBLE;
++
++ return CS_INCOMPATIBLE;
++}
++
++static int getCompatiblityScore(std::string co_triple_target_id,
++ std::string agent_triple_target_id) {
+ // Primitive Check
+- if (co_triple_target_id == agent_triple_target_id) return true;
++ if (co_triple_target_id == agent_triple_target_id) return CS_EXACT_MATCH;
+
+ // Parse code object triple target id
+ if (!consume(co_triple_target_id, std::string(AMDGCN_TARGET_TRIPLE) + '-')) {
+- return false;
++ return CS_INCOMPATIBLE;
+ }
+
+ std::string co_processor;
+ char co_sram_ecc, co_xnack;
+ if (!getTargetIDValue(co_triple_target_id, co_processor, co_sram_ecc, co_xnack)) {
+- return false;
++ return CS_INCOMPATIBLE;
+ }
+
+- if (!co_triple_target_id.empty()) return false;
++ if (!co_triple_target_id.empty()) return CS_INCOMPATIBLE;
+
+ // Parse agent isa triple target id
+ if (!consume(agent_triple_target_id, std::string(AMDGCN_TARGET_TRIPLE) + '-')) {
+- return false;
++ return CS_INCOMPATIBLE;
+ }
+
+ std::string agent_isa_processor;
+ char isa_sram_ecc, isa_xnack;
+ if (!getTargetIDValue(agent_triple_target_id, agent_isa_processor, isa_sram_ecc, isa_xnack)) {
+- return false;
++ return CS_INCOMPATIBLE;
+ }
+
+- if (!agent_triple_target_id.empty()) return false;
++ if (!agent_triple_target_id.empty()) return CS_INCOMPATIBLE;
+
+ // Check for compatibility
+- if (agent_isa_processor != co_processor) return false;
+- if (co_sram_ecc != ' ') {
+- if (co_sram_ecc != isa_sram_ecc) return false;
++ int processor_score = getProcessorCompatibilityScore(co_processor, agent_isa_processor);
++ if (processor_score == CS_INCOMPATIBLE) {
++ return CS_INCOMPATIBLE;
+ }
+- if (co_xnack != ' ') {
+- if (co_xnack != isa_xnack) return false;
++
++ int xnack_bonus;
++ if (co_xnack == ' ') {
++ xnack_bonus = 0;
++ } else if (co_xnack == isa_xnack) {
++ xnack_bonus = CS_XNACK_SPECIALIZED;
++ } else {
++ return CS_INCOMPATIBLE;
+ }
+
+- return true;
++ int sram_ecc_bonus;
++ if (co_sram_ecc == ' ') {
++ sram_ecc_bonus = 0;
++ } else if (co_sram_ecc == isa_sram_ecc) {
++ sram_ecc_bonus = CS_SRAM_ECC_SPECIALIZED;
++ } else {
++ return CS_INCOMPATIBLE;
++ }
++
++ return processor_score + xnack_bonus + sram_ecc_bonus;
+ }
+
+ // This will be moved to COMGR eventually
+@@ -483,6 +559,7 @@ hipError_t CodeObject::extractCodeObjectFromFatBinary(
+ for (size_t i = 0; i < agent_triple_target_ids.size(); i++) {
+ code_objs.push_back(std::make_pair(nullptr, 0));
+ }
++ std::vector<int> compatibility_score(agent_triple_target_ids.size());
+
+ const auto obheader = reinterpret_cast<const __ClangOffloadBundleHeader*>(data);
+ const auto* desc = &obheader->desc[0];
+@@ -495,17 +572,19 @@ hipError_t CodeObject::extractCodeObjectFromFatBinary(
+ reinterpret_cast<const void*>(reinterpret_cast<uintptr_t>(obheader) + desc->offset);
+ const size_t image_size = desc->size;
+
+- if (num_code_objs == 0) break;
+ std::string bundleEntryId{desc->bundleEntryId, desc->bundleEntryIdSize};
+
+ std::string co_triple_target_id;
+ if (!getTripleTargetID(bundleEntryId, image, co_triple_target_id)) continue;
+
+ for (size_t dev = 0; dev < agent_triple_target_ids.size(); ++dev) {
+- if (code_objs[dev].first) continue;
+- if (isCodeObjectCompatibleWithDevice(co_triple_target_id, agent_triple_target_ids[dev])) {
++ if (compatibility_score[dev] >= CS_PROCESSOR_MATCH) continue;
++ int score = getCompatiblityScore(co_triple_target_id, agent_triple_target_ids[dev]);
++ if (score > compatibility_score[dev]) {
++ compatibility_score[dev] = score;
++ if (!code_objs[dev].first)
++ --num_code_objs;
+ code_objs[dev] = std::make_pair(image, image_size);
+- --num_code_objs;
+ }
+ }
+ }
+--- a/rocclr/device/device.cpp
++++ b/rocclr/device/device.cpp
+@@ -232,10 +232,49 @@ std::string Isa::isaName() const {
+ return std::string(hsaIsaNamePrefix) + targetId();
+ }
+
++template <class T, std::size_t N>
++static bool Contains(const std::array<T, N>& arr, const T& value) {
++ return std::find(std::begin(arr), std::end(arr), value) != std::end(arr);
++}
++
++static bool IsVersionCompatible(const Isa &codeObjectIsa,
++ const Isa &agentIsa) {
++ if (codeObjectIsa.versionMajor() == agentIsa.versionMajor() &&
++ codeObjectIsa.versionMinor() == agentIsa.versionMinor()) {
++
++ if (codeObjectIsa.versionStepping() == agentIsa.versionStepping()) {
++ return true; // exact match
++ }
++
++ // The code object and the agent may sometimes be compatible if
++ // they differ only by stepping version.
++ if (codeObjectIsa.versionMajor() == 9 &&
++ codeObjectIsa.versionMinor() == 0) {
++ const std::array<uint32_t, 4> gfx900_equivalent = { 0, 2, 9, 12 };
++ const std::array<uint32_t, 5> gfx900_superset = { 0, 2, 6, 9, 12 };
++ if (Contains(gfx900_equivalent, codeObjectIsa.versionStepping()) &&
++ Contains(gfx900_superset, agentIsa.versionStepping())) {
++ return true; // gfx900 compatible object and agent
++ }
++ } else if (codeObjectIsa.versionMajor() == 10) {
++ if (codeObjectIsa.versionMinor() == 1) {
++ const std::array<uint32_t, 2> gfx1010_equivalent = { 0, 2 };
++ const std::array<uint32_t, 4> gfx1010_superset = { 0, 1, 2, 3 };
++ if (Contains(gfx1010_equivalent, codeObjectIsa.versionStepping()) &&
++ Contains(gfx1010_superset, agentIsa.versionStepping())) {
++ return true; // gfx1010 compatible object and agent
++ }
++ } else if (codeObjectIsa.versionMinor() == 3) {
++ return true; // gfx1030 compatible object and agent
++ }
++ }
++ }
++
++ return false;
++}
++
+ bool Isa::isCompatible(const Isa &codeObjectIsa, const Isa &agentIsa) {
+- if (codeObjectIsa.versionMajor() != agentIsa.versionMajor() ||
+- codeObjectIsa.versionMinor() != agentIsa.versionMinor() ||
+- codeObjectIsa.versionStepping() != agentIsa.versionStepping())
++ if (!IsVersionCompatible(codeObjectIsa, agentIsa))
+ return false;
+
+ assert(codeObjectIsa.isSrameccSupported() == agentIsa.isSrameccSupported() &&