summaryrefslogtreecommitdiff
blob: dd1aefe4df4fd79b97d981eb5c1092d5bb287bfc (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
From 3a4d533a1e2a179ad873c480dc4a42ea23681263 Mon Sep 17 00:00:00 2001
From: Mike Li <Tianxinmike.Li@amd.com>
Date: Wed, 17 Aug 2022 11:44:09 -0400
Subject: [PATCH 1/2] Check permission and handle PermissionError exception

Signed-off-by: Mike Li <Tianxinmike.Li@amd.com>
Change-Id: If7cb8464d0b761e4be45c85eb7147ceed609da61
---
 rocm_agent_enumerator | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/rocm_agent_enumerator b/rocm_agent_enumerator
index 6264a5f..ceb9e11 100755
--- a/rocm_agent_enumerator
+++ b/rocm_agent_enumerator
@@ -195,10 +195,15 @@ def readFromKFD():
       node_path = os.path.join(topology_dir, node)
       if os.path.isdir(node_path):
         prop_path = node_path + '/properties'
-        if os.path.isfile(prop_path):
+        if os.path.isfile(prop_path) and os.access(prop_path, os.R_OK):
           target_search_term = re.compile("gfx_target_version.+")
           with open(prop_path) as f:
-            line = f.readline()
+            try:
+              line = f.readline()
+            except PermissionError:
+              # We may have a subsystem (e.g. scheduler) limiting device visibility which
+              # could cause a permission error.
+              line = ''
             while line != '' :
               search_result = target_search_term.search(line)
               if search_result is not None:

From 94b4b3f0a66eb70912177ca7076b4267f8b5449b Mon Sep 17 00:00:00 2001
From: Johannes Dieterich <johannes.dieterich@amd.com>
Date: Mon, 21 Nov 2022 18:09:55 +0000
Subject: [PATCH 2/2] Fix rocminfo when run within docker environments

Currently, rocminfo will fail when executed inside a docker container
due to being unable to lsmod inside docker. This has impacts on
rocprofiler use.

Fix this behavior by querying initstate of the amdgpu module from
/sys/module/amdgpu instead. If initstate is marked "live" everything if
fine - error out with either "not loaded" (initstate file does not
exist) or "not live" (initstate file does not contain "live" string).

Change-Id: I6f2e9655942fd4cf840fd3f56b7d69e893fa84d7
---
 rocminfo.cc | 30 ++++++++++++++++++++++++------
 1 file changed, 24 insertions(+), 6 deletions(-)

diff --git a/rocminfo.cc b/rocminfo.cc
index 0842d57..8ed9111 100755
--- a/rocminfo.cc
+++ b/rocminfo.cc
@@ -51,6 +51,7 @@
 #include <unistd.h>
 #include <pwd.h>
 
+#include <fstream>
 #include <vector>
 #include <string>
 #include <sstream>
@@ -1039,16 +1040,33 @@ AcquireAndDisplayAgentInfo(hsa_agent_t agent, void* data) {
 
 int CheckInitialState(void) {
   // Check kernel module for ROCk is loaded
-  FILE *fd = popen("lsmod | grep amdgpu", "r");
-  char buf[16];
-  if (fread (buf, 1, sizeof (buf), fd) == 0) {
+
+  std::ifstream amdgpu_initstate("/sys/module/amdgpu/initstate");
+  if (amdgpu_initstate){
+    std::stringstream buffer;
+    buffer << amdgpu_initstate.rdbuf();
+    amdgpu_initstate.close();
+
+    std::string line;
+    bool is_live = false;
+    while (std::getline(buffer, line)){
+      if (line.find( "live" ) != std::string::npos){
+        is_live = true;
+        break;
+      }
+    }
+    if (is_live){
+      printf("%sROCk module is loaded%s\n", COL_WHT, COL_RESET);
+    } else {
+      printf("%sROCk module is NOT live, possibly no GPU devices%s\n",
+                                                          COL_RED, COL_RESET);
+      return -1;
+    }
+  } else {
     printf("%sROCk module is NOT loaded, possibly no GPU devices%s\n",
                                                           COL_RED, COL_RESET);
     return -1;
-  } else {
-    printf("%sROCk module is loaded%s\n", COL_WHT, COL_RESET);
   }
-  pclose(fd);
 
   // Check if user belongs to the group for /dev/kfd (e.g. "video" or
   // "render")