diff options
Diffstat (limited to 'app-admin/rasdaemon/files')
5 files changed, 260 insertions, 33 deletions
diff --git a/app-admin/rasdaemon/files/rasdaemon-0.8.0-bashisms-configure.patch b/app-admin/rasdaemon/files/rasdaemon-0.8.0-bashisms-configure.patch new file mode 100644 index 000000000000..e227df6724b2 --- /dev/null +++ b/app-admin/rasdaemon/files/rasdaemon-0.8.0-bashisms-configure.patch @@ -0,0 +1,40 @@ +https://github.com/mchehab/rasdaemon/pull/89 + +From 9461d22e334686f76c99477386072f4673cd0a72 Mon Sep 17 00:00:00 2001 +From: Sam James <sam@gentoo.org> +Date: Sun, 19 Feb 2023 18:33:20 +0000 +Subject: [PATCH] configure.ac: fix bashisms + +configure scripts need to be runnable with a POSIX-compliant /bin/sh. + +On many (but not all!) systems, /bin/sh is provided by Bash, so errors +like this aren't spotted. Notably Debian defaults to /bin/sh provided +by dash which doesn't tolerate such bashisms as '=='. + +This retains compatibility with bash. + +Fixes configure warnings/errors like: +``` +checking for libtraceevent... yes +./configure: 13430: test: x: unexpected operator +./configure: 13439: test: x: unexpected operator +``` + +Signed-off-by: Sam James <sam@gentoo.org> +--- a/configure.ac ++++ b/configure.ac +@@ -170,11 +170,11 @@ AM_COND_IF([WITH_AMP_NS_DECODE], [USE_AMP_NS_DECODE="yes"], [USE_AMP_NS_DECODE=" + AC_ARG_ENABLE([cpu_fault_isolation], + AS_HELP_STRING([--enable-cpu-fault-isolation], [enable cpu online fault isolation])) + +-AS_IF([test "x$enable_cpu_fault_isolation" = "xyes" || test "x$enable_all" == "xyes"], [ ++AS_IF([test "x$enable_cpu_fault_isolation" = "xyes" || test "x$enable_all" = "xyes"], [ + AC_DEFINE(HAVE_CPU_FAULT_ISOLATION,1,"have cpu online fault isolation") + AC_SUBST([WITH_CPU_FAULT_ISOLATION]) + ]) +-AM_CONDITIONAL([WITH_CPU_FAULT_ISOLATION], [test x$enable_cpu_fault_isolation = xyes || test x$enable_all == xyes]) ++AM_CONDITIONAL([WITH_CPU_FAULT_ISOLATION], [test x$enable_cpu_fault_isolation = xyes || test x$enable_all = xyes]) + AM_COND_IF([WITH_CPU_FAULT_ISOLATION], [USE_CPU_FAULT_ISOLATION="yes"], [USE_CPU_FAULT_ISOLATION="no"]) + + test "$sysconfdir" = '${prefix}/etc' && sysconfdir=/etc + diff --git a/app-admin/rasdaemon/files/rasdaemon-0.8.0-check-online-cpus-not-configured.patch b/app-admin/rasdaemon/files/rasdaemon-0.8.0-check-online-cpus-not-configured.patch new file mode 100644 index 000000000000..0d3e60976659 --- /dev/null +++ b/app-admin/rasdaemon/files/rasdaemon-0.8.0-check-online-cpus-not-configured.patch @@ -0,0 +1,40 @@ +https://bugs.gentoo.org/890286 +https://github.com/mchehab/rasdaemon/issues/77 +https://github.com/mchehab/rasdaemon/commit/f1ea76375281001cdf4a048c1a4a24d86c6fbe48 + +From f1ea76375281001cdf4a048c1a4a24d86c6fbe48 Mon Sep 17 00:00:00 2001 +From: Zeph / Liz Loss-Cutler-Hull <warp-spam_git@aehallh.com> +Date: Sun, 9 Jul 2023 04:57:19 -0700 +Subject: [PATCH] Check CPUs online, not configured. + +When the number of CPUs detected is greater than the number of CPUs in +the system, rasdaemon will crash when it receives some events. + +Looking deeper, we also fail to use the poll method for similar reasons +in this case. + +All of this can be prevented by checking to see how many CPUs are +currently online (sysconf(_SC_NPROCESSORS_ONLN)) instead of how many +CPUs the current kernel was configured to support +(sysconf(_SC_NPROCESSORS_CONF)). + +For the kernel side of the discussion, see https://lore.kernel.org/lkml/CAM6Wdxft33zLeeXHhmNX5jyJtfGTLiwkQSApc=10fqf+rQh9DA@mail.gmail.com/T/ +Signed-off-by: Mauro Carvalho Chehab <mchehab@kernel.org> +--- + ras-events.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/ras-events.c b/ras-events.c +index a82dab2..5935163 100644 +--- a/ras-events.c ++++ b/ras-events.c +@@ -350,7 +350,7 @@ static void parse_ras_data(struct pthread_data *pdata, struct kbuffer *kbuf, + + static int get_num_cpus(struct ras_events *ras) + { +- return sysconf(_SC_NPROCESSORS_CONF); ++ return sysconf(_SC_NPROCESSORS_ONLN); + #if 0 + char fname[MAX_PATH + 1]; + int num_cpus = 0; + diff --git a/app-admin/rasdaemon/files/rasdaemon-0.8.0-table-create-offline-cpus.patch b/app-admin/rasdaemon/files/rasdaemon-0.8.0-table-create-offline-cpus.patch new file mode 100644 index 000000000000..4eb3977930c6 --- /dev/null +++ b/app-admin/rasdaemon/files/rasdaemon-0.8.0-table-create-offline-cpus.patch @@ -0,0 +1,179 @@ +https://bugs.gentoo.org/890286 +https://github.com/mchehab/rasdaemon/issues/77 +https://github.com/mchehab/rasdaemon/commit/6f7851f72d8464c7a20a248d4abf4362de8f0ba9 + +From 6f7851f72d8464c7a20a248d4abf4362de8f0ba9 Mon Sep 17 00:00:00 2001 +From: Shiju Jose <shiju.jose@huawei.com> +Date: Sun, 5 Mar 2023 23:14:42 +0000 +Subject: [PATCH] rasdaemon: fix table create if some cpus are offline + +Fix for regression in ras_mc_create_table() if some cpus are offline +at the system start + +Issue: + +Regression in the ras_mc_create_table() if some of the cpus are offline +at the system start when run the rasdaemon. + +This issue is reproducible in ras_mc_create_table() with decode and +record non-standard events and reproducible sometimes with +ras_mc_create_table() for the standard events. + +Also in the multi thread way, there is memory leak in ras_mc_event_opendb() +as struct sqlite3_priv *priv and sqlite3 *db allocated/initialized per +thread, but stored in the common struct ras_events ras in pthread data, +which is shared across the threads. + +Reason: + +when the system starts with some of the cpus offline and then run +the rasdaemon, read_ras_event_all_cpus() exit with error and switch to +the multi thread way. However read() in read_ras_event() return error in +threads for each of the offline CPUs and does clean up including calling +ras_mc_event_closedb(). + +Since the 'struct ras_events ras' passed in the pthread_data to each of the +threads is common, struct sqlite3_priv *priv and sqlite3 *db allocated/ +initialized per thread and stored in the common 'struct ras_events ras', +are getting overwritten in each ras_mc_event_opendb()(which called from +pthread per cpu), result memory leak. + +Also when ras_mc_event_closedb() is called in the above error case from +the threads corresponding to the offline cpus, close the sqlite3 *db and +free sqlite3_priv *priv stored in the common 'struct ras_events ras', +result regression when accessing priv->db in the ras_mc_create_table() +from another context later. + +Solution: + +In ras_mc_event_opendb(), allocate struct sqlite3_priv *priv, +init sqlite3 *db and create tables common for the threads with shared +'struct ras_events ras' based on a reference count and free them in the +same way. + +Also protect critical code ras_mc_event_opendb() and ras_mc_event_closedb() +using mutex in the multi thread case from any regression caused by the +thread pre-emption. + +Reported-by: Lei Feng <fenglei47@h-partners.com> +Signed-off-by: Shiju Jose <shiju.jose@huawei.com> +Signed-off-by: Mauro Carvalho Chehab <mchehab@kernel.org> +--- + ras-events.c | 16 +++++++++++++++- + ras-events.h | 4 +++- + ras-record.c | 12 ++++++++++++ + 3 files changed, 30 insertions(+), 2 deletions(-) + +diff --git a/ras-events.c b/ras-events.c +index 49e4f9a..5fe8e19 100644 +--- a/ras-events.c ++++ b/ras-events.c +@@ -625,19 +625,25 @@ static void *handle_ras_events_cpu(void *priv) + + log(TERM, LOG_INFO, "Listening to events on cpu %d\n", pdata->cpu); + if (pdata->ras->record_events) { ++ pthread_mutex_lock(&pdata->ras->db_lock); + if (ras_mc_event_opendb(pdata->cpu, pdata->ras)) { ++ pthread_mutex_unlock(&pdata->ras->db_lock); + log(TERM, LOG_ERR, "Can't open database\n"); + close(fd); + kbuffer_free(kbuf); + free(page); + return 0; + } ++ pthread_mutex_unlock(&pdata->ras->db_lock); + } + + read_ras_event(fd, pdata, kbuf, page); + +- if (pdata->ras->record_events) ++ if (pdata->ras->record_events) { ++ pthread_mutex_lock(&pdata->ras->db_lock); + ras_mc_event_closedb(pdata->cpu, pdata->ras); ++ pthread_mutex_unlock(&pdata->ras->db_lock); ++ } + + close(fd); + kbuffer_free(kbuf); +@@ -993,6 +999,11 @@ int handle_ras_events(int record_events) + + /* Poll doesn't work on this kernel. Fallback to pthread way */ + if (rc == -255) { ++ if (pthread_mutex_init(&ras->db_lock, NULL) != 0) { ++ log(SYSLOG, LOG_INFO, "sqlite db lock init has failed\n"); ++ goto err; ++ } ++ + log(SYSLOG, LOG_INFO, + "Opening one thread per cpu (%d threads)\n", cpus); + for (i = 0; i < cpus; i++) { +@@ -1005,6 +1016,8 @@ int handle_ras_events(int record_events) + i); + while (--i) + pthread_cancel(data[i].thread); ++ ++ pthread_mutex_destroy(&ras->db_lock); + goto err; + } + } +@@ -1012,6 +1025,7 @@ int handle_ras_events(int record_events) + /* Wait for all threads to complete */ + for (i = 0; i < cpus; i++) + pthread_join(data[i].thread, NULL); ++ pthread_mutex_destroy(&ras->db_lock); + } + + log(SYSLOG, LOG_INFO, "Huh! something got wrong. Aborting.\n"); +diff --git a/ras-events.h b/ras-events.h +index 6c9f507..649b0c0 100644 +--- a/ras-events.h ++++ b/ras-events.h +@@ -56,7 +56,9 @@ struct ras_events { + time_t uptime_diff; + + /* For ras-record */ +- void *db_priv; ++ void *db_priv; ++ int db_ref_count; ++ pthread_mutex_t db_lock; + + /* For the mce handler */ + struct mce_priv *mce_priv; +diff --git a/ras-record.c b/ras-record.c +index a367939..adc97a4 100644 +--- a/ras-record.c ++++ b/ras-record.c +@@ -763,6 +763,10 @@ int ras_mc_event_opendb(unsigned cpu, struct ras_events *ras) + + printf("Calling %s()\n", __FUNCTION__); + ++ ras->db_ref_count++; ++ if (ras->db_ref_count > 1) ++ return 0; ++ + ras->db_priv = NULL; + + priv = calloc(1, sizeof(*priv)); +@@ -912,6 +916,13 @@ int ras_mc_event_closedb(unsigned int cpu, struct ras_events *ras) + + printf("Calling %s()\n", __func__); + ++ if (ras->db_ref_count > 0) ++ ras->db_ref_count--; ++ else ++ return -1; ++ if (ras->db_ref_count > 0) ++ return 0; ++ + if (!priv) + return -1; + +@@ -1018,6 +1029,7 @@ int ras_mc_event_closedb(unsigned int cpu, struct ras_events *ras) + log(TERM, LOG_ERR, + "cpu %u: Failed to shutdown sqlite: error = %d\n", cpu, rc); + free(priv); ++ ras->db_priv = NULL; + + return 0; + } + diff --git a/app-admin/rasdaemon/files/rasdaemon.confd b/app-admin/rasdaemon/files/rasdaemon.confd index 3241ef438dab..413f9f742cd4 100644 --- a/app-admin/rasdaemon/files/rasdaemon.confd +++ b/app-admin/rasdaemon/files/rasdaemon.confd @@ -1,2 +1,2 @@ # pass the --record option if sqlite is enabled and you wish store events in sqlite -#RASDAEMON_ARGS=--record +RASDAEMON_ARGS=--record diff --git a/app-admin/rasdaemon/files/sysconfig-fix-0.6.7.patch b/app-admin/rasdaemon/files/sysconfig-fix-0.6.7.patch deleted file mode 100644 index ce4f5ce92bc2..000000000000 --- a/app-admin/rasdaemon/files/sysconfig-fix-0.6.7.patch +++ /dev/null @@ -1,32 +0,0 @@ -From 2379c720a7e490854a2f352ca53af6fbd99c0832 Mon Sep 17 00:00:00 2001 -From: Matt Whitlock <whitslack@users.noreply.github.com> -Date: Wed, 9 Jun 2021 10:25:18 -0400 -Subject: [PATCH] configure.ac: fix SYSCONFDEFDIR default value - -configure.ac was using AC_ARG_WITH incorrectly, yielding a generated configure script like: - - # Check whether --with-sysconfdefdir was given. - if test "${with_sysconfdefdir+set}" = set; then : - withval=$with_sysconfdefdir; SYSCONFDEFDIR=$withval - else - "/etc/sysconfig" - fi - -This commit fixes the default case so that the SYSCONFDEFDIR variable is assigned the value "/etc/sysconfig" rather than trying to execute "/etc/sysconfig" as a command. ---- - configure.ac | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/configure.ac b/configure.ac -index f7d1947..33b81fe 100644 ---- a/configure.ac -+++ b/configure.ac -@@ -172,7 +172,7 @@ AC_SUBST([RASSTATEDIR]) - AC_ARG_WITH(sysconfdefdir, - AC_HELP_STRING([--with-sysconfdefdir=DIR], [rasdaemon environment file dir]), - [SYSCONFDEFDIR=$withval], -- ["/etc/sysconfig"]) -+ [SYSCONFDEFDIR=/etc/sysconfig]) - AC_SUBST([SYSCONFDEFDIR]) - - AC_DEFINE([RAS_DB_FNAME], ["ras-mc_event.db"], [ras events database]) |