From: "Luis R. Rodriguez" mcgrof [ at ] suse.com
The purpose of commit e64fae55 (January 2012) on systemd was
to introduce a timeout for drivers that are not using asynch
firmware loading. That commit actually would not have triggered
in full effect on udev's usage of kmod for module loading until
commit 786235ee was merged on Linux (Nov 2013).
As it is today [ systemd e64fae55 + kernel e64fae55 ] will trigger
a SIGKILL to udev's usage of kmod for module loading after a 30
second timeout. Hannes modified systemd through commit 9719859c
to enable a custom timeout. A different timeout value can only
prevent a kill after a maximum amount of time is known to be
required for a system though.
Penalizing a device driver for not using asynch firmware loading
by killing it and preventing it from loading *might* have originally
been reasonable but its not the only reason why some drivers might
take more than 30 seconds to load. Some drivers might actually
require to take over 30 seconds on just writing the firmware to the
hardware. The worst case scenario however would be to run into
storage drivers which might go over the timeout value in which
case currently the system would simply be unbootable. Fixing
drivers should be our *top priority* but the current state of
affairs has proven to make it very difficult to debug why a
driver is failing to load.
Instead of always forcing a kill lets only warn for workers
handling kmod. This should enable easier methods for determining
which drivers need fixing and the logic would only be used on
workers dealing with kmod module loading. This would also pave
the way for us to eventually customize behaviour for the different
enum udev_builtin_cmd.
Cc: Greg Kroah-Hartman gregkh [ at ] linuxfoundation.org
Cc: Tetsuo Handa penguin-kernel [ at ] I-love.SAKURA.ne.jp
Cc: Joseph Salisbury joseph.salisbury [ at ] canonical.com
Cc: Kay Sievers kay [ at ] vrfy.org
Cc: One Thousand Gnomes gnomes [ at ] lxorguk.ukuu.org.uk
Cc: Tim Gardner tim.gardner [ at ] canonical.com
Cc: Pierre Fersing pierre-fersing [ at ] pierref.org
Cc: Andrew Morton akpm [ at ] linux-foundation.org
Cc: Oleg Nesterov oleg [ at ] redhat.com
Cc: Benjamin Poirier bpoirier [ at ] suse.de
Cc: Greg Kroah-Hartman gregkh [ at ] linuxfoundation.org
Cc: Nagalakshmi Nandigama nagalakshmi.nandigama [ at ] avagotech.com
Cc: Praveen Krishnamoorthy praveen.krishnamoorthy [ at ] avagotech.com
Cc: Sreekanth Reddy sreekanth.reddy [ at ] avagotech.com
Cc: Abhijit Mahajan abhijit.mahajan [ at ] avagotech.com
Cc: Hariprasad S hariprasad [ at ] chelsio.com
Cc: Santosh Rastapur santosh [ at ] chelsio.com
Cc: Hannes Reinecke hare [ at ] suse.de
This v2 modifies the language slightly and adds a bit more description
about the intent to allow this to help grow customizing the timeout
handling for the different enum udev_builtin_cmd.
src/udev/udev-event.c | 20 ++++++++++++++++++++
src/udev/udev.h | 1 +
src/udev/udevd.c | 9 +++++++++
3 files changed, 30 insertions(+)
diff --git a/src/udev/udev-event.c b/src/udev/udev-event.c
index 00cd6d4..8ddc911 100644
--- a/src/udev/udev-event.c
+++ b/src/udev/udev-event.c
@@ -880,6 +880,25 @@ void udev_event_execute_rules(struct udev_event *event,
+#ifdef HAVE_KMOD
+static void udev_builtin_set_timeout_pref(enum udev_builtin_cmd builtin_cmd,
+ struct udev_event *event)
+{
+ switch (builtin_cmd) {
+ case UDEV_BUILTIN_KMOD:
+ event->warn_timeout = true; /* only warn on timeout */
+ default:
+ event->warn_timeout = false; /* kill otherwise */
+ }
+}
+#else
+void udev_builtin_set_timeout_pref(enum udev_builtin_cmd builtin_cmd,
+ struct udev_event *event)
+{
+ return;
+}
+#endif
void udev_event_execute_run(struct udev_event *event, usec_t timeout_usec, const sigset_t *sigmask) {
struct udev_list_entry *list_entry;
@@ -890,6 +909,7 @@ void udev_event_execute_run(struct udev_event *event, usec_t timeout_usec, const
if (builtin_cmd < UDEV_BUILTIN_MAX) {
char command[UTIL_PATH_SIZE];
+ udev_builtin_set_timeout_pref(builtin_cmd, event);
udev_event_apply_format(event, cmd, command, sizeof(command));
udev_builtin_run(event->dev, builtin_cmd, command, false);
} else {
diff --git a/src/udev/udev.h b/src/udev/udev.h
index 4aca70b..c17feae 100644
--- a/src/udev/udev.h
+++ b/src/udev/udev.h
@@ -58,6 +58,7 @@ struct udev_event {
bool name_final;
bool devlink_final;
bool run_final;
+ bool warn_timeout;
};
struct udev_watch {
diff --git a/src/udev/udevd.c b/src/udev/udevd.c
index f882cfb..d7ee4d2 100644
--- a/src/udev/udevd.c
+++ b/src/udev/udevd.c
@@ -100,6 +100,7 @@ struct event {
dev_t devnum;
int ifindex;
bool is_block;
+ bool warn_on_timeout;
#ifdef HAVE_FIRMWARE
bool nodelay;
#endif
@@ -304,6 +305,9 @@ static void worker_new(struct event *event) {
+ if (udev_event->warn_timeout)
+ event->warn_on_timeout = true;
/* apply rules, create node, symlinks */
udev_event_execute_rules(udev_event, event_timeout_usec, rules, 눨♪_orig);
@@ -1393,6 +1397,11 @@ int main(int argc, char *argv[]) {
continue;
if ((now(CLOCK_MONOTONIC) - worker->event_start_usec) > event_timeout_usec) {
+ if (worker->event->warn_on_timeout) {
+ log_error("worker [%u] %s timeout hit; fix caller!",
+ worker->pid, worker->event->devpath);
+ continue;
+ }
log_error("worker [%u] %s timeout; kill it", worker->pid, worker->event->devpath);
kill(worker->pid, SIGKILL);
worker->state = WORKER_KILLED;
The purpose of commit e64fae55 (January 2012) on systemd was
to introduce a timeout for drivers that are not using asynch
firmware loading. That commit actually would not have triggered
in full effect on udev's usage of kmod for module loading until
commit 786235ee was merged on Linux (Nov 2013).
As it is today [ systemd e64fae55 + kernel e64fae55 ] will trigger
a SIGKILL to udev's usage of kmod for module loading after a 30
second timeout. Hannes modified systemd through commit 9719859c
to enable a custom timeout. A different timeout value can only
prevent a kill after a maximum amount of time is known to be
required for a system though.
Penalizing a device driver for not using asynch firmware loading
by killing it and preventing it from loading *might* have originally
been reasonable but its not the only reason why some drivers might
take more than 30 seconds to load. Some drivers might actually
require to take over 30 seconds on just writing the firmware to the
hardware. The worst case scenario however would be to run into
storage drivers which might go over the timeout value in which
case currently the system would simply be unbootable. Fixing
drivers should be our *top priority* but the current state of
affairs has proven to make it very difficult to debug why a
driver is failing to load.
Instead of always forcing a kill lets only warn for workers
handling kmod. This should enable easier methods for determining
which drivers need fixing and the logic would only be used on
workers dealing with kmod module loading. This would also pave
the way for us to eventually customize behaviour for the different
enum udev_builtin_cmd.
Cc: Greg Kroah-Hartman gregkh [ at ] linuxfoundation.org
Cc: Tetsuo Handa penguin-kernel [ at ] I-love.SAKURA.ne.jp
Cc: Joseph Salisbury joseph.salisbury [ at ] canonical.com
Cc: Kay Sievers kay [ at ] vrfy.org
Cc: One Thousand Gnomes gnomes [ at ] lxorguk.ukuu.org.uk
Cc: Tim Gardner tim.gardner [ at ] canonical.com
Cc: Pierre Fersing pierre-fersing [ at ] pierref.org
Cc: Andrew Morton akpm [ at ] linux-foundation.org
Cc: Oleg Nesterov oleg [ at ] redhat.com
Cc: Benjamin Poirier bpoirier [ at ] suse.de
Cc: Greg Kroah-Hartman gregkh [ at ] linuxfoundation.org
Cc: Nagalakshmi Nandigama nagalakshmi.nandigama [ at ] avagotech.com
Cc: Praveen Krishnamoorthy praveen.krishnamoorthy [ at ] avagotech.com
Cc: Sreekanth Reddy sreekanth.reddy [ at ] avagotech.com
Cc: Abhijit Mahajan abhijit.mahajan [ at ] avagotech.com
Cc: Hariprasad S hariprasad [ at ] chelsio.com
Cc: Santosh Rastapur santosh [ at ] chelsio.com
Cc: Hannes Reinecke hare [ at ] suse.de
This v2 modifies the language slightly and adds a bit more description
about the intent to allow this to help grow customizing the timeout
handling for the different enum udev_builtin_cmd.
src/udev/udev-event.c | 20 ++++++++++++++++++++
src/udev/udev.h | 1 +
src/udev/udevd.c | 9 +++++++++
3 files changed, 30 insertions(+)
diff --git a/src/udev/udev-event.c b/src/udev/udev-event.c
index 00cd6d4..8ddc911 100644
--- a/src/udev/udev-event.c
+++ b/src/udev/udev-event.c
@@ -880,6 +880,25 @@ void udev_event_execute_rules(struct udev_event *event,
+#ifdef HAVE_KMOD
+static void udev_builtin_set_timeout_pref(enum udev_builtin_cmd builtin_cmd,
+ struct udev_event *event)
+{
+ switch (builtin_cmd) {
+ case UDEV_BUILTIN_KMOD:
+ event->warn_timeout = true; /* only warn on timeout */
+ default:
+ event->warn_timeout = false; /* kill otherwise */
+ }
+}
+#else
+void udev_builtin_set_timeout_pref(enum udev_builtin_cmd builtin_cmd,
+ struct udev_event *event)
+{
+ return;
+}
+#endif
void udev_event_execute_run(struct udev_event *event, usec_t timeout_usec, const sigset_t *sigmask) {
struct udev_list_entry *list_entry;
@@ -890,6 +909,7 @@ void udev_event_execute_run(struct udev_event *event, usec_t timeout_usec, const
if (builtin_cmd < UDEV_BUILTIN_MAX) {
char command[UTIL_PATH_SIZE];
+ udev_builtin_set_timeout_pref(builtin_cmd, event);
udev_event_apply_format(event, cmd, command, sizeof(command));
udev_builtin_run(event->dev, builtin_cmd, command, false);
} else {
diff --git a/src/udev/udev.h b/src/udev/udev.h
index 4aca70b..c17feae 100644
--- a/src/udev/udev.h
+++ b/src/udev/udev.h
@@ -58,6 +58,7 @@ struct udev_event {
bool name_final;
bool devlink_final;
bool run_final;
+ bool warn_timeout;
};
struct udev_watch {
diff --git a/src/udev/udevd.c b/src/udev/udevd.c
index f882cfb..d7ee4d2 100644
--- a/src/udev/udevd.c
+++ b/src/udev/udevd.c
@@ -100,6 +100,7 @@ struct event {
dev_t devnum;
int ifindex;
bool is_block;
+ bool warn_on_timeout;
#ifdef HAVE_FIRMWARE
bool nodelay;
#endif
@@ -304,6 +305,9 @@ static void worker_new(struct event *event) {
+ if (udev_event->warn_timeout)
+ event->warn_on_timeout = true;
/* apply rules, create node, symlinks */
udev_event_execute_rules(udev_event, event_timeout_usec, rules, 눨♪_orig);
@@ -1393,6 +1397,11 @@ int main(int argc, char *argv[]) {
continue;
if ((now(CLOCK_MONOTONIC) - worker->event_start_usec) > event_timeout_usec) {
+ if (worker->event->warn_on_timeout) {
+ log_error("worker [%u] %s timeout hit; fix caller!",
+ worker->pid, worker->event->devpath);
+ continue;
+ }
log_error("worker [%u] %s timeout; kill it", worker->pid, worker->event->devpath);
kill(worker->pid, SIGKILL);
worker->state = WORKER_KILLED;