[ptest-runner][PATCH 3/3] utils.c: add system data collection when a test gets stuck.


Alexander Kanavin
 

Currently, ptest-runner simply kills the offending test without further ado,
which is not at all helpful when trying to figure out why it happens
(especially if such hangs are intermittent and rare). There's now a script
that gets executed before killing the test, so ideas on what to have in it
are welcome.

Signed-off-by: Alexander Kanavin <alex@...>
---
Makefile | 2 +-
ptest-runner-collect-system-data | 6 ++++++
utils.c | 24 ++++++++++++++++++++++++
3 files changed, 31 insertions(+), 1 deletion(-)
create mode 100755 ptest-runner-collect-system-data

diff --git a/Makefile b/Makefile
index a6372de..168cf5a 100644
--- a/Makefile
+++ b/Makefile
@@ -43,7 +43,7 @@ $(TEST_EXECUTABLE): $(TEST_OBJECTS)
$(CC) $(LDFLAGS) $(TEST_OBJECTS) -o $@ $(TEST_LIBSTATIC) $(TEST_LDFLAGS)

check: $(TEST_EXECUTABLE)
- ./$(TEST_EXECUTABLE) -d $(TEST_DATA)
+ PATH=.:$(PATH) ./$(TEST_EXECUTABLE) -d $(TEST_DATA)

.c.o:
$(CC) $(CFLAGS) -c $< -o $@
diff --git a/ptest-runner-collect-system-data b/ptest-runner-collect-system-data
new file mode 100755
index 0000000..ba335da
--- /dev/null
+++ b/ptest-runner-collect-system-data
@@ -0,0 +1,6 @@
+#!/bin/sh
+# Other ideas on what to do when a ptest gets stuck welcome.
+dmesg
+pstree -a -l
+df
+free
diff --git a/utils.c b/utils.c
index 58c3aa1..a67ac11 100644
--- a/utils.c
+++ b/utils.c
@@ -281,6 +281,27 @@ close_fds(void)
}
}

+static void
+collect_system_state(FILE* fout)
+{
+ char *cmd = "ptest-runner-collect-system-data";
+
+ char buf[1024];
+ FILE *fp;
+
+ if ((fp = popen(cmd, "r")) == NULL) {
+ fprintf(fout, "Error opening pipe!\n");
+ }
+
+ while (fgets(buf, 1024, fp) != NULL) {
+ fprintf(fout, "%s", buf);
+ }
+
+ if(pclose(fp)) {
+ fprintf(fout, "Command not found or exited with error status\n");
+ }
+}
+
static void *
read_child(void *arg)
{
@@ -313,6 +334,9 @@ read_child(void *arg)
}

} else if (r == 0) {
+ // no output from the test after a timeout; the test is stuck, so collect
+ // as much data from the system as possible and kill the test
+ collect_system_state(_child_reader.fps[0]);
_child_reader.timeouted = 1;
kill(-_child_reader.pid, SIGKILL);
}
--
2.33.0

Join {yocto@lists.yoctoproject.org to automatically receive all group messages.