Debug data for ltp qemu hang on debian10-ty-2


Richard Purdie
 

Hi All,

We had a hanging qemu process:

https://autobuilder.yoctoproject.org/typhoon/#/builders/95/builds/1898

This is on debian10-ty-2.yocto.io.

I was able to ssh into the image on the worker with ssh root@....2
and it looks to be hung during controller ltp tests. I've dumped someĀ 
output below, I'm not too familiar with ltp so struggling to understand
where it is at.

The processes are still around so we may be able to save data from
them but the servers will be rebooted tomorrow during maint at which
point we'd lose this unless we ask Michael to defer.

Cheers,

Richard


root@qemux86-64:/opt/ltp/results# ps ax
PID TTY STAT TIME COMMAND
1 ? Ss 0:03 init [5]
2 ? S 0:00 [kthreadd]
3 ? I< 0:00 [rcu_gp]
4 ? I< 0:00 [rcu_par_gp]
6 ? I< 0:00 [kworker/0:0H-events_highpri]
8 ? I< 0:00 [mm_percpu_wq]
9 ? S 0:00 [rcu_tasks_kthre]
10 ? S 0:00 [rcu_tasks_rude_]
11 ? S 0:00 [rcu_tasks_trace]
12 ? S 0:00 [ksoftirqd/0]
13 ? I 0:06 [rcu_preempt]
14 ? S 0:00 [migration/0]
15 ? S 0:00 [cpuhp/0]
16 ? S 0:00 [cpuhp/1]
17 ? S 0:00 [migration/1]
18 ? S 0:00 [ksoftirqd/1]
20 ? I< 0:00 [kworker/1:0H-events_highpri]
21 ? S 0:00 [cpuhp/2]
22 ? S 0:00 [migration/2]
23 ? S 0:00 [ksoftirqd/2]
25 ? I< 0:00 [kworker/2:0H-kblockd]
26 ? S 0:00 [cpuhp/3]
27 ? S 0:00 [migration/3]
28 ? S 0:00 [ksoftirqd/3]
30 ? I< 0:00 [kworker/3:0H-kblockd]
31 ? S 0:00 [kdevtmpfs]
32 ? I< 0:00 [netns]
34 ? S 0:00 [oom_reaper]
35 ? I< 0:00 [writeback]
36 ? S 0:07 [kcompactd0]
73 ? I< 0:00 [kblockd]
74 ? I< 0:00 [blkcg_punt_bio]
75 ? I< 0:00 [ata_sff]
76 ? I< 0:00 [md]
78 ? I< 0:00 [kworker/0:1H-kblockd]
79 ? I< 0:00 [rpciod]
80 ? I< 0:00 [kworker/u9:0]
81 ? I< 0:00 [xprtiod]
82 ? S 0:00 [kswapd0]
83 ? I< 0:00 [nfsiod]
84 ? I< 0:00 [cifsiod]
85 ? I< 0:00 [smb3decryptd]
86 ? I< 0:00 [cifsfileinfoput]
87 ? I< 0:00 [cifsoplockd]
89 ? I< 0:00 [acpi_thermal_pm]
91 ? S 0:00 [hwrng]
92 ? I< 0:00 [ttm_swap]
93 ? I< 0:00 [nvme-wq]
94 ? I< 0:00 [nvme-reset-wq]
95 ? I< 0:00 [nvme-delete-wq]
96 ? S 0:00 [scsi_eh_0]
97 ? I< 0:00 [scsi_tmf_0]
98 ? S 0:00 [scsi_eh_1]
99 ? I< 0:00 [scsi_tmf_1]
100 ? S 0:00 [scsi_eh_2]
101 ? I< 0:00 [scsi_tmf_2]
102 ? S 0:00 [scsi_eh_3]
103 ? I< 0:00 [scsi_tmf_3]
104 ? S 0:00 [scsi_eh_4]
105 ? I< 0:00 [scsi_tmf_4]
106 ? S 0:00 [scsi_eh_5]
107 ? I< 0:00 [scsi_tmf_5]
114 ? I< 0:00 [raid5wq]
115 ? I< 0:00 [dm_bufio_cache]
116 ? I< 0:00 [ipv6_addrconf]
124 ? S 0:03 [jbd2/vda-8]
125 ? I< 0:00 [ext4-rsv-conver]
160 ? Ss 0:00 /sbin/udevd -d
358 ? I< 0:00 [kworker/2:1H-kblockd]
525 ? Ss 0:00 /usr/bin/dbus-daemon --system
533 ? Ss 0:05 /usr/sbin/connmand
539 ? S 0:00 xinit /etc/X11/Xsession -- /usr/bin/Xorg :0 -br -pn
545 ? S 0:01 /usr/sbin/wpa_supplicant -u
548 ? S<sl 0:12 /usr/bin/Xorg :0 -br -pn
551 ? S 0:00 matchbox-window-manager -theme Sato -use_cursor no
556 ? S 0:00 dbus-launch --sh-syntax --exit-with-sessionsh-keygen -f "/home/pokybuild/.ssh/known_hosts" -R "192.168.7.2"
557 ? Ss 0:00 /usr/bin/dbus-daemon --syslog --fork --print-pid 5 --print-address 7 --session
576 ? Sl 0:00 /usr/libexec/at-spi-bus-launcher --launch-immediately
580 ? Sl 0:00 connman-applet
602 ? S 0:00 /usr/bin/dbus-daemon --config-file=/usr/share/defaults/at-spi2/accessibility.conf --nofork --print-address 3
612 ? Sl 0:00 matchbox-desktop
613 ? Sl 0:06 matchbox-panel --start-applets showdesktop,windowselector --end-applets clock,,systray,startup-notify,notify
616 ? S 0:00 /usr/libexec/gconfd-2
620 ? Ss 0:00 /usr/sbin/dropbear -r /etc/dropbear/dropbear_rsa_host_key -p 22 -B
621 ? S 0:00 /usr/bin/settings-daemon
628 ? Sl 0:00 /usr/sbin/console-kit-daemon --no-daemon
629 ? Ss 0:00 /usr/sbin/rpcbind
700 ? S 0:00 /usr/libexec/bluetooth/bluetoothd
712 ? Sl 0:00 /usr/libexec/at-spi2-registryd --use-gnome-session
732 ? S 0:00 avahi-daemon: running [qemux86-64.local]
733 ? S 0:00 avahi-daemon: chroot helper
737 ? Ss 0:00 /usr/sbin/ofonod
741 ? Ss 0:00 /usr/sbin/crond
750 ? Ss 0:00 /bin/sh /bin/start_getty 115200 ttyS0 vt102
751 ? Ss 0:00 /bin/sh /bin/start_getty 115200 ttyS1 vt102
752 tty1 Ss+ 0:00 /sbin/getty 38400 tty1
755 ttyS0 Ss+ 0:00 /sbin/getty -L 115200 ttyS0 vt102
756 ttyS1 Ss+ 0:00 -sh
760 ? I< 0:00 [kworker/1:1H-kblockd]
1087 ? I< 0:00 [kworker/3:1H-kblockd]
3886 ? D 0:00 cgroup_xattr
3887 ? R 0:00 /usr/sbin/dropbear -r /etc/dropbear/dropbear_rsa_host_key -p 22 -B
3888 pts/0 Ss 0:00 -sh
3892 ? I 0:00 [kworker/u8:2-events_unbound]
3906 ? I 0:00 [kworker/u8:0-events_unbound]
3911 ? S 0:00 /usr/sbin/dropbear -r /etc/dropbear/dropbear_rsa_host_key -p 22 -B
3912 pts/1 Ss+ 0:00 -sh
3918 pts/0 R+ 0:00 ps ax
4451 ? I 0:00 [kworker/2:1-mm_percpu_wq]
4454 ? I 0:00 [kworker/1:0-rcu_gp]
4569 ? I 0:00 [kworker/2:4-rcu_gp]
4620 ? I 0:00 [kworker/0:2-rcu_gp]
5157 ? S 0:00 /usr/sbin/dropbear -r /etc/dropbear/dropbear_rsa_host_key -p 22 -B
5158 ? S 0:00 /bin/sh /opt/ltp/runltp -f controllers -p -q -r /opt/ltp -l /opt/ltp/results/controllers -I 1 -d /opt/ltp
5200 ? S 0:00 /opt/ltp/bin/ltp-pan -q -e -S -a 5158 -n 5158 -p -f /opt/ltp/ltp-qYESo05pbN/alltests -l /opt/ltp/results/controllers -C /opt/ltp/output/LTP_RUN_ON-controllers.failed -T /opt/ltp/output/LTP_RUN_ON-controllers.tconf
12503 ? I 0:00 [kworker/u8:1-events_unbound]
12921 ? I 0:00 [kworker/1:2-mm_percpu_wq]
12961 ? I 0:00 [kworker/0:0-events]
13462 ? I 0:00 [kworker/3:3-rcu_gp]
14966 ? S 0:00 /sbin/syslogd -n -O /var/log/messages
14969 ? S 0:01 /sbin/klogd -n
21420 ? I 0:00 [kworker/3:0-mm_percpu_wq]

root@qemux86-64:/opt/ltp/results# ls -la
total 172
drwxr-xr-x 2 root root 4096 May 11 10:55 .
drwxr-xr-x 13 root root 4096 May 11 10:55 ..
-rw-r--r-- 1 root root 6207 May 11 10:55 containers
-rw-r--r-- 1 root root 20494 May 11 10:58 controllers
-rw-r--r-- 1 root root 2466 May 11 10:46 dio
-rw-r--r-- 1 root root 561 May 11 10:46 io
-rw-r--r-- 1 root root 969 May 11 10:53 ipc
-rw-r--r-- 1 root root 1106 May 11 10:22 math
-rw-r--r-- 1 root root 5255 May 11 10:53 mm
-rw-r--r-- 1 root root 493 May 11 10:55 nptl
-rw-r--r-- 1 root root 901 May 11 10:55 pty
-rw-r--r-- 1 root root 1106 May 11 10:55 sched
-rw-r--r-- 1 root root 91210 May 11 10:40 syscalls

root@qemux86-64:/opt/ltp/results# tail controllers
cpuset_exclusive CONF 32
cpuset_hierarchy CONF 32
cpuset_syscall CONF 32
cpuset_sched_domains CONF 32
cpuset_load_balance CONF 32
cpuset_hotplug CONF 32
cpuset_memory CONF 32
cpuset_memory_pressure CONF 32
cpuset_memory_spread CONF 32
cpuset_regression_test FAIL 6


root@qemux86-64:/opt/ltp/results# tail ../output/LTP_RUN_ON-controllers.failed
cgroup_fj_stress_net_prio_10_3_each cgroup_fj_stress.sh net_prio 10 3 each
cgroup_fj_stress_net_prio_1_200_each cgroup_fj_stress.sh net_prio 1 200 each
cgroup_fj_stress_net_prio_200_1_each cgroup_fj_stress.sh net_prio 200 1 each
cpuacct_1_1 cpuacct.sh 1 1
cpuacct_1_10 cpuacct.sh 1 10
cpuacct_10_10 cpuacct.sh 10 10
cpuacct_1_100 cpuacct.sh 1 100
cpuacct_100_1 cpuacct.sh 100 1
cpuacct_100_100 cpuacct.sh 100 100
cpuset_regression_test cpuset_regression_test.sh