[ptest-runner][PATCH] util.c: remove the usage of pipe for parent/child communication


Yi Fan Yu
 

If the output of the testing script is very large,
the fwrite called by the parent in
wait_child would start blocking. That would slow down
the parent speed in clearing the pipe buffer.

This is an issue because the pipe is Non-Blocking.
so the child(ex: a bash script)
who was designed to output to what he thinks is a
is stdout, might error out due to EAGAIN (pipe full).

The proposed fix is to remove pipes and just let the child
write directly to stdout (same his parent)

This would mean that the parent no longer control the output
of the child.

Signed-off-by: Yi Fan Yu <yifan.yu@windriver.com>
---
utils.c | 53 ++++++++---------------------------------------------
1 file changed, 8 insertions(+), 45 deletions(-)

diff --git a/utils.c b/utils.c
index a4e190e..5d62ced 100644
--- a/utils.c
+++ b/utils.c
@@ -274,12 +274,14 @@ run_child(char *run_ptest, int fd_stdout, int fd_stderr)
argv[0] = run_ptest;
argv[1] = NULL;

+ /* since it isn't use by the child, close(fd_stderr) ? */
+ close(fd_stderr);
+
dup2(fd_stdout, STDOUT_FILENO);
// XXX: Redirect stderr to stdout to avoid buffer ordering problems.
dup2(fd_stdout, STDERR_FILENO);

- /* since it isn't use by the child, close(fd_stderr) ? */
- close(fd_stderr); /* try using to see if this fixes bash run-read. rwm todo */
+
close_fds();

execv(run_ptest, argv);
@@ -290,20 +292,13 @@ run_child(char *run_ptest, int fd_stdout, int fd_stderr)
static inline int
wait_child(pid_t pid, int timeout, int *fds, FILE **fps, int *timeouted)
{
- struct pollfd pfds[2];
struct timespec sentinel;
clockid_t clock = CLOCK_MONOTONIC;
int looping = 1;
- int r;

int status = -1;
int waitflags;

- pfds[0].fd = fds[0];
- pfds[0].events = POLLIN;
- pfds[1].fd = fds[1];
- pfds[1].events = POLLIN;
-
if (clock_gettime(clock, &sentinel) == -1) {
clock = CLOCK_REALTIME;
clock_gettime(clock, &sentinel);
@@ -328,29 +323,10 @@ wait_child(pid_t pid, int timeout, int *fds, FILE **fps, int *timeouted)
if (waitpid(pid, &status, waitflags) == pid)
looping = 0;

- r = poll(pfds, 2, WAIT_CHILD_POLL_TIMEOUT_MS);
- if (r > 0) {
- char buf[WAIT_CHILD_BUF_MAX_SIZE];
- ssize_t n;
-
- if (pfds[0].revents != 0) {
- while ((n = read(fds[0], buf, WAIT_CHILD_BUF_MAX_SIZE)) > 0)
- fwrite(buf, (size_t)n, 1, fps[0]);
- }
-
- if (pfds[1].revents != 0) {
- while ((n = read(fds[1], buf, WAIT_CHILD_BUF_MAX_SIZE)) > 0) {
- fflush(fps[0]);
- fwrite(buf, (size_t)n, 1, fps[1]);
- fflush(fps[1]);
- }
- }
-
- clock_gettime(clock, &sentinel);
- }
- }
+ }

fflush(fps[0]);
+
return status;
}

@@ -412,8 +388,6 @@ run_ptests(struct ptest_list *head, const struct ptest_options opts,
char stime[GET_STIME_BUF_SIZE];

pid_t child;
- int pipefd_stdout[2];
- int pipefd_stderr[2];
int timeouted;
time_t sttime, entime;
time_t duration;
@@ -428,14 +402,6 @@ run_ptests(struct ptest_list *head, const struct ptest_options opts,

do
{
- if ((rc = pipe2(pipefd_stdout, O_NONBLOCK)) == -1)
- break;
-
- if ((rc = pipe2(pipefd_stderr, O_NONBLOCK)) == -1) {
- close(pipefd_stdout[0]);
- close(pipefd_stdout[1]);
- break;
- }
fprintf(fp, "START: %s\n", progname);
if (isatty(0) && ioctl(0, TIOCNOTTY) == -1) {
fprintf(fp, "ERROR: Unable to detach from controlling tty, %s\n", strerror(errno));
@@ -474,11 +440,10 @@ run_ptests(struct ptest_list *head, const struct ptest_options opts,
fprintf(fp, "ERROR: Unable to attach to controlling tty, %s\n", strerror(errno));
}

- run_child(p->run_ptest, pipefd_stdout[1], pipefd_stderr[1]);
+ run_child(p->run_ptest, STDOUT_FILENO, STDERR_FILENO);

} else {
int status;
- int fds[2]; fds[0] = pipefd_stdout[0]; fds[1] = pipefd_stderr[0];
FILE *fps[2]; fps[0] = fp; fps[1] = fp_stderr;

if (setpgid(child, pgid) == -1) {
@@ -489,7 +454,7 @@ run_ptests(struct ptest_list *head, const struct ptest_options opts,
fprintf(fp, "%s\n", get_stime(stime, GET_STIME_BUF_SIZE, sttime));
fprintf(fp, "BEGIN: %s\n", ptest_dir);

- status = wait_child(child, opts.timeout, fds, fps, &timeouted);
+ status = wait_child(child, opts.timeout, NULL, fps, &timeouted);
entime = time(NULL);
duration = entime - sttime;

@@ -510,8 +475,6 @@ run_ptests(struct ptest_list *head, const struct ptest_options opts,
PTEST_LIST_ITERATE_END
fprintf(fp, "STOP: %s\n", progname);

- close(pipefd_stdout[0]); close(pipefd_stdout[1]);
- close(pipefd_stderr[0]); close(pipefd_stderr[1]);
} while (0);

if (rc == -1)
--
2.25.1


Yi Fan Yu
 

i didn't spend too much time getting the patch to look nice.
It is just a proposal because this seems to be the root cause of a seemingly ...
unrelated bug.

https://bugzilla.yoctoproject.org/show_bug.cgi?id=14170

On 2/3/21 6:44 PM, Yi Fan Yu wrote:
If the output of the testing script is very large,
the fwrite called by the parent in
wait_child would start blocking. That would slow down
the parent speed in clearing the pipe buffer.

This is an issue because the pipe is Non-Blocking.
so the child(ex: a bash script)
who was designed to output to what he thinks is a
is stdout, might error out due to EAGAIN (pipe full).

The proposed fix is to remove pipes and just let the child
write directly to stdout (same his parent)

This would mean that the parent no longer control the output
of the child.

Signed-off-by: Yi Fan Yu <yifan.yu@windriver.com>
---
utils.c | 53 ++++++++---------------------------------------------
1 file changed, 8 insertions(+), 45 deletions(-)

diff --git a/utils.c b/utils.c
index a4e190e..5d62ced 100644
--- a/utils.c
+++ b/utils.c
@@ -274,12 +274,14 @@ run_child(char *run_ptest, int fd_stdout, int fd_stderr)
argv[0] = run_ptest;
argv[1] = NULL;
+ /* since it isn't use by the child, close(fd_stderr) ? */
+ close(fd_stderr);
+
dup2(fd_stdout, STDOUT_FILENO);
// XXX: Redirect stderr to stdout to avoid buffer ordering problems.
dup2(fd_stdout, STDERR_FILENO);
- /* since it isn't use by the child, close(fd_stderr) ? */
- close(fd_stderr); /* try using to see if this fixes bash run-read. rwm todo */
+
close_fds();
execv(run_ptest, argv);
@@ -290,20 +292,13 @@ run_child(char *run_ptest, int fd_stdout, int fd_stderr)
static inline int
wait_child(pid_t pid, int timeout, int *fds, FILE **fps, int *timeouted)
{
- struct pollfd pfds[2];
struct timespec sentinel;
clockid_t clock = CLOCK_MONOTONIC;
int looping = 1;
- int r;
int status = -1;
int waitflags;
- pfds[0].fd = fds[0];
- pfds[0].events = POLLIN;
- pfds[1].fd = fds[1];
- pfds[1].events = POLLIN;
-
if (clock_gettime(clock, &sentinel) == -1) {
clock = CLOCK_REALTIME;
clock_gettime(clock, &sentinel);
@@ -328,29 +323,10 @@ wait_child(pid_t pid, int timeout, int *fds, FILE **fps, int *timeouted)
if (waitpid(pid, &status, waitflags) == pid)
looping = 0;
- r = poll(pfds, 2, WAIT_CHILD_POLL_TIMEOUT_MS);
- if (r > 0) {
- char buf[WAIT_CHILD_BUF_MAX_SIZE];
- ssize_t n;
-
- if (pfds[0].revents != 0) {
- while ((n = read(fds[0], buf, WAIT_CHILD_BUF_MAX_SIZE)) > 0)
- fwrite(buf, (size_t)n, 1, fps[0]);
- }
-
- if (pfds[1].revents != 0) {
- while ((n = read(fds[1], buf, WAIT_CHILD_BUF_MAX_SIZE)) > 0) {
- fflush(fps[0]);
- fwrite(buf, (size_t)n, 1, fps[1]);
- fflush(fps[1]);
- }
- }
-
- clock_gettime(clock, &sentinel);
- }
- }
+ }
fflush(fps[0]);
+
return status;
}
@@ -412,8 +388,6 @@ run_ptests(struct ptest_list *head, const struct ptest_options opts,
char stime[GET_STIME_BUF_SIZE];
pid_t child;
- int pipefd_stdout[2];
- int pipefd_stderr[2];
int timeouted;
time_t sttime, entime;
time_t duration;
@@ -428,14 +402,6 @@ run_ptests(struct ptest_list *head, const struct ptest_options opts,
do
{
- if ((rc = pipe2(pipefd_stdout, O_NONBLOCK)) == -1)
- break;
-
- if ((rc = pipe2(pipefd_stderr, O_NONBLOCK)) == -1) {
- close(pipefd_stdout[0]);
- close(pipefd_stdout[1]);
- break;
- }
fprintf(fp, "START: %s\n", progname);
if (isatty(0) && ioctl(0, TIOCNOTTY) == -1) {
fprintf(fp, "ERROR: Unable to detach from controlling tty, %s\n", strerror(errno));
@@ -474,11 +440,10 @@ run_ptests(struct ptest_list *head, const struct ptest_options opts,
fprintf(fp, "ERROR: Unable to attach to controlling tty, %s\n", strerror(errno));
}
- run_child(p->run_ptest, pipefd_stdout[1], pipefd_stderr[1]);
+ run_child(p->run_ptest, STDOUT_FILENO, STDERR_FILENO);
} else {
int status;
- int fds[2]; fds[0] = pipefd_stdout[0]; fds[1] = pipefd_stderr[0];
FILE *fps[2]; fps[0] = fp; fps[1] = fp_stderr;
if (setpgid(child, pgid) == -1) {
@@ -489,7 +454,7 @@ run_ptests(struct ptest_list *head, const struct ptest_options opts,
fprintf(fp, "%s\n", get_stime(stime, GET_STIME_BUF_SIZE, sttime));
fprintf(fp, "BEGIN: %s\n", ptest_dir);
- status = wait_child(child, opts.timeout, fds, fps, &timeouted);
+ status = wait_child(child, opts.timeout, NULL, fps, &timeouted);
entime = time(NULL);
duration = entime - sttime;
@@ -510,8 +475,6 @@ run_ptests(struct ptest_list *head, const struct ptest_options opts,
PTEST_LIST_ITERATE_END
fprintf(fp, "STOP: %s\n", progname);
- close(pipefd_stdout[0]); close(pipefd_stdout[1]);
- close(pipefd_stderr[0]); close(pipefd_stderr[1]);
} while (0);
if (rc == -1)


Randy MacLeod
 

On 2021-02-03 6:47 p.m., Yi Fan Yu wrote:
i didn't spend too much time getting the patch to look nice.
It is just a proposal because this seems to be the root cause of a seemingly ...
unrelated bug.
https://bugzilla.yoctoproject.org/show_bug.cgi?id=14170
On 2/3/21 6:44 PM, Yi Fan Yu wrote:
If the output of the testing script is very large,
the fwrite called by the parent in
wait_child would start blocking. That would slow down
the parent speed in clearing the pipe buffer.

This is an issue because the pipe is Non-Blocking.
so the child(ex: a bash script)
who was designed to output to what he thinks is a
is stdout, might error out due to EAGAIN (pipe full).

The proposed fix is to remove pipes and just let the child
write directly to stdout (same his parent)

This would mean that the parent no longer control the output
of the child.
Thanks Yi Fan.

I'll have to look at this more closely later this week but
sharing file descriptors between parent and child seems like
a poor design. I have two main ideas and a work-around to mention.

First the work-around that we discussed for the failing
test in glib-2.0 codegen, is to limit the output until we fix
ptest-runner. I think you said that's a one line change and
it would give us more time to fix ptest-runner should we need it.

To fix ptest-runner I believe that it should be able to
handle arbitrary input or at least a few MB of input from
the child. I'll need to read the code more carefully but can't we:
1. add some internal buffering so that we can drain the
pipe before checking for timeouts, etc.
2. We could run ptest-runner as a higher priority task
than the code under test so that the parent has more
time to drain the pipe.
Neither of these will fix the problem completely but together
these changes may prevent all but the most verbose tests from
failing due to a full pipe.

Since we don't often see this problem, we could hide the issue
in the ptest-runner script for glib using some command line
buffer executable that already exists (*) as a linux utility.

I've CCed Anibal since he designed and wrote ptest-runner.

All for now,
../Randy

*
A quick search turned up:

https://unix.stackexchange.com/questions/188175/completely-buffer-command-output-before-piping-to-another-command

$ sudo apt install moreutils
$ file `which sponge`
/usr/bin/sponge: ELF 64-bit LSB shared object, x86-64, version 1 (SYSV), dynamically linked, interpreter /lib64/ld-linux-x86-64.so.2, BuildID[sha1]=f72063a4a10044e902905de1a8c1640f611394f9, for GNU/Linux 3.2.0, stripped

but maybe there's a better alternative since we don't want to buffer
the full input of the pipe.

There's also mbuffer but I'll leave it to you to investigate.



Signed-off-by: Yi Fan Yu <yifan.yu@windriver.com>
---
  utils.c | 53 ++++++++---------------------------------------------
  1 file changed, 8 insertions(+), 45 deletions(-)

diff --git a/utils.c b/utils.c
index a4e190e..5d62ced 100644
--- a/utils.c
+++ b/utils.c
@@ -274,12 +274,14 @@ run_child(char *run_ptest, int fd_stdout, int fd_stderr)
      argv[0] = run_ptest;
      argv[1] = NULL;
+    /* since it isn't use by the child, close(fd_stderr) ? */
+    close(fd_stderr);
+
      dup2(fd_stdout, STDOUT_FILENO);
      // XXX: Redirect stderr to stdout to avoid buffer ordering problems.
      dup2(fd_stdout, STDERR_FILENO);
-    /* since it isn't use by the child, close(fd_stderr) ? */
-    close(fd_stderr); /* try using to see if this fixes bash run-read. rwm todo */
+
      close_fds();
      execv(run_ptest, argv);
@@ -290,20 +292,13 @@ run_child(char *run_ptest, int fd_stdout, int fd_stderr)
  static inline int
  wait_child(pid_t pid, int timeout, int *fds, FILE **fps, int *timeouted)
  {
-    struct pollfd pfds[2];
      struct timespec sentinel;
      clockid_t clock = CLOCK_MONOTONIC;
      int looping = 1;
-    int r;
      int status = -1;
      int waitflags;
-    pfds[0].fd = fds[0];
-    pfds[0].events = POLLIN;
-    pfds[1].fd = fds[1];
-    pfds[1].events = POLLIN;
-
      if (clock_gettime(clock, &sentinel) == -1) {
          clock = CLOCK_REALTIME;
          clock_gettime(clock, &sentinel);
@@ -328,29 +323,10 @@ wait_child(pid_t pid, int timeout, int *fds, FILE **fps, int *timeouted)
          if (waitpid(pid, &status, waitflags) == pid)
              looping = 0;
-        r = poll(pfds, 2, WAIT_CHILD_POLL_TIMEOUT_MS);
-        if (r > 0) {
-            char buf[WAIT_CHILD_BUF_MAX_SIZE];
-            ssize_t n;
-
-            if (pfds[0].revents != 0) {
-                while ((n = read(fds[0], buf, WAIT_CHILD_BUF_MAX_SIZE)) > 0)
-                    fwrite(buf, (size_t)n, 1, fps[0]);
-            }
-
-            if (pfds[1].revents != 0) {
-                while ((n = read(fds[1], buf, WAIT_CHILD_BUF_MAX_SIZE)) > 0) {
-                    fflush(fps[0]);
-                    fwrite(buf, (size_t)n, 1, fps[1]);
-                    fflush(fps[1]);
-                }
-            }
-
-            clock_gettime(clock, &sentinel);
-        }
-    }
+        }
      fflush(fps[0]);
+
      return status;
  }
@@ -412,8 +388,6 @@ run_ptests(struct ptest_list *head, const struct ptest_options opts,
      char stime[GET_STIME_BUF_SIZE];
      pid_t child;
-    int pipefd_stdout[2];
-    int pipefd_stderr[2];
      int timeouted;
      time_t sttime, entime;
      time_t duration;
@@ -428,14 +402,6 @@ run_ptests(struct ptest_list *head, const struct ptest_options opts,
      do
      {
-        if ((rc = pipe2(pipefd_stdout, O_NONBLOCK)) == -1)
-            break;
-
-        if ((rc = pipe2(pipefd_stderr, O_NONBLOCK)) == -1) {
-            close(pipefd_stdout[0]);
-            close(pipefd_stdout[1]);
-            break;
-        }
          fprintf(fp, "START: %s\n", progname);
          if (isatty(0) && ioctl(0, TIOCNOTTY) == -1) {
              fprintf(fp, "ERROR: Unable to detach from controlling tty, %s\n", strerror(errno));
@@ -474,11 +440,10 @@ run_ptests(struct ptest_list *head, const struct ptest_options opts,
                      fprintf(fp, "ERROR: Unable to attach to controlling tty, %s\n", strerror(errno));
                  }
-                run_child(p->run_ptest, pipefd_stdout[1], pipefd_stderr[1]);
+                run_child(p->run_ptest, STDOUT_FILENO, STDERR_FILENO);
              } else {
                  int status;
-                int fds[2]; fds[0] = pipefd_stdout[0]; fds[1] = pipefd_stderr[0];
                  FILE *fps[2]; fps[0] = fp; fps[1] = fp_stderr;
                  if (setpgid(child, pgid) == -1) {
@@ -489,7 +454,7 @@ run_ptests(struct ptest_list *head, const struct ptest_options opts,
                  fprintf(fp, "%s\n", get_stime(stime, GET_STIME_BUF_SIZE, sttime));
                  fprintf(fp, "BEGIN: %s\n", ptest_dir);
-                status = wait_child(child, opts.timeout, fds, fps, &timeouted);
+                status = wait_child(child, opts.timeout, NULL, fps, &timeouted);
                  entime = time(NULL);
                  duration = entime - sttime;
@@ -510,8 +475,6 @@ run_ptests(struct ptest_list *head, const struct ptest_options opts,
          PTEST_LIST_ITERATE_END
          fprintf(fp, "STOP: %s\n", progname);
-        close(pipefd_stdout[0]); close(pipefd_stdout[1]);
-        close(pipefd_stderr[0]); close(pipefd_stderr[1]);
      } while (0);
      if (rc == -1)


--
# Randy MacLeod
# Wind River Linux