From 9ada7d9aad8cbb4a491fc0bae4fdc2c58958be3a Mon Sep 17 00:00:00 2001 From: Fabian Homborg Date: Tue, 14 Dec 2021 20:32:09 +0100 Subject: [PATCH] read: Also read in chunks when directly redirected We can't always read in chunks because we often can't bear to overread: ```fish echo foo\nbar | begin read -l foo read -l bar end ``` needs to have the first read read `foo` and the second read `bar`. So here we can only read one byte at a time. However, when we are directly redirected: ```fish echo foo | read foo ``` we can, because the data is only for us anyway. The stream will be closed after, so anything not read just goes away. Nobody else is there to read. This dramatically speeds up `read` of long lines through a pipe. How much depends on the length of the line. With lines of 5000 characters it's about 15x, with lines of 50 characters about 2x, lines of 5 characters about 1.07x. See #8542. --- src/builtins/read.cpp | 15 +++++++++++---- tests/checks/read.fish | 9 +++++++++ 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/src/builtins/read.cpp b/src/builtins/read.cpp index a72e66de3..914b62b08 100644 --- a/src/builtins/read.cpp +++ b/src/builtins/read.cpp @@ -262,7 +262,7 @@ static int read_interactive(parser_t &parser, wcstring &buff, int nchars, bool s /// of chars. /// /// Returns an exit status. -static int read_in_chunks(int fd, wcstring &buff, bool split_null) { +static int read_in_chunks(int fd, wcstring &buff, bool split_null, bool do_seek) { int exit_res = STATUS_CMD_OK; std::string str; bool eof = false; @@ -284,7 +284,7 @@ static int read_in_chunks(int fd, wcstring &buff, bool split_null) { if (bytes_consumed < bytes_read) { // We found a splitter. The +1 because we need to treat the splitter as consumed, but // not append it to the string. - if (lseek(fd, bytes_consumed - bytes_read + 1, SEEK_CUR) == -1) { + if (do_seek && lseek(fd, bytes_consumed - bytes_read + 1, SEEK_CUR) == -1) { wperror(L"lseek"); return STATUS_CMD_ERROR; } @@ -501,8 +501,15 @@ maybe_t builtin_read(parser_t &parser, io_streams_t &streams, const wchar_t read_interactive(parser, buff, opts.nchars, opts.shell, opts.silent, opts.prompt, opts.right_prompt, opts.commandline, streams.stdin_fd); } else if (!opts.nchars && !stream_stdin_is_a_tty && - lseek(streams.stdin_fd, 0, SEEK_CUR) != -1) { - exit_res = read_in_chunks(streams.stdin_fd, buff, opts.split_null); + (streams.stdin_is_directly_redirected || lseek(streams.stdin_fd, 0, SEEK_CUR) != -1)) { + // We read in chunks when we either can seek (so we put the bytes back), + // or we have the bytes to ourselves (because it's directly redirected). + // + // Note we skip seeking back even if we're directly redirected to a seekable stream, + // under the assumption that the stream will be closed soon anyway. + // You don't rewind VHS tapes before throwing them in the trash. + // TODO: Do this when nchars is set by seeking back. + exit_res = read_in_chunks(streams.stdin_fd, buff, opts.split_null, !streams.stdin_is_directly_redirected); } else { exit_res = read_one_char_at_a_time(streams.stdin_fd, buff, opts.nchars, opts.split_null); diff --git a/tests/checks/read.fish b/tests/checks/read.fish index 8801b6246..2ed2925c8 100644 --- a/tests/checks/read.fish +++ b/tests/checks/read.fish @@ -366,3 +366,12 @@ function-scoped-read # CHECK: $skamtebord[1]: |bar| # CHECK: $craaab: set in local scope, unexported, with 1 elements # CHECK: $craaab[1]: |baz| + +echo foo\nbar\nbaz | begin + read -l foo + read -l bar + cat + # CHECK: baz + echo $bar + # CHECK: bar +end