From 90a4af511235fcb5959913bbe18c835fcd89c38a Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Tue, 29 May 2018 21:11:34 -0700 Subject: [PATCH] Add separated_buffer_t and adopt it in output_stream_t separated_buffer_t encapsulates the logic around discarding (which was previously duplicated between output_stream_t and io_buffer_t), and will also encapsulate the logic around explicitly separated output. --- src/exec.cpp | 16 ++-- src/fish_tests.cpp | 4 +- src/io.cpp | 4 +- src/io.h | 192 +++++++++++++++++++++++++++++----------- src/parse_execution.cpp | 2 +- 5 files changed, 154 insertions(+), 64 deletions(-) diff --git a/src/exec.cpp b/src/exec.cpp index a12c33c9e..95d7ec573 100644 --- a/src/exec.cpp +++ b/src/exec.cpp @@ -938,7 +938,7 @@ void exec_job(parser_t &parser, job_t *j) { const bool stdout_is_to_buffer = stdout_io && stdout_io->io_mode == IO_BUFFER; const bool no_stdout_output = stdout_stream.empty(); const bool no_stderr_output = stderr_stream.empty(); - const bool stdout_discarded = stdout_stream.output_discarded(); + const bool stdout_discarded = stdout_stream.buffer().discarded(); if (!stdout_discarded && no_stdout_output && no_stderr_output) { // The builtin produced no output and is not inside of a pipeline. No @@ -950,6 +950,12 @@ void exec_job(parser_t &parser, job_t *j) { // The builtin produced no stderr, and its stdout is going to an // internal buffer. There is no need to fork. This helps out the // performance quite a bit in complex completion code. + // TODO: we're sloppy about handling explicitly separated output. + // Theoretically we could have explicitly separated output on stdout and + // also stderr output; in that case we ought to thread the exp-sep output + // through to the io buffer. We're getting away with this because the only + // thing that can output exp-sep output is `string split0` which doesn't + // also produce stderr. debug(3, L"Skipping fork: buffered output for internal builtin '%ls'", p->argv0()); @@ -960,8 +966,8 @@ void exec_job(parser_t &parser, job_t *j) { // We are writing to normal stdout and stderr. Just do it - no need to fork. debug(3, L"Skipping fork: ordinary output for internal builtin '%ls'", p->argv0()); - const std::string outbuff = wcs2string(stdout_stream.buffer()); - const std::string errbuff = wcs2string(stderr_stream.buffer()); + const std::string outbuff = wcs2string(stdout_stream.contents()); + const std::string errbuff = wcs2string(stderr_stream.contents()); bool builtin_io_done = do_builtin_io(outbuff.data(), outbuff.size(), errbuff.data(), errbuff.size()); if (!builtin_io_done && errno != EPIPE) { @@ -990,11 +996,11 @@ void exec_job(parser_t &parser, job_t *j) { // in the child. // // These strings may contain embedded nulls, so don't treat them as C strings. - const std::string outbuff_str = wcs2string(stdout_stream.buffer()); + const std::string outbuff_str = wcs2string(stdout_stream.contents()); const char *outbuff = outbuff_str.data(); size_t outbuff_len = outbuff_str.size(); - const std::string errbuff_str = wcs2string(stderr_stream.buffer()); + const std::string errbuff_str = wcs2string(stderr_stream.contents()); const char *errbuff = errbuff_str.data(); size_t errbuff_len = errbuff_str.size(); diff --git a/src/fish_tests.cpp b/src/fish_tests.cpp index 070e5705c..65d3f55bb 100644 --- a/src/fish_tests.cpp +++ b/src/fish_tests.cpp @@ -4107,10 +4107,10 @@ static void run_one_string_test(const wchar_t *const *argv, int expected_rc, if (rc != expected_rc) { err(L"Test failed on line %lu: [%ls]: expected return code %d but got %d", __LINE__, args.c_str(), expected_rc, rc); - } else if (streams.out.buffer() != expected_out) { + } else if (streams.out.contents() != expected_out) { err(L"Test failed on line %lu: [%ls]: expected [%ls] but got [%ls]", __LINE__, args.c_str(), escape_string(expected_out, ESCAPE_ALL).c_str(), - escape_string(streams.out.buffer(), ESCAPE_ALL).c_str()); + escape_string(streams.out.contents(), ESCAPE_ALL).c_str()); } } diff --git a/src/io.cpp b/src/io.cpp index e18be8f75..d1573a9fb 100644 --- a/src/io.cpp +++ b/src/io.cpp @@ -34,11 +34,11 @@ void io_buffer_t::print() const { void io_buffer_t::append_from_stream(const output_stream_t &stream) { if (output_discarded()) return; - if (stream.output_discarded()) { + if (stream.buffer().discarded()) { set_discard(); return; } - const std::string str = wcs2string(stream.buffer()); + const std::string str = wcs2string(stream.contents()); out_buffer_append(str.data(), str.size()); } diff --git a/src/io.h b/src/io.h index 7b789fb67..a58ccf362 100644 --- a/src/io.h +++ b/src/io.h @@ -22,6 +22,127 @@ using std::tr1::shared_ptr; #include "common.h" #include "env.h" +/// separated_buffer_t is composed of a sequence of elements, some of which may be explicitly +/// separated (e.g. through string spit0) and some of which the separation is inferred. This enum +/// tracks the type. +enum class separation_type_t { + /// This element's separation should be inferred, e.g. through IFS. + inferred, + /// This element was explicitly separated and should not be separated further. + explicitly +}; + +/// A separated_buffer_t contains a list of elements, some of which may be separated explicitly and +/// others which must be separated further by the user (e.g. via IFS). +template +class separated_buffer_t { + struct element_t { + StringType contents; + separation_type_t separation; + + element_t(StringType contents, separation_type_t sep) + : contents(std::move(contents)), separation(sep) {} + + bool is_explicitly_separated() const { return separation == separation_type_t::explicitly; } + }; + + /// Limit on how much data we'll buffer. Zero means no limit. + size_t buffer_limit_; + + /// Current size of all contents. + size_t contents_size_{0}; + + /// List of buffer elements. + std::vector elements_; + + /// True if we're discarding input because our buffer_limit has been exceeded. + bool discard = false; + + /// Mark that we are about to add the given size \p delta to the buffer. \return true if we + /// succeed, false if we exceed buffer_limit. + bool try_add_size(size_t delta) { + if (discard) return false; + contents_size_ += delta; + if (contents_size_ < delta) { + // Overflow! + set_discard(); + return false; + } + if (buffer_limit_ > 0 && contents_size_ > buffer_limit_) { + set_discard(); + return false; + } + return true; + } + + /// separated_buffer_t may not be copied. + separated_buffer_t(const separated_buffer_t &) = delete; + void operator=(const separated_buffer_t &) = delete; + +public: + /// Construct a separated_buffer_t with the given buffer limit \p limit, or 0 for no limit. + separated_buffer_t(size_t limit) : buffer_limit_(limit) {} + + /// \return the buffer limit size, or 0 for no limit. + size_t limit() const { return buffer_limit_; } + + /// \return the contents size. + size_t size() const { return contents_size_; } + + /// \return whether the output has been discarded. + bool discarded() const { return discard; } + + /// Mark the contents as discarded. + void set_discard() { + elements_.clear(); + contents_size_ = 0; + discard = true; + } + + /// Serialize the contents to a single string, where explicitly separated elements have a + /// newline appended. + StringType newline_serialized() const { + StringType result; + result.reserve(size()); + for (const auto &elem : elements_) { + result.append(elem.contents); + if (elem.is_explicitly_separated()) { + result.push_back('\n'); + } + } + return result; + } + + /// \return the list of elements. + const std::vector &elements() const { return elements_; } + + /// Append an element with range [begin, end) and the given separation type \p sep. + template + void append(Iterator begin, Iterator end, separation_type_t sep = separation_type_t::inferred) { + if (!try_add_size(std::distance(begin, end))) return; + // Try merging with the last element. + if (sep == separation_type_t::inferred && !elements_.empty() && !elements_.back().is_explicitly_separated()) { + elements_.back().contents.append(begin, end); + } else { + elements_.emplace_back(StringType(begin, end), sep); + } + } + + /// Append a string \p str with the given separation type \p sep. + void append(const StringType &str, separation_type_t sep = separation_type_t::inferred) { + append(str.begin(), str.end(), sep); + } + + // Given that this is a narrow stream, convert a wide stream \p rhs to narrow and then append + // it. + template + void append_wide_buffer(const separated_buffer_t &rhs) { + for (const auto &rhs_elem : rhs.elements()) { + append(wcs2string(rhs_elem.contents), rhs_elem.separation); + } + } +}; + /// Describes what type of IO operation an io_data_t represents. enum io_mode_t { IO_FILE, IO_PIPE, IO_FD, IO_BUFFER, IO_CLOSE }; @@ -194,77 +315,40 @@ bool pipe_avoid_conflicts_with_io_chain(int fds[2], const io_chain_t &ios); /// Class representing the output that a builtin can generate. class output_stream_t { private: - /// Limit on how much data we'll buffer. Zero means no limit. - size_t buffer_limit; - /// True if we're discarding input. - bool discard; + /// Storage for our data. + separated_buffer_t buffer_; + // No copying. - output_stream_t(const output_stream_t &s); - void operator=(const output_stream_t &s); - - wcstring buffer_; - - void check_for_overflow() { - if (buffer_limit && buffer_.size() > buffer_limit) { - discard = true; - buffer_.clear(); - } - } + output_stream_t(const output_stream_t &s) = delete; + void operator=(const output_stream_t &s) = delete; public: - output_stream_t(size_t buffer_limit_) : buffer_limit(buffer_limit_), discard(false) {} + output_stream_t(size_t buffer_limit) : buffer_(buffer_limit) {} - void append(const wcstring &s) { - if (discard) return; - buffer_.append(s); - check_for_overflow(); - } + void append(const wcstring &s) { buffer_.append(s.begin(), s.end()); } - void append(const wchar_t *s) { - if (discard) return; - buffer_.append(s); - check_for_overflow(); - } + const separated_buffer_t &buffer() const { return buffer_; } - void append(wchar_t s) { - if (discard) return; - buffer_.push_back(s); - check_for_overflow(); - } + void append(const wchar_t *s) { append(s, wcslen(s)); } - void append(const wchar_t *s, size_t amt) { - if (discard) return; - buffer_.append(s, amt); - check_for_overflow(); - } + void append(wchar_t s) { append(&s, 1); } - void push_back(wchar_t c) { - if (discard) return; - buffer_.push_back(c); - check_for_overflow(); - } + void append(const wchar_t *s, size_t amt) { buffer_.append(s, s + amt); } + + void push_back(wchar_t c) { append(c); } void append_format(const wchar_t *format, ...) { - if (discard) return; va_list va; va_start(va, format); - ::append_formatv(buffer_, format, va); + append_formatv(format, va); va_end(va); - check_for_overflow(); } - void append_formatv(const wchar_t *format, va_list va_orig) { - if (discard) return; - ::append_formatv(buffer_, format, va_orig); - check_for_overflow(); - } + void append_formatv(const wchar_t *format, va_list va) { append(vformat_string(format, va)); } - const wcstring &buffer() const { return buffer_; } + bool empty() const { return buffer_.size() == 0; } - /// Function that returns true if we discarded the input because there was too much data. - bool output_discarded() const { return discard; } - - bool empty() const { return buffer_.empty(); } + wcstring contents() const { return buffer_.newline_serialized(); } }; struct io_streams_t { diff --git a/src/parse_execution.cpp b/src/parse_execution.cpp index b728435ec..15289ba56 100644 --- a/src/parse_execution.cpp +++ b/src/parse_execution.cpp @@ -328,7 +328,7 @@ parse_execution_result_t parse_execution_context_t::run_function_statement( proc_set_last_status(err); if (!streams.err.empty()) { - this->report_error(header, L"%ls", streams.err.buffer().c_str()); + this->report_error(header, L"%ls", streams.err.contents().c_str()); result = parse_execution_errored; }