From 90a4af511235fcb5959913bbe18c835fcd89c38a Mon Sep 17 00:00:00 2001
From: ridiculousfish <corydoras@ridiculousfish.com>
Date: Tue, 29 May 2018 21:11:34 -0700
Subject: [PATCH] Add separated_buffer_t and adopt it in output_stream_t

separated_buffer_t encapsulates the logic around discarding (which
was previously duplicated between output_stream_t and io_buffer_t),
and will also encapsulate the logic around explicitly separated
output.
---
 src/exec.cpp            |  16 ++--
 src/fish_tests.cpp      |   4 +-
 src/io.cpp              |   4 +-
 src/io.h                | 192 +++++++++++++++++++++++++++++-----------
 src/parse_execution.cpp |   2 +-
 5 files changed, 154 insertions(+), 64 deletions(-)

diff --git a/src/exec.cpp b/src/exec.cpp
index a12c33c9e..95d7ec573 100644
--- a/src/exec.cpp
+++ b/src/exec.cpp
@@ -938,7 +938,7 @@ void exec_job(parser_t &parser, job_t *j) {
                     const bool stdout_is_to_buffer = stdout_io && stdout_io->io_mode == IO_BUFFER;
                     const bool no_stdout_output = stdout_stream.empty();
                     const bool no_stderr_output = stderr_stream.empty();
-                    const bool stdout_discarded = stdout_stream.output_discarded();
+                    const bool stdout_discarded = stdout_stream.buffer().discarded();
 
                     if (!stdout_discarded && no_stdout_output && no_stderr_output) {
                         // The builtin produced no output and is not inside of a pipeline. No
@@ -950,6 +950,12 @@ void exec_job(parser_t &parser, job_t *j) {
                         // The builtin produced no stderr, and its stdout is going to an
                         // internal buffer. There is no need to fork. This helps out the
                         // performance quite a bit in complex completion code.
+                        // TODO: we're sloppy about handling explicitly separated output.
+                        // Theoretically we could have explicitly separated output on stdout and
+                        // also stderr output; in that case we ought to thread the exp-sep output
+                        // through to the io buffer. We're getting away with this because the only
+                        // thing that can output exp-sep output is `string split0` which doesn't
+                        // also produce stderr.
                         debug(3, L"Skipping fork: buffered output for internal builtin '%ls'",
                               p->argv0());
 
@@ -960,8 +966,8 @@ void exec_job(parser_t &parser, job_t *j) {
                         // We are writing to normal stdout and stderr. Just do it - no need to fork.
                         debug(3, L"Skipping fork: ordinary output for internal builtin '%ls'",
                               p->argv0());
-                        const std::string outbuff = wcs2string(stdout_stream.buffer());
-                        const std::string errbuff = wcs2string(stderr_stream.buffer());
+                        const std::string outbuff = wcs2string(stdout_stream.contents());
+                        const std::string errbuff = wcs2string(stderr_stream.contents());
                         bool builtin_io_done = do_builtin_io(outbuff.data(), outbuff.size(),
                                                              errbuff.data(), errbuff.size());
                         if (!builtin_io_done && errno != EPIPE) {
@@ -990,11 +996,11 @@ void exec_job(parser_t &parser, job_t *j) {
                     // in the child.
                     //
                     // These strings may contain embedded nulls, so don't treat them as C strings.
-                    const std::string outbuff_str = wcs2string(stdout_stream.buffer());
+                    const std::string outbuff_str = wcs2string(stdout_stream.contents());
                     const char *outbuff = outbuff_str.data();
                     size_t outbuff_len = outbuff_str.size();
 
-                    const std::string errbuff_str = wcs2string(stderr_stream.buffer());
+                    const std::string errbuff_str = wcs2string(stderr_stream.contents());
                     const char *errbuff = errbuff_str.data();
                     size_t errbuff_len = errbuff_str.size();
 
diff --git a/src/fish_tests.cpp b/src/fish_tests.cpp
index 070e5705c..65d3f55bb 100644
--- a/src/fish_tests.cpp
+++ b/src/fish_tests.cpp
@@ -4107,10 +4107,10 @@ static void run_one_string_test(const wchar_t *const *argv, int expected_rc,
     if (rc != expected_rc) {
         err(L"Test failed on line %lu: [%ls]: expected return code %d but got %d", __LINE__,
             args.c_str(), expected_rc, rc);
-    } else if (streams.out.buffer() != expected_out) {
+    } else if (streams.out.contents() != expected_out) {
         err(L"Test failed on line %lu: [%ls]: expected [%ls] but got [%ls]", __LINE__, args.c_str(),
             escape_string(expected_out, ESCAPE_ALL).c_str(),
-            escape_string(streams.out.buffer(), ESCAPE_ALL).c_str());
+            escape_string(streams.out.contents(), ESCAPE_ALL).c_str());
     }
 }
 
diff --git a/src/io.cpp b/src/io.cpp
index e18be8f75..d1573a9fb 100644
--- a/src/io.cpp
+++ b/src/io.cpp
@@ -34,11 +34,11 @@ void io_buffer_t::print() const {
 void io_buffer_t::append_from_stream(const output_stream_t &stream) {
     if (output_discarded())
         return;
-    if (stream.output_discarded()) {
+    if (stream.buffer().discarded()) {
         set_discard();
         return;
     }
-    const std::string str = wcs2string(stream.buffer());
+    const std::string str = wcs2string(stream.contents());
     out_buffer_append(str.data(), str.size());
 }
 
diff --git a/src/io.h b/src/io.h
index 7b789fb67..a58ccf362 100644
--- a/src/io.h
+++ b/src/io.h
@@ -22,6 +22,127 @@ using std::tr1::shared_ptr;
 #include "common.h"
 #include "env.h"
 
+/// separated_buffer_t is composed of a sequence of elements, some of which may be explicitly
+/// separated (e.g. through string spit0) and some of which the separation is inferred. This enum
+/// tracks the type.
+enum class separation_type_t {
+    /// This element's separation should be inferred, e.g. through IFS.
+    inferred,
+    /// This element was explicitly separated and should not be separated further.
+    explicitly
+};
+
+/// A separated_buffer_t contains a list of elements, some of which may be separated explicitly and
+/// others which must be separated further by the user (e.g. via IFS).
+template <typename StringType>
+class separated_buffer_t {
+    struct element_t {
+        StringType contents;
+        separation_type_t separation;
+
+        element_t(StringType contents, separation_type_t sep)
+            : contents(std::move(contents)), separation(sep) {}
+
+        bool is_explicitly_separated() const { return separation == separation_type_t::explicitly; }
+    };
+
+    /// Limit on how much data we'll buffer. Zero means no limit.
+    size_t buffer_limit_;
+
+    /// Current size of all contents.
+    size_t contents_size_{0};
+
+    /// List of buffer elements.
+    std::vector<element_t> elements_;
+
+    /// True if we're discarding input because our buffer_limit has been exceeded.
+    bool discard = false;
+
+    /// Mark that we are about to add the given size \p delta to the buffer. \return true if we
+    /// succeed, false if we exceed buffer_limit.
+    bool try_add_size(size_t delta) {
+        if (discard) return false;
+        contents_size_ += delta;
+        if (contents_size_ < delta) {
+            // Overflow!
+            set_discard();
+            return false;
+        }
+        if (buffer_limit_ > 0 && contents_size_ > buffer_limit_) {
+            set_discard();
+            return false;
+        }
+        return true;
+    }
+
+    /// separated_buffer_t may not be copied.
+    separated_buffer_t(const separated_buffer_t &) = delete;
+    void operator=(const separated_buffer_t &) = delete;
+
+public:
+ /// Construct a separated_buffer_t with the given buffer limit \p limit, or 0 for no limit.
+ separated_buffer_t(size_t limit) : buffer_limit_(limit) {}
+
+ /// \return the buffer limit size, or 0 for no limit.
+ size_t limit() const { return buffer_limit_; }
+
+ /// \return the contents size.
+ size_t size() const { return contents_size_; }
+
+ /// \return whether the output has been discarded.
+ bool discarded() const { return discard; }
+
+ /// Mark the contents as discarded.
+ void set_discard() {
+     elements_.clear();
+     contents_size_ = 0;
+     discard = true;
+ }
+
+ /// Serialize the contents to a single string, where explicitly separated elements have a
+ /// newline appended.
+ StringType newline_serialized() const {
+     StringType result;
+     result.reserve(size());
+     for (const auto &elem : elements_) {
+         result.append(elem.contents);
+         if (elem.is_explicitly_separated()) {
+             result.push_back('\n');
+         }
+     }
+     return result;
+    }
+
+    /// \return the list of elements.
+    const std::vector<element_t> &elements() const { return elements_; }
+
+    /// Append an element with range [begin, end) and the given separation type \p sep.
+    template <typename Iterator>
+    void append(Iterator begin, Iterator end, separation_type_t sep = separation_type_t::inferred) {
+        if (!try_add_size(std::distance(begin, end))) return;
+        // Try merging with the last element.
+        if (sep == separation_type_t::inferred && !elements_.empty() && !elements_.back().is_explicitly_separated()) {
+            elements_.back().contents.append(begin, end);
+        } else {
+            elements_.emplace_back(StringType(begin, end), sep);
+        }
+    }
+
+    /// Append a string \p str with the given separation type \p sep.
+    void append(const StringType &str, separation_type_t sep = separation_type_t::inferred) {
+        append(str.begin(), str.end(), sep);
+    }
+
+    // Given that this is a narrow stream, convert a wide stream \p rhs to narrow and then append
+    // it.
+    template <typename RHSStringType>
+    void append_wide_buffer(const separated_buffer_t<RHSStringType> &rhs) {
+        for (const auto &rhs_elem : rhs.elements()) {
+            append(wcs2string(rhs_elem.contents), rhs_elem.separation);
+        }
+    }
+};
+
 /// Describes what type of IO operation an io_data_t represents.
 enum io_mode_t { IO_FILE, IO_PIPE, IO_FD, IO_BUFFER, IO_CLOSE };
 
@@ -194,77 +315,40 @@ bool pipe_avoid_conflicts_with_io_chain(int fds[2], const io_chain_t &ios);
 /// Class representing the output that a builtin can generate.
 class output_stream_t {
    private:
-    /// Limit on how much data we'll buffer. Zero means no limit.
-    size_t buffer_limit;
-    /// True if we're discarding input.
-    bool discard;
+    /// Storage for our data.
+    separated_buffer_t<wcstring> buffer_;
+
     // No copying.
-    output_stream_t(const output_stream_t &s);
-    void operator=(const output_stream_t &s);
-
-    wcstring buffer_;
-
-    void check_for_overflow() {
-        if (buffer_limit && buffer_.size() > buffer_limit) {
-            discard = true;
-            buffer_.clear();
-        }
-    }
+    output_stream_t(const output_stream_t &s) = delete;
+    void operator=(const output_stream_t &s) = delete;
 
    public:
-    output_stream_t(size_t buffer_limit_) : buffer_limit(buffer_limit_), discard(false) {}
+    output_stream_t(size_t buffer_limit) : buffer_(buffer_limit) {}
 
-    void append(const wcstring &s) {
-        if (discard) return;
-        buffer_.append(s);
-        check_for_overflow();
-    }
+    void append(const wcstring &s) { buffer_.append(s.begin(), s.end()); }
 
-    void append(const wchar_t *s) {
-        if (discard) return;
-        buffer_.append(s);
-        check_for_overflow();
-    }
+    const separated_buffer_t<wcstring> &buffer() const { return buffer_; }
 
-    void append(wchar_t s) {
-        if (discard) return;
-        buffer_.push_back(s);
-        check_for_overflow();
-    }
+    void append(const wchar_t *s) { append(s, wcslen(s)); }
 
-    void append(const wchar_t *s, size_t amt) {
-        if (discard) return;
-        buffer_.append(s, amt);
-        check_for_overflow();
-    }
+    void append(wchar_t s) { append(&s, 1); }
 
-    void push_back(wchar_t c) {
-        if (discard) return;
-        buffer_.push_back(c);
-        check_for_overflow();
-    }
+    void append(const wchar_t *s, size_t amt) { buffer_.append(s, s + amt); }
+
+    void push_back(wchar_t c) { append(c); }
 
     void append_format(const wchar_t *format, ...) {
-        if (discard) return;
         va_list va;
         va_start(va, format);
-        ::append_formatv(buffer_, format, va);
+        append_formatv(format, va);
         va_end(va);
-        check_for_overflow();
     }
 
-    void append_formatv(const wchar_t *format, va_list va_orig) {
-        if (discard) return;
-        ::append_formatv(buffer_, format, va_orig);
-        check_for_overflow();
-    }
+    void append_formatv(const wchar_t *format, va_list va) { append(vformat_string(format, va)); }
 
-    const wcstring &buffer() const { return buffer_; }
+    bool empty() const { return buffer_.size() == 0; }
 
-    /// Function that returns true if we discarded the input because there was too much data.
-    bool output_discarded() const { return discard; }
-
-    bool empty() const { return buffer_.empty(); }
+    wcstring contents() const { return buffer_.newline_serialized(); }
 };
 
 struct io_streams_t {
diff --git a/src/parse_execution.cpp b/src/parse_execution.cpp
index b728435ec..15289ba56 100644
--- a/src/parse_execution.cpp
+++ b/src/parse_execution.cpp
@@ -328,7 +328,7 @@ parse_execution_result_t parse_execution_context_t::run_function_statement(
     proc_set_last_status(err);
 
     if (!streams.err.empty()) {
-        this->report_error(header, L"%ls", streams.err.buffer().c_str());
+        this->report_error(header, L"%ls", streams.err.contents().c_str());
         result = parse_execution_errored;
     }