Reflect newlines after pipes in fish grammar

The previous attempt to support newlines after pipes changed the lexer to swallow newlines after encountering a pipe. This has two problems that are difficult to fix: 1. comments cannot be placed after the pipe 2. fish_indent won't know about the newlines, so it will erase them Address these problems by removing the lexer behavior, and replacing it with a new parser symbol "optional_newlines" allowing the newlines to be reflected directly in the fish grammar.
2026-06-11 13:51:16 -03:00 · 2018-02-18 14:37:44 -08:00
parent ea4e997dc9
commit fd2a0dffa9
12 changed files with 50 additions and 80 deletions
--- a/src/tokenizer.cpp
+++ b/src/tokenizer.cpp
@@ -102,16 +102,7 @@ bool tokenizer_t::next(struct tok_t *result) {
    }

    assert(this->buff >= this->orig_buff);
-    if (this->last_type == TOK_PIPE) {
-        // Ignore subsequent whitespaces or a newline after a pipe (#1285).
-        int pipe_pos = current_pos - 1;
-        while (this->orig_buff[pipe_pos] != L'|') {
-            pipe_pos--;
-        }
-        result->length = pipe_pos - this->last_pos + 1;
-    } else {
-        result->length = current_pos >= this->last_pos ? current_pos - this->last_pos : 0;
-    }
+    result->length = current_pos >= this->last_pos ? current_pos - this->last_pos : 0;

    this->tok_next();
    return true;
@@ -538,12 +529,14 @@ void tokenizer_t::tok_next() {
            this->last_type = TOK_END;
            // fwprintf( stderr, L"End of string\n" );
            this->has_next = false;
+            this->last_token.clear();
            break;
        }
        case L'\r':  // carriage-return
        case L'\n':  // newline
        case L';': {
            this->last_type = TOK_END;
+            this->last_token.assign(1, *this->buff);
            this->buff++;
            // Hack: when we get a newline, swallow as many as we can. This compresses multiple
            // subsequent newlines into a single one.
@@ -553,7 +546,6 @@ void tokenizer_t::tok_next() {
                    this->buff++;
                }
            }
-            this->last_token.clear();
            break;
        }
        case L'&': {
@@ -565,7 +557,6 @@ void tokenizer_t::tok_next() {
            this->last_token = L"1";
            this->last_type = TOK_PIPE;
            this->buff++;
-            skip_newline_after_pipe();
            break;
        }
        case L'>':
@@ -580,9 +571,6 @@ void tokenizer_t::tok_next() {
                TOK_CALL_ERROR(this, TOK_OTHER, REDIRECT_ERROR, this->buff);
            } else {
                this->buff += consumed;
-                if (mode == TOK_PIPE) {
-                    skip_newline_after_pipe();
-                }
                this->last_type = mode;
                this->last_token = to_string(fd);
            }
@@ -606,9 +594,6 @@ void tokenizer_t::tok_next() {
                    TOK_CALL_ERROR(this, TOK_OTHER, PIPE_ERROR, error_location);
                } else {
                    this->buff += consumed;
-                    if (mode == TOK_PIPE) {
-                        skip_newline_after_pipe();
-                    }
                    this->last_type = mode;
                    this->last_token = to_string(fd);
                }
@@ -621,20 +606,6 @@ void tokenizer_t::tok_next() {
    }
 }

-/// If the line ends with pipe, continue to the next line (#1285).
-void tokenizer_t::skip_newline_after_pipe() {
-    while (1) {
-        if (this->buff[0] == L'\n') {
-            this->buff++;
-            break;
-        } else if (my_iswspace(this->buff[0])) {
-            this->buff++;
-        } else {
-            break;
-        }
-    }
-}
-
 wcstring tok_first(const wcstring &str) {
    wcstring result;
    tokenizer_t t(str.c_str(), TOK_SQUASH_ERRORS);