From 41e562ebc22d3e4dfc444e14b579236b82e08b1f Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sun, 13 Oct 2013 13:26:52 -0700 Subject: [PATCH] Clean up redirection parsing in the tokenizer. --- tokenizer.cpp | 166 ++++++++++++++++++++++++++++++++------------------ 1 file changed, 106 insertions(+), 60 deletions(-) diff --git a/tokenizer.cpp b/tokenizer.cpp index 1ef0bf5dc..8a6fe58a8 100644 --- a/tokenizer.cpp +++ b/tokenizer.cpp @@ -50,7 +50,7 @@ segments. /** Error string for when trying to pipe from fd 0 */ -#define PIPE_ERROR _( L"Can not use fd 0 as pipe output" ) +#define PIPE_ERROR _( L"Cannot use stdin (fd 0) as pipe output" ) /** Characters that separate tokens. They are ordered by frequency of occurrence to increase parsing speed. @@ -435,66 +435,92 @@ static void read_comment(tokenizer_t *tok) tok->last_type = TOK_COMMENT; } -/** - Read a FD redirection. +/* Reads a redirection or an "fd pipe" (like 2>|) from a string. Returns how many characters were consumed. If zero, then this string was not a redirection. + + Also returns by reference the redirection mode, and the fd to redirection. */ -static void read_redirect(tokenizer_t *tok, int fd) +static size_t read_redirection_or_fd_pipe(const wchar_t *buff, enum token_type *out_redirection_mode, int *out_fd) { + bool errored = false; + int fd = 0; enum token_type redirection_mode = TOK_NONE; - if ((*tok->buff == L'>') || - (*tok->buff == L'^')) + size_t idx = 0; + + /* Determine the fd. This may be specified as a prefix like '2>...' or it may be implicit like '>' or '^'. Try parsing out a number; if we did not get any digits then infer it from the first character */ + for (; iswdigit(buff[idx]); idx++) { - tok->buff++; - if (*tok->buff == *(tok->buff-1)) + int digit = buff[idx] - L'0'; + fd = fd * 10 + digit; + } + + if (idx == 0) + { + /* We did not find a leading digit, so there's no explicit fd. Infer it from the type */ + switch (buff[idx]) { - tok->buff++; - redirection_mode = TOK_REDIRECT_APPEND; - } - else - { - redirection_mode = TOK_REDIRECT_OUT; - } - - if (*tok->buff == L'|') - { - if (fd == 0) - { - TOK_CALL_ERROR(tok, TOK_OTHER, PIPE_ERROR); - return; - } - tok->buff++; - tok->last_token = to_string(fd); - tok->last_type = TOK_PIPE; - return; + case L'>': fd = STDOUT_FILENO; break; + case L'<': fd = STDIN_FILENO; break; + case L'^': fd = STDERR_FILENO; break; + default: errored = true; break; } } - else if (*tok->buff == L'<') + + /* Either way we should have ended on the redirection character itself like '>' */ + wchar_t redirect_char = buff[idx++]; //note increment of idx + if (redirect_char == L'>' || redirect_char == L'^') + { + redirection_mode = TOK_REDIRECT_OUT; + if (buff[idx] == redirect_char) + { + /* Doubled up like ^^ or >>. That means append */ + redirection_mode = TOK_REDIRECT_APPEND; + idx++; + } + } + else if (redirect_char == L'<') { - tok->buff++; redirection_mode = TOK_REDIRECT_IN; } else { - TOK_CALL_ERROR(tok, TOK_OTHER, REDIRECT_ERROR); + /* Something else */ + errored = true; } - - tok->last_token = to_string(fd); - - if (*tok->buff == L'&') + + /* Optional characters like & or ?, or the pipe char | */ + wchar_t opt_char = buff[idx]; + if (opt_char == L'&') { - tok->buff++; - tok->last_type = TOK_REDIRECT_FD; + redirection_mode = TOK_REDIRECT_FD; + idx++; } - else if (*tok->buff == L'?') + else if (opt_char == L'?') { - tok->buff++; - tok->last_type = TOK_REDIRECT_NOCLOB; + redirection_mode = TOK_REDIRECT_NOCLOB; + idx++; } - else + else if (opt_char == L'|') { - tok->last_type = redirection_mode; + /* So the string looked like '2>|'. This is not a redirection - it's a pipe! That gets handled elsewhere. */ + redirection_mode = TOK_PIPE; + idx++; } + + /* Don't return valid-looking stuff on error */ + if (errored) + { + idx = 0; + redirection_mode = TOK_NONE; + } + + /* Return stuff */ + if (out_redirection_mode != NULL) + *out_redirection_mode = redirection_mode; + if (out_fd != NULL) + *out_fd = fd; + + return idx; } wchar_t tok_last_quote(tokenizer_t *tok) @@ -606,36 +632,56 @@ void tok_next(tokenizer_t *tok) break; case L'>': - read_redirect(tok, 1); - return; case L'<': - read_redirect(tok, 0); - return; case L'^': - read_redirect(tok, 2); - return; + { + /* There's some duplication with the code in the default case below. The key difference here is that we must never parse these as a string; a failed redirection is an error! */ + enum token_type mode = TOK_NONE; + int fd = -1; + size_t consumed = read_redirection_or_fd_pipe(tok->buff, &mode, &fd); + if (consumed == 0) + { + TOK_CALL_ERROR(tok, TOK_OTHER, REDIRECT_ERROR); + } + else + { + tok->buff += consumed; + tok->last_type = mode; + tok->last_token = to_string(fd); + } + } + break; default: { + /* Maybe a redirection like '2>&1', maybe a pipe like 2>|, maybe just a string */ + size_t consumed = 0; + enum token_type mode = TOK_NONE; + int fd = -1; if (iswdigit(*tok->buff)) + consumed = read_redirection_or_fd_pipe(tok->buff, &mode, &fd); + + if (consumed > 0) { - const wchar_t *orig = tok->buff; - int fd = 0; - while (iswdigit(*tok->buff)) - fd = (fd*10) + (*(tok->buff++) - L'0'); - - switch (*(tok->buff)) + /* It looks like a redirection or a pipe. But we don't support piping fd 0. */ + if (mode == TOK_PIPE && fd == 0) { - case L'^': - case L'>': - case L'<': - read_redirect(tok, fd); - return; + TOK_CALL_ERROR(tok, TOK_OTHER, PIPE_ERROR); + } + else + { + tok->buff += consumed; + tok->last_type = mode; + tok->last_token = to_string(fd); } - tok->buff = orig; } - read_string(tok); + else + { + /* Not a redirection or pipe, so just a stirng */ + read_string(tok); + } } + break; }