diff --git a/fish-rust/src/redirection.rs b/fish-rust/src/redirection.rs index 2acd152eb..b27dd35a1 100644 --- a/fish-rust/src/redirection.rs +++ b/fish-rust/src/redirection.rs @@ -15,6 +15,7 @@ mod redirection_ffi { type wcharz_t = super::wcharz_t; } + #[derive(Debug)] enum RedirectionMode { overwrite, // normal redirection: > file.txt append, // appending redirection: >> file.txt diff --git a/fish-rust/src/tests/mod.rs b/fish-rust/src/tests/mod.rs index 547614301..091baf458 100644 --- a/fish-rust/src/tests/mod.rs +++ b/fish-rust/src/tests/mod.rs @@ -3,3 +3,5 @@ mod fd_monitor; #[cfg(test)] mod string_escape; +#[cfg(test)] +mod tokenizer; diff --git a/fish-rust/src/tests/tokenizer.rs b/fish-rust/src/tests/tokenizer.rs new file mode 100644 index 000000000..ec901504e --- /dev/null +++ b/fish-rust/src/tests/tokenizer.rs @@ -0,0 +1,141 @@ +use crate::redirection::RedirectionMode; +use crate::tokenizer::{PipeOrRedir, TokFlags, TokenType, Tokenizer, TokenizerError}; +use crate::wchar::prelude::*; +use libc::{STDERR_FILENO, STDOUT_FILENO}; + +#[test] +fn test_tokenizer() { + { + let s = L!("alpha beta"); + let mut t = Tokenizer::new(s, TokFlags(0)); + + let token = t.next(); // alpha + assert!(token.is_some()); + let token = token.unwrap(); + assert_eq!(token.type_, TokenType::string); + assert_eq!(token.length, 5); + assert_eq!(t.text_of(&token), "alpha"); + + let token = t.next(); // beta + assert!(token.is_some()); + let token = token.unwrap(); + assert_eq!(token.type_, TokenType::string); + assert_eq!(token.offset, 6); + assert_eq!(token.length, 4); + assert_eq!(t.text_of(&token), "beta"); + + assert!(t.next().is_none()); + } + + let s = L!(concat!( + "string &1 'nested \"quoted\" '(string containing subshells ", + "){and,brackets}$as[$well (as variable arrays)] not_a_redirect^ ^ ^^is_a_redirect ", + "&| &> ", + "&&& ||| ", + "&& || & |", + "Compress_Newlines\n \n\t\n \nInto_Just_One", + )); + type tt = TokenType; + #[rustfmt::skip] + let types = [ + tt::string, tt::redirect, tt::string, tt::redirect, tt::string, tt::string, tt::string, + tt::string, tt::string, tt::pipe, tt::redirect, tt::andand, tt::background, tt::oror, + tt::pipe, tt::andand, tt::oror, tt::background, tt::pipe, tt::string, tt::end, tt::string, + ]; + + { + let t = Tokenizer::new(s, TokFlags(0)); + let mut actual_types = vec![]; + for token in t { + actual_types.push(token.type_); + } + assert_eq!(&actual_types[..], types); + } + + // Test some errors. + + { + let mut t = Tokenizer::new(L!("abc\\"), TokFlags(0)); + let token = t.next().unwrap(); + assert_eq!(token.type_, TokenType::error); + assert_eq!(token.error, TokenizerError::unterminated_escape); + assert_eq!(token.error_offset_within_token, 3); + } + + { + let mut t = Tokenizer::new(L!("abc )defg(hij"), TokFlags(0)); + let _token = t.next().unwrap(); + let token = t.next().unwrap(); + assert_eq!(token.type_, TokenType::error); + assert_eq!(token.error, TokenizerError::closing_unopened_subshell); + assert_eq!(token.offset, 4); + assert_eq!(token.error_offset_within_token, 0); + } + + { + let mut t = Tokenizer::new(L!("abc defg(hij (klm)"), TokFlags(0)); + let _token = t.next().unwrap(); + let token = t.next().unwrap(); + assert_eq!(token.type_, TokenType::error); + assert_eq!(token.error, TokenizerError::unterminated_subshell); + assert_eq!(token.error_offset_within_token, 4); + } + + { + let mut t = Tokenizer::new(L!("abc defg[hij (klm)"), TokFlags(0)); + let _token = t.next().unwrap(); + let token = t.next().unwrap(); + assert_eq!(token.type_, TokenType::error); + assert_eq!(token.error, TokenizerError::unterminated_slice); + assert_eq!(token.error_offset_within_token, 4); + } + + // Test some redirection parsing. + macro_rules! pipe_or_redir { + ($s:literal) => { + PipeOrRedir::try_from(L!($s)).unwrap() + }; + } + + assert!(pipe_or_redir!("|").is_pipe); + assert!(pipe_or_redir!("0>|").is_pipe); + assert_eq!(pipe_or_redir!("0>|").fd, 0); + assert!(pipe_or_redir!("2>|").is_pipe); + assert_eq!(pipe_or_redir!("2>|").fd, 2); + assert!(pipe_or_redir!(">|").is_pipe); + assert_eq!(pipe_or_redir!(">|").fd, STDOUT_FILENO); + assert!(!pipe_or_redir!(">").is_pipe); + assert_eq!(pipe_or_redir!(">").fd, STDOUT_FILENO); + assert_eq!(pipe_or_redir!("2>").fd, STDERR_FILENO); + assert_eq!(pipe_or_redir!("9999999999999>").fd, -1); + assert_eq!(pipe_or_redir!("9999999999999>&2").fd, -1); + assert_eq!(pipe_or_redir!("9999999999999>&2").is_valid(), false); + assert_eq!(pipe_or_redir!("9999999999999>&2").is_valid(), false); + + assert!(pipe_or_redir!("&|").is_pipe); + assert!(pipe_or_redir!("&|").stderr_merge); + assert!(!pipe_or_redir!("&>").is_pipe); + assert!(pipe_or_redir!("&>").stderr_merge); + assert!(pipe_or_redir!("&>>").stderr_merge); + assert!(pipe_or_redir!("&>?").stderr_merge); + + macro_rules! get_redir_mode { + ($s:literal) => { + pipe_or_redir!($s).mode + }; + } + + assert_eq!(get_redir_mode!("<"), RedirectionMode::input); + assert_eq!(get_redir_mode!(">"), RedirectionMode::overwrite); + assert_eq!(get_redir_mode!("2>"), RedirectionMode::overwrite); + assert_eq!(get_redir_mode!(">>"), RedirectionMode::append); + assert_eq!(get_redir_mode!("2>>"), RedirectionMode::append); + assert_eq!(get_redir_mode!("2>?"), RedirectionMode::noclob); + assert_eq!( + get_redir_mode!("9999999999999999>?"), + RedirectionMode::noclob + ); + assert_eq!(get_redir_mode!("2>&3"), RedirectionMode::fd); + assert_eq!(get_redir_mode!("3<&0"), RedirectionMode::fd); + assert_eq!(get_redir_mode!("3 c_int { // \return if we are "valid". Here "valid" means only that the source fd did not overflow. // For example 99999999999> is invalid. - fn is_valid(&self) -> bool { + pub fn is_valid(&self) -> bool { self.fd >= 0 } diff --git a/src/fish_tests.cpp b/src/fish_tests.cpp index 722aeb8af..5db84ccff 100644 --- a/src/fish_tests.cpp +++ b/src/fish_tests.cpp @@ -530,169 +530,6 @@ static void test_convert_nulls() { } } -/// Test the tokenizer. -static void test_tokenizer() { - say(L"Testing tokenizer"); - { - const wchar_t *str = L"alpha beta"; - auto t = new_tokenizer(str, 0); - std::unique_ptr token{}; - - token = t->next(); // alpha - do_test(token); - do_test(token->type_ == token_type_t::string); - do_test(token->offset == 0); - do_test(token->length == 5); - do_test(*t->text_of(*token) == L"alpha"); - - token = t->next(); // beta - do_test(token); - do_test(token->type_ == token_type_t::string); - do_test(token->offset == 6); - do_test(token->length == 4); - do_test(*t->text_of(*token) == L"beta"); - - token = t->next(); - do_test(!token); - } - - const wchar_t *str = - L"string &1 'nested \"quoted\" '(string containing subshells " - L"){and,brackets}$as[$well (as variable arrays)] not_a_redirect^ ^ ^^is_a_redirect " - L"&| &> " - L"&&& ||| " - L"&& || & |" - L"Compress_Newlines\n \n\t\n \nInto_Just_One"; - using tt = token_type_t; - const token_type_t types[] = { - tt::string, tt::redirect, tt::string, tt::redirect, tt::string, tt::string, - tt::string, tt::string, tt::string, tt::pipe, tt::redirect, tt::andand, - tt::background, tt::oror, tt::pipe, tt::andand, tt::oror, tt::background, - tt::pipe, tt::string, tt::end, tt::string}; - - say(L"Test correct tokenization"); - - { - auto t = new_tokenizer(str, 0); - size_t i = 0; - while (auto token = t->next()) { - if (i >= sizeof types / sizeof *types) { - err(L"Too many tokens returned from tokenizer"); - std::fwprintf(stdout, L"Got excess token type %ld\n", (long)token->type_); - break; - } - if (types[i] != token->type_) { - err(L"Tokenization error:"); - std::fwprintf( - stdout, - L"Token number %zu of string \n'%ls'\n, expected type %ld, got token type " - L"%ld\n", - i + 1, str, (long)types[i], (long)token->type_); - } - i++; - } - if (i < sizeof types / sizeof *types) { - err(L"Too few tokens returned from tokenizer"); - } - } - - // Test some errors. - { - auto t = new_tokenizer(L"abc\\", 0); - auto token = t->next(); - do_test(token); - do_test(token->type_ == token_type_t::error); - do_test(token->error == tokenizer_error_t::unterminated_escape); - do_test(token->error_offset_within_token == 3); - } - - { - auto t = new_tokenizer(L"abc )defg(hij", 0); - auto token = t->next(); - do_test(token); - token = t->next(); - do_test(token); - do_test(token->type_ == token_type_t::error); - do_test(token->error == tokenizer_error_t::closing_unopened_subshell); - do_test(token->offset == 4); - do_test(token->error_offset_within_token == 0); - } - - { - auto t = new_tokenizer(L"abc defg(hij (klm)", 0); - auto token = t->next(); - do_test(token); - token = t->next(); - do_test(token); - do_test(token->type_ == token_type_t::error); - do_test(token->error == tokenizer_error_t::unterminated_subshell); - do_test(token->error_offset_within_token == 4); - } - - { - auto t = new_tokenizer(L"abc defg[hij (klm)", 0); - auto token = t->next(); - do_test(token); - token = t->next(); - do_test(token); - do_test(token->type_ == token_type_t::error); - do_test(token->error == tokenizer_error_t::unterminated_slice); - do_test(token->error_offset_within_token == 4); - } - - // Test some redirection parsing. - auto pipe_or_redir = [](const wchar_t *s) { return pipe_or_redir_from_string(s); }; - do_test(pipe_or_redir(L"|")->is_pipe); - do_test(pipe_or_redir(L"0>|")->is_pipe); - do_test(pipe_or_redir(L"0>|")->fd == 0); - do_test(pipe_or_redir(L"2>|")->is_pipe); - do_test(pipe_or_redir(L"2>|")->fd == 2); - do_test(pipe_or_redir(L">|")->is_pipe); - do_test(pipe_or_redir(L">|")->fd == STDOUT_FILENO); - do_test(!pipe_or_redir(L">")->is_pipe); - do_test(pipe_or_redir(L">")->fd == STDOUT_FILENO); - do_test(pipe_or_redir(L"2>")->fd == STDERR_FILENO); - do_test(pipe_or_redir(L"9999999999999>")->fd == -1); - do_test(pipe_or_redir(L"9999999999999>&2")->fd == -1); - do_test(pipe_or_redir(L"9999999999999>&2")->is_valid() == false); - do_test(pipe_or_redir(L"9999999999999>&2")->is_valid() == false); - - do_test(pipe_or_redir(L"&|")->is_pipe); - do_test(pipe_or_redir(L"&|")->stderr_merge); - do_test(!pipe_or_redir(L"&>")->is_pipe); - do_test(pipe_or_redir(L"&>")->stderr_merge); - do_test(pipe_or_redir(L"&>>")->stderr_merge); - do_test(pipe_or_redir(L"&>?")->stderr_merge); - - auto get_redir_mode = [](const wchar_t *s) -> maybe_t { - if (auto redir = pipe_or_redir_from_string(s)) { - return redir->mode; - } - return none(); - }; - - if (get_redir_mode(L"<") != redirection_mode_t::input) - err(L"redirection_type_for_string failed on line %ld", (long)__LINE__); - if (get_redir_mode(L">") != redirection_mode_t::overwrite) - err(L"redirection_type_for_string failed on line %ld", (long)__LINE__); - if (get_redir_mode(L"2>") != redirection_mode_t::overwrite) - err(L"redirection_type_for_string failed on line %ld", (long)__LINE__); - if (get_redir_mode(L">>") != redirection_mode_t::append) - err(L"redirection_type_for_string failed on line %ld", (long)__LINE__); - if (get_redir_mode(L"2>>") != redirection_mode_t::append) - err(L"redirection_type_for_string failed on line %ld", (long)__LINE__); - if (get_redir_mode(L"2>?") != redirection_mode_t::noclob) - err(L"redirection_type_for_string failed on line %ld", (long)__LINE__); - if (get_redir_mode(L"9999999999999999>?") != redirection_mode_t::noclob) - err(L"redirection_type_for_string failed on line %ld", (long)__LINE__); - if (get_redir_mode(L"2>&3") != redirection_mode_t::fd) - err(L"redirection_type_for_string failed on line %ld", (long)__LINE__); - if (get_redir_mode(L"3<&0") != redirection_mode_t::fd) - err(L"redirection_type_for_string failed on line %ld", (long)__LINE__); - if (get_redir_mode(L"3 shared_int{0}; @@ -5552,7 +5389,6 @@ static const test_t s_tests[]{ {TEST_GROUP("convert_ascii"), test_convert_ascii}, {TEST_GROUP("perf_convert_ascii"), perf_convert_ascii, true}, {TEST_GROUP("convert_nulls"), test_convert_nulls}, - {TEST_GROUP("tokenizer"), test_tokenizer}, {TEST_GROUP("iothread"), test_iothread}, {TEST_GROUP("pthread"), test_pthread}, {TEST_GROUP("debounce"), test_debounce},