Clean up old syntax highlighting code now that the new parser seems to work

This commit is contained in:
ridiculousfish
2014-02-09 21:21:59 -08:00
parent 77dbaf3aef
commit bbd784a2e8

View File

@@ -409,296 +409,6 @@ rgb_color_t highlight_get_color(highlight_spec_t highlight, bool is_background)
}
/**
Highlight operators (such as $, ~, %, as well as escaped characters.
*/
static void highlight_parameter(const wcstring &buffstr, std::vector<highlight_spec_t> &colors, wcstring_list_t *error)
{
const wchar_t * const buff = buffstr.c_str();
enum {e_unquoted, e_single_quoted, e_double_quoted} mode = e_unquoted;
size_t in_pos, len = buffstr.size();
int bracket_count=0;
int normal_status = colors.at(0);
for (in_pos=0; in_pos<len; in_pos++)
{
wchar_t c = buffstr.at(in_pos);
switch (mode)
{
/*
Mode 0 means unquoted string
*/
case e_unquoted:
{
if (c == L'\\')
{
size_t start_pos = in_pos;
in_pos++;
if (wcschr(L"~%", buff[in_pos]))
{
if (in_pos == 1)
{
colors.at(start_pos) = highlight_spec_escape;
colors.at(in_pos+1) = normal_status;
}
}
else if (buff[in_pos]==L',')
{
if (bracket_count)
{
colors.at(start_pos) = highlight_spec_escape;
colors.at(in_pos+1) = normal_status;
}
}
else if (wcschr(L"abefnrtv*?$(){}[]'\"<>^ \\#;|&", buff[in_pos]))
{
colors.at(start_pos)= highlight_spec_escape;
colors.at(in_pos+1)=normal_status;
}
else if (wcschr(L"c", buff[in_pos]))
{
colors.at(start_pos) = highlight_spec_escape;
if (in_pos+2 < colors.size())
colors.at(in_pos+2)=normal_status;
}
else if (wcschr(L"uUxX01234567", buff[in_pos]))
{
int i;
long long res=0;
int chars=2;
int base=16;
wchar_t max_val = ASCII_MAX;
switch (buff[in_pos])
{
case L'u':
{
chars=4;
max_val = UCS2_MAX;
break;
}
case L'U':
{
chars=8;
max_val = WCHAR_MAX;
break;
}
case L'x':
{
break;
}
case L'X':
{
max_val = BYTE_MAX;
break;
}
default:
{
base=8;
chars=3;
in_pos--;
break;
}
}
for (i=0; i<chars; i++)
{
long d = convert_digit(buff[++in_pos],base);
if (d < 0)
{
in_pos--;
break;
}
res=(res*base)|d;
}
if ((res <= max_val))
{
colors.at(start_pos) = highlight_spec_escape;
colors.at(in_pos+1) = normal_status;
}
else
{
colors.at(start_pos) = highlight_spec_error;
colors.at(in_pos+1) = normal_status;
}
}
}
else
{
switch (buff[in_pos])
{
case L'~':
case L'%':
{
if (in_pos == 0)
{
colors.at(in_pos) = highlight_spec_operator;
colors.at(in_pos+1) = normal_status;
}
break;
}
case L'$':
{
wchar_t n = buff[in_pos+1];
colors.at(in_pos) = (n==L'$'||wcsvarchr(n))? highlight_spec_operator:highlight_spec_error;
colors.at(in_pos+1) = normal_status;
break;
}
case L'*':
case L'?':
case L'(':
case L')':
{
colors.at(in_pos) = highlight_spec_operator;
colors.at(in_pos+1) = normal_status;
break;
}
case L'{':
{
colors.at(in_pos) = highlight_spec_operator;
colors.at(in_pos+1) = normal_status;
bracket_count++;
break;
}
case L'}':
{
colors.at(in_pos) = highlight_spec_operator;
colors.at(in_pos+1) = normal_status;
bracket_count--;
break;
}
case L',':
{
if (bracket_count)
{
colors.at(in_pos) = highlight_spec_operator;
colors.at(in_pos+1) = normal_status;
}
break;
}
case L'\'':
{
colors.at(in_pos) = highlight_spec_quote;
mode = e_single_quoted;
break;
}
case L'\"':
{
colors.at(in_pos) = highlight_spec_quote;
mode = e_double_quoted;
break;
}
}
}
break;
}
/*
Mode 1 means single quoted string, i.e 'foo'
*/
case e_single_quoted:
{
if (c == L'\\')
{
size_t start_pos = in_pos;
switch (buff[++in_pos])
{
case '\\':
case L'\'':
{
colors.at(start_pos) = highlight_spec_escape;
colors.at(in_pos+1) = highlight_spec_quote;
break;
}
case 0:
{
return;
}
}
}
if (c == L'\'')
{
mode = e_unquoted;
colors.at(in_pos+1) = normal_status;
}
break;
}
/*
Mode 2 means double quoted string, i.e. "foo"
*/
case e_double_quoted:
{
switch (c)
{
case '"':
{
mode = e_unquoted;
colors.at(in_pos+1) = normal_status;
break;
}
case '\\':
{
size_t start_pos = in_pos;
switch (buff[++in_pos])
{
case L'\0':
{
return;
}
case '\\':
case L'$':
case '"':
{
colors.at(start_pos) = highlight_spec_escape;
colors.at(in_pos+1) = highlight_spec_quote;
break;
}
}
break;
}
case '$':
{
wchar_t n = buff[in_pos+1];
colors.at(in_pos) = (n==L'$'||wcsvarchr(n))? highlight_spec_operator:highlight_spec_error;
colors.at(in_pos+1) = highlight_spec_quote;
break;
}
}
break;
}
}
}
}
static bool has_expand_reserved(const wcstring &str)
{
bool result = false;
@@ -875,524 +585,6 @@ bool autosuggest_validate_from_history(const history_item_t &item, file_detectio
return suggestionOK;
}
// This function does I/O
static void tokenize(const wchar_t * const buff, std::vector<highlight_spec_t> &color, const size_t pos, wcstring_list_t *error, const wcstring &working_directory, const env_vars_snapshot_t &vars)
{
ASSERT_IS_BACKGROUND_THREAD();
wcstring cmd;
int had_cmd=0;
wcstring last_cmd;
int accept_switches = 1;
int use_function = 1;
int use_command = 1;
int use_builtin = 1;
CHECK(buff,);
if (buff[0] == L'\0')
return;
std::fill(color.begin(), color.end(), (highlight_spec_t)highlight_spec_invalid);
tokenizer_t tok(buff, TOK_SHOW_COMMENTS | TOK_SQUASH_ERRORS);
for (; tok_has_next(&tok); tok_next(&tok))
{
int last_type = tok_last_type(&tok);
switch (last_type)
{
case TOK_STRING:
{
if (had_cmd)
{
/*Parameter */
const wchar_t *param = tok_last(&tok);
if (param[0] == L'-')
{
if (wcscmp(param, L"--") == 0)
{
accept_switches = 0;
color.at(tok_get_pos(&tok)) = highlight_spec_param;
}
else if (accept_switches)
{
if (complete_is_valid_option(last_cmd, param, error, false /* no autoload */))
color.at(tok_get_pos(&tok)) = highlight_spec_param;
else
color.at(tok_get_pos(&tok)) = highlight_spec_error;
}
else
{
color.at(tok_get_pos(&tok)) = highlight_spec_param;
}
}
else
{
color.at(tok_get_pos(&tok)) = highlight_spec_param;
}
if (cmd == L"cd")
{
wcstring dir = tok_last(&tok);
if (expand_one(dir, EXPAND_SKIP_CMDSUBST))
{
int is_help = string_prefixes_string(dir, L"--help") || string_prefixes_string(dir, L"-h");
if (!is_help && ! is_potential_cd_path(dir, working_directory, PATH_EXPAND_TILDE, NULL))
{
color.at(tok_get_pos(&tok)) = highlight_spec_error;
}
}
}
/* Highlight the parameter. highlight_parameter wants to write one more color than we have characters (hysterical raisins) so allocate one more in the vector. But don't copy it back. */
const wcstring param_str = param;
size_t tok_pos = tok_get_pos(&tok);
std::vector<highlight_spec_t>::const_iterator where = color.begin() + tok_pos;
std::vector<highlight_spec_t> subcolors(where, where + param_str.size());
subcolors.push_back(highlight_spec_invalid);
highlight_parameter(param_str, subcolors, error);
/* Copy the subcolors back into our colors array */
std::copy(subcolors.begin(), subcolors.begin() + param_str.size(), color.begin() + tok_pos);
}
else
{
/*
Command. First check that the command actually exists.
*/
cmd = tok_last(&tok);
bool expanded = expand_one(cmd, EXPAND_SKIP_CMDSUBST | EXPAND_SKIP_VARIABLES | EXPAND_SKIP_JOBS);
if (! expanded || has_expand_reserved(cmd))
{
color.at(tok_get_pos(&tok)) = highlight_spec_error;
}
else
{
bool is_cmd = false;
int is_subcommand = 0;
int mark = tok_get_pos(&tok);
color.at(tok_get_pos(&tok)) = use_builtin ? highlight_spec_command : highlight_spec_error;
if (parser_keywords_is_subcommand(cmd))
{
int sw;
if (cmd == L"builtin")
{
use_function = 0;
use_command = 0;
use_builtin = 1;
}
else if (cmd == L"command" || cmd == L"exec")
{
use_command = 1;
use_function = 0;
use_builtin = 0;
}
tok_next(&tok);
sw = parser_keywords_is_switch(tok_last(&tok));
if (!parser_keywords_is_block(cmd) &&
sw == ARG_SWITCH)
{
/*
The 'builtin' and 'command' builtins
are normally followed by another
command, but if they are invoked
with a switch, they aren't.
*/
use_command = 1;
use_function = 1;
use_builtin = 2;
}
else
{
if (sw == ARG_SKIP)
{
color.at(tok_get_pos(&tok)) = highlight_spec_param;
mark = tok_get_pos(&tok);
}
is_subcommand = 1;
}
tok_set_pos(&tok, mark);
}
if (!is_subcommand)
{
/*
OK, this is a command, it has been
successfully expanded and everything
looks ok. Lets check if the command
exists.
*/
/*
First check if it is a builtin or
function, since we don't have to stat
any files for that
*/
if (! is_cmd && use_builtin)
is_cmd = builtin_exists(cmd);
if (! is_cmd && use_function)
is_cmd = function_exists_no_autoload(cmd, vars);
if (! is_cmd)
is_cmd = expand_abbreviation(cmd, NULL);
/*
Moving on to expensive tests
*/
/*
Check if this is a regular command
*/
if (! is_cmd && use_command)
{
is_cmd = path_get_path(cmd, NULL, vars);
}
/* Maybe it is a path for a implicit cd command. */
if (! is_cmd)
{
if (use_builtin || (use_function && function_exists_no_autoload(L"cd", vars)))
is_cmd = path_can_be_implicit_cd(cmd, NULL, working_directory.c_str(), vars);
}
if (is_cmd)
{
color.at(tok_get_pos(&tok)) = highlight_spec_command;
}
else
{
if (error)
{
error->push_back(format_string(L"Unknown command \'%ls\'", cmd.c_str()));
}
color.at(tok_get_pos(&tok)) = (highlight_spec_error);
}
had_cmd = 1;
}
if (had_cmd)
{
last_cmd = tok_last(&tok);
}
}
}
break;
}
case TOK_REDIRECT_NOCLOB:
case TOK_REDIRECT_OUT:
case TOK_REDIRECT_IN:
case TOK_REDIRECT_APPEND:
case TOK_REDIRECT_FD:
{
if (!had_cmd)
{
color.at(tok_get_pos(&tok)) = highlight_spec_error;
if (error)
error->push_back(L"Redirection without a command");
break;
}
wcstring target_str;
const wchar_t *target=NULL;
color.at(tok_get_pos(&tok)) = highlight_spec_redirection;
tok_next(&tok);
/*
Check that we are redirecting into a file
*/
switch (tok_last_type(&tok))
{
case TOK_STRING:
{
target_str = tok_last(&tok);
if (expand_one(target_str, EXPAND_SKIP_CMDSUBST))
{
target = target_str.c_str();
}
/*
Redirect filename may contain a cmdsubst.
If so, it will be ignored/not flagged.
*/
}
break;
default:
{
size_t pos = tok_get_pos(&tok);
if (pos < color.size())
{
color.at(pos) = highlight_spec_error;
}
if (error)
error->push_back(L"Invalid redirection");
}
}
if (target != 0)
{
wcstring dir = target;
size_t slash_idx = dir.find_last_of(L'/');
struct stat buff;
/*
If file is in directory other than '.', check
that the directory exists.
*/
if (slash_idx != wcstring::npos)
{
dir.resize(slash_idx);
if (wstat(dir, &buff) == -1)
{
color.at(tok_get_pos(&tok)) = highlight_spec_error;
if (error)
error->push_back(format_string(L"Directory \'%ls\' does not exist", dir.c_str()));
}
}
/*
If the file is read from or appended to, check
if it exists.
*/
if (last_type == TOK_REDIRECT_IN ||
last_type == TOK_REDIRECT_APPEND)
{
if (wstat(target, &buff) == -1)
{
color.at(tok_get_pos(&tok)) = highlight_spec_error;
if (error)
error->push_back(format_string(L"File \'%ls\' does not exist", target));
}
}
if (last_type == TOK_REDIRECT_NOCLOB)
{
if (wstat(target, &buff) != -1)
{
color.at(tok_get_pos(&tok)) = highlight_spec_error;
if (error)
error->push_back(format_string(L"File \'%ls\' exists", target));
}
}
}
break;
}
case TOK_PIPE:
case TOK_BACKGROUND:
{
if (had_cmd)
{
color.at(tok_get_pos(&tok)) = highlight_spec_statement_terminator;
had_cmd = 0;
use_command = 1;
use_function = 1;
use_builtin = 1;
accept_switches = 1;
}
else
{
color.at(tok_get_pos(&tok)) = highlight_spec_error;
if (error)
error->push_back(L"No job to put in background");
}
break;
}
case TOK_END:
{
color.at(tok_get_pos(&tok)) = highlight_spec_statement_terminator;
had_cmd = 0;
use_command = 1;
use_function = 1;
use_builtin = 1;
accept_switches = 1;
break;
}
case TOK_COMMENT:
{
color.at(tok_get_pos(&tok)) = highlight_spec_comment;
break;
}
case TOK_ERROR:
default:
{
/*
If the tokenizer reports an error, highlight it as such.
*/
if (error)
error->push_back(tok_last(&tok));
color.at(tok_get_pos(&tok)) = highlight_spec_error;
break;
}
}
}
}
void highlight_shell(const wcstring &buff, std::vector<highlight_spec_t> &color, size_t pos, wcstring_list_t *error, const env_vars_snapshot_t &vars)
{
if (1)
{
highlight_shell_new_parser(buff, color, pos, error, vars);
}
else
{
highlight_shell_classic(buff, color, pos, error, vars);
}
}
// PCA This function does I/O, (calls is_potential_path, path_get_path, maybe others) and so ought to only run on a background thread
void highlight_shell_classic(const wcstring &buff, std::vector<highlight_spec_t> &color, size_t pos, wcstring_list_t *error, const env_vars_snapshot_t &vars)
{
ASSERT_IS_BACKGROUND_THREAD();
const size_t length = buff.size();
assert(buff.size() == color.size());
if (length == 0)
return;
std::fill(color.begin(), color.end(), (highlight_spec_t)highlight_spec_invalid);
/* Do something sucky and get the current working directory on this background thread. This should really be passed in. */
const wcstring working_directory = env_get_pwd_slash();
/* Tokenize the string */
tokenize(buff.c_str(), color, pos, error, working_directory, vars);
/* Locate and syntax highlight cmdsubsts recursively */
wchar_t * const subbuff = wcsdup(buff.c_str());
wchar_t * subpos = subbuff;
bool done = false;
while (1)
{
wchar_t *begin, *end;
if (parse_util_locate_cmdsubst(subpos, &begin, &end, true) <= 0)
{
break;
}
/* Note: This *end = 0 writes into subbuff! */
if (!*end)
done = true;
else
*end = 0;
//our subcolors start at color + (begin-subbuff)+1
size_t start = begin - subbuff + 1, len = wcslen(begin + 1);
std::vector<highlight_spec_t> subcolors(len, highlight_spec_invalid);
highlight_shell(begin+1, subcolors, -1, error, vars);
// insert subcolors
std::copy(subcolors.begin(), subcolors.end(), color.begin() + start);
// highlight the end of the subcommand
assert(end >= subbuff);
if ((size_t)(end - subbuff) < length)
{
color.at(end-subbuff)=highlight_spec_operator;
}
if (done)
break;
subpos = end+1;
}
free(subbuff);
/*
The highlighting code only changes the first element when the
color changes. This fills in the rest.
*/
int last_val=0;
for (size_t i=0; i < buff.size(); i++)
{
highlight_spec_t &current_val = color.at(i);
if (current_val != highlight_spec_invalid)
{
last_val = current_val;
}
else
{
current_val = last_val; //note - this writes into the vector
}
}
/*
Color potentially valid paths in a special path color if they
are the current token.
For reasons that I don't yet understand, it's required that pos be allowed to be length (e.g. when backspacing).
*/
if (pos <= length)
{
const wchar_t *cbuff = buff.c_str();
const wchar_t *tok_begin, *tok_end;
parse_util_token_extent(cbuff, pos, &tok_begin, &tok_end, 0, 0);
if (tok_begin && tok_end)
{
wcstring token(tok_begin, tok_end-tok_begin);
if (unescape_string_in_place(&token, UNESCAPE_SPECIAL))
{
/* Big hack: is_potential_path expects a tilde, but unescape_string gives us HOME_DIRECTORY. Put it back. */
if (! token.empty() && token.at(0) == HOME_DIRECTORY)
token.at(0) = L'~';
const wcstring_list_t working_directory_list(1, working_directory);
if (is_potential_path(token, working_directory_list, PATH_EXPAND_TILDE))
{
for (ptrdiff_t i=tok_begin-cbuff; i < (tok_end-cbuff); i++)
{
// Don't color highlight_spec_error because it looks dorky. For example, trying to cd into a non-directory would show an underline and also red.
if (highlight_get_primary(color.at(i)) != highlight_spec_error)
{
color.at(i) |= highlight_modifier_valid_path;
}
}
}
}
}
}
highlight_universal_internal(buff, color, pos);
/*
Spaces should not be highlighted at all, since it makes cursor look funky in some terminals
*/
for (size_t i=0; i < buff.size(); i++)
{
if (iswspace(buff.at(i)))
{
color.at(i)=0;
}
}
}
/* Highlights the variable starting with 'in', setting colors within the 'colors' array. Returns the number of characters consumed. */
static size_t color_variable(const wchar_t *in, size_t in_len, std::vector<highlight_spec_t>::iterator colors)
{
@@ -2227,7 +1419,7 @@ const highlighter_t::color_array_t & highlighter_t::highlight()
return color_array;
}
void highlight_shell_new_parser(const wcstring &buff, std::vector<highlight_spec_t> &color, size_t pos, wcstring_list_t *error, const env_vars_snapshot_t &vars)
void highlight_shell(const wcstring &buff, std::vector<highlight_spec_t> &color, size_t pos, wcstring_list_t *error, const env_vars_snapshot_t &vars)
{
/* Do something sucky and get the current working directory on this background thread. This should really be passed in. */
const wcstring working_directory = env_get_pwd_slash();