mirror of
https://github.com/fish-shell/fish-shell.git
synced 2026-06-01 21:21:15 -03:00
Initial revision
darcs-hash:20050920132639-ac50b-fa3b476891e1f5f67207cf4cc7bf623834cc5edc.gz
This commit is contained in:
146
tokenizer.h
Normal file
146
tokenizer.h
Normal file
@@ -0,0 +1,146 @@
|
||||
/** \file tokenizer.h
|
||||
|
||||
A specialized tokenizer for tokenizing the fish language. In the
|
||||
future, the tokenizer should be extended to support marks,
|
||||
tokenizing multiple strings and disposing of unused string
|
||||
segments.
|
||||
*/
|
||||
|
||||
/**
|
||||
Token types
|
||||
*/
|
||||
enum token_type
|
||||
{
|
||||
TOK_NONE, /**< Tokenizer not yet constructed */
|
||||
TOK_ERROR, /**< Error reading token */
|
||||
TOK_INVALID,/**< Invalid token */
|
||||
TOK_STRING,/**< String token */
|
||||
TOK_PIPE,/**< Pipe token */
|
||||
TOK_END,/**< End token */
|
||||
TOK_REDIRECT_OUT, /**< redirection token */
|
||||
TOK_REDIRECT_APPEND,/**< redirection append token */
|
||||
TOK_REDIRECT_IN,/**< input redirection token */
|
||||
TOK_REDIRECT_FD,/**< redirection to new fd token */
|
||||
TOK_BACKGROUND,/**< send job to bg token */
|
||||
TOK_COMMENT/**< comment token */
|
||||
}
|
||||
;
|
||||
|
||||
/**
|
||||
Flag telling the tokenizer to accept incomplete parameters,
|
||||
i.e. parameters with mismatching paranthesis, etc. This is useful
|
||||
for tab-completion.
|
||||
*/
|
||||
#define TOK_ACCEPT_UNFINISHED 1
|
||||
|
||||
/**
|
||||
Flag telling the tokenizer not to remove comments. Useful for
|
||||
syntax highlighting.
|
||||
*/
|
||||
#define TOK_SHOW_COMMENTS 2
|
||||
|
||||
|
||||
/**
|
||||
The tokenizer struct.
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
/** A pointer into the original string, showing where the next token begins */
|
||||
wchar_t *buff;
|
||||
/** A copy of the original string */
|
||||
wchar_t *orig_buff;
|
||||
/** A pointer to the last token*/
|
||||
wchar_t *last;
|
||||
|
||||
/** Type of last token*/
|
||||
int last_type;
|
||||
/** Length of last token*/
|
||||
int last_len;
|
||||
/** Offset of last token*/
|
||||
int last_pos;
|
||||
/** Whether there are more tokens*/
|
||||
int has_next;
|
||||
/** Whether incomplete tokens are accepted*/
|
||||
int accept_unfinished;
|
||||
/** Whether commants should be returned*/
|
||||
int show_comments;
|
||||
/** Type of last quote, can be either ' or ".*/
|
||||
wchar_t last_quote;
|
||||
}
|
||||
tokenizer;
|
||||
|
||||
/**
|
||||
Initialize the tokenizer. b is the string that is to be
|
||||
tokenized. It is not copied, and should not be freed by the caller
|
||||
until after the tokenizer is destroyed.
|
||||
|
||||
\param tok The tokenizer to initialize
|
||||
\param b The string to tokenize
|
||||
\param flags Flags to the tokenizer. Setting TOK_ACCEPT_UNFINISHED will cause the tokenizer
|
||||
to accept incomplete tokens, such as a subshell without a closing
|
||||
parenthesis, as a valid token. Setting TOK_SHOW_COMMENTS will return comments as tokens
|
||||
|
||||
*/
|
||||
void tok_init( tokenizer *tok, const wchar_t *b, int flags );
|
||||
|
||||
/**
|
||||
Jump to the next token.
|
||||
*/
|
||||
void tok_next( tokenizer *tok );
|
||||
|
||||
/**
|
||||
Returns the type of the last token. Must be one of the values in the token_type enum.
|
||||
*/
|
||||
int tok_last_type( tokenizer *tok );
|
||||
|
||||
/**
|
||||
Returns the last token string. The string should not be freed by the caller.
|
||||
*/
|
||||
wchar_t *tok_last( tokenizer *tok );
|
||||
|
||||
/**
|
||||
Returns the type of quote from the last TOK_QSTRING
|
||||
*/
|
||||
wchar_t tok_last_quote( tokenizer *tok );
|
||||
|
||||
/**
|
||||
Returns true as long as there are more tokens left
|
||||
*/
|
||||
int tok_has_next( tokenizer *tok );
|
||||
|
||||
/**
|
||||
Returns the position of the beginning of the current token in the original string
|
||||
*/
|
||||
int tok_get_pos( tokenizer *tok );
|
||||
|
||||
/**
|
||||
Destroy the tokenizer and free asociated memory
|
||||
*/
|
||||
void tok_destroy( tokenizer *tok );
|
||||
|
||||
|
||||
/**
|
||||
Returns the original string to tokenizer
|
||||
*/
|
||||
wchar_t *tok_string( tokenizer *tok );
|
||||
|
||||
|
||||
/**
|
||||
Returns only the first token from the specified string. This is a
|
||||
convenience function, used to retrieve the first token of a
|
||||
string. This can be useful for error messages, etc.
|
||||
|
||||
The string should be freed. After use.
|
||||
*/
|
||||
wchar_t *tok_first( const wchar_t *str );
|
||||
|
||||
/**
|
||||
Move tokenizer position
|
||||
*/
|
||||
void tok_set_pos( tokenizer *tok, int pos );
|
||||
|
||||
/**
|
||||
Returns a string description of the specified token type
|
||||
*/
|
||||
const wchar_t *tok_get_desc( int type );
|
||||
|
||||
Reference in New Issue
Block a user