From 10ec6292d997ac18803df92469d2ab4ee03166e7 Mon Sep 17 00:00:00 2001 From: Charles Cabergs Date: Sun, 13 Sep 2020 17:49:48 +0200 Subject: Refactoring lexer to understand it --- src/lexer/lexer.c | 181 ++++++++++++++++++------------------------------------ 1 file changed, 61 insertions(+), 120 deletions(-) (limited to 'src/lexer/lexer.c') diff --git a/src/lexer/lexer.c b/src/lexer/lexer.c index 9d4b9bd..907dc70 100644 --- a/src/lexer/lexer.c +++ b/src/lexer/lexer.c @@ -6,179 +6,101 @@ /* By: nahaddac +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2020/07/16 08:18:25 by nahaddac #+# #+# */ -/* Updated: 2020/09/13 10:54:43 by nahaddac ### ########.fr */ +/* Updated: 2020/09/13 17:45:30 by charles ### ########.fr */ /* */ /* ************************************************************************** */ #include "lexer.h" +// len until meaningful character for non quoted str int len_until_sep(char *input) { int i; i = -1; - while(input[++i]) + while (input[++i]) { if (input[i] == '\\') { i += 2; - if (input[i] == ' ' || input[i] == '\t') - { - while(ft_isblank(input[++i])) - ; - return i; - } - else if (input[i] != lexer_sep(input[i]) || input[i] != 39 || input[i] != '"') + if (ft_isblank(input[i])) + return (i + 1 + lexer_space(&input[i + 1])); + else if (input[i] != '\'' || input[i] != '"') i += len_until_sep(&input[i]); - return i; + return (i); } if (lexer_sep(input[i])) - return(i); + return (i); if (input[i] == '\'' || input[i] == '"') - return(i); - if (ft_isblank(input[i])) - { - while(ft_isblank(input[++i])) - ; return (i); - } + if (ft_isblank(input[i])) + return (i + 1 + lexer_space(&input[i + 1])); } - return(i); + return (i); } -int check_input(char *input) +// token content length +int tok_len(char *input) { - int i; - int op; + int i; i = 0; - op = 1; if (input[i] == '\\' && lexer_sep(input[i + 1])) { i += 2; return (i + lexer_space(&input[i])); } if (input[i] == '(' || input[i] == ')') + return (i + 1); + if (lexer_sep(input[i])) // fucked on & alone { - i +=1; - if(ft_isblank(input[i])) - while(ft_isblank(input[i++]) != 1) - ; - return (i); - } - if (lexer_sep(input[i])) - { - if (input[i] == ';') - return (i + lexer_space(&input[i + 1]) + 1); - while(input[i] == input[i + 1] && op < 2) - { + if (input[i] == input[i + 1]) i++; - op++; - } - i += lexer_space(&input[i + 1]); - return (i + 1); + return (i + 1 + lexer_space(&input[i + 1])); } - if (input[i] == 39 || input[i] == '"') - return(lexer_check_between_quote(input, i)); + if (input[i] == '\'' || input[i] == '"') + return (quote_len(input, i)); if (ft_isblank(input[i])) - { - while(ft_isblank(input[++i])) - ; - return (i); - } - i = len_until_sep(&input[i]); - return i; + return (i + 1 + lexer_space(&input[i + 1])); + return (len_until_sep(&input[i])); } - -int check_input_out(char *input) -{ - int i; - int j; - - i = 0; - while(input[i] != '\0') - { - j = 0; - j += len_until_sep(&input[i]); - if (j != 0) - return(j); - i += j; - j = check_input(&input[i]); - return(j); - } - return(0); -} - -enum e_tok token_check_stick(t_tok_lst *tok) -{ - int i; - - i = ft_strlen(tok->content); - if (i > 0) - if (ft_isblank(tok->content[i - 1])) - return (tok->tag); - return (tok->tag | TAG_STICK); -} - -enum e_tok token_str_or_quote(t_tok_lst *tok) -{ - int i; - - i = 0; - while (tok->content[i] != '\0') - { - if (tok->content[i] == '\'') - { - tok->tag = TAG_STR_SINGLE; - return (token_check_stick(tok)); - } - if (tok->content[i] == '"') - { - tok->tag = TAG_STR_DOUBLE; - return (token_check_stick(tok)); - } - else - { - tok->tag = TAG_STR; - return (token_check_stick(tok)); - } - i++; - } - return(0); -} - -void push_token_enum(t_tok_lst *tok) -{ - enum e_tok tag; - - tag = ret_token(tok->content, 0); - if (tag == 0) - tok->tag = token_str_or_quote(tok); - else - tok->tag = tag; -} +/* +** \brief interate over input +** get the number of character for the current token +** create a token from a substring in input +** assign a tag to the token +*/ t_tok_lst *create_token_list(char *input, t_tok_lst **lst) { t_tok_lst *tok; size_t i; size_t j; + size_t len; + len = ft_strlen(input); i = 0; - while (i < ft_strlen(input)) + while (i < len) { - j = 0; - j += check_input(&input[i]); + j = tok_len(&input[i]); tok = tok_lst_new_until(0, input + i, j); - push_token_enum(tok); - if (ft_isblank(tok->content[0]) != 1) + tok->tag = tok_assign_tag(tok->content); + if (tok->tag == 0) + tok->tag = tok_assign_str(tok); + if (!ft_isblank(tok->content[0])) // ? tok_lst_push_back(lst, tok); i += j; } return (*lst); } +/* +** \brief Create a token list from a string +** \param input Input string +** \return The created tokens or NULL on error +*/ + t_tok_lst *lexer(char *input) { t_tok_lst *lst; @@ -190,3 +112,22 @@ t_tok_lst *lexer(char *input) lst = lexer_trim_out(lst); return (lst); } + +/* int check_input_out(char *input) */ +/* { */ +/* int i; */ +/* int j; */ +/* */ +/* i = 0; */ +/* while(input[i] != '\0') */ +/* { */ +/* j = 0; */ +/* j += len_until_sep(&input[i]); */ +/* if (j != 0) */ +/* return(j); */ +/* i += j; */ +/* j = check_input(&input[i]); */ +/* return(j); */ +/* } */ +/* return(0); */ +/* } */ -- cgit