diff options
| author | Charles Cabergs <me@cacharle.xyz> | 2020-09-13 17:49:48 +0200 |
|---|---|---|
| committer | Charles Cabergs <me@cacharle.xyz> | 2020-09-13 17:49:48 +0200 |
| commit | 10ec6292d997ac18803df92469d2ab4ee03166e7 (patch) | |
| tree | ffeb2baf5a63f63b1bcaa24f1b91d1f81c54b982 /src | |
| parent | 9ef012a8016b81fc6063c4fc9e861a22b5bd5dac (diff) | |
| download | minishell-10ec6292d997ac18803df92469d2ab4ee03166e7.tar.gz minishell-10ec6292d997ac18803df92469d2ab4ee03166e7.tar.bz2 minishell-10ec6292d997ac18803df92469d2ab4ee03166e7.zip | |
Refactoring lexer to understand it
Diffstat (limited to 'src')
| -rw-r--r-- | src/lexer/lexer.c | 181 | ||||
| -rw-r--r-- | src/lexer/lexer_utils.c | 109 | ||||
| -rw-r--r-- | src/lexer/utils.c | 107 |
3 files changed, 168 insertions, 229 deletions
diff --git a/src/lexer/lexer.c b/src/lexer/lexer.c index 9d4b9bd..907dc70 100644 --- a/src/lexer/lexer.c +++ b/src/lexer/lexer.c @@ -6,179 +6,101 @@ /* By: nahaddac <nahaddac@student.42.fr> +#+ +:+ +#+ */ /* +#+#+#+#+#+ +#+ */ /* Created: 2020/07/16 08:18:25 by nahaddac #+# #+# */ -/* Updated: 2020/09/13 10:54:43 by nahaddac ### ########.fr */ +/* Updated: 2020/09/13 17:45:30 by charles ### ########.fr */ /* */ /* ************************************************************************** */ #include "lexer.h" +// len until meaningful character for non quoted str int len_until_sep(char *input) { int i; i = -1; - while(input[++i]) + while (input[++i]) { if (input[i] == '\\') { i += 2; - if (input[i] == ' ' || input[i] == '\t') - { - while(ft_isblank(input[++i])) - ; - return i; - } - else if (input[i] != lexer_sep(input[i]) || input[i] != 39 || input[i] != '"') + if (ft_isblank(input[i])) + return (i + 1 + lexer_space(&input[i + 1])); + else if (input[i] != '\'' || input[i] != '"') i += len_until_sep(&input[i]); - return i; + return (i); } if (lexer_sep(input[i])) - return(i); + return (i); if (input[i] == '\'' || input[i] == '"') - return(i); - if (ft_isblank(input[i])) - { - while(ft_isblank(input[++i])) - ; return (i); - } + if (ft_isblank(input[i])) + return (i + 1 + lexer_space(&input[i + 1])); } - return(i); + return (i); } -int check_input(char *input) +// token content length +int tok_len(char *input) { - int i; - int op; + int i; i = 0; - op = 1; if (input[i] == '\\' && lexer_sep(input[i + 1])) { i += 2; return (i + lexer_space(&input[i])); } if (input[i] == '(' || input[i] == ')') + return (i + 1); + if (lexer_sep(input[i])) // fucked on & alone { - i +=1; - if(ft_isblank(input[i])) - while(ft_isblank(input[i++]) != 1) - ; - return (i); - } - if (lexer_sep(input[i])) - { - if (input[i] == ';') - return (i + lexer_space(&input[i + 1]) + 1); - while(input[i] == input[i + 1] && op < 2) - { + if (input[i] == input[i + 1]) i++; - op++; - } - i += lexer_space(&input[i + 1]); - return (i + 1); + return (i + 1 + lexer_space(&input[i + 1])); } - if (input[i] == 39 || input[i] == '"') - return(lexer_check_between_quote(input, i)); + if (input[i] == '\'' || input[i] == '"') + return (quote_len(input, i)); if (ft_isblank(input[i])) - { - while(ft_isblank(input[++i])) - ; - return (i); - } - i = len_until_sep(&input[i]); - return i; + return (i + 1 + lexer_space(&input[i + 1])); + return (len_until_sep(&input[i])); } - -int check_input_out(char *input) -{ - int i; - int j; - - i = 0; - while(input[i] != '\0') - { - j = 0; - j += len_until_sep(&input[i]); - if (j != 0) - return(j); - i += j; - j = check_input(&input[i]); - return(j); - } - return(0); -} - -enum e_tok token_check_stick(t_tok_lst *tok) -{ - int i; - - i = ft_strlen(tok->content); - if (i > 0) - if (ft_isblank(tok->content[i - 1])) - return (tok->tag); - return (tok->tag | TAG_STICK); -} - -enum e_tok token_str_or_quote(t_tok_lst *tok) -{ - int i; - - i = 0; - while (tok->content[i] != '\0') - { - if (tok->content[i] == '\'') - { - tok->tag = TAG_STR_SINGLE; - return (token_check_stick(tok)); - } - if (tok->content[i] == '"') - { - tok->tag = TAG_STR_DOUBLE; - return (token_check_stick(tok)); - } - else - { - tok->tag = TAG_STR; - return (token_check_stick(tok)); - } - i++; - } - return(0); -} - -void push_token_enum(t_tok_lst *tok) -{ - enum e_tok tag; - - tag = ret_token(tok->content, 0); - if (tag == 0) - tok->tag = token_str_or_quote(tok); - else - tok->tag = tag; -} +/* +** \brief interate over input +** get the number of character for the current token +** create a token from a substring in input +** assign a tag to the token +*/ t_tok_lst *create_token_list(char *input, t_tok_lst **lst) { t_tok_lst *tok; size_t i; size_t j; + size_t len; + len = ft_strlen(input); i = 0; - while (i < ft_strlen(input)) + while (i < len) { - j = 0; - j += check_input(&input[i]); + j = tok_len(&input[i]); tok = tok_lst_new_until(0, input + i, j); - push_token_enum(tok); - if (ft_isblank(tok->content[0]) != 1) + tok->tag = tok_assign_tag(tok->content); + if (tok->tag == 0) + tok->tag = tok_assign_str(tok); + if (!ft_isblank(tok->content[0])) // ? tok_lst_push_back(lst, tok); i += j; } return (*lst); } +/* +** \brief Create a token list from a string +** \param input Input string +** \return The created tokens or NULL on error +*/ + t_tok_lst *lexer(char *input) { t_tok_lst *lst; @@ -190,3 +112,22 @@ t_tok_lst *lexer(char *input) lst = lexer_trim_out(lst); return (lst); } + +/* int check_input_out(char *input) */ +/* { */ +/* int i; */ +/* int j; */ +/* */ +/* i = 0; */ +/* while(input[i] != '\0') */ +/* { */ +/* j = 0; */ +/* j += len_until_sep(&input[i]); */ +/* if (j != 0) */ +/* return(j); */ +/* i += j; */ +/* j = check_input(&input[i]); */ +/* return(j); */ +/* } */ +/* return(0); */ +/* } */ diff --git a/src/lexer/lexer_utils.c b/src/lexer/lexer_utils.c deleted file mode 100644 index d848f95..0000000 --- a/src/lexer/lexer_utils.c +++ /dev/null @@ -1,109 +0,0 @@ -/* ************************************************************************** */ -/* */ -/* ::: :::::::: */ -/* lexer_utils.c :+: :+: :+: */ -/* +:+ +:+ +:+ */ -/* By: nahaddac <nahaddac@student.42.fr> +#+ +:+ +#+ */ -/* +#+#+#+#+#+ +#+ */ -/* Created: 2020/07/16 08:18:15 by nahaddac #+# #+# */ -/* Updated: 2020/09/13 11:00:45 by nahaddac ### ########.fr */ -/* */ -/* ************************************************************************** */ - -#include "lexer.h" - -// check for append tag -enum e_tok ret_token_sep_redir_append(char *input, int i) -{ - if (input[i + 1] == '>') - return(TAG_REDIR_APPEND); - return (TAG_REDIR_OUT); - -} - -// return token tag corresponding to string id -enum e_tok ret_token(char *input, int i) -{ - if (input[i] == ';') - return(TAG_END); - if (input[i] == '&' && input[i + 1] == '&') - return(TAG_AND); - if (input[i] == '|' && input[i + 1] == '|') - return(TAG_OR); - if(input[i] == '|') - return(TAG_PIPE); - if (input[i] == '>') - return(ret_token_sep_redir_append(input,i)); - if (input[i] == '<') - return(TAG_REDIR_IN); - if (input[i] == '(') - return(TAG_PARENT_OPEN); - if (input[i] == ')') - return(TAG_PARENT_CLOSE); - return(0); - -} - -// check is char is separator -// /!\ can be replaced by ft_strchr(";&|><()", input) == NULL -int lexer_sep(char input) -{ - char *sep; - int i; - - i = 0; - sep = ";&|><()"; - while(sep[i] != '\0') - { - if(sep[i] == input) - return(1); - i++; - } - return (0); -} - -// skip spaces -// /!\ can be replaced by strspn -int lexer_space(char *input) -{ - int i; - - i=0; - while(ft_isblank(input[i])) - i++; - return(i); -} - -static int lex_check_single_quote(char *input, int i) -{ - i++; - while(input[i] != '\0') - { - if(input[i] == '\\') - i+=1; - if(input[i] == '\'') - break; - ++i; - } - if (ft_isblank(input[i + 1])) - while(ft_isblank(input[i + 1])) - i++; - return(i + 1); -} - -int lexer_check_between_quote(char *input, int i) -{ - if(input[i] == '\'') - return(lex_check_single_quote(input, i)); - i++; - while(input[i] != '"' && (input[i] != '\0')) - { - if (input[i] == '\\') - i += 1; - ++i; - } - if (ft_isblank(input[i + 1])) - while(ft_isblank(input[i + 1])) - i++; - return(i + 1); -} diff --git a/src/lexer/utils.c b/src/lexer/utils.c new file mode 100644 index 0000000..7df4955 --- /dev/null +++ b/src/lexer/utils.c @@ -0,0 +1,107 @@ +/* ************************************************************************** */ +/* */ +/* ::: :::::::: */ +/* utils.c :+: :+: :+: */ +/* +:+ +:+ +:+ */ +/* By: nahaddac <nahaddac@student.42.fr> +#+ +:+ +#+ */ +/* +#+#+#+#+#+ +#+ */ +/* Created: 2020/07/16 08:18:15 by nahaddac #+# #+# */ +/* Updated: 2020/09/13 17:23:29 by charles ### ########.fr */ +/* */ +/* ************************************************************************** */ + +#include "lexer.h" + +// return token tag corresponding to string id +enum e_tok tok_assign_tag(char *content) +{ + if (content[0] == ';') + return (TAG_END); + if (ft_strncmp(content, "&&", 2) == 0) + return (TAG_AND); + if (ft_strncmp(content, "||", 2) == 0) + return (TAG_OR); + if(content[0] == '|') + return (TAG_PIPE); + if (content[0] == '>') + return (TAG_REDIR_OUT); + if (content[0] == '<') + return (TAG_REDIR_IN); + if (ft_strncmp(content, ">>", 2) == 0) + return (TAG_REDIR_APPEND); + if (content[0] == '(') + return (TAG_PARENT_OPEN); + if (content[0] == ')') + return (TAG_PARENT_CLOSE); + return (0); +} + +enum e_tok tok_assign_stick(t_tok_lst *tok) +{ + int i; + + i = ft_strlen(tok->content); + if (i > 0) + if (ft_isblank(tok->content[i - 1])) + return (tok->tag); + return (tok->tag | TAG_STICK); +} + +enum e_tok tok_assign_str(t_tok_lst *tok) +{ + int i; + + // could use strchr to search ' or " + i = 0; + while (tok->content[i] != '\0') + { + if (tok->content[i] == '\'') + { + tok->tag = TAG_STR_SINGLE; + return (tok_assign_stick(tok)); + } + if (tok->content[i] == '"') + { + tok->tag = TAG_STR_DOUBLE; + return (tok_assign_stick(tok)); + } + else + { + tok->tag = TAG_STR; + return (tok_assign_stick(tok)); + } + i++; + } + return (0); +} + + +// check is char is separator +// & alone could be considered a separator +int lexer_sep(char c) +{ + return (ft_strchr(";&|><()", c) != NULL); +} + +// number of starting space character +int lexer_space(char *input) +{ + return (ft_strspn(input, " \t")); +} + +int quote_len(char *input, int i) +{ + char quote_type; + + quote_type = input[i]; + i++; + while (input[i] != quote_type && input[i] != '\0') + { + if (input[i] == '\\') + i++; + i++; + } + while (ft_isblank(input[i + 1])) + i++; + return (i + 1); +} |
