aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorCharles Cabergs <me@cacharle.xyz>2020-09-13 17:49:48 +0200
committerCharles Cabergs <me@cacharle.xyz>2020-09-13 17:49:48 +0200
commit10ec6292d997ac18803df92469d2ab4ee03166e7 (patch)
treeffeb2baf5a63f63b1bcaa24f1b91d1f81c54b982 /src
parent9ef012a8016b81fc6063c4fc9e861a22b5bd5dac (diff)
downloadminishell-10ec6292d997ac18803df92469d2ab4ee03166e7.tar.gz
minishell-10ec6292d997ac18803df92469d2ab4ee03166e7.tar.bz2
minishell-10ec6292d997ac18803df92469d2ab4ee03166e7.zip
Refactoring lexer to understand it
Diffstat (limited to 'src')
-rw-r--r--src/lexer/lexer.c181
-rw-r--r--src/lexer/lexer_utils.c109
-rw-r--r--src/lexer/utils.c107
3 files changed, 168 insertions, 229 deletions
diff --git a/src/lexer/lexer.c b/src/lexer/lexer.c
index 9d4b9bd..907dc70 100644
--- a/src/lexer/lexer.c
+++ b/src/lexer/lexer.c
@@ -6,179 +6,101 @@
/* By: nahaddac <nahaddac@student.42.fr> +#+ +:+ +#+ */
/* +#+#+#+#+#+ +#+ */
/* Created: 2020/07/16 08:18:25 by nahaddac #+# #+# */
-/* Updated: 2020/09/13 10:54:43 by nahaddac ### ########.fr */
+/* Updated: 2020/09/13 17:45:30 by charles ### ########.fr */
/* */
/* ************************************************************************** */
#include "lexer.h"
+// len until meaningful character for non quoted str
int len_until_sep(char *input)
{
int i;
i = -1;
- while(input[++i])
+ while (input[++i])
{
if (input[i] == '\\')
{
i += 2;
- if (input[i] == ' ' || input[i] == '\t')
- {
- while(ft_isblank(input[++i]))
- ;
- return i;
- }
- else if (input[i] != lexer_sep(input[i]) || input[i] != 39 || input[i] != '"')
+ if (ft_isblank(input[i]))
+ return (i + 1 + lexer_space(&input[i + 1]));
+ else if (input[i] != '\'' || input[i] != '"')
i += len_until_sep(&input[i]);
- return i;
+ return (i);
}
if (lexer_sep(input[i]))
- return(i);
+ return (i);
if (input[i] == '\'' || input[i] == '"')
- return(i);
- if (ft_isblank(input[i]))
- {
- while(ft_isblank(input[++i]))
- ;
return (i);
- }
+ if (ft_isblank(input[i]))
+ return (i + 1 + lexer_space(&input[i + 1]));
}
- return(i);
+ return (i);
}
-int check_input(char *input)
+// token content length
+int tok_len(char *input)
{
- int i;
- int op;
+ int i;
i = 0;
- op = 1;
if (input[i] == '\\' && lexer_sep(input[i + 1]))
{
i += 2;
return (i + lexer_space(&input[i]));
}
if (input[i] == '(' || input[i] == ')')
+ return (i + 1);
+ if (lexer_sep(input[i])) // fucked on & alone
{
- i +=1;
- if(ft_isblank(input[i]))
- while(ft_isblank(input[i++]) != 1)
- ;
- return (i);
- }
- if (lexer_sep(input[i]))
- {
- if (input[i] == ';')
- return (i + lexer_space(&input[i + 1]) + 1);
- while(input[i] == input[i + 1] && op < 2)
- {
+ if (input[i] == input[i + 1])
i++;
- op++;
- }
- i += lexer_space(&input[i + 1]);
- return (i + 1);
+ return (i + 1 + lexer_space(&input[i + 1]));
}
- if (input[i] == 39 || input[i] == '"')
- return(lexer_check_between_quote(input, i));
+ if (input[i] == '\'' || input[i] == '"')
+ return (quote_len(input, i));
if (ft_isblank(input[i]))
- {
- while(ft_isblank(input[++i]))
- ;
- return (i);
- }
- i = len_until_sep(&input[i]);
- return i;
+ return (i + 1 + lexer_space(&input[i + 1]));
+ return (len_until_sep(&input[i]));
}
-
-int check_input_out(char *input)
-{
- int i;
- int j;
-
- i = 0;
- while(input[i] != '\0')
- {
- j = 0;
- j += len_until_sep(&input[i]);
- if (j != 0)
- return(j);
- i += j;
- j = check_input(&input[i]);
- return(j);
- }
- return(0);
-}
-
-enum e_tok token_check_stick(t_tok_lst *tok)
-{
- int i;
-
- i = ft_strlen(tok->content);
- if (i > 0)
- if (ft_isblank(tok->content[i - 1]))
- return (tok->tag);
- return (tok->tag | TAG_STICK);
-}
-
-enum e_tok token_str_or_quote(t_tok_lst *tok)
-{
- int i;
-
- i = 0;
- while (tok->content[i] != '\0')
- {
- if (tok->content[i] == '\'')
- {
- tok->tag = TAG_STR_SINGLE;
- return (token_check_stick(tok));
- }
- if (tok->content[i] == '"')
- {
- tok->tag = TAG_STR_DOUBLE;
- return (token_check_stick(tok));
- }
- else
- {
- tok->tag = TAG_STR;
- return (token_check_stick(tok));
- }
- i++;
- }
- return(0);
-}
-
-void push_token_enum(t_tok_lst *tok)
-{
- enum e_tok tag;
-
- tag = ret_token(tok->content, 0);
- if (tag == 0)
- tok->tag = token_str_or_quote(tok);
- else
- tok->tag = tag;
-}
+/*
+** \brief interate over input
+** get the number of character for the current token
+** create a token from a substring in input
+** assign a tag to the token
+*/
t_tok_lst *create_token_list(char *input, t_tok_lst **lst)
{
t_tok_lst *tok;
size_t i;
size_t j;
+ size_t len;
+ len = ft_strlen(input);
i = 0;
- while (i < ft_strlen(input))
+ while (i < len)
{
- j = 0;
- j += check_input(&input[i]);
+ j = tok_len(&input[i]);
tok = tok_lst_new_until(0, input + i, j);
- push_token_enum(tok);
- if (ft_isblank(tok->content[0]) != 1)
+ tok->tag = tok_assign_tag(tok->content);
+ if (tok->tag == 0)
+ tok->tag = tok_assign_str(tok);
+ if (!ft_isblank(tok->content[0])) // ?
tok_lst_push_back(lst, tok);
i += j;
}
return (*lst);
}
+/*
+** \brief Create a token list from a string
+** \param input Input string
+** \return The created tokens or NULL on error
+*/
+
t_tok_lst *lexer(char *input)
{
t_tok_lst *lst;
@@ -190,3 +112,22 @@ t_tok_lst *lexer(char *input)
lst = lexer_trim_out(lst);
return (lst);
}
+
+/* int check_input_out(char *input) */
+/* { */
+/* int i; */
+/* int j; */
+/* */
+/* i = 0; */
+/* while(input[i] != '\0') */
+/* { */
+/* j = 0; */
+/* j += len_until_sep(&input[i]); */
+/* if (j != 0) */
+/* return(j); */
+/* i += j; */
+/* j = check_input(&input[i]); */
+/* return(j); */
+/* } */
+/* return(0); */
+/* } */
diff --git a/src/lexer/lexer_utils.c b/src/lexer/lexer_utils.c
deleted file mode 100644
index d848f95..0000000
--- a/src/lexer/lexer_utils.c
+++ /dev/null
@@ -1,109 +0,0 @@
-/* ************************************************************************** */
-/* */
-/* ::: :::::::: */
-/* lexer_utils.c :+: :+: :+: */
-/* +:+ +:+ +:+ */
-/* By: nahaddac <nahaddac@student.42.fr> +#+ +:+ +#+ */
-/* +#+#+#+#+#+ +#+ */
-/* Created: 2020/07/16 08:18:15 by nahaddac #+# #+# */
-/* Updated: 2020/09/13 11:00:45 by nahaddac ### ########.fr */
-/* */
-/* ************************************************************************** */
-
-#include "lexer.h"
-
-// check for append tag
-enum e_tok ret_token_sep_redir_append(char *input, int i)
-{
- if (input[i + 1] == '>')
- return(TAG_REDIR_APPEND);
- return (TAG_REDIR_OUT);
-
-}
-
-// return token tag corresponding to string id
-enum e_tok ret_token(char *input, int i)
-{
- if (input[i] == ';')
- return(TAG_END);
- if (input[i] == '&' && input[i + 1] == '&')
- return(TAG_AND);
- if (input[i] == '|' && input[i + 1] == '|')
- return(TAG_OR);
- if(input[i] == '|')
- return(TAG_PIPE);
- if (input[i] == '>')
- return(ret_token_sep_redir_append(input,i));
- if (input[i] == '<')
- return(TAG_REDIR_IN);
- if (input[i] == '(')
- return(TAG_PARENT_OPEN);
- if (input[i] == ')')
- return(TAG_PARENT_CLOSE);
- return(0);
-
-}
-
-// check is char is separator
-// /!\ can be replaced by ft_strchr(";&|><()", input) == NULL
-int lexer_sep(char input)
-{
- char *sep;
- int i;
-
- i = 0;
- sep = ";&|><()";
- while(sep[i] != '\0')
- {
- if(sep[i] == input)
- return(1);
- i++;
- }
- return (0);
-}
-
-// skip spaces
-// /!\ can be replaced by strspn
-int lexer_space(char *input)
-{
- int i;
-
- i=0;
- while(ft_isblank(input[i]))
- i++;
- return(i);
-}
-
-static int lex_check_single_quote(char *input, int i)
-{
- i++;
- while(input[i] != '\0')
- {
- if(input[i] == '\\')
- i+=1;
- if(input[i] == '\'')
- break;
- ++i;
- }
- if (ft_isblank(input[i + 1]))
- while(ft_isblank(input[i + 1]))
- i++;
- return(i + 1);
-}
-
-int lexer_check_between_quote(char *input, int i)
-{
- if(input[i] == '\'')
- return(lex_check_single_quote(input, i));
- i++;
- while(input[i] != '"' && (input[i] != '\0'))
- {
- if (input[i] == '\\')
- i += 1;
- ++i;
- }
- if (ft_isblank(input[i + 1]))
- while(ft_isblank(input[i + 1]))
- i++;
- return(i + 1);
-}
diff --git a/src/lexer/utils.c b/src/lexer/utils.c
new file mode 100644
index 0000000..7df4955
--- /dev/null
+++ b/src/lexer/utils.c
@@ -0,0 +1,107 @@
+/* ************************************************************************** */
+/* */
+/* ::: :::::::: */
+/* utils.c :+: :+: :+: */
+/* +:+ +:+ +:+ */
+/* By: nahaddac <nahaddac@student.42.fr> +#+ +:+ +#+ */
+/* +#+#+#+#+#+ +#+ */
+/* Created: 2020/07/16 08:18:15 by nahaddac #+# #+# */
+/* Updated: 2020/09/13 17:23:29 by charles ### ########.fr */
+/* */
+/* ************************************************************************** */
+
+#include "lexer.h"
+
+// return token tag corresponding to string id
+enum e_tok tok_assign_tag(char *content)
+{
+ if (content[0] == ';')
+ return (TAG_END);
+ if (ft_strncmp(content, "&&", 2) == 0)
+ return (TAG_AND);
+ if (ft_strncmp(content, "||", 2) == 0)
+ return (TAG_OR);
+ if(content[0] == '|')
+ return (TAG_PIPE);
+ if (content[0] == '>')
+ return (TAG_REDIR_OUT);
+ if (content[0] == '<')
+ return (TAG_REDIR_IN);
+ if (ft_strncmp(content, ">>", 2) == 0)
+ return (TAG_REDIR_APPEND);
+ if (content[0] == '(')
+ return (TAG_PARENT_OPEN);
+ if (content[0] == ')')
+ return (TAG_PARENT_CLOSE);
+ return (0);
+}
+
+enum e_tok tok_assign_stick(t_tok_lst *tok)
+{
+ int i;
+
+ i = ft_strlen(tok->content);
+ if (i > 0)
+ if (ft_isblank(tok->content[i - 1]))
+ return (tok->tag);
+ return (tok->tag | TAG_STICK);
+}
+
+enum e_tok tok_assign_str(t_tok_lst *tok)
+{
+ int i;
+
+ // could use strchr to search ' or "
+ i = 0;
+ while (tok->content[i] != '\0')
+ {
+ if (tok->content[i] == '\'')
+ {
+ tok->tag = TAG_STR_SINGLE;
+ return (tok_assign_stick(tok));
+ }
+ if (tok->content[i] == '"')
+ {
+ tok->tag = TAG_STR_DOUBLE;
+ return (tok_assign_stick(tok));
+ }
+ else
+ {
+ tok->tag = TAG_STR;
+ return (tok_assign_stick(tok));
+ }
+ i++;
+ }
+ return (0);
+}
+
+
+// check is char is separator
+// & alone could be considered a separator
+int lexer_sep(char c)
+{
+ return (ft_strchr(";&|><()", c) != NULL);
+}
+
+// number of starting space character
+int lexer_space(char *input)
+{
+ return (ft_strspn(input, " \t"));
+}
+
+int quote_len(char *input, int i)
+{
+ char quote_type;
+
+ quote_type = input[i];
+ i++;
+ while (input[i] != quote_type && input[i] != '\0')
+ {
+ if (input[i] == '\\')
+ i++;
+ i++;
+ }
+ while (ft_isblank(input[i + 1]))
+ i++;
+ return (i + 1);
+}