Refactoring lexer to understand it

author: Charles Cabergs <me@cacharle.xyz> 2020-09-13 17:49:48 +0200
committer: Charles Cabergs <me@cacharle.xyz> 2020-09-13 17:49:48 +0200
commit: 10ec6292d997ac18803df92469d2ab4ee03166e7 (patch)
tree: ffeb2baf5a63f63b1bcaa24f1b91d1f81c54b982 /src
parent: 9ef012a8016b81fc6063c4fc9e861a22b5bd5dac (diff)
download: minishell-10ec6292d997ac18803df92469d2ab4ee03166e7.tar.gz
minishell-10ec6292d997ac18803df92469d2ab4ee03166e7.tar.bz2
minishell-10ec6292d997ac18803df92469d2ab4ee03166e7.zip
3 files changed, 168 insertions, 229 deletions
diff --git a/src/lexer/lexer.c b/src/lexer/lexer.c
index 9d4b9bd..907dc70 100644
--- a/src/lexer/lexer.c
+++ b/src/lexer/lexer.c
@@ -6,179 +6,101 @@
 /*   By: nahaddac <nahaddac@student.42.fr>          +#+  +:+       +#+        */
 /*                                                +#+#+#+#+#+   +#+           */
 /*   Created: 2020/07/16 08:18:25 by nahaddac          #+#    #+#             */
-/*   Updated: 2020/09/13 10:54:43 by nahaddac         ###   ########.fr       */
+/*   Updated: 2020/09/13 17:45:30 by charles          ###   ########.fr       */
 /*                                                                            */
 /* ************************************************************************** */
 
 #include "lexer.h"
 
+// len until meaningful character for non quoted str
 int 			len_until_sep(char *input)
 {
 	int i;
 
 	i = -1;
-	while(input[++i])
+	while (input[++i])
 	{
 		if (input[i] == '\\')
 		{
 			i += 2;
-			if (input[i] == ' ' || input[i] == '\t')
-			{
-				while(ft_isblank(input[++i]))
-					;
-				return i;
-			}
-			else if (input[i] != lexer_sep(input[i]) || input[i] != 39 || input[i] != '"')
+			if (ft_isblank(input[i]))
+				return (i + 1 + lexer_space(&input[i + 1]));
+			else if (input[i] != '\'' || input[i] != '"')
 				i += len_until_sep(&input[i]);
-			return i;
+			return (i);
 		}
 		if (lexer_sep(input[i]))
-			return(i);
+			return (i);
 		if (input[i] == '\'' || input[i] == '"')
-			return(i);
-		if (ft_isblank(input[i]))
-		{
-			while(ft_isblank(input[++i]))
-				;
 			return (i);
-		}
+		if (ft_isblank(input[i]))
+			return (i + 1 + lexer_space(&input[i + 1]));
 	}
-	return(i);
+	return (i);
 }
 
-int				check_input(char *input)
+// token content length
+int				tok_len(char *input)
 {
-	int 				i;
-	int 				op;
+	int i;
 
 	i = 0;
-	op = 1;
 	if (input[i] == '\\' && lexer_sep(input[i + 1]))
 	{
 		i += 2;
 		return (i + lexer_space(&input[i]));
 	}
 	if (input[i] == '(' || input[i] == ')')
+		return (i + 1);
+	if (lexer_sep(input[i])) // fucked on & alone
 	{
-		i +=1;
-		if(ft_isblank(input[i]))
-			while(ft_isblank(input[i++]) != 1)
-				;
-		return (i);
-	}
-	if (lexer_sep(input[i]))
-	{
-		if (input[i] == ';')
-			return (i + lexer_space(&input[i + 1]) + 1);
-		while(input[i] == input[i + 1] && op < 2)
-		{
+		if (input[i] == input[i + 1])
 			i++;
-			op++;
-		}
-		i += lexer_space(&input[i + 1]);
-		return (i + 1);
+		return (i + 1 + lexer_space(&input[i + 1]));
 	}
-	if (input[i] == 39 || input[i] == '"')
-		return(lexer_check_between_quote(input, i));
+	if (input[i] == '\'' || input[i] == '"')
+		return (quote_len(input, i));
 	if (ft_isblank(input[i]))
-	{
-		while(ft_isblank(input[++i]))
-			;
-		 return (i);
-	}
-	i = len_until_sep(&input[i]);
-	return i;
+		return (i + 1 + lexer_space(&input[i + 1]));
+	return (len_until_sep(&input[i]));
 }
 
-
-int 					check_input_out(char *input)
-{
-	int i;
-	int j;
-
-	i = 0;
-	while(input[i] != '\0')
-	{
-		j = 0;
-		j += len_until_sep(&input[i]);
-		if (j != 0)
-			return(j);
-		i += j;
-		j = check_input(&input[i]);
-		return(j);
-	}
-	return(0);
-}
-
-enum e_tok token_check_stick(t_tok_lst *tok)
-{
-	int i;
-
-	i = ft_strlen(tok->content);
-	if (i > 0)
-		if (ft_isblank(tok->content[i - 1]))
-			return (tok->tag);
-	return (tok->tag | TAG_STICK);
-}
-
-enum e_tok token_str_or_quote(t_tok_lst *tok)
-{
-	int i;
-
-	i = 0;
-	while (tok->content[i] != '\0')
-	{
-		if (tok->content[i] == '\'')
-		{
-			tok->tag = TAG_STR_SINGLE;
-			return (token_check_stick(tok));
-		}
-		if (tok->content[i] == '"')
-		{
-			tok->tag = TAG_STR_DOUBLE;
-			return (token_check_stick(tok));
-		}
-		else
-		{
-			tok->tag = TAG_STR;
-			return (token_check_stick(tok));
-		}
-		i++;
-	}
-	return(0);
-}
-
-void						push_token_enum(t_tok_lst *tok)
-{
-	enum e_tok 		tag;
-
-	tag = ret_token(tok->content, 0);
-	if (tag == 0)
-		tok->tag = token_str_or_quote(tok);
-	else
-		tok->tag = tag;
-}
+/*
+** \brief interate over input
+**        get the number of character for the current token
+**        create a token from a substring in input
+**        assign a tag to the token
+*/
 
 t_tok_lst				*create_token_list(char *input, t_tok_lst **lst)
 {
 	t_tok_lst	*tok;
 	size_t 		i;
 	size_t		j;
+	size_t		len;
 
+	len = ft_strlen(input);
 	i = 0;
-	while (i < ft_strlen(input))
+	while (i < len)
 	{
-		j = 0;
-		j += check_input(&input[i]);
+		j = tok_len(&input[i]);
 		tok = tok_lst_new_until(0, input + i, j);
-		push_token_enum(tok);
-		if (ft_isblank(tok->content[0]) != 1)
+		tok->tag = tok_assign_tag(tok->content);
+		if (tok->tag == 0)
+			tok->tag = tok_assign_str(tok);
+		if (!ft_isblank(tok->content[0])) // ?
 			tok_lst_push_back(lst, tok);
 		i += j;
 	}
 	return (*lst);
 }
 
+/*
+** \brief        Create a token list from a string
+** \param input  Input string
+** \return       The created tokens or NULL on error
+*/
+
 t_tok_lst        			*lexer(char *input)
 {
 	t_tok_lst	*lst;
@@ -190,3 +112,22 @@ t_tok_lst        			*lexer(char *input)
 	lst = lexer_trim_out(lst);
 	return (lst);
 }
+
+/* int 					check_input_out(char *input) */
+/* { */
+/* 	int i; */
+/* 	int j; */
+/*  */
+/* 	i = 0; */
+/* 	while(input[i] != '\0') */
+/* 	{ */
+/* 		j = 0; */
+/* 		j += len_until_sep(&input[i]); */
+/* 		if (j != 0) */
+/* 			return(j); */
+/* 		i += j; */
+/* 		j = check_input(&input[i]); */
+/* 		return(j); */
+/* 	} */
+/* 	return(0); */
+/* } */
diff --git a/src/lexer/lexer_utils.c b/src/lexer/lexer_utils.c
deleted file mode 100644
index d848f95..0000000
--- a/src/lexer/lexer_utils.c
+++ /dev/null
@@ -1,109 +0,0 @@
-/* ************************************************************************** */
-/*                                                                            */
-/*                                                        :::      ::::::::   */
-/*   lexer_utils.c                                      :+:      :+:    :+:   */
-/*                                                    +:+ +:+         +:+     */
-/*   By: nahaddac <nahaddac@student.42.fr>          +#+  +:+       +#+        */
-/*                                                +#+#+#+#+#+   +#+           */
-/*   Created: 2020/07/16 08:18:15 by nahaddac          #+#    #+#             */
-/*   Updated: 2020/09/13 11:00:45 by nahaddac         ###   ########.fr       */
-/*                                                                            */
-/* ************************************************************************** */
-
-#include "lexer.h"
-
-// check for append tag
-enum e_tok                ret_token_sep_redir_append(char *input, int i)
-{
-    if (input[i + 1] ==  '>')
-        return(TAG_REDIR_APPEND);
-    return (TAG_REDIR_OUT);
-
-}
-
-// return token tag corresponding to string id
-enum e_tok                ret_token(char *input, int  i)
-{
-    if (input[i] == ';')
-        return(TAG_END);
-    if (input[i] == '&' && input[i + 1] == '&')
-        return(TAG_AND);
-    if (input[i] == '|' && input[i + 1] == '|')
-        return(TAG_OR);
-    if(input[i]  == '|')
-        return(TAG_PIPE);
-    if (input[i] == '>')
-        return(ret_token_sep_redir_append(input,i));
-    if (input[i] == '<')
-        return(TAG_REDIR_IN);
-    if (input[i] ==  '(')
-        return(TAG_PARENT_OPEN);
-    if (input[i] == ')')
-        return(TAG_PARENT_CLOSE);
-    return(0);
-
-}
-
-// check is char is separator
-// /!\ can be replaced by ft_strchr(";&|><()", input) == NULL
-int                 	lexer_sep(char input)
-{
-    char            *sep;
-    int             i;
-
-    i = 0;
-    sep = ";&|><()";
-    while(sep[i] != '\0')
-    {
-        if(sep[i] == input)
-            return(1);
-        i++;
-    }
-    return (0);
-}
-
-// skip spaces
-// /!\ can be replaced by strspn
-int             		lexer_space(char *input)
-{
-    int i;
-
-    i=0;
-    while(ft_isblank(input[i]))
-        i++;
-    return(i);
-}
-
-static int             lex_check_single_quote(char *input, int i)
-{
-    i++;
-    while(input[i] != '\0')
-    {
-        if(input[i] == '\\')
-            i+=1;
-        if(input[i] == '\'')
-            break;
-        ++i;
-    }
-    if (ft_isblank(input[i + 1]))
-        while(ft_isblank(input[i + 1]))
-            i++;
-    return(i + 1);
-}
-
-int             		lexer_check_between_quote(char *input, int i)
-{
-    if(input[i] == '\'')
-        return(lex_check_single_quote(input, i));
-    i++;
-    while(input[i] != '"' && (input[i] != '\0'))
-    {
-        if (input[i] == '\\')
-            i += 1;
-        ++i;
-    }
-    if (ft_isblank(input[i + 1]))
-        while(ft_isblank(input[i + 1]))
-            i++;
-    return(i + 1);
-}
diff --git a/src/lexer/utils.c b/src/lexer/utils.c
new file mode 100644
index 0000000..7df4955
--- /dev/null
+++ b/src/lexer/utils.c
@@ -0,0 +1,107 @@
+/* ************************************************************************** */
+/*                                                                            */
+/*                                                        :::      ::::::::   */
+/*   utils.c                                            :+:      :+:    :+:   */
+/*                                                    +:+ +:+         +:+     */
+/*   By: nahaddac <nahaddac@student.42.fr>          +#+  +:+       +#+        */
+/*                                                +#+#+#+#+#+   +#+           */
+/*   Created: 2020/07/16 08:18:15 by nahaddac          #+#    #+#             */
+/*   Updated: 2020/09/13 17:23:29 by charles          ###   ########.fr       */
+/*                                                                            */
+/* ************************************************************************** */
+
+#include "lexer.h"
+
+// return token tag corresponding to string id
+enum e_tok                tok_assign_tag(char *content)
+{
+    if (content[0] == ';')
+        return (TAG_END);
+    if (ft_strncmp(content, "&&", 2) == 0)
+        return (TAG_AND);
+    if (ft_strncmp(content, "||", 2) == 0)
+        return (TAG_OR);
+    if(content[0]  == '|')
+        return (TAG_PIPE);
+    if (content[0] == '>')
+    	return (TAG_REDIR_OUT);
+    if (content[0] == '<')
+        return (TAG_REDIR_IN);
+    if (ft_strncmp(content, ">>", 2) == 0)
+    	return (TAG_REDIR_APPEND);
+    if (content[0] == '(')
+        return (TAG_PARENT_OPEN);
+    if (content[0] == ')')
+        return (TAG_PARENT_CLOSE);
+    return (0);
+}
+
+enum e_tok tok_assign_stick(t_tok_lst *tok)
+{
+	int i;
+
+	i = ft_strlen(tok->content);
+	if (i > 0)
+		if (ft_isblank(tok->content[i - 1]))
+			return (tok->tag);
+	return (tok->tag | TAG_STICK);
+}
+
+enum e_tok tok_assign_str(t_tok_lst *tok)
+{
+	int i;
+
+	// could use strchr to search ' or "
+	i = 0;
+	while (tok->content[i] != '\0')
+	{
+		if (tok->content[i] == '\'')
+		{
+			tok->tag = TAG_STR_SINGLE;
+			return (tok_assign_stick(tok));
+		}
+		if (tok->content[i] == '"')
+		{
+			tok->tag = TAG_STR_DOUBLE;
+			return (tok_assign_stick(tok));
+		}
+		else
+		{
+			tok->tag = TAG_STR;
+			return (tok_assign_stick(tok));
+		}
+		i++;
+	}
+	return (0);
+}
+
+
+// check is char is separator
+// & alone could be considered a separator
+int                 	lexer_sep(char c)
+{
+	return (ft_strchr(";&|><()", c) != NULL);
+}
+
+// number of starting space character
+int             		lexer_space(char *input)
+{
+	return (ft_strspn(input, " \t"));
+}
+
+int             		quote_len(char *input, int i)
+{
+	char	quote_type;
+
+	quote_type = input[i];
+    i++;
+    while (input[i] != quote_type && input[i] != '\0')
+    {
+        if (input[i] == '\\')
+			i++;
+        i++;
+    }
+	while (ft_isblank(input[i + 1]))
+		i++;
+    return (i + 1);
+}
author	Charles Cabergs <me@cacharle.xyz>	2020-09-13 17:49:48 +0200
committer	Charles Cabergs <me@cacharle.xyz>	2020-09-13 17:49:48 +0200
commit	10ec6292d997ac18803df92469d2ab4ee03166e7 (patch)
tree	ffeb2baf5a63f63b1bcaa24f1b91d1f81c54b982 /src
parent	9ef012a8016b81fc6063c4fc9e861a22b5bd5dac (diff)
download	minishell-10ec6292d997ac18803df92469d2ab4ee03166e7.tar.gz minishell-10ec6292d997ac18803df92469d2ab4ee03166e7.tar.bz2 minishell-10ec6292d997ac18803df92469d2ab4ee03166e7.zip