src/lexer/lexer.c


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127

/* ************************************************************************** */
/*                                                                            */
/*                                                        :::      ::::::::   */
/*   lexer.c                                            :+:      :+:    :+:   */
/*                                                    +:+ +:+         +:+     */
/*   By: nahaddac <nahaddac@student.42.fr>          +#+  +:+       +#+        */
/*                                                +#+#+#+#+#+   +#+           */
/*   Created: 2020/07/16 08:18:25 by nahaddac          #+#    #+#             */
/*   Updated: 2020/09/17 13:19:40 by nahaddac         ###   ########.fr       */
/*                                                                            */
/* ************************************************************************** */

#include "lexer.h"
#include <stdio.h>

// len until meaningful character for non quoted str
int 			len_until_sep(char *input)
{
	int i;

	i = -1;
	while (input[++i])
	{
		if (input[i] == '\\')
		{
			i += 2;
			if (ft_isblank(input[i]))
				return (i + 1 + lexer_space(&input[i + 1]));
			else if (input[i] != '\'' || input[i] != '"')
				i += len_until_sep(&input[i]);
			return (i);
		}
		if (lexer_sep(input[i]))
			return (i);
		if (input[i] == '\'' || input[i] == '"')
			return (i);
		if (ft_isblank(input[i]))
			return (i + 1 + lexer_space(&input[i + 1]));
	}
	return (i);
}

// token content length
int				tok_len(char *input)
{
	int i;

	i = 0;
	if (input[i] == '\\' && lexer_sep(input[i + 1]))
	{
		i += 2;
		return (i + lexer_space(&input[i]));
	}
	if (input[i] == '(' || input[i] == ')')
		return (i + 1);
	if (lexer_sep(input[i]))
	{
		if (input[i] != ';' && input[i] == input[i + 1])
			i++;
		return (i + 1 + lexer_space(&input[i + 1]));
	}
	if (input[i] == '\'' || input[i] == '"')
		return (quote_len(input, i));
	if (ft_isblank(input[i]))
		return (i + 1 + lexer_space(&input[i + 1]));
	return (len_until_sep(&input[i]));
}

/*
** \brief interate over input
**        get the number of character for the current token
**        create a token from a substring in input
**        assign a tag to the token
**		  return all token 
*/

t_tok_lst				*create_token_list(char *input, t_tok_lst **lst)
{
	t_tok_lst	*tok;
	size_t 		i;
	size_t		j;
	size_t		len;

	len = ft_strlen(input);
	i = 0;
	while (i < len)
	{
		j = tok_len(&input[i]);
		tok = tok_lst_new_until(0, input + i, j);
		tok->tag = tok_assign_tag(tok->content);
		if (tok->tag == 0)
			tok->tag = tok_assign_str(tok);
		if (!ft_isblank(tok->content[0])) // ?
			tok_lst_push_back(lst, tok);
		i += j;
	}
	return (*lst);
}

/*
** \brief        Create a token list from a string
** \param input  Input string
** \return       The created tokens or NULL on error
*/

int     			lexer(char *input, t_tok_lst **out)
{
	int status;
	/* t_tok_lst	*curr; */

	if (!input)
		return (2);
	*out = NULL;
	*out = create_token_list(input, out);
	status = lexer_trim(*out);
	/* curr = *out; */
	/* while (curr != NULL) */
	/* { */
	/* 	if (!(curr->tag & TAG_IS_STR)) */
	/* 	{ */
	/* 		free(curr->content); */
	/* 		curr->content = NULL; */
	/* 	} */
	/* 	curr = curr->next; */
	/* } */
	return (status);
}