From d69056799a399058005b4950751397a31110de4a Mon Sep 17 00:00:00 2001 From: koekeishiya Date: Mon, 7 Aug 2017 20:23:44 +0200 Subject: v0.0.1 --- src/tokenize.c | 171 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 171 insertions(+) create mode 100644 src/tokenize.c (limited to 'src/tokenize.c') diff --git a/src/tokenize.c b/src/tokenize.c new file mode 100644 index 0000000..53f294a --- /dev/null +++ b/src/tokenize.c @@ -0,0 +1,171 @@ +#include "tokenize.h" +#define internal static + +internal int +token_equals(struct token token, const char *match) +{ + const char *at = match; + unsigned index = 0; + while(*at++ == token.text[index++] && index < token.length); + return (*at == '\0' && index == token.length) ? 1 : 0; +} + +internal void +advance(struct tokenizer *tokenizer) +{ + if(*tokenizer->at == '\n') { + tokenizer->cursor = 0; + ++tokenizer->line; + } + ++tokenizer->cursor; + ++tokenizer->at; +} + +internal void +eat_whitespace(struct tokenizer *tokenizer) +{ + while(*tokenizer->at && isspace(*tokenizer->at)) { + advance(tokenizer); + } +} + +internal void +eat_comment(struct tokenizer *tokenizer) +{ + while(*tokenizer->at && *tokenizer->at != '\n') { + advance(tokenizer); + } +} + +internal void +eat_command(struct tokenizer *tokenizer) +{ + while(*tokenizer->at && *tokenizer->at != '\n') { + if(*tokenizer->at == '\\') { + advance(tokenizer); + } + advance(tokenizer); + } +} + +internal void +eat_hex(struct tokenizer *tokenizer) +{ + while((*tokenizer->at) && + ((isdigit(*tokenizer->at)) || + (*tokenizer->at >= 'A' && *tokenizer->at <= 'F'))) { + advance(tokenizer); + } +} + +internal void +eat_identifier(struct tokenizer *tokenizer) +{ + while(*tokenizer->at && isalpha(*tokenizer->at)) { + advance(tokenizer); + } +} + +internal enum token_type +resolve_identifier_type(struct token token) +{ + if(token.length == 1) { + return Token_Key; + } + + for(int i = 0; i < array_count(token_modifier_map); ++i) { + if(token_equals(token, token_modifier_map[i])) { + return Token_Modifier; + } + } + + for(int i = 0; i < array_count(token_key_map); ++i) { + if(token_equals(token, token_key_map[i])) { + return Token_Key; + } + } + + return Token_Unknown; +} + +struct token +peek_token(struct tokenizer tokenizer) +{ + return get_token(&tokenizer); +} + +struct token +get_token(struct tokenizer *tokenizer) +{ + struct token token; + char c; + + eat_whitespace(tokenizer); + + token.length = 1; + token.text = tokenizer->at; + token.line = tokenizer->line; + token.cursor = tokenizer->cursor; + c = *token.text; + advance(tokenizer); + + switch(c) + { + case '\0': { token.type = Token_EndOfStream; } break; + case '+': { token.type = Token_Plus; } break; + case '-': + { + if(*tokenizer->at && *tokenizer->at == '>') { + advance(tokenizer); + token.length = tokenizer->at - token.text; + token.type = Token_Arrow; + } else { + token.type = Token_Dash; + } + } break; + case ':': + { + eat_whitespace(tokenizer); + + token.text = tokenizer->at; + token.line = tokenizer->line; + token.cursor = tokenizer->cursor; + + eat_command(tokenizer); + token.length = tokenizer->at - token.text; + token.type = Token_Command; + } break; + case '#': + { + eat_comment(tokenizer); + token = get_token(tokenizer); + } break; + default: + { + if(c == '0' && *tokenizer->at == 'x') { + advance(tokenizer); + eat_hex(tokenizer); + token.length = tokenizer->at - token.text; + token.type = Token_Key_Hex; + } else if(isdigit(c)) { + token.type = Token_Key; + } else if(isalpha(c)) { + eat_identifier(tokenizer); + token.length = tokenizer->at - token.text; + token.type = resolve_identifier_type(token); + } else { + token.type = Token_Unknown; + } + } break; + } + + return token; +} + +void tokenizer_init(struct tokenizer *tokenizer, char *buffer) +{ + tokenizer->buffer = buffer; + tokenizer->at = buffer; + tokenizer->line = 1; + tokenizer->cursor = 1; +} -- cgit v1.2.3