aboutsummaryrefslogtreecommitdiff
path: root/src/tokenize.c
diff options
context:
space:
mode:
authorkoekeishiya <aasvi93@hotmail.com>2017-08-07 20:23:44 +0200
committerkoekeishiya <aasvi93@hotmail.com>2017-08-07 20:23:44 +0200
commitd69056799a399058005b4950751397a31110de4a (patch)
tree1dee43a2f247094c58d1263cee8c8477b893e376 /src/tokenize.c
downloadskhd-d69056799a399058005b4950751397a31110de4a.tar.gz
skhd-d69056799a399058005b4950751397a31110de4a.zip
v0.0.1
Diffstat (limited to 'src/tokenize.c')
-rw-r--r--src/tokenize.c171
1 files changed, 171 insertions, 0 deletions
diff --git a/src/tokenize.c b/src/tokenize.c
new file mode 100644
index 0000000..53f294a
--- /dev/null
+++ b/src/tokenize.c
@@ -0,0 +1,171 @@
+#include "tokenize.h"
+#define internal static
+
+internal int
+token_equals(struct token token, const char *match)
+{
+ const char *at = match;
+ unsigned index = 0;
+ while(*at++ == token.text[index++] && index < token.length);
+ return (*at == '\0' && index == token.length) ? 1 : 0;
+}
+
+internal void
+advance(struct tokenizer *tokenizer)
+{
+ if(*tokenizer->at == '\n') {
+ tokenizer->cursor = 0;
+ ++tokenizer->line;
+ }
+ ++tokenizer->cursor;
+ ++tokenizer->at;
+}
+
+internal void
+eat_whitespace(struct tokenizer *tokenizer)
+{
+ while(*tokenizer->at && isspace(*tokenizer->at)) {
+ advance(tokenizer);
+ }
+}
+
+internal void
+eat_comment(struct tokenizer *tokenizer)
+{
+ while(*tokenizer->at && *tokenizer->at != '\n') {
+ advance(tokenizer);
+ }
+}
+
+internal void
+eat_command(struct tokenizer *tokenizer)
+{
+ while(*tokenizer->at && *tokenizer->at != '\n') {
+ if(*tokenizer->at == '\\') {
+ advance(tokenizer);
+ }
+ advance(tokenizer);
+ }
+}
+
+internal void
+eat_hex(struct tokenizer *tokenizer)
+{
+ while((*tokenizer->at) &&
+ ((isdigit(*tokenizer->at)) ||
+ (*tokenizer->at >= 'A' && *tokenizer->at <= 'F'))) {
+ advance(tokenizer);
+ }
+}
+
+internal void
+eat_identifier(struct tokenizer *tokenizer)
+{
+ while(*tokenizer->at && isalpha(*tokenizer->at)) {
+ advance(tokenizer);
+ }
+}
+
+internal enum token_type
+resolve_identifier_type(struct token token)
+{
+ if(token.length == 1) {
+ return Token_Key;
+ }
+
+ for(int i = 0; i < array_count(token_modifier_map); ++i) {
+ if(token_equals(token, token_modifier_map[i])) {
+ return Token_Modifier;
+ }
+ }
+
+ for(int i = 0; i < array_count(token_key_map); ++i) {
+ if(token_equals(token, token_key_map[i])) {
+ return Token_Key;
+ }
+ }
+
+ return Token_Unknown;
+}
+
+struct token
+peek_token(struct tokenizer tokenizer)
+{
+ return get_token(&tokenizer);
+}
+
+struct token
+get_token(struct tokenizer *tokenizer)
+{
+ struct token token;
+ char c;
+
+ eat_whitespace(tokenizer);
+
+ token.length = 1;
+ token.text = tokenizer->at;
+ token.line = tokenizer->line;
+ token.cursor = tokenizer->cursor;
+ c = *token.text;
+ advance(tokenizer);
+
+ switch(c)
+ {
+ case '\0': { token.type = Token_EndOfStream; } break;
+ case '+': { token.type = Token_Plus; } break;
+ case '-':
+ {
+ if(*tokenizer->at && *tokenizer->at == '>') {
+ advance(tokenizer);
+ token.length = tokenizer->at - token.text;
+ token.type = Token_Arrow;
+ } else {
+ token.type = Token_Dash;
+ }
+ } break;
+ case ':':
+ {
+ eat_whitespace(tokenizer);
+
+ token.text = tokenizer->at;
+ token.line = tokenizer->line;
+ token.cursor = tokenizer->cursor;
+
+ eat_command(tokenizer);
+ token.length = tokenizer->at - token.text;
+ token.type = Token_Command;
+ } break;
+ case '#':
+ {
+ eat_comment(tokenizer);
+ token = get_token(tokenizer);
+ } break;
+ default:
+ {
+ if(c == '0' && *tokenizer->at == 'x') {
+ advance(tokenizer);
+ eat_hex(tokenizer);
+ token.length = tokenizer->at - token.text;
+ token.type = Token_Key_Hex;
+ } else if(isdigit(c)) {
+ token.type = Token_Key;
+ } else if(isalpha(c)) {
+ eat_identifier(tokenizer);
+ token.length = tokenizer->at - token.text;
+ token.type = resolve_identifier_type(token);
+ } else {
+ token.type = Token_Unknown;
+ }
+ } break;
+ }
+
+ return token;
+}
+
+void tokenizer_init(struct tokenizer *tokenizer, char *buffer)
+{
+ tokenizer->buffer = buffer;
+ tokenizer->at = buffer;
+ tokenizer->line = 1;
+ tokenizer->cursor = 1;
+}