From fdf2097f7f81e69777c75ac5f74b5e9e69676a9a Mon Sep 17 00:00:00 2001 From: JellyApple102 Date: Thu, 23 Sep 2021 22:48:52 -0400 Subject: [PATCH] add punctuation to strings --- grammar.js | 2 +- src/grammar.json | 2 +- src/parser.c | 12 ++++++++++-- test/corpus/test.txt | 8 +++++++- 4 files changed, 19 insertions(+), 5 deletions(-) diff --git a/grammar.js b/grammar.js index 926684a..72ae457 100644 --- a/grammar.js +++ b/grammar.js @@ -36,7 +36,7 @@ module.exports = grammar({ large: $ => /[A-Z_\d]+/, - string: $ => /[A-Za-z ]+/, + string: $ => /[A-Za-z \-,.']+/, separator: $ => /:/, diff --git a/src/grammar.json b/src/grammar.json index 8ce716d..e6c80bd 100644 --- a/src/grammar.json +++ b/src/grammar.json @@ -121,7 +121,7 @@ }, "string": { "type": "PATTERN", - "value": "[A-Za-z ]+" + "value": "[A-Za-z \\-,.']+" }, "separator": { "type": "PATTERN", diff --git a/src/parser.c b/src/parser.c index 355d021..9ab41cc 100644 --- a/src/parser.c +++ b/src/parser.c @@ -181,7 +181,9 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) { lookahead == '\r') SKIP(1) if (('0' <= lookahead && lookahead <= '9')) ADVANCE(9); if (('A' <= lookahead && lookahead <= 'Z')) ADVANCE(7); - if (('a' <= lookahead && lookahead <= 'z')) ADVANCE(11); + if (lookahead == '\'' || + (',' <= lookahead && lookahead <= '.') || + ('a' <= lookahead && lookahead <= 'z')) ADVANCE(11); END_STATE(); case 2: if (lookahead == '\t' || @@ -217,6 +219,8 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) { lookahead == '_') ADVANCE(8); if (('A' <= lookahead && lookahead <= 'Z')) ADVANCE(7); if (lookahead == ' ' || + lookahead == '\'' || + (',' <= lookahead && lookahead <= '.') || ('a' <= lookahead && lookahead <= 'z')) ADVANCE(11); END_STATE(); case 8: @@ -234,11 +238,15 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) { if (lookahead == ' ') ADVANCE(10); if (('0' <= lookahead && lookahead <= '9')) ADVANCE(9); if (('A' <= lookahead && lookahead <= 'Z')) ADVANCE(7); - if (('a' <= lookahead && lookahead <= 'z')) ADVANCE(11); + if (lookahead == '\'' || + (',' <= lookahead && lookahead <= '.') || + ('a' <= lookahead && lookahead <= 'z')) ADVANCE(11); END_STATE(); case 11: ACCEPT_TOKEN(sym_string); if (lookahead == ' ' || + lookahead == '\'' || + (',' <= lookahead && lookahead <= '.') || ('A' <= lookahead && lookahead <= 'Z') || ('a' <= lookahead && lookahead <= 'z')) ADVANCE(11); END_STATE(); diff --git a/test/corpus/test.txt b/test/corpus/test.txt index e9d8ee2..5f1b2d1 100644 --- a/test/corpus/test.txt +++ b/test/corpus/test.txt @@ -7,6 +7,7 @@ opening_comment [TEST:BODY_1PART] comment here [MULTIPLE:string:123:LARGE] [SINGLE_TEST] + [HERE:A string, contating lot's of diffent punctuation.] --- @@ -30,4 +31,9 @@ opening_comment (large))) (bracket_statement (parameter_list - (declaration)))) + (declaration))) + (bracket_statement + (parameter_list + (declaration) + (separator) + (string))))