From a0332795e7893ec7052cbf5dc481d5b97600efa7 Mon Sep 17 00:00:00 2001 From: Restioson Date: Fri, 21 Jul 2017 08:37:40 +0200 Subject: [PATCH 1/2] Create ProtoLexer.g4 --- dsl/ProtoLexer.g4 | 260 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 260 insertions(+) create mode 100644 dsl/ProtoLexer.g4 diff --git a/dsl/ProtoLexer.g4 b/dsl/ProtoLexer.g4 new file mode 100644 index 0000000..43ea7d6 --- /dev/null +++ b/dsl/ProtoLexer.g4 @@ -0,0 +1,260 @@ +lexer grammar ProtoLexer; + +PACKAGE + : 'package' + ; +SYNTAX + : 'syntax' + ; +IMPORT + : 'import' + ; +PUBLIC + : 'public' + ; +OPTION + : 'option' + ; +MESSAGE + : 'message' + ; +GROUP + : 'group' + ; +OPTIONAL + : 'optional' + ; +REQUIRED + : 'required' + ; +REPEATED + : 'repeated' + ; +ONEOF + : 'oneof' + ; +EXTEND + : 'extend' + ; +EXTENSIONS + : 'extensions' + ; +TO + : 'to' + ; +MAX + : 'max' + ; +RESERVED + : 'reserved' + ; +ENUM + : 'enum' + ; +SERVICE + : 'service' + ; +RPC + : 'rpc' + ; +RETURNS + : 'returns' + ; +STREAM + : 'stream' + ; +MAP + : 'map' + ; +BOOLEAN_VALUE + : 'true' + | 'false' + ; +DOUBLE + : 'double' + ; +FLOAT + : 'float' + ; +INT32 + : 'int32' + ; +INT64 + : 'int64' + ; +UINT32 + : 'uint32' + ; +UINT64 + : 'uint64' + ; +SINT32 + : 'sint32' + ; +SINT64 + : 'sint64' + ; +FIXED32 + : 'fixed32' + ; +FIXED64 + : 'fixed64' + ; +SFIXED32 + : 'sfixed32' + ; +SFIXED64 + : 'sfixed64' + ; +BOOL + : 'bool' + ; +STRING + : 'string' + ; +BYTES + : 'bytes' + ; +COMMENT + : '/*' .*? '*/' -> channel(HIDDEN) + ; +LINE_COMMENT + : '//' ~('\r' | '\n')* -> channel(HIDDEN) + ; +NL + : '\r'? '\n' -> channel(HIDDEN) + ; +WS + : [ \t]+ -> channel(HIDDEN) + ; +LCURLY + : '{' + ; +RCURLY + : '}' + ; +LPAREN + : '(' + ; +RPAREN + : ')' + ; +LSQUARE + : '[' + ; +RSQUARE + : ']' + ; +LT + : '<' + ; +GT + : '>' + ; +COMMA + : ',' + ; +DOT + : '.' + ; +COLON + : ':' + ; +SEMICOLON + : ';' + ; +ASSIGN + : '=' + ; +IDENT + : (ALPHA | UNDERSCORE) (ALPHA | DIGIT | UNDERSCORE)* + ; +STRING_VALUE + : DOUBLE_QUOTE_STRING + | SINGLE_QUOTE_STRING + ; +INTEGER_VALUE + : DEC_VALUE + | HEX_VALUE + | OCT_VALUE + ; +FLOAT_VALUE + : EXPONENT + | FLOAT_LIT + | MINUS? INF + | NAN + ; +fragment DOUBLE_QUOTE_STRING + : '"' ( ESC_SEQ | ~('\\'|'"') )* '"' + ; +fragment SINGLE_QUOTE_STRING + : '\'' ( ESC_SEQ | ~('\\'|'\'') )* '\'' + ; +fragment EXPONENT + : (FLOAT_LIT|DEC_VALUE) EXP DEC_VALUE + ; +fragment FLOAT_LIT + : MINUS? DIGIT+ '.' DIGIT* // "0.", "0.123" + | MINUS? '.' DIGIT+ // ".123" + ; +fragment INF + : 'inf' + ; +fragment NAN + : 'nan' + ; +fragment EXP + : 'e' + | 'E' + ; +fragment DEC_VALUE + : '0' | MINUS? '1'..'9' '0'..'9'* + ; +fragment HEX_VALUE + : MINUS? '0' [xX] HEX_DIGIT+ + ; +fragment OCT_VALUE + : MINUS? '0' OCT_DIGIT+ + ; +fragment MINUS + : '-' + ; +fragment ALPHA + : [a-zA-Z] + ; +fragment DIGIT + : [0-9] + ; +fragment HEX_DIGIT + : [0-9a-fA-F] + ; +fragment OCT_DIGIT + : [0-7] + ; +fragment UNDERSCORE + : '_' + ; +fragment ESC_SEQ + : '\\' ('a'|'v'|'b'|'t'|'n'|'f'|'r'|'?'|'\"'|'\''|'\\') + | '\\' ('x'|'X') HEX_DIGIT HEX_DIGIT + | UNICODE_ESC + | OCTAL_ESC + ; +fragment OCTAL_ESC + : '\\' [0-3] OCT_DIGIT OCT_DIGIT + | '\\' OCT_DIGIT OCT_DIGIT + | '\\' OCT_DIGIT + ; +fragment UNICODE_ESC + : '\\' 'u' HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT + ; + +/** "catch all" rule for any char not matche in a token rule of your + * grammar. Lexers in Intellij must return all tokens good and bad. + * There must be a token to cover all characters, which makes sense, for + * an IDE. The parser however should not see these bad tokens because + * it just confuses the issue. Hence, the hidden channel. + */ +ERRCHAR + : . -> channel(HIDDEN) + ; From 06dbd6127678e9f0f1146dc8b34379154edf7cc9 Mon Sep 17 00:00:00 2001 From: Restioson Date: Fri, 21 Jul 2017 08:38:05 +0200 Subject: [PATCH 2/2] Create ProtoParser.g4 --- dsl/ProtoParser.g4 | 279 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 279 insertions(+) create mode 100644 dsl/ProtoParser.g4 diff --git a/dsl/ProtoParser.g4 b/dsl/ProtoParser.g4 new file mode 100644 index 0000000..7aadabf --- /dev/null +++ b/dsl/ProtoParser.g4 @@ -0,0 +1,279 @@ +parser grammar ProtoParser; + +options { + tokenVocab = ProtoLexer; +} + +proto + // syntax should be first statement in the file + : syntax? + ( packageStatement + | importStatement + | optionEntry + | enumBlock + | messageBlock + | extendBlock + | serviceBlock)* + EOF + ; +syntax + : SYNTAX ASSIGN STRING_VALUE SEMICOLON + ; +packageStatement + : PACKAGE packageName SEMICOLON + ; +packageName + : fullIdent + ; +importStatement + : IMPORT PUBLIC? fileReference SEMICOLON + ; +fileReference + : STRING_VALUE + ; +optionEntry + : OPTION option SEMICOLON + ; +enumBlock + : ENUM enumName LCURLY (enumField | optionEntry)* RCURLY SEMICOLON? + ; +enumName + : ident + ; +enumField + : enumFieldName ASSIGN enumFieldValue fieldOptions? SEMICOLON + ; +enumFieldName + : ident + ; +enumFieldValue + : INTEGER_VALUE + ; +extendBlock + : EXTEND typeReference LCURLY extendBlockEntry* RCURLY SEMICOLON? + ; +extendBlockEntry + : field + | groupBlock + ; +serviceBlock + : SERVICE serviceName LCURLY (rpcMethod | optionEntry)* RCURLY SEMICOLON? + ; +serviceName + : ident + ; +rpcMethod + : RPC rpcName LPAREN rpcType RPAREN + RETURNS LPAREN rpcType RPAREN (LCURLY optionEntry* RCURLY)? SEMICOLON? + ; +rpcName + : ident + ; +rpcType + : STREAM? typeReference + ; +messageBlock + : MESSAGE messageName LCURLY + (field + | optionEntry + | messageBlock + | enumBlock + | extensions + | extendBlock + | groupBlock + | oneof + | map + | reservedFieldRanges + | reservedFieldNames)* + RCURLY SEMICOLON? + ; +messageName + : ident + ; +oneof + : ONEOF oneofName LCURLY (oneofField | oneofGroup)* RCURLY SEMICOLON? + ; +oneofName + : ident + ; +oneofField + : typeReference fieldName ASSIGN tag fieldOptions? SEMICOLON + ; +oneofGroup + : GROUP fieldName ASSIGN tag LCURLY + (field + | optionEntry + | messageBlock + | enumBlock + | extensions + | extendBlock + | groupBlock)* + RCURLY SEMICOLON? + ; +map + : MAP LT mapKey COMMA mapValue GT fieldName ASSIGN tag fieldOptions? SEMICOLON + ; +mapKey + : INT32 + | INT64 + | UINT32 + | UINT64 + | SINT32 + | SINT64 + | FIXED32 + | FIXED64 + | SFIXED32 + | SFIXED64 + | BOOL + | STRING + ; +mapValue + : typeReference + ; +tag + : INTEGER_VALUE + ; +groupBlock + : fieldModifier GROUP groupName ASSIGN tag LCURLY + (field + | optionEntry + | messageBlock + | enumBlock + | extensions + | extendBlock + | groupBlock)* + RCURLY SEMICOLON? + ; +groupName + : ident + ; +extensions + : EXTENSIONS range (COMMA range)* SEMICOLON + ; +range + : rangeFrom ( TO ( rangeTo | MAX ) )? + ; +rangeFrom + : INTEGER_VALUE + ; +rangeTo + : INTEGER_VALUE + ; +reservedFieldRanges + : RESERVED range (COMMA range)* SEMICOLON + ; +reservedFieldNames + : RESERVED reservedFieldName (COMMA reservedFieldName)* SEMICOLON + ; +reservedFieldName + : STRING_VALUE + ; +field + : fieldModifier? typeReference fieldName ASSIGN tag fieldOptions? SEMICOLON + ; +fieldName + : ident + ; +fieldModifier + : OPTIONAL + | REQUIRED + | REPEATED + ; +typeReference + : DOUBLE + | FLOAT + | INT32 + | INT64 + | UINT32 + | UINT64 + | SINT32 + | SINT64 + | FIXED32 + | FIXED64 + | SFIXED32 + | SFIXED64 + | BOOL + | STRING + | BYTES + | DOT? ident (DOT ident)* + ; +fieldOptions + : LSQUARE (option (COMMA option)* )? RSQUARE + ; +option + : optionName ASSIGN optionValue + ; +optionName + : ident (DOT ident)* + | LPAREN typeReference RPAREN (DOT optionName)* + ; +optionValue + : INTEGER_VALUE + | FLOAT_VALUE + | BOOLEAN_VALUE + | STRING_VALUE + | IDENT + | textFormat + ; +textFormat + : LCURLY textFormatEntry* RCURLY + ; +textFormatOptionName + : ident + | LSQUARE typeReference RSQUARE + ; +textFormatEntry + : textFormatOptionName COLON textFormatOptionValue + | textFormatOptionName textFormat + ; +textFormatOptionValue + : INTEGER_VALUE + | FLOAT_VALUE + | BOOLEAN_VALUE + | STRING_VALUE + | IDENT + ; +fullIdent + : ident (DOT ident)* + ; +ident + : IDENT + | PACKAGE + | SYNTAX + | IMPORT + | PUBLIC + | OPTION + | MESSAGE + | GROUP + | OPTIONAL + | REQUIRED + | REPEATED + | ONEOF + | EXTEND + | EXTENSIONS + | TO + | MAX + | RESERVED + | ENUM + | SERVICE + | RPC + | RETURNS + | STREAM + | MAP + | BOOLEAN_VALUE + | DOUBLE + | FLOAT + | INT32 + | INT64 + | UINT32 + | UINT64 + | SINT32 + | SINT64 + | FIXED32 + | FIXED64 + | SFIXED32 + | SFIXED64 + | BOOL + | STRING + | BYTES + ;