From 430430e74e3a725a30e0df96062caab9090d77d7 Mon Sep 17 00:00:00 2001 From: mikhan808 Date: Thu, 16 Sep 2021 14:44:12 +0300 Subject: [PATCH 1/6] multiline token is determined by the last token in the line, not the whole line --- .../modes/antlr/AntlrTokenMaker.java | 55 +++++++++++-------- 1 file changed, 32 insertions(+), 23 deletions(-) diff --git a/src/main/java/de/tisoft/rsyntaxtextarea/modes/antlr/AntlrTokenMaker.java b/src/main/java/de/tisoft/rsyntaxtextarea/modes/antlr/AntlrTokenMaker.java index 5c2201b..f6c2fe6 100644 --- a/src/main/java/de/tisoft/rsyntaxtextarea/modes/antlr/AntlrTokenMaker.java +++ b/src/main/java/de/tisoft/rsyntaxtextarea/modes/antlr/AntlrTokenMaker.java @@ -83,10 +83,8 @@ public Token getTokenList(Segment text, int initialTokenType, int startOffset) { } // check if we have a multi line token start without an end - String multilineTokenEnd = getMultilineTokenEnd(line); - if (multilineTokenEnd != null) { - line += multilineTokenEnd; - } + String multilineTokenEnd = null; + Lexer lexer = createLexer(line); for (int mode : modeInfo.modeStack.toArray()) { @@ -102,25 +100,27 @@ public Token getTokenList(Segment text, int initialTokenType, int startOffset) { try { while (true) { - org.antlr.v4.runtime.Token at = lexer.nextToken(); - setLanguageIndex(lexer._mode); - if (at.getType() == CommonToken.EOF) { - if (multilineTokenEnd == null) { - addNullToken(); + org.antlr.v4.runtime.Token at = lexer.nextToken(); + setLanguageIndex(lexer._mode); + if (at.getType() == CommonToken.EOF) { + if(currentToken!=null) + multilineTokenEnd=getMultilineTokenEnd(currentToken); + if (multilineTokenEnd == null) { + addNullToken(); + } + break; + } else { + addToken( + text, + currentArrayOffset, + currentDocumentOffset, + multilineTokenStart, + multilineTokenEnd, + at); + // update from current token + currentArrayOffset = currentToken.textOffset + currentToken.textCount; + currentDocumentOffset = currentToken.getEndOffset(); } - break; - } else { - addToken( - text, - currentArrayOffset, - currentDocumentOffset, - multilineTokenStart, - multilineTokenEnd, - at); - // update from current token - currentArrayOffset = currentToken.textOffset + currentToken.textCount; - currentDocumentOffset = currentToken.getEndOffset(); - } } } catch (AlwaysThrowingErrorListener.AntlrException exceptionInstanceNotNeeded) { // mark the rest of the line as error @@ -200,6 +200,15 @@ private String getMultilineTokenStart(ModeInfoManager.ModeInfo modeInfo) { .map(i -> i.tokenStart) .orElse(null); } + private String getMultilineTokenEnd(Token token) { + for (MultiLineTokenInfo mti : multiLineTokenInfos) { + if (mti.token == token.getType()) { + if (token.getLexeme().equals(mti.tokenEnd) || !token.endsWith(mti.tokenEnd.toCharArray())) + return mti.tokenEnd; + } + } + return null; + } private String getMultilineTokenEnd(String line) { return multiLineTokenInfos.stream() @@ -223,4 +232,4 @@ private Optional getMultiLineTokenInfo(int languageIndex, in } protected abstract Lexer createLexer(String text); -} +} \ No newline at end of file From 6580077ff63129cc2e4a11c95f9809d25da7bf98 Mon Sep 17 00:00:00 2001 From: mikhan808 Date: Thu, 16 Sep 2021 15:01:50 +0300 Subject: [PATCH 2/6] replace the end token with the start token --- .../modes/antlr/AntlrTokenMaker.java | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/main/java/de/tisoft/rsyntaxtextarea/modes/antlr/AntlrTokenMaker.java b/src/main/java/de/tisoft/rsyntaxtextarea/modes/antlr/AntlrTokenMaker.java index f6c2fe6..0e37c23 100644 --- a/src/main/java/de/tisoft/rsyntaxtextarea/modes/antlr/AntlrTokenMaker.java +++ b/src/main/java/de/tisoft/rsyntaxtextarea/modes/antlr/AntlrTokenMaker.java @@ -33,15 +33,16 @@ package de.tisoft.rsyntaxtextarea.modes.antlr; -import java.util.Arrays; -import java.util.List; -import java.util.Optional; -import javax.swing.text.Segment; import org.antlr.v4.runtime.CommonToken; import org.antlr.v4.runtime.Lexer; import org.fife.ui.rsyntaxtextarea.Token; import org.fife.ui.rsyntaxtextarea.TokenMakerBase; +import javax.swing.text.Segment; +import java.util.Arrays; +import java.util.List; +import java.util.Optional; + public abstract class AntlrTokenMaker extends TokenMakerBase { private final ModeInfoManager modeInfoManager = new ModeInfoManager(); @@ -203,8 +204,8 @@ private String getMultilineTokenStart(ModeInfoManager.ModeInfo modeInfo) { private String getMultilineTokenEnd(Token token) { for (MultiLineTokenInfo mti : multiLineTokenInfos) { if (mti.token == token.getType()) { - if (token.getLexeme().equals(mti.tokenEnd) || !token.endsWith(mti.tokenEnd.toCharArray())) - return mti.tokenEnd; + if (token.getLexeme().equals(mti.tokenStart) || !token.endsWith(mti.tokenEnd.toCharArray())) + return mti.tokenEnd; } } return null; From d4ecc36e01c63c7751bcab80a922e49fa5840ded Mon Sep 17 00:00:00 2001 From: mikhan808 Date: Mon, 7 Feb 2022 11:54:24 +0300 Subject: [PATCH 3/6] .To activate multiline mode, at the end of a line, check if the token is at the beginning of the line and if multiline mode is already activated. If so, even if token =multi-string-token.start=multi-string-token-end --- .../modes/antlr/AntlrTokenMaker.java | 95 +++++++++++-------- 1 file changed, 53 insertions(+), 42 deletions(-) diff --git a/src/main/java/de/tisoft/rsyntaxtextarea/modes/antlr/AntlrTokenMaker.java b/src/main/java/de/tisoft/rsyntaxtextarea/modes/antlr/AntlrTokenMaker.java index 0e37c23..8dcc30d 100644 --- a/src/main/java/de/tisoft/rsyntaxtextarea/modes/antlr/AntlrTokenMaker.java +++ b/src/main/java/de/tisoft/rsyntaxtextarea/modes/antlr/AntlrTokenMaker.java @@ -101,11 +101,12 @@ public Token getTokenList(Segment text, int initialTokenType, int startOffset) { try { while (true) { + try { org.antlr.v4.runtime.Token at = lexer.nextToken(); setLanguageIndex(lexer._mode); if (at.getType() == CommonToken.EOF) { - if(currentToken!=null) - multilineTokenEnd=getMultilineTokenEnd(currentToken); + if (currentToken != null) + multilineTokenEnd = getMultilineTokenEnd(currentToken, initialTokenType); if (multilineTokenEnd == null) { addNullToken(); } @@ -122,6 +123,9 @@ public Token getTokenList(Segment text, int initialTokenType, int startOffset) { currentArrayOffset = currentToken.textOffset + currentToken.textCount; currentDocumentOffset = currentToken.getEndOffset(); } + } catch (Exception e) { + e.printStackTrace(); + } } } catch (AlwaysThrowingErrorListener.AntlrException exceptionInstanceNotNeeded) { // mark the rest of the line as error @@ -132,11 +136,11 @@ public Token getTokenList(Segment text, int initialTokenType, int startOffset) { int type = multilineTokenStart != null ? modeInfo.tokenType : Token.ERROR_IDENTIFIER; addToken( - text, - currentArrayOffset, - currentArrayOffset + remainingText.length() - 1, - type, - currentDocumentOffset); + text, + currentArrayOffset, + currentArrayOffset + remainingText.length() - 1, + type, + currentDocumentOffset); if (multilineTokenStart == null) { // we are not in a multiline token, so we assume the line ends here @@ -158,34 +162,34 @@ public Token getTokenList(Segment text, int initialTokenType, int startOffset) { if (!lexer._modeStack.isEmpty() || lexer._mode != Lexer.DEFAULT_MODE) { currentToken.setType( - modeInfoManager.storeModeInfo(currentToken.getType(), lexer._mode, lexer._modeStack)); + modeInfoManager.storeModeInfo(currentToken.getType(), lexer._mode, lexer._modeStack)); } return firstToken; } private void addToken( - Segment text, - int start, - int startOffset, - String multilineTokenStart, - String multilineTokenEnd, - org.antlr.v4.runtime.Token at) { + Segment text, + int start, + int startOffset, + String multilineTokenStart, + String multilineTokenEnd, + org.antlr.v4.runtime.Token at) { addToken( - text, - start, - calculateTokenEnd(multilineTokenStart, multilineTokenEnd, start, at), - getClosestStandardTokenTypeForInternalType(at.getType()), - startOffset); + text, + start, + calculateTokenEnd(multilineTokenStart, multilineTokenEnd, start, at), + getClosestStandardTokenTypeForInternalType(at.getType()), + startOffset); } private int calculateTokenEnd( - String multilineTokenStart, - String multilineTokenEnd, - int currentArrayOffset, - org.antlr.v4.runtime.Token at) { + String multilineTokenStart, + String multilineTokenEnd, + int currentArrayOffset, + org.antlr.v4.runtime.Token at) { int end = currentArrayOffset + at.getText().length() - 1; - if (multilineTokenStart != null && at.getText().startsWith(multilineTokenStart)) { + if (multilineTokenStart != null && at.getText().startsWith(multilineTokenStart) && at.getCharPositionInLine() == 0) { // need to subtract our inserted token start end -= multilineTokenStart.length(); } @@ -198,14 +202,21 @@ private int calculateTokenEnd( private String getMultilineTokenStart(ModeInfoManager.ModeInfo modeInfo) { return getMultiLineTokenInfo(getLanguageIndex(), modeInfo.tokenType) - .map(i -> i.tokenStart) - .orElse(null); + .map(i -> i.tokenStart) + .orElse(null); } - private String getMultilineTokenEnd(Token token) { + + private String getMultilineTokenEnd(Token token, int initialTypeToken) { for (MultiLineTokenInfo mti : multiLineTokenInfos) { if (mti.token == token.getType()) { - if (token.getLexeme().equals(mti.tokenStart) || !token.endsWith(mti.tokenEnd.toCharArray())) - return mti.tokenEnd; + if (!token.endsWith(mti.tokenEnd.toCharArray())) + return mti.tokenEnd; + if (mti.tokenStart.contentEquals(mti.tokenEnd)) { + if (token.getOffset() == 0 && initialTypeToken == mti.token) { + return null; + } else if (token.getLexeme().equals(mti.tokenEnd)) + return mti.tokenEnd; + } } } return null; @@ -213,23 +224,23 @@ private String getMultilineTokenEnd(Token token) { private String getMultilineTokenEnd(String line) { return multiLineTokenInfos.stream() - // the language index matches our current language - .filter(i -> i.languageIndex == getLanguageIndex()) - // the line contains the token start - .filter(i -> line.contains(i.tokenStart)) - // the line doesn't contain the token end after the token start - .filter( - i -> line.indexOf(i.tokenEnd, line.indexOf(i.tokenStart) + i.tokenStart.length()) == -1) - .map(i -> i.tokenEnd) - .findFirst() - .orElse(null); + // the language index matches our current language + .filter(i -> i.languageIndex == getLanguageIndex()) + // the line contains the token start + .filter(i -> line.contains(i.tokenStart)) + // the line doesn't contain the token end after the token start + .filter( + i -> line.indexOf(i.tokenEnd, line.indexOf(i.tokenStart) + i.tokenStart.length()) == -1) + .map(i -> i.tokenEnd) + .findFirst() + .orElse(null); } private Optional getMultiLineTokenInfo(int languageIndex, int token) { return multiLineTokenInfos.stream() - .filter(i -> i.languageIndex == languageIndex) - .filter(i -> i.token == token) - .findFirst(); + .filter(i -> i.languageIndex == languageIndex) + .filter(i -> i.token == token) + .findFirst(); } protected abstract Lexer createLexer(String text); From dedcbe990a60db33d14b4cd4ff0ec620e4407764 Mon Sep 17 00:00:00 2001 From: mikhan808 Date: Mon, 7 Feb 2022 12:33:52 +0300 Subject: [PATCH 4/6] eliminate remarks to the writing of the code: 1. remove unused method 2. remove the nested try block --- .../modes/antlr/AntlrTokenMaker.java | 69 +++++++------------ 1 file changed, 25 insertions(+), 44 deletions(-) diff --git a/src/main/java/de/tisoft/rsyntaxtextarea/modes/antlr/AntlrTokenMaker.java b/src/main/java/de/tisoft/rsyntaxtextarea/modes/antlr/AntlrTokenMaker.java index 8dcc30d..e8983bc 100644 --- a/src/main/java/de/tisoft/rsyntaxtextarea/modes/antlr/AntlrTokenMaker.java +++ b/src/main/java/de/tisoft/rsyntaxtextarea/modes/antlr/AntlrTokenMaker.java @@ -86,7 +86,6 @@ public Token getTokenList(Segment text, int initialTokenType, int startOffset) { // check if we have a multi line token start without an end String multilineTokenEnd = null; - Lexer lexer = createLexer(line); for (int mode : modeInfo.modeStack.toArray()) { // push the modes into the lexer, so it knows where it is @@ -101,30 +100,26 @@ public Token getTokenList(Segment text, int initialTokenType, int startOffset) { try { while (true) { - try { - org.antlr.v4.runtime.Token at = lexer.nextToken(); - setLanguageIndex(lexer._mode); - if (at.getType() == CommonToken.EOF) { - if (currentToken != null) - multilineTokenEnd = getMultilineTokenEnd(currentToken, initialTokenType); - if (multilineTokenEnd == null) { - addNullToken(); - } - break; - } else { - addToken( - text, - currentArrayOffset, - currentDocumentOffset, - multilineTokenStart, - multilineTokenEnd, - at); - // update from current token - currentArrayOffset = currentToken.textOffset + currentToken.textCount; - currentDocumentOffset = currentToken.getEndOffset(); + org.antlr.v4.runtime.Token at = lexer.nextToken(); + setLanguageIndex(lexer._mode); + if (at.getType() == CommonToken.EOF) { + if (currentToken != null) + multilineTokenEnd = getMultilineTokenEnd(currentToken, initialTokenType); + if (multilineTokenEnd == null) { + addNullToken(); } - } catch (Exception e) { - e.printStackTrace(); + break; + } else { + addToken( + text, + currentArrayOffset, + currentDocumentOffset, + multilineTokenStart, + multilineTokenEnd, + at); + // update from current token + currentArrayOffset = currentToken.textOffset + currentToken.textCount; + currentDocumentOffset = currentToken.getEndOffset(); } } } catch (AlwaysThrowingErrorListener.AntlrException exceptionInstanceNotNeeded) { @@ -189,7 +184,9 @@ private int calculateTokenEnd( int currentArrayOffset, org.antlr.v4.runtime.Token at) { int end = currentArrayOffset + at.getText().length() - 1; - if (multilineTokenStart != null && at.getText().startsWith(multilineTokenStart) && at.getCharPositionInLine() == 0) { + if (multilineTokenStart != null + && at.getText().startsWith(multilineTokenStart) + && at.getCharPositionInLine() == 0) { // need to subtract our inserted token start end -= multilineTokenStart.length(); } @@ -209,33 +206,17 @@ private String getMultilineTokenStart(ModeInfoManager.ModeInfo modeInfo) { private String getMultilineTokenEnd(Token token, int initialTypeToken) { for (MultiLineTokenInfo mti : multiLineTokenInfos) { if (mti.token == token.getType()) { - if (!token.endsWith(mti.tokenEnd.toCharArray())) - return mti.tokenEnd; + if (!token.endsWith(mti.tokenEnd.toCharArray())) return mti.tokenEnd; if (mti.tokenStart.contentEquals(mti.tokenEnd)) { if (token.getOffset() == 0 && initialTypeToken == mti.token) { return null; - } else if (token.getLexeme().equals(mti.tokenEnd)) - return mti.tokenEnd; + } else if (token.getLexeme().equals(mti.tokenEnd)) return mti.tokenEnd; } } } return null; } - private String getMultilineTokenEnd(String line) { - return multiLineTokenInfos.stream() - // the language index matches our current language - .filter(i -> i.languageIndex == getLanguageIndex()) - // the line contains the token start - .filter(i -> line.contains(i.tokenStart)) - // the line doesn't contain the token end after the token start - .filter( - i -> line.indexOf(i.tokenEnd, line.indexOf(i.tokenStart) + i.tokenStart.length()) == -1) - .map(i -> i.tokenEnd) - .findFirst() - .orElse(null); - } - private Optional getMultiLineTokenInfo(int languageIndex, int token) { return multiLineTokenInfos.stream() .filter(i -> i.languageIndex == languageIndex) @@ -244,4 +225,4 @@ private Optional getMultiLineTokenInfo(int languageIndex, in } protected abstract Lexer createLexer(String text); -} \ No newline at end of file +} From 0dc5989581dc9b2b050dafc1bc498f00f76a1d20 Mon Sep 17 00:00:00 2001 From: mikhan808 Date: Mon, 7 Feb 2022 13:09:13 +0300 Subject: [PATCH 5/6] fix grammar to recognize multiline tokens --- .../de/tisoft/rsyntaxtextarea/modes/antlr/Test.g4 | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/test/antlr4/de/tisoft/rsyntaxtextarea/modes/antlr/Test.g4 b/src/test/antlr4/de/tisoft/rsyntaxtextarea/modes/antlr/Test.g4 index 87bea49..a70f129 100644 --- a/src/test/antlr4/de/tisoft/rsyntaxtextarea/modes/antlr/Test.g4 +++ b/src/test/antlr4/de/tisoft/rsyntaxtextarea/modes/antlr/Test.g4 @@ -40,12 +40,12 @@ DIV : '/'; // note, that the start of the COMMENT_DOC token is a valid combination of 2 other token types // also a COMMENT_DOC would also be a valid COMMENT, so it needs to be before it COMMENT_DOC - : '/**' .*? '*/' -> channel(HIDDEN) + : '/**' .*? ('*/'|EOF) -> channel(HIDDEN) ; // note, that the start of the COMMENT token is a valid combination of 2 other token types COMMENT - : '/*' .*? '*/' -> channel(HIDDEN) + : '/*' .*? ('*/'|EOF) -> channel(HIDDEN) ; @@ -61,13 +61,13 @@ STRING_LITERAL /// shortstringitem ::= shortstringchar | stringescapeseq /// shortstringchar ::= fragment SHORT_STRING - : '\'' ( STRING_ESCAPE_SEQ | ~[\\\r\n\f'] )* '\'' - | '"' ( STRING_ESCAPE_SEQ | ~[\\\r\n\f"] )* '"' + : '\'' ( STRING_ESCAPE_SEQ | ~[\\\r\n\f'] )* ('\''|EOF) + | '"' ( STRING_ESCAPE_SEQ | ~[\\\r\n\f"] )* ('"'|EOF) ; /// longstring ::= "'''" longstringitem* "'''" | '"""' longstringitem* '"""' fragment LONG_STRING - : '\'\'\'' LONG_STRING_ITEM*? '\'\'\'' - | '"""' LONG_STRING_ITEM*? '"""' + : '\'\'\'' LONG_STRING_ITEM*? ('\'\'\''|EOF) + | '"""' LONG_STRING_ITEM*? ('"""'|EOF) ; /// longstringitem ::= longstringchar | stringescapeseq From be6a7881e0f6e1a5e8c8e65f7ca4898a2298aa77 Mon Sep 17 00:00:00 2001 From: mikhan808 Date: Mon, 7 Feb 2022 13:21:25 +0300 Subject: [PATCH 6/6] eliminate comments on the code reduce the cognitive complexity of the method --- .../modes/antlr/AntlrTokenMaker.java | 25 +++++++++++-------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/src/main/java/de/tisoft/rsyntaxtextarea/modes/antlr/AntlrTokenMaker.java b/src/main/java/de/tisoft/rsyntaxtextarea/modes/antlr/AntlrTokenMaker.java index e8983bc..70bc47a 100644 --- a/src/main/java/de/tisoft/rsyntaxtextarea/modes/antlr/AntlrTokenMaker.java +++ b/src/main/java/de/tisoft/rsyntaxtextarea/modes/antlr/AntlrTokenMaker.java @@ -103,9 +103,7 @@ public Token getTokenList(Segment text, int initialTokenType, int startOffset) { org.antlr.v4.runtime.Token at = lexer.nextToken(); setLanguageIndex(lexer._mode); if (at.getType() == CommonToken.EOF) { - if (currentToken != null) - multilineTokenEnd = getMultilineTokenEnd(currentToken, initialTokenType); - if (multilineTokenEnd == null) { + if (currentToken != null && isUnfinishedMultilineToken(currentToken, initialTokenType)) { addNullToken(); } break; @@ -203,18 +201,23 @@ private String getMultilineTokenStart(ModeInfoManager.ModeInfo modeInfo) { .orElse(null); } - private String getMultilineTokenEnd(Token token, int initialTypeToken) { + private boolean isUnfinishedMultilineToken(Token token, int initialTypeToken) { for (MultiLineTokenInfo mti : multiLineTokenInfos) { if (mti.token == token.getType()) { - if (!token.endsWith(mti.tokenEnd.toCharArray())) return mti.tokenEnd; - if (mti.tokenStart.contentEquals(mti.tokenEnd)) { - if (token.getOffset() == 0 && initialTypeToken == mti.token) { - return null; - } else if (token.getLexeme().equals(mti.tokenEnd)) return mti.tokenEnd; - } + return isUnfinishedMultilineToken(token, initialTypeToken, mti); } } - return null; + return false; + } + + private boolean isUnfinishedMultilineToken(Token token, int initialTypeToken, MultiLineTokenInfo mti) { + if (!token.endsWith(mti.tokenEnd.toCharArray())) return true; + if (mti.tokenStart.contentEquals(mti.tokenEnd)) { + if (token.getOffset() == 0 && initialTypeToken == mti.token) { + return false; + } else return token.getLexeme().equals(mti.tokenEnd); + } + return false; } private Optional getMultiLineTokenInfo(int languageIndex, int token) {