+ "\" but got \"" + match.token + "\" instead.");
}
- public TokenizerMatch getNextToken() {
+ public TokenizerMatch getNextToken() throws InvalidSyntaxException {
tokenIndexes.push(currentIndex);
StringBuilder token = new StringBuilder();
while (true){
- if (isTokenTermination()){
- Terminator tokenTerminator = findTokenTerminator();
-
- if (tokenTerminator.termination == PRESERVE){
- if (hasAccumulatedToken(token)){
- // already assembled some token
- return new TokenizerMatch(token.toString(), "", tokenTerminator);
- } else {
- currentIndex++;
- return new TokenizerMatch(tokenTerminator.startSequence, "", tokenTerminator);
- }
- } else if (tokenTerminator.termination == DROP){
- if (hasAccumulatedToken(token)){
- currentIndex++;
- return new TokenizerMatch(token.toString(), "", tokenTerminator);
- } else {
- currentIndex++;
- }
- }
- } else {
+ if (isOngoingToken()) {
token.append(source.charAt(currentIndex));
currentIndex++;
+ continue;
+ }
+
+ Terminator tokenTerminator = findTokenTerminator();
+
+ if (tokenTerminator.termination == PRESERVE){
+ return buildPreservedToken(token, tokenTerminator);
+ } else if (tokenTerminator.termination == DROP){
+ if (hasAccumulatedToken(token)){
+ currentIndex++;
+ return new TokenizerMatch(token.toString(), "", tokenTerminator);
+ } else {
+ currentIndex++;
+ }
}
}
}
+ private TokenizerMatch buildPreservedToken(StringBuilder token, Terminator terminator) throws InvalidSyntaxException {
+ if (hasAccumulatedToken(token))
+ return new TokenizerMatch(token.toString(), "", terminator);
+
+ if (terminator.hasEndSequence()){
+ int endSequenceIndex = source.indexOf(terminator.endSequence,
+ currentIndex + terminator.startSequence.length());
+
+ if (endSequenceIndex < 0)
+ throw new InvalidSyntaxException("Expected \"" + terminator.endSequence + "\" but not found.");
+
+ String reminder = source.substring(currentIndex + terminator.startSequence.length(), endSequenceIndex);
+ currentIndex = endSequenceIndex + terminator.endSequence.length();
+
+ return new TokenizerMatch(terminator.startSequence, reminder, terminator);
+ } else {
+ currentIndex += terminator.startSequence.length();
+ return new TokenizerMatch(terminator.startSequence, "", terminator);
+ }
+ }
+
private boolean hasAccumulatedToken(StringBuilder token) {
return token.length() > 0;
}
- private boolean isTokenTermination() {
- return findTokenTerminator() != null;
+ private boolean isOngoingToken() {
+ return findTokenTerminator() == null;
}
public Terminator findTokenTerminator() {
return null;
}
- public boolean consumeIfNextToken(final String token) {
+ public boolean consumeIfNextToken(final String token) throws InvalidSyntaxException {
if (token.equals(getNextToken().token))
return true;
return false;
}
- public TokenizerMatch peekNextToken(){
+ public TokenizerMatch peekNextToken() throws InvalidSyntaxException {
TokenizerMatch result = getNextToken();
unreadToken();
return result;
}
- public boolean peekIsOneOf(String ... possibilities){
+ public boolean peekIsOneOf(String ... possibilities) throws InvalidSyntaxException {
String nextToken = peekNextToken().token;
return Stream.of(possibilities).anyMatch(possibility -> possibility.equals(nextToken));
}
import org.junit.Test;
+import static eu.svjatoslav.commons.string.tokenizer.Terminator.TerminationStrategy.DROP;
import static eu.svjatoslav.commons.string.tokenizer.Terminator.TerminationStrategy.PRESERVE;
import static org.junit.Assert.assertEquals;
@Test
public void you_can_peek() throws Exception {
Tokenizer tokenizer = new Tokenizer("this is a N'2015-03-18 09:48:54.360' test")
- .addTerminator(" ", Terminator.TerminationStrategy.DROP)
+ .addTerminator(" ", DROP)
.addTerminator("N'", "'", PRESERVE);
tokenizer.expectAndConsumeNextToken("this");
@Test
public void complexTerminator() throws Exception {
- Tokenizer tokenizer = new Tokenizer(" this((\"hello\" /* comment */ (( is a N'2015-03-18 09:48:54.360' test")
- .addTerminator(" ", Terminator.TerminationStrategy.DROP)
- .addTerminator("(", Terminator.TerminationStrategy.PRESERVE)
- .addTerminator("\"", "\"" ,Terminator.TerminationStrategy.PRESERVE)
- .addTerminator("/*", "*/" ,Terminator.TerminationStrategy.DROP)
+ Tokenizer tokenizer = new Tokenizer("/* hello */ /** comment **/ (( is a N'2015-03-18 09:48:54.360' test")
+ .addTerminator(" ", DROP)
+ .addTerminator("(", PRESERVE)
+ .addTerminator("\"", "\"" , PRESERVE)
+ .addTerminator("/*", "*/" , PRESERVE)
;
- System.out.println(tokenizer.getNextToken().token);
+ TokenizerMatch nextToken = tokenizer.getNextToken();
+ System.out.println("T: \"" + nextToken.token + "\", R: \"" + nextToken.reminder + "\"");
System.out.println(tokenizer.getNextToken().token);
System.out.println(tokenizer.getNextToken().token);
System.out.println(tokenizer.getNextToken().token);
@Test
public void testComplexTerminator() throws Exception {
Tokenizer tokenizer = new Tokenizer("this N'2015-03-18 09:48:54.360' /* thoe unto u */ test")
- .addTerminator(" ", Terminator.TerminationStrategy.DROP)
+ .addTerminator(" ", DROP)
.addTerminator("/*", "*/", PRESERVE);
// tokenizer.expectAndConsumeNextToken("this");