X-Git-Url: http://www2.svjatoslav.eu/gitweb/?a=blobdiff_plain;f=src%2Fmain%2Fjava%2Feu%2Fsvjatoslav%2Fcommons%2Fstring%2Ftokenizer%2FTokenizer.java;h=939ede9dc5a8a7ecfe687745261254ba3b9b235b;hb=3bc3db3ceb288b82e48c349bf27dfafda2bcd444;hp=c4ca4d2e8dcd01e2b3a3218ea23224b1aa47ce47;hpb=2082c4e93c58ae61eac0bdf7955e8996f6a19768;p=svjatoslav_commons.git diff --git a/src/main/java/eu/svjatoslav/commons/string/tokenizer/Tokenizer.java b/src/main/java/eu/svjatoslav/commons/string/tokenizer/Tokenizer.java index c4ca4d2..939ede9 100755 --- a/src/main/java/eu/svjatoslav/commons/string/tokenizer/Tokenizer.java +++ b/src/main/java/eu/svjatoslav/commons/string/tokenizer/Tokenizer.java @@ -15,6 +15,7 @@ import java.util.Stack; import java.util.stream.Stream; import static eu.svjatoslav.commons.string.tokenizer.Terminator.TerminationStrategy.DROP; +import static eu.svjatoslav.commons.string.tokenizer.Terminator.TerminationStrategy.PRESERVE; public class Tokenizer { @@ -32,6 +33,7 @@ public class Tokenizer { public Tokenizer setSource(String source){ this.source = source; currentIndex = 0; + tokenIndexes.clear(); return this; } @@ -55,49 +57,71 @@ public class Tokenizer { + "\" but got \"" + match.token + "\" instead."); } - public TokenizerMatch getNextToken() { + public TokenizerMatch getNextToken() throws InvalidSyntaxException { tokenIndexes.push(currentIndex); - final StringBuilder result = new StringBuilder(); - - while (true) { - if (currentIndex >= source.length()) - return null; - - boolean accumulateCurrentChar = true; - - for (final Terminator terminator : terminators) - if (sequenceMatches(terminator.startSequence)) - - if (terminator.termination == DROP) { - currentIndex += terminator.startSequence.length(); - - if (terminator.endSequence != null) - skipUntilSequence(terminator.endSequence); - - if (result.length() > 0) - return new TokenizerMatch(result.toString(), - terminator); - else { - accumulateCurrentChar = false; - break; - } - } else if (result.length() > 0) - return new TokenizerMatch(result.toString(), terminator); - else { - currentIndex += terminator.startSequence.length(); - return new TokenizerMatch(terminator.startSequence, - terminator); - } - - if (accumulateCurrentChar) { - result.append(source.charAt(currentIndex)); + + StringBuilder token = new StringBuilder(); + + while (true){ + if (isOngoingToken()) { + token.append(source.charAt(currentIndex)); currentIndex++; + continue; + } + + Terminator tokenTerminator = findTokenTerminator(); + + if (tokenTerminator.termination == PRESERVE){ + return buildPreservedToken(token, tokenTerminator); + } else if (tokenTerminator.termination == DROP){ + if (hasAccumulatedToken(token)){ + currentIndex++; + return new TokenizerMatch(token.toString(), "", tokenTerminator); + } else { + currentIndex++; + } } } } - public boolean consumeIfNextToken(final String token) { + private TokenizerMatch buildPreservedToken(StringBuilder token, Terminator terminator) throws InvalidSyntaxException { + if (hasAccumulatedToken(token)) + return new TokenizerMatch(token.toString(), "", terminator); + + if (terminator.hasEndSequence()){ + int endSequenceIndex = source.indexOf(terminator.endSequence, + currentIndex + terminator.startSequence.length()); + + if (endSequenceIndex < 0) + throw new InvalidSyntaxException("Expected \"" + terminator.endSequence + "\" but not found."); + + String reminder = source.substring(currentIndex + terminator.startSequence.length(), endSequenceIndex); + currentIndex = endSequenceIndex + terminator.endSequence.length(); + + return new TokenizerMatch(terminator.startSequence, reminder, terminator); + } else { + currentIndex += terminator.startSequence.length(); + return new TokenizerMatch(terminator.startSequence, "", terminator); + } + } + + private boolean hasAccumulatedToken(StringBuilder token) { + return token.length() > 0; + } + + private boolean isOngoingToken() { + return findTokenTerminator() == null; + } + + public Terminator findTokenTerminator() { + for (Terminator terminator : terminators) + if (terminator.matches(source, currentIndex)) + return terminator; + return null; + } + + public boolean consumeIfNextToken(final String token) throws InvalidSyntaxException { if (token.equals(getNextToken().token)) return true; @@ -105,13 +129,13 @@ public class Tokenizer { return false; } - public TokenizerMatch peekNextToken(){ + public TokenizerMatch peekNextToken() throws InvalidSyntaxException { TokenizerMatch result = getNextToken(); unreadToken(); return result; } - public boolean peekIsOneOf(String ... possibilities){ + public boolean peekIsOneOf(String ... possibilities) throws InvalidSyntaxException { String nextToken = peekNextToken().token; return Stream.of(possibilities).anyMatch(possibility -> possibility.equals(nextToken)); }