/*
- * Svjatoslav Commons - shared library of common functionality.
- * Copyright ©2012-2017, Svjatoslav Agejenko, svjatoslav@svjatoslav.eu
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 3 of the GNU Lesser General Public License
- * or later as published by the Free Software Foundation.
+ * Svjatoslav Commons - shared library of common functionality. Author: Svjatoslav Agejenko.
+ * This project is released under Creative Commons Zero (CC0) license.
*/
-
package eu.svjatoslav.commons.string.tokenizer;
import java.util.ArrayList;
import static eu.svjatoslav.commons.string.tokenizer.Terminator.TerminationStrategy.DROP;
import static eu.svjatoslav.commons.string.tokenizer.Terminator.TerminationStrategy.PRESERVE;
+import static java.lang.System.out;
public class Tokenizer {
+ /**
+ * Stack of token indexes. This allows to walk back in history and un-consume the token.
+ */
private final Stack<Integer> tokenIndexes = new Stack<>();
+
+ /**
+ * Terminators that will be searched for by given tokenizer within given source string.
+ */
private final List<Terminator> terminators = new ArrayList<>();
- private String source;
+
+ private String source; // string to be tokenized
+
private int currentIndex = 0;
private int cachedTerminatorIndex = -1;
return this;
}
+ public Tokenizer addTerminator(Terminator terminator) {
+ terminators.add(terminator);
+ return this;
+ }
+
public Tokenizer addTerminator(final String startSequence,
final String endSequence, final Terminator.TerminationStrategy terminationStrategy) {
terminators.add(new Terminator(startSequence, endSequence, terminationStrategy));
+ "\" but got \"" + match.token + "\" instead.");
}
-
+ /**
+ * @return next @TokenizerMatch or <code>null</code> if end of input is reached.
+ * @throws InvalidSyntaxException
+ */
public TokenizerMatch getNextToken() throws InvalidSyntaxException {
tokenIndexes.push(currentIndex);
currentIndex += terminator.startSequence.length();
}
- private TokenizerMatch buildPreservedToken(StringBuilder token, Terminator terminator) throws InvalidSyntaxException {
+ /**
+ * @throws InvalidSyntaxException if end sequence is not found as is expected by given token.
+ */
+ private TokenizerMatch buildPreservedToken(StringBuilder token, Terminator terminator)
+ throws InvalidSyntaxException {
if (hasAccumulatedToken(token))
return new TokenizerMatch(token.toString(), null, terminator);
if (terminator.hasEndSequence())
- return buildComplexPreservedToken(terminator);
+ return buildTokenWithExpectedENdSequence(terminator);
else
- return buildSimplePreservedToken(terminator);
+ return buildTokenWithoutEndSequence(terminator);
}
- private TokenizerMatch buildSimplePreservedToken(Terminator terminator) {
+ private TokenizerMatch buildTokenWithoutEndSequence(Terminator terminator) {
currentIndex += terminator.startSequence.length();
return new TokenizerMatch(terminator.startSequence, null, terminator);
}
- private TokenizerMatch buildComplexPreservedToken(Terminator terminator) throws InvalidSyntaxException {
+ private TokenizerMatch buildTokenWithExpectedENdSequence(Terminator terminator) throws InvalidSyntaxException {
int endSequenceIndex = getEndSequenceIndex(terminator);
String reminder = source.substring(currentIndex + terminator.startSequence.length(), endSequenceIndex);
currentIndex = endSequenceIndex + terminator.endSequence.length();
return new TokenizerMatch(terminator.startSequence, reminder, terminator);
}
+ /**
+ * @throws InvalidSyntaxException if end of input is reached without finding expected end sequence.
+ */
private int getEndSequenceIndex(Terminator terminator) throws InvalidSyntaxException {
int endSequenceIndex = source.indexOf(terminator.endSequence,
currentIndex + terminator.startSequence.length());
return getOrFindTokenTerminator() == null;
}
- public boolean hasMoreTokens() {
+ public boolean hasMoreContent() {
return currentIndex < source.length();
}
currentIndex = tokenIndexes.pop();
}
+ /**
+ * For debugging
+ */
+ public void enlistRemainingTokens(){
+ int redTokenCount = 0;
+
+ try {
+ while (hasMoreContent()) {
+ out.println(getNextToken().toString());
+ redTokenCount++;
+ }
+ } catch (InvalidSyntaxException e){
+ out.println("There is syntax exception");
+ }
+
+ // restore pointer to original location
+ for (int i = 0; i< redTokenCount; i++ ) unreadToken();
+ }
+
+
public void skipUntilDataEnd() {
tokenIndexes.push(currentIndex);
currentIndex = source.length();