private String source;
private int currentIndex = 0;
+ int cachedTerminatorIndex = -1;
+ Terminator cachedTerminator;
+
public Tokenizer(final String source) {
this.source = source;
}
this.source = source;
currentIndex = 0;
tokenIndexes.clear();
+
+ cachedTerminatorIndex = -1;
+ cachedTerminator = null;
return this;
}
+ "\" but got \"" + match.token + "\" instead.");
}
+
+
public TokenizerMatch getNextToken() throws InvalidSyntaxException {
tokenIndexes.push(currentIndex);
- StringBuilder token = new StringBuilder();
+ StringBuilder tokenAccumulator = new StringBuilder();
while (true){
+
+ if (currentIndex >= source.length()){ // reached end of input
+ if (hasAccumulatedToken(tokenAccumulator))
+ return new TokenizerMatch(tokenAccumulator.toString(), null, null);
+ else
+ return null;
+ }
+
if (isOngoingToken()) {
- token.append(source.charAt(currentIndex));
+ tokenAccumulator.append(source.charAt(currentIndex));
currentIndex++;
continue;
}
- Terminator tokenTerminator = findTokenTerminator();
-
- if (tokenTerminator.termination == PRESERVE){
- return buildPreservedToken(token, tokenTerminator);
- } else if (tokenTerminator.termination == DROP){
- if (hasAccumulatedToken(token)){
- currentIndex++;
- return new TokenizerMatch(token.toString(), "", tokenTerminator);
- } else {
- currentIndex++;
- }
+ Terminator terminator = getOrFindTokenTerminator();
+
+ if (terminator.termination == PRESERVE)
+ return buildPreservedToken(tokenAccumulator, terminator);
+ else if (terminator.termination == DROP){
+ skipUntilTerminatorEnd(terminator);
+
+ if (hasAccumulatedToken(tokenAccumulator))
+ return new TokenizerMatch(tokenAccumulator.toString(), null, terminator);
}
}
}
+ private void skipUntilTerminatorEnd(Terminator terminator) throws InvalidSyntaxException {
+ if (terminator.hasEndSequence())
+ currentIndex = getEndSequenceIndex(terminator) + terminator.endSequence.length();
+ else
+ currentIndex += terminator.startSequence.length();
+ }
+
private TokenizerMatch buildPreservedToken(StringBuilder token, Terminator terminator) throws InvalidSyntaxException {
if (hasAccumulatedToken(token))
- return new TokenizerMatch(token.toString(), "", terminator);
+ return new TokenizerMatch(token.toString(), null, terminator);
- if (terminator.hasEndSequence()){
- int endSequenceIndex = source.indexOf(terminator.endSequence,
- currentIndex + terminator.startSequence.length());
+ if (terminator.hasEndSequence())
+ return buildComplexPreservedToken(terminator);
+ else
+ return buildSimplePreservedToken(terminator);
+ }
- if (endSequenceIndex < 0)
- throw new InvalidSyntaxException("Expected \"" + terminator.endSequence + "\" but not found.");
+ private TokenizerMatch buildSimplePreservedToken(Terminator terminator) {
+ currentIndex += terminator.startSequence.length();
+ return new TokenizerMatch(terminator.startSequence, null, terminator);
+ }
- String reminder = source.substring(currentIndex + terminator.startSequence.length(), endSequenceIndex);
- currentIndex = endSequenceIndex + terminator.endSequence.length();
+ private TokenizerMatch buildComplexPreservedToken(Terminator terminator) throws InvalidSyntaxException {
+ int endSequenceIndex = getEndSequenceIndex(terminator);
+ String reminder = source.substring(currentIndex + terminator.startSequence.length(), endSequenceIndex);
+ currentIndex = endSequenceIndex + terminator.endSequence.length();
- return new TokenizerMatch(terminator.startSequence, reminder, terminator);
- } else {
- currentIndex += terminator.startSequence.length();
- return new TokenizerMatch(terminator.startSequence, "", terminator);
- }
+ return new TokenizerMatch(terminator.startSequence, reminder, terminator);
+ }
+
+ private int getEndSequenceIndex(Terminator terminator) throws InvalidSyntaxException {
+ int endSequenceIndex = source.indexOf(terminator.endSequence,
+ currentIndex + terminator.startSequence.length());
+
+ if (endSequenceIndex < 0)
+ throw new InvalidSyntaxException("Expected \"" + terminator.endSequence + "\" but not found.");
+
+ return endSequenceIndex;
}
private boolean hasAccumulatedToken(StringBuilder token) {
}
private boolean isOngoingToken() {
- return findTokenTerminator() == null;
+ return getOrFindTokenTerminator() == null;
}
- public Terminator findTokenTerminator() {
+ public boolean hasMoreTokens(){
+ return currentIndex < source.length();
+ }
+
+ /**
+ * Attempts to cache terminator search result.
+ */
+ public Terminator getOrFindTokenTerminator() {
+ if (currentIndex == cachedTerminatorIndex)
+ return cachedTerminator;
+
+ cachedTerminatorIndex = currentIndex;
+ cachedTerminator = findTokenTerminator();
+ return cachedTerminator;
+ }
+
+ private Terminator findTokenTerminator() {
for (Terminator terminator : terminators)
if (terminator.matches(source, currentIndex))
return terminator;
if (peekIsOneOf(possibilities))
throw new InvalidSyntaxException("Not expected \"" + peekNextToken().token + "\" here.");
}
-
-
- public boolean sequenceMatches(final String sequence) {
- if ((currentIndex + sequence.length()) > source.length())
- return false;
-
- for (int i = 0; i < sequence.length(); i++)
- if (sequence.charAt(i) != source.charAt(i + currentIndex))
- return false;
-
- return true;
- }
-
- public void skipUntilDataEnd() {
- tokenIndexes.push(currentIndex);
- currentIndex = source.length();
- }
-
- public void skipUntilSequence(final String sequence) {
- while (currentIndex < source.length()) {
- if (sequenceMatches(sequence)) {
- currentIndex += sequence.length();
- return;
- }
-
- currentIndex++;
- }
- }
-
+
public void unreadToken() {
currentIndex = tokenIndexes.pop();
}
import static eu.svjatoslav.commons.string.tokenizer.Terminator.TerminationStrategy.DROP;
import static eu.svjatoslav.commons.string.tokenizer.Terminator.TerminationStrategy.PRESERVE;
import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNull;
public class TokenizerTest {
- @Test
- public void findTokenTerminator() throws Exception {
-
- Tokenizer tokenizer = new Tokenizer("this /* comment */ a test")
- .addTerminator("/*", "*/", PRESERVE);
-
-
-
- }
-
@Test
- public void you_can_peek() throws Exception {
+ public void testPeeking() throws Exception {
Tokenizer tokenizer = new Tokenizer("this is a N'2015-03-18 09:48:54.360' test")
.addTerminator(" ", DROP)
.addTerminator("N'", "'", PRESERVE);
}
@Test
- public void complexTerminator() throws Exception {
- Tokenizer tokenizer = new Tokenizer("/* hello */ /** comment **/ (( is a N'2015-03-18 09:48:54.360' test")
+ public void testTokenization() throws Exception {
+ Tokenizer tokenizer = new Tokenizer("\"hello\" /** comment **/ (( is a N'2015-03-18 09:48:54.360' test")
.addTerminator(" ", DROP)
.addTerminator("(", PRESERVE)
.addTerminator("\"", "\"" , PRESERVE)
- .addTerminator("/*", "*/" , PRESERVE)
+ .addTerminator("N'", "'" , PRESERVE)
+ .addTerminator("/*", "*/" , DROP)
;
- TokenizerMatch nextToken = tokenizer.getNextToken();
- System.out.println("T: \"" + nextToken.token + "\", R: \"" + nextToken.reminder + "\"");
- System.out.println(tokenizer.getNextToken().token);
- System.out.println(tokenizer.getNextToken().token);
- System.out.println(tokenizer.getNextToken().token);
- System.out.println(tokenizer.getNextToken().token);
- System.out.println(tokenizer.getNextToken().token);
- System.out.println(tokenizer.getNextToken().token);
- System.out.println(tokenizer.getNextToken().token);
- System.out.println(tokenizer.getNextToken().token);
-
+ assertTokenEquals("\"", "hello", tokenizer);
+ assertTokenEquals("(", null, tokenizer);
+ assertTokenEquals("(", null, tokenizer);
+ assertTokenEquals("is", null, tokenizer);
+ assertTokenEquals("a", null, tokenizer);
+ assertTokenEquals("N'", "2015-03-18 09:48:54.360", tokenizer);
+ assertTokenEquals("test", null, tokenizer);
-// tokenizer.expectAndConsumeNextToken("this");
-//
-// assertEquals("is", tokenizer.peekNextToken().token);
-//
-// assertEquals("is", tokenizer.peekNextToken().token);
-//
-// assertEquals(true, tokenizer.peekIsOneOf("maybe", "is", "that"));
+ assertNull(tokenizer.getNextToken());
+ assertFalse(tokenizer.hasMoreTokens());
}
+ private void assertTokenEquals(String token, String reminder, Tokenizer tokenizer) throws InvalidSyntaxException {
+ TokenizerMatch nextToken = tokenizer.getNextToken();
- @Test
- public void testComplexTerminator() throws Exception {
- Tokenizer tokenizer = new Tokenizer("this N'2015-03-18 09:48:54.360' /* thoe unto u */ test")
- .addTerminator(" ", DROP)
- .addTerminator("/*", "*/", PRESERVE);
-
-// tokenizer.expectAndConsumeNextToken("this");
-
-// assertEquals("2015-03-18 09:48:54.360", tokenizer.getNextToken().token);
-
- System.out.println("1st: " + tokenizer.getNextToken().token);
-
- System.out.println("2nd: " + tokenizer.getNextToken().token);
-
- System.out.println("2nd: " + tokenizer.getNextToken().token);
-
- System.out.println("2nd: " + tokenizer.getNextToken().token);
-
- System.out.println("2nd: " + tokenizer.getNextToken().token);
+ assertEquals(token, nextToken.token);
+ if (reminder == null)
+ assertNull(nextToken.reminder);
+ else
+ assertEquals(reminder, nextToken.reminder);
}
+ private void debugNextToken(Tokenizer tokenizer) throws InvalidSyntaxException {
+ TokenizerMatch nextToken = tokenizer.getNextToken();
+ if (nextToken == null)
+ System.out.println("null");
+ else
+ System.out.println("T: \"" + nextToken.token + "\", R: \"" + nextToken.reminder + "\"");
+ }
+
}
\ No newline at end of file