this.termination = termination;
}
+ public boolean matches(String source, int index) {
+ // boundary check
+ if (source.length() < (index + startSequence.length()))
+ return false;
+
+ // match check
+ for (int i = 0; i < startSequence.length(); i++)
+ if (startSequence.charAt(i) != source.charAt(index + i))
+ return false;
+
+ return true;
+ }
+
public enum TerminationStrategy {
PRESERVE,
DROP
import java.util.stream.Stream;
import static eu.svjatoslav.commons.string.tokenizer.Terminator.TerminationStrategy.DROP;
+import static eu.svjatoslav.commons.string.tokenizer.Terminator.TerminationStrategy.PRESERVE;
public class Tokenizer {
public Tokenizer setSource(String source){
this.source = source;
currentIndex = 0;
+ tokenIndexes.clear();
return this;
}
public TokenizerMatch getNextToken() {
tokenIndexes.push(currentIndex);
- final StringBuilder result = new StringBuilder();
-
- while (true) {
- if (currentIndex >= source.length())
- return null;
-
- boolean accumulateCurrentChar = true;
-
- for (final Terminator terminator : terminators)
- if (sequenceMatches(terminator.startSequence))
-
- if (terminator.termination == DROP) {
- currentIndex += terminator.startSequence.length();
-
- if (terminator.endSequence != null)
- skipUntilSequence(terminator.endSequence);
-
- if (result.length() > 0)
- return new TokenizerMatch(result.toString(),
- terminator);
- else {
- accumulateCurrentChar = false;
- break;
- }
- } else if (result.length() > 0)
- return new TokenizerMatch(result.toString(), terminator);
- else {
- currentIndex += terminator.startSequence.length();
- return new TokenizerMatch(terminator.startSequence,
- terminator);
- }
- if (accumulateCurrentChar) {
- result.append(source.charAt(currentIndex));
+ StringBuilder token = new StringBuilder();
+
+ while (true){
+ if (isTokenTermination()){
+ Terminator tokenTerminator = findTokenTerminator();
+
+ if (tokenTerminator.termination == PRESERVE){
+ if (hasAccumulatedToken(token)){
+ // already assembled some token
+ return new TokenizerMatch(token.toString(), "", tokenTerminator);
+ } else {
+ currentIndex++;
+ return new TokenizerMatch(tokenTerminator.startSequence, "", tokenTerminator);
+ }
+ } else if (tokenTerminator.termination == DROP){
+ if (hasAccumulatedToken(token)){
+ currentIndex++;
+ return new TokenizerMatch(token.toString(), "", tokenTerminator);
+ } else {
+ currentIndex++;
+ }
+ }
+ } else {
+ token.append(source.charAt(currentIndex));
currentIndex++;
}
}
}
+ private boolean hasAccumulatedToken(StringBuilder token) {
+ return token.length() > 0;
+ }
+
+ private boolean isTokenTermination() {
+ return findTokenTerminator() != null;
+ }
+
+ public Terminator findTokenTerminator() {
+ for (Terminator terminator : terminators)
+ if (terminator.matches(source, currentIndex))
+ return terminator;
+ return null;
+ }
+
public boolean consumeIfNextToken(final String token) {
if (token.equals(getNextToken().token))
return true;
public class TokenizerMatch {
public final String token;
+ public final String reminder;
public final Terminator terminator;
- public TokenizerMatch(final String token, final Terminator terminator) {
+ public TokenizerMatch(final String token, final String reminder, final Terminator terminator) {
this.token = token;
+ this.reminder = reminder;
this.terminator = terminator;
}
public String toString() {
return "TokenizerMatch{" +
"token='" + token + '\'' +
+ ", reminder='" + reminder + '\'' +
", terminator=" + terminator +
'}';
}
--- /dev/null
+package eu.svjatoslav.commons.string.tokenizer;
+
+import org.junit.Test;
+
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+public class TerminatorTest {
+
+ @Test
+ public void testMatches(){
+ Terminator terminator = new Terminator(
+ "/*", "*/", Terminator.TerminationStrategy.PRESERVE);
+
+ // must find
+ assertTrue(terminator.matches("/* bla bla bla */", 0));
+
+ // must not find
+ assertFalse(terminator.matches("/* bla bla bla */", 1));
+
+ // must not overflow
+ assertFalse(terminator.matches("/", 0));
+ }
+
+}
\ No newline at end of file
import org.junit.Test;
+import static eu.svjatoslav.commons.string.tokenizer.Terminator.TerminationStrategy.PRESERVE;
import static org.junit.Assert.assertEquals;
public class TokenizerTest {
+ @Test
+ public void findTokenTerminator() throws Exception {
+
+ Tokenizer tokenizer = new Tokenizer("this /* comment */ a test")
+ .addTerminator("/*", "*/", PRESERVE);
+
+
+
+ }
@Test
- public void peekNextToken() throws Exception {
- Tokenizer tokenizer = new Tokenizer("this is a test")
- .addTerminator(" ", Terminator.TerminationStrategy.DROP);
+ public void you_can_peek() throws Exception {
+ Tokenizer tokenizer = new Tokenizer("this is a N'2015-03-18 09:48:54.360' test")
+ .addTerminator(" ", Terminator.TerminationStrategy.DROP)
+ .addTerminator("N'", "'", PRESERVE);
tokenizer.expectAndConsumeNextToken("this");
assertEquals(true, tokenizer.peekIsOneOf("maybe", "is", "that"));
}
+ @Test
+ public void complexTerminator() throws Exception {
+ Tokenizer tokenizer = new Tokenizer(" this((\"hello\" /* comment */ (( is a N'2015-03-18 09:48:54.360' test")
+ .addTerminator(" ", Terminator.TerminationStrategy.DROP)
+ .addTerminator("(", Terminator.TerminationStrategy.PRESERVE)
+ .addTerminator("\"", "\"" ,Terminator.TerminationStrategy.PRESERVE)
+ .addTerminator("/*", "*/" ,Terminator.TerminationStrategy.DROP)
+ ;
+
+ System.out.println(tokenizer.getNextToken().token);
+ System.out.println(tokenizer.getNextToken().token);
+ System.out.println(tokenizer.getNextToken().token);
+ System.out.println(tokenizer.getNextToken().token);
+ System.out.println(tokenizer.getNextToken().token);
+ System.out.println(tokenizer.getNextToken().token);
+ System.out.println(tokenizer.getNextToken().token);
+ System.out.println(tokenizer.getNextToken().token);
+ System.out.println(tokenizer.getNextToken().token);
+
+
+// tokenizer.expectAndConsumeNextToken("this");
+//
+// assertEquals("is", tokenizer.peekNextToken().token);
+//
+// assertEquals("is", tokenizer.peekNextToken().token);
+//
+// assertEquals(true, tokenizer.peekIsOneOf("maybe", "is", "that"));
+ }
+
+
+ @Test
+ public void testComplexTerminator() throws Exception {
+ Tokenizer tokenizer = new Tokenizer("this N'2015-03-18 09:48:54.360' /* thoe unto u */ test")
+ .addTerminator(" ", Terminator.TerminationStrategy.DROP)
+ .addTerminator("/*", "*/", PRESERVE);
+
+// tokenizer.expectAndConsumeNextToken("this");
+
+// assertEquals("2015-03-18 09:48:54.360", tokenizer.getNextToken().token);
+
+ System.out.println("1st: " + tokenizer.getNextToken().token);
+
+ System.out.println("2nd: " + tokenizer.getNextToken().token);
+
+ System.out.println("2nd: " + tokenizer.getNextToken().token);
+
+ System.out.println("2nd: " + tokenizer.getNextToken().token);
+
+ System.out.println("2nd: " + tokenizer.getNextToken().token);
+
+ }
+
}
\ No newline at end of file