Convenience method for splitting string into groups based on regular expression
[svjatoslav_commons.git] / src / test / java / eu / svjatoslav / commons / string / tokenizer / TokenizerTest.java
index e72b936..19fbd4d 100644 (file)
@@ -2,86 +2,67 @@ package eu.svjatoslav.commons.string.tokenizer;
 
 import org.junit.Test;
 
+import static eu.svjatoslav.commons.string.tokenizer.Terminator.TerminationStrategy.DROP;
 import static eu.svjatoslav.commons.string.tokenizer.Terminator.TerminationStrategy.PRESERVE;
-import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.*;
 
 public class TokenizerTest {
-    @Test
-    public void findTokenTerminator() throws Exception {
-
-        Tokenizer tokenizer = new Tokenizer("this /* comment */ a test")
-                .addTerminator("/*", "*/", PRESERVE);
-
-
-
-    }
-
 
     @Test
-    public void you_can_peek() throws Exception {
-        Tokenizer tokenizer = new Tokenizer("this is a N'2015-03-18 09:48:54.360' test")
-                .addTerminator(" ", Terminator.TerminationStrategy.DROP)
-                .addTerminator("N'", "'", PRESERVE);
+    public void testPeeking() throws Exception {
+        Tokenizer tokenizer = new Tokenizer("this is a N'2015-03-18 09:48:54.360' test");
+        tokenizer.addTerminator(DROP, "\\s");
+        tokenizer.addTerminator(PRESERVE, "N'.*'");
 
-        tokenizer.expectAndConsumeNextToken("this");
+        tokenizer.expectAndConsumeNextStringToken("this");
 
         assertEquals("is", tokenizer.peekNextToken().token);
 
         assertEquals("is", tokenizer.peekNextToken().token);
 
-        assertEquals(true, tokenizer.peekIsOneOf("maybe", "is", "that"));
+        assertTrue(tokenizer.peekIsOneOf("maybe", "is", "that"));
     }
 
     @Test
-    public void complexTerminator() throws Exception {
-        Tokenizer tokenizer = new Tokenizer("   this((\"hello\"  /* comment */   ((  is a N'2015-03-18 09:48:54.360' test")
-                .addTerminator(" ", Terminator.TerminationStrategy.DROP)
-                .addTerminator("(", Terminator.TerminationStrategy.PRESERVE)
-                .addTerminator("\"", "\"" ,Terminator.TerminationStrategy.PRESERVE)
-                .addTerminator("/*", "*/" ,Terminator.TerminationStrategy.DROP)
-                ;
-
-        System.out.println(tokenizer.getNextToken().token);
-        System.out.println(tokenizer.getNextToken().token);
-        System.out.println(tokenizer.getNextToken().token);
-        System.out.println(tokenizer.getNextToken().token);
-        System.out.println(tokenizer.getNextToken().token);
-        System.out.println(tokenizer.getNextToken().token);
-        System.out.println(tokenizer.getNextToken().token);
-        System.out.println(tokenizer.getNextToken().token);
-        System.out.println(tokenizer.getNextToken().token);
-
-
-//        tokenizer.expectAndConsumeNextToken("this");
-//
-//        assertEquals("is", tokenizer.peekNextToken().token);
-//
-//        assertEquals("is", tokenizer.peekNextToken().token);
-//
-//        assertEquals(true, tokenizer.peekIsOneOf("maybe", "is", "that"));
+    public void testTokenization() throws Exception {
+        Tokenizer tokenizer = new Tokenizer("\"hello world\"   /** comment **/   ((  is a N'2015-03-18 09:48:54.360' test");
+        tokenizer.addTerminator(DROP,"\\s");
+        tokenizer.addTerminator(PRESERVE,"\\(");
+        tokenizer.addTerminator(PRESERVE, "\\\".*\\\"");
+        tokenizer.addTerminator(PRESERVE, "N'.*'");
+        tokenizer.addTerminator(DROP,"/\\*.*\\*/");
+
+        assertTokenEquals("\"hello world\"", tokenizer);
+        assertTokenEquals("(", tokenizer);
+        assertTokenEquals("(", tokenizer);
+        assertTokenEquals("is", tokenizer);
+        assertTokenEquals("a", tokenizer);
+        assertTokenEquals("N'2015-03-18 09:48:54.360'", tokenizer);
+        assertTokenEquals("test", tokenizer);
+
+        assertNull(tokenizer.getNextToken());
+        assertFalse(tokenizer.hasMoreContent());
     }
 
 
     @Test
-    public void testComplexTerminator() throws Exception {
-        Tokenizer tokenizer = new Tokenizer("this N'2015-03-18 09:48:54.360'  /* thoe unto u */ test")
-                .addTerminator(" ", Terminator.TerminationStrategy.DROP)
-                .addTerminator("/*", "*/", PRESERVE);
-
-//        tokenizer.expectAndConsumeNextToken("this");
-
-//        assertEquals("2015-03-18 09:48:54.360", tokenizer.getNextToken().token);
-
-        System.out.println("1st: " + tokenizer.getNextToken().token);
-
-        System.out.println("2nd: " + tokenizer.getNextToken().token);
-
-        System.out.println("2nd: " + tokenizer.getNextToken().token);
-
-        System.out.println("2nd: " + tokenizer.getNextToken().token);
+    public void testMultilineTokenization() throws Exception {
+        Tokenizer tokenizer = new Tokenizer(
+                "* heading\r\nnormal text\r\nnormal text 2");
+        tokenizer.addTerminator(PRESERVE,"\\*[ \\t]+.*\\r?\\n");
+        tokenizer.addTerminator(DROP,"\\r?\\n", "normalText");
+
+        assertTokenEquals("* heading\r\n", tokenizer);
+        assertTokenEquals("normal text", tokenizer);
+        assertTokenEquals("normal text 2", tokenizer);
+
+        assertNull(tokenizer.getNextToken());
+        assertFalse(tokenizer.hasMoreContent());
+    }
 
-        System.out.println("2nd: " + tokenizer.getNextToken().token);
 
+    private void assertTokenEquals(String expectedValue, Tokenizer tokenizer){
+        assertEquals(expectedValue, tokenizer.getNextToken().token);
     }
 
 }
\ No newline at end of file