Convenience method for splitting string into groups based on regular expression
[svjatoslav_commons.git] / src / test / java / eu / svjatoslav / commons / string / tokenizer / TokenizerTest.java
1 package eu.svjatoslav.commons.string.tokenizer;
2
3 import org.junit.Test;
4
5 import static eu.svjatoslav.commons.string.tokenizer.Terminator.TerminationStrategy.DROP;
6 import static eu.svjatoslav.commons.string.tokenizer.Terminator.TerminationStrategy.PRESERVE;
7 import static org.junit.Assert.*;
8
9 public class TokenizerTest {
10
11     @Test
12     public void testPeeking() throws Exception {
13         Tokenizer tokenizer = new Tokenizer("this is a N'2015-03-18 09:48:54.360' test");
14         tokenizer.addTerminator(DROP, "\\s");
15         tokenizer.addTerminator(PRESERVE, "N'.*'");
16
17         tokenizer.expectAndConsumeNextStringToken("this");
18
19         assertEquals("is", tokenizer.peekNextToken().token);
20
21         assertEquals("is", tokenizer.peekNextToken().token);
22
23         assertTrue(tokenizer.peekIsOneOf("maybe", "is", "that"));
24     }
25
26     @Test
27     public void testTokenization() throws Exception {
28         Tokenizer tokenizer = new Tokenizer("\"hello world\"   /** comment **/   ((  is a N'2015-03-18 09:48:54.360' test");
29         tokenizer.addTerminator(DROP,"\\s");
30         tokenizer.addTerminator(PRESERVE,"\\(");
31         tokenizer.addTerminator(PRESERVE, "\\\".*\\\"");
32         tokenizer.addTerminator(PRESERVE, "N'.*'");
33         tokenizer.addTerminator(DROP,"/\\*.*\\*/");
34
35         assertTokenEquals("\"hello world\"", tokenizer);
36         assertTokenEquals("(", tokenizer);
37         assertTokenEquals("(", tokenizer);
38         assertTokenEquals("is", tokenizer);
39         assertTokenEquals("a", tokenizer);
40         assertTokenEquals("N'2015-03-18 09:48:54.360'", tokenizer);
41         assertTokenEquals("test", tokenizer);
42
43         assertNull(tokenizer.getNextToken());
44         assertFalse(tokenizer.hasMoreContent());
45     }
46
47
48     @Test
49     public void testMultilineTokenization() throws Exception {
50         Tokenizer tokenizer = new Tokenizer(
51                 "* heading\r\nnormal text\r\nnormal text 2");
52         tokenizer.addTerminator(PRESERVE,"\\*[ \\t]+.*\\r?\\n");
53         tokenizer.addTerminator(DROP,"\\r?\\n", "normalText");
54
55         assertTokenEquals("* heading\r\n", tokenizer);
56         assertTokenEquals("normal text", tokenizer);
57         assertTokenEquals("normal text 2", tokenizer);
58
59         assertNull(tokenizer.getNextToken());
60         assertFalse(tokenizer.hasMoreContent());
61     }
62
63
64     private void assertTokenEquals(String expectedValue, Tokenizer tokenizer){
65         assertEquals(expectedValue, tokenizer.getNextToken().token);
66     }
67
68 }