Convenience method for splitting string into groups based on regular expression
authorSvjatoslav Agejenko <svjatoslav@svjatoslav.eu>
Mon, 10 Aug 2020 18:28:29 +0000 (21:28 +0300)
committerSvjatoslav Agejenko <svjatoslav@svjatoslav.eu>
Mon, 10 Aug 2020 18:28:29 +0000 (21:28 +0300)
src/main/java/eu/svjatoslav/commons/string/String2.java
src/test/java/eu/svjatoslav/commons/string/tokenizer/TokenizerTest.java

index 6b36b5b..af7ac27 100755 (executable)
@@ -6,6 +6,8 @@ package eu.svjatoslav.commons.string;
 
 import java.util.ArrayList;
 import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 
 public class String2 {
 
@@ -156,4 +158,19 @@ public class String2 {
     public String toString() {
         return getSubString(0, chars.size());
     }
+
+
+    public static String[] getGroups(String s, String regexp){
+        Pattern pattern = Pattern.compile(regexp);
+        Matcher matcher = pattern.matcher(s);
+
+        matcher.find();
+        String[] result = new String[matcher.groupCount()];
+
+        for (int i = 0; i< result.length; i++){
+            result[i] = matcher.group(i+1);
+        }
+
+        return result;
+    }
 }
index 519b6d1..19fbd4d 100644 (file)
@@ -44,6 +44,23 @@ public class TokenizerTest {
         assertFalse(tokenizer.hasMoreContent());
     }
 
+
+    @Test
+    public void testMultilineTokenization() throws Exception {
+        Tokenizer tokenizer = new Tokenizer(
+                "* heading\r\nnormal text\r\nnormal text 2");
+        tokenizer.addTerminator(PRESERVE,"\\*[ \\t]+.*\\r?\\n");
+        tokenizer.addTerminator(DROP,"\\r?\\n", "normalText");
+
+        assertTokenEquals("* heading\r\n", tokenizer);
+        assertTokenEquals("normal text", tokenizer);
+        assertTokenEquals("normal text 2", tokenizer);
+
+        assertNull(tokenizer.getNextToken());
+        assertFalse(tokenizer.hasMoreContent());
+    }
+
+
     private void assertTokenEquals(String expectedValue, Tokenizer tokenizer){
         assertEquals(expectedValue, tokenizer.getNextToken().token);
     }