From 18e9d2f537f5bb900172558b6e7af9485392f32e Mon Sep 17 00:00:00 2001 From: Svjatoslav Agejenko Date: Mon, 10 Aug 2020 21:28:29 +0300 Subject: [PATCH] Convenience method for splitting string into groups based on regular expression --- .../eu/svjatoslav/commons/string/String2.java | 17 +++++++++++++++++ .../commons/string/tokenizer/TokenizerTest.java | 17 +++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/src/main/java/eu/svjatoslav/commons/string/String2.java b/src/main/java/eu/svjatoslav/commons/string/String2.java index 6b36b5b..af7ac27 100755 --- a/src/main/java/eu/svjatoslav/commons/string/String2.java +++ b/src/main/java/eu/svjatoslav/commons/string/String2.java @@ -6,6 +6,8 @@ package eu.svjatoslav.commons.string; import java.util.ArrayList; import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; public class String2 { @@ -156,4 +158,19 @@ public class String2 { public String toString() { return getSubString(0, chars.size()); } + + + public static String[] getGroups(String s, String regexp){ + Pattern pattern = Pattern.compile(regexp); + Matcher matcher = pattern.matcher(s); + + matcher.find(); + String[] result = new String[matcher.groupCount()]; + + for (int i = 0; i< result.length; i++){ + result[i] = matcher.group(i+1); + } + + return result; + } } diff --git a/src/test/java/eu/svjatoslav/commons/string/tokenizer/TokenizerTest.java b/src/test/java/eu/svjatoslav/commons/string/tokenizer/TokenizerTest.java index 519b6d1..19fbd4d 100644 --- a/src/test/java/eu/svjatoslav/commons/string/tokenizer/TokenizerTest.java +++ b/src/test/java/eu/svjatoslav/commons/string/tokenizer/TokenizerTest.java @@ -44,6 +44,23 @@ public class TokenizerTest { assertFalse(tokenizer.hasMoreContent()); } + + @Test + public void testMultilineTokenization() throws Exception { + Tokenizer tokenizer = new Tokenizer( + "* heading\r\nnormal text\r\nnormal text 2"); + tokenizer.addTerminator(PRESERVE,"\\*[ \\t]+.*\\r?\\n"); + tokenizer.addTerminator(DROP,"\\r?\\n", "normalText"); + + assertTokenEquals("* heading\r\n", tokenizer); + assertTokenEquals("normal text", tokenizer); + assertTokenEquals("normal text 2", tokenizer); + + assertNull(tokenizer.getNextToken()); + assertFalse(tokenizer.hasMoreContent()); + } + + private void assertTokenEquals(String expectedValue, Tokenizer tokenizer){ assertEquals(expectedValue, tokenizer.getNextToken().token); } -- 2.20.1