X-Git-Url: http://www2.svjatoslav.eu/gitweb/?a=blobdiff_plain;f=src%2Fmain%2Fjava%2Feu%2Fsvjatoslav%2Fcommons%2Fstring%2Ftokenizer%2FTerminator.java;h=8946b32a2474ae6d4c2e91758d5f6b5c3fe10296;hb=9bb7c6bf73ebbcbc66f4abd0fabf2f698c42d42c;hp=8bf6a8dc99db8d1e9f90447cf35162a5590fdf0c;hpb=965adace2b47642e2ca84f499cc32683c12059c6;p=svjatoslav_commons.git diff --git a/src/main/java/eu/svjatoslav/commons/string/tokenizer/Terminator.java b/src/main/java/eu/svjatoslav/commons/string/tokenizer/Terminator.java index 8bf6a8d..8946b32 100755 --- a/src/main/java/eu/svjatoslav/commons/string/tokenizer/Terminator.java +++ b/src/main/java/eu/svjatoslav/commons/string/tokenizer/Terminator.java @@ -1,60 +1,56 @@ /* - * Svjatoslav Commons - shared library of common functionality. - * Copyright ©2012-2019, Svjatoslav Agejenko, svjatoslav@svjatoslav.eu - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 3 of the GNU Lesser General Public License - * or later as published by the Free Software Foundation. + * Svjatoslav Commons - shared library of common functionality. Author: Svjatoslav Agejenko. + * This project is released under Creative Commons Zero (CC0) license. */ - package eu.svjatoslav.commons.string.tokenizer; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + public class Terminator { - public final String startSequence; - public final String endSequence; + String regexp; public final TerminationStrategy termination; + public final String group; + public boolean active = true; + public final Pattern pattern; - public Terminator(final String startSequence, TerminationStrategy termination) { - this.startSequence = startSequence; - this.endSequence = null; - this.termination = termination; - } - - public Terminator(final String startSequence, final String endSequence, TerminationStrategy termination) { - this.startSequence = startSequence; - this.endSequence = endSequence; + public Terminator(TerminationStrategy termination, String regexp, String group) { this.termination = termination; + this.group = group; + this.regexp = regexp; + this.pattern = Pattern.compile("^"+regexp); } - public boolean matches(String source, int index) { - // boundary check - if (source.length() < (index + startSequence.length())) - return false; - - // match check - for (int i = 0; i < startSequence.length(); i++) - if (startSequence.charAt(i) != source.charAt(index + i)) - return false; - - return true; - } - - public boolean hasEndSequence() { - return endSequence != null; + public Matcher match(String source, int index) { + Matcher matcher = pattern.matcher(source); + matcher.region(index, source.length()); + return matcher; } @Override public String toString() { return "Terminator{" + - "startSequence='" + startSequence + '\'' + - ", endSequence='" + endSequence + '\'' + + "regexp='" + regexp + '\'' + ", termination=" + termination + + ", group='" + group + '\'' + + ", active=" + active + '}'; } public enum TerminationStrategy { + /** + * Preserve token that is identified within Terminator and return it for processing. For example when + * building language parser, it could be used for statements that you want to capture. + */ PRESERVE, + + /** + * While tokens that are marked by Terminator are identified, they are dropped and not returned for consumption. + * For example, when building language parser, you might use such strategy for whitespace and comments. + * That is, those tokens act as separators between actually useful tokens, but you don't want to consume such + * separators or comments in your code. + */ DROP } }