Parse multiline list headings
authorSvjatoslav Agejenko <svjatoslav@svjatoslav.eu>
Sat, 15 Aug 2020 15:18:45 +0000 (18:18 +0300)
committerSvjatoslav Agejenko <svjatoslav@svjatoslav.eu>
Sat, 15 Aug 2020 15:18:45 +0000 (18:18 +0300)
src/main/java/eu/svjatoslav/sixth/core/document/Document.java
src/main/java/eu/svjatoslav/sixth/core/document/Heading.java
src/main/java/eu/svjatoslav/sixth/core/document/ListElement.java [deleted file]
src/main/java/eu/svjatoslav/sixth/core/document/content/Content.java
src/main/java/eu/svjatoslav/sixth/core/document/content/ListElement.java [new file with mode: 0644]
src/main/java/eu/svjatoslav/sixth/core/document/content/PropertyCollection.java
src/main/java/eu/svjatoslav/sixth/core/document/content/TextBlock.java
src/main/java/eu/svjatoslav/sixth/core/document/text/FormattedText.java
src/main/java/eu/svjatoslav/sixth/core/document/text/FormattedTextElement.java
src/main/java/eu/svjatoslav/sixth/core/document/text/Hyperlink.java
src/main/java/eu/svjatoslav/sixth/core/document/text/PlainText.java

index 262bee3..c4e18cd 100644 (file)
@@ -1,6 +1,5 @@
 package eu.svjatoslav.sixth.core.document;
 
-import eu.svjatoslav.commons.string.String2;
 import eu.svjatoslav.commons.string.tokenizer.InvalidSyntaxException;
 import eu.svjatoslav.commons.string.tokenizer.Tokenizer;
 import eu.svjatoslav.commons.string.tokenizer.TokenizerMatch;
@@ -28,14 +27,10 @@ public class Document {
             return createHeading(name, targetLevel);
         }
 
-        try {
-            Heading missingIntermediate = new Heading(fromOrg("<noname>"), currentHeading.level + 1, currentHeading);
-            currentHeading.addChild(missingIntermediate);
-            currentHeading = missingIntermediate;
-            return createHeading(name, targetLevel);
-        } catch (InvalidSyntaxException e) {
-            throw new IllegalStateException("impossible situation");
-        }
+        Heading missingIntermediate = new Heading(fromOrg("<noname>"), currentHeading.level + 1, currentHeading);
+        currentHeading.addChild(missingIntermediate);
+        currentHeading = missingIntermediate;
+        return createHeading(name, targetLevel);
     }
 
     public Heading getCurrentHeading(){
@@ -43,10 +38,7 @@ public class Document {
     }
 
     private void parseHeading(TokenizerMatch token) throws InvalidSyntaxException {
-        // expected sample heading:
-        // ***** test heading
-
-        String[] headingSections = String2.getGroups(token.token, "(\\*+)[ \\t](.*)\\r?\\n");
+        String[] headingSections = token.getRegExpGroups();
         int level = headingSections[0].length();
         String title = headingSections[1];
         createHeading(fromOrg(title), level);
@@ -57,15 +49,15 @@ public class Document {
 
         // Org heading:
         // "*** Example Heading 1234"
-        tokenizer.addTerminator(PRESERVE, "\\*+[ \\t].*\\r?\\n", TG_HEADING);
+        tokenizer.addTerminator(PRESERVE, "(\\*+)[ \\t](.*)\\r?\\n", TG_HEADING);
 
         // Org list. Examples:
         // "   + my list title"
         // "   - my list title"
-        tokenizer.addTerminator(PRESERVE, "[ \\t]*(\\+|-)([ \\t].*)?\\r?\\n", TG_LIST);
+        tokenizer.addTerminator(PRESERVE, "([ \\t]*)(\\+|-)[ \\t]?(.*)?\\r?\\n", TG_LIST);
 
         // "   * my list title"
-        tokenizer.addTerminator(PRESERVE, "[ \\t]+\\*([ \\t].*)?\\r?\\n", TG_LIST);
+        tokenizer.addTerminator(PRESERVE, "([ \\t]+)(\\*)[ \\t]?(.*)?\\r?\\n", TG_LIST);
 
         // DocumentProperty:
         // "#+OPTIONS: H:20 num:20"
@@ -83,8 +75,7 @@ public class Document {
                 continue;
             }
 
-            tokenizer.unreadToken();
-            currentHeading.parse(tokenizer);
+            currentHeading.parse(tm);
         }
 
     }
index 23cc92f..0ae9ad5 100644 (file)
@@ -1,16 +1,14 @@
 package eu.svjatoslav.sixth.core.document;
 
-import eu.svjatoslav.commons.string.tokenizer.InvalidSyntaxException;
 import eu.svjatoslav.commons.string.tokenizer.Tokenizer;
 import eu.svjatoslav.commons.string.tokenizer.TokenizerMatch;
+import eu.svjatoslav.sixth.core.document.content.ListElement;
 import eu.svjatoslav.sixth.core.document.text.FormattedText;
 
 import java.util.ArrayList;
 import java.util.List;
 
-import static eu.svjatoslav.sixth.core.document.Helper.TG_DOCUMENT_PROPERTY;
 import static eu.svjatoslav.sixth.core.document.Helper.TG_LIST;
-import static eu.svjatoslav.sixth.core.document.text.FormattedText.fromOrg;
 
 public class Heading {
     public final FormattedText name;
@@ -18,7 +16,7 @@ public class Heading {
     public final Heading parent;
     private final List<Heading> children = new ArrayList<>();
 
-    public final ListElement rootListElement = new ListElement(null, 0, null);
+    public final ListElement rootListElement = new ListElement(null, -1, null, "");
     private ListElement currentListElement = rootListElement;
 
     public Heading(FormattedText name, int level, Heading parent){
@@ -40,8 +38,7 @@ public class Heading {
 
         if (level > 0) sb.append(enlistTitleInMD());
 
-       // sb.append(unparsedContent);
-
+        rootListElement.toMD(sb, -2);
 
         children.stream().map(Heading::toMD).forEach(sb::append);
 
@@ -53,59 +50,70 @@ public class Heading {
         for (int i = 0; i < level; i++)
             sb.append("#");
 
-        sb.append(" ").append(name.compileMd()).append("\n");
+        sb.append(" ").append(name.toMD()).append("\n");
         return sb.toString();
     }
 
-    public ListElement createListElement(FormattedText name, int targetLevel){
-        if (currentListElement.level == (targetLevel - 1)){
-            ListElement newListElement = new ListElement(name, targetLevel, currentListElement);
-            currentListElement.addChild(newListElement);
-            currentListElement = newListElement;
-            return newListElement;
-        }
+    public ListElement getCurrentHeading(){
+        return currentListElement;
+    }
 
-        if (currentListElement.level > (targetLevel - 1)){
-            currentListElement = currentListElement.parent;
-            return createListElement(name, targetLevel);
-        }
+    public void parse(TokenizerMatch tm){
 
-        try {
-            ListElement missingIntermediate = new ListElement(
-                    fromOrg("<noname>"), currentListElement.level + 1, currentListElement);
-            currentListElement.addChild(missingIntermediate);
-            currentListElement = missingIntermediate;
-            return createListElement(name, targetLevel);
-        } catch (InvalidSyntaxException e) {
-            throw new IllegalStateException("impossible situation");
+        if (tm.isGroup(TG_LIST)){
+            parseList(tm);
+            return;
         }
+
+        currentListElement.parse(tm);
     }
 
-    public ListElement getCurrentHeading(){
-        return currentListElement;
+    private void parseList(TokenizerMatch tm) {
+        String[] listSections = tm.getRegExpGroups();
+        int indent = listSections[0].length();
+        String type = listSections[1];
+
+        FormattedText title = FormattedText.fromOrg(parseFullListTitle(listSections.length > 2 ? listSections[2] : "", tm.getTokenizer(), indent));
+
+//        System.out.println("  indent: " + indent);
+//        System.out.println("  type: " + type);
+//        System.out.println("  title: " + title);
+
+        ListElement parent = null;
+
+        if (indent > currentListElement.indent){
+            ListElement newElement = new ListElement(title, indent, parent, type);
+            currentListElement.addContent(newElement);
+            currentListElement = newElement;
+        }
+
     }
 
-    public void parse(Tokenizer tokenizer){
-        while (tokenizer.hasMoreContent()) {
-            final TokenizerMatch tm = tokenizer.getNextToken();
 
-            if (tm.isGroup(TG_LIST)){
-                System.out.println("LIST!: " + tm.token);
-                continue;
-            }
+    private String parseFullListTitle(String partialTitle, Tokenizer tokenizer, int listIndent){
+        StringBuilder sb = new StringBuilder();
+        sb.append(partialTitle);
 
-            if (tm.isGroup(TG_DOCUMENT_PROPERTY)){
-//                System.out.println("DOCUMENT PROPERTY!!!: " + tm.token);
-                continue;
-            }
+        while (tokenizer.hasMoreContent()){
+            final TokenizerMatch tm = tokenizer.getNextToken();
 
-            if (tm.isGroup(null)){
-                System.out.println("unhandled \"" + tm.token + "\"");
+            if (isContentContinuation(tm, listIndent, null)){
+                String titleContinuation = tm.token.substring(listIndent).trim();
+                sb.append("\n").append(titleContinuation);
                 continue;
             }
 
             tokenizer.unreadToken();
             break;
         }
+
+        return sb.toString();
     }
+
+    public static boolean isContentContinuation(TokenizerMatch tm, int requiredIndent, String requiredGroup) {
+        if (tm.token.length() <= requiredIndent) return false;
+
+        return tm.isGroup(requiredGroup) && tm.token.substring(0, requiredIndent +1).trim().length() == 0;
+    }
+
 }
diff --git a/src/main/java/eu/svjatoslav/sixth/core/document/ListElement.java b/src/main/java/eu/svjatoslav/sixth/core/document/ListElement.java
deleted file mode 100644 (file)
index 908b190..0000000
+++ /dev/null
@@ -1,49 +0,0 @@
-package eu.svjatoslav.sixth.core.document;
-
-import eu.svjatoslav.commons.string.tokenizer.Tokenizer;
-import eu.svjatoslav.commons.string.tokenizer.TokenizerMatch;
-import eu.svjatoslav.sixth.core.document.text.FormattedText;
-
-import java.util.ArrayList;
-import java.util.List;
-
-import static eu.svjatoslav.sixth.core.document.Helper.TG_DOCUMENT_PROPERTY;
-
-public class ListElement {
-    public final FormattedText name;
-    public final int level;
-    public final ListElement parent;
-    private final List<ListElement> children = new ArrayList<>();
-
-    public ListElement(FormattedText name, int level, ListElement parent){
-        this.level = level;
-        this.name = name;
-        this.parent = parent;
-    }
-
-    public void addChild(ListElement listElement){
-        children.add(listElement);
-    }
-
-    public void parse(Tokenizer tokenizer){
-        while (tokenizer.hasMoreContent()) {
-            final TokenizerMatch tm = tokenizer.getNextToken();
-
-            if (tm.isGroup(TG_DOCUMENT_PROPERTY)){
-                System.out.println("DOCUMENT PROPERT!!!: " + tm.token);
-                continue;
-            }
-
-            if (tm.isGroup(null)){
-                System.out.println("    HC: " + tm.token);
-                continue;
-            }
-
-            tokenizer.unreadToken();
-            break;
-        }
-    }
-
-
-
-}
index d57b01b..1dad36c 100644 (file)
@@ -1,4 +1,5 @@
 package eu.svjatoslav.sixth.core.document.content;
 
 public interface Content {
+    void toMD(StringBuilder sb, int indent);
 }
diff --git a/src/main/java/eu/svjatoslav/sixth/core/document/content/ListElement.java b/src/main/java/eu/svjatoslav/sixth/core/document/content/ListElement.java
new file mode 100644 (file)
index 0000000..c4e4425
--- /dev/null
@@ -0,0 +1,58 @@
+package eu.svjatoslav.sixth.core.document.content;
+
+import eu.svjatoslav.commons.string.String2;
+import eu.svjatoslav.commons.string.tokenizer.TokenizerMatch;
+import eu.svjatoslav.sixth.core.document.text.FormattedText;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import static eu.svjatoslav.sixth.core.document.Helper.TG_DOCUMENT_PROPERTY;
+
+public class ListElement implements Content {
+    public final FormattedText name;
+    public final int indent;
+    public final ListElement parent;
+    private String type;
+    private final List<Content> content = new ArrayList<>();
+
+    public ListElement(FormattedText name, int indent, ListElement parent, String type) {
+        this.indent = indent;
+        this.name = name;
+        this.parent = parent;
+        this.type = type;
+    }
+
+    public void addContent(ListElement content) {
+        this.content.add(content);
+    }
+
+    public void parse(TokenizerMatch tm) {
+
+        if (tm.isGroup(TG_DOCUMENT_PROPERTY)) {
+//            System.out.println("DOCUMENT PROPERT!!!: " + tm.token);
+            return;
+        }
+
+        if (tm.isGroup(null)) {
+//            System.out.println("  Plain text content: " + tm.token);
+            return;
+        }
+
+        System.out.println("ERROR!!!! Unable to handle: " + tm);
+
+    }
+
+
+    public void toMD(StringBuilder sb, int i) {
+        if (indent >= 0){
+            String2 s = new String2();
+            s.addSuffix(" ", indent).addSuffix(type).addSuffix(" ").addSuffix(name.toMD()).addSuffix("\n");
+            sb.append(s.toString());
+        }
+
+        for (Content c : content) {
+            c.toMD(sb, indent + 2);
+        }
+    }
+}
index f3840b4..4034670 100644 (file)
@@ -7,4 +7,8 @@ public class PropertyCollection implements Content {
 
     private Map<String, String> propertyToValue = new HashMap<>();
 
+    @Override
+    public void toMD(StringBuilder sb, int indent) {
+        // TODO
+    }
 }
index e5bd4ff..104fb21 100644 (file)
@@ -4,4 +4,10 @@ import eu.svjatoslav.sixth.core.document.text.FormattedText;
 
 public class TextBlock implements Content {
     private FormattedText text;
+
+    @Override
+    public void toMD(StringBuilder sb, int indent) {
+        // TODO
+        //        sb.append(text.toMD());
+    }
 }
index ae6a421..1b51320 100644 (file)
@@ -1,22 +1,23 @@
 package eu.svjatoslav.sixth.core.document.text;
 
-import eu.svjatoslav.commons.string.tokenizer.InvalidSyntaxException;
 import eu.svjatoslav.commons.string.tokenizer.Tokenizer;
 import eu.svjatoslav.commons.string.tokenizer.TokenizerMatch;
 
 import java.util.ArrayList;
 import java.util.List;
 
+import static eu.svjatoslav.sixth.core.document.Helper.TG_HYPERLINK;
+
 public class FormattedText {
     List<FormattedTextElement> elements = new ArrayList<>();
 
-    public void parseOrgSyntax(String orgText) throws InvalidSyntaxException {
+    public void parseOrgSyntax(String orgText) {
 
         Tokenizer tokenizer = getTokenizer(orgText);
         while (tokenizer.hasMoreContent()) {
             final TokenizerMatch token = tokenizer.getNextToken();
 
-            if (token.terminator == Hyperlink.orgTerminator){
+            if (token.isGroup(TG_HYPERLINK)){
                 elements.add(Hyperlink.fromOrg(token));
                 continue;
             }
@@ -28,17 +29,17 @@ public class FormattedText {
     }
 
 
-    public static FormattedText fromOrg(String orgText) throws InvalidSyntaxException {
+    public static FormattedText fromOrg(String orgText){
         FormattedText formattedText = new FormattedText();
         formattedText.parseOrgSyntax(orgText);
         return formattedText;
     }
 
-    public String compileMd(){
+    public String toMD(){
         StringBuilder sb = new StringBuilder();
 
         for (FormattedTextElement element : elements)
-            sb.append(element.compileMd());
+            sb.append(element.toMD());
 
         return sb.toString();
     }
@@ -46,6 +47,7 @@ public class FormattedText {
     private Tokenizer getTokenizer(String contents) {
         final Tokenizer tokenizer = new Tokenizer(contents);
         tokenizer.addTerminator(Hyperlink.orgTerminator);
+        tokenizer.addTerminator(Hyperlink.orgTerminator2);
         return tokenizer;
     }
 
index 5e6884c..41d7ab5 100644 (file)
@@ -1,6 +1,5 @@
 package eu.svjatoslav.sixth.core.document.text;
 
-import eu.svjatoslav.commons.string.tokenizer.InvalidSyntaxException;
 import eu.svjatoslav.commons.string.tokenizer.Terminator;
 import eu.svjatoslav.commons.string.tokenizer.TokenizerMatch;
 
@@ -10,32 +9,32 @@ import static eu.svjatoslav.sixth.core.document.Helper.TG_HYPERLINK;
 public class Hyperlink implements FormattedTextElement {
 
     public static final Terminator orgTerminator =
-            new Terminator(PRESERVE, "\\[\\[.*\\]\\]", TG_HYPERLINK);
+            new Terminator(PRESERVE, "\\[\\[(.+)\\][ \\t]*\\[(.+)\\]\\]", TG_HYPERLINK);
+
+        public static final Terminator orgTerminator2 =
+            new Terminator(PRESERVE, "\\[\\[(.*)\\]\\]", TG_HYPERLINK);
 
     private String label;
     private String URL;
 
     @Override
-    public String compileMd() {
+    public String toMD() {
         return "<URL: " + URL + ", LABEL: " + label + ">";
     }
 
-    public static Hyperlink fromOrg(TokenizerMatch tokenizerMatch) throws InvalidSyntaxException {
+    public static Hyperlink fromOrg(TokenizerMatch tokenizerMatch) {
         Hyperlink hyperlink = new Hyperlink();
-        hyperlink.parseOrgSyntax(tokenizerMatch);
+        hyperlink.parseOrg(tokenizerMatch);
         return hyperlink;
     }
 
-    private void parseOrgSyntax(TokenizerMatch tokenizerMatch) throws InvalidSyntaxException {
-//        Tokenizer tokenizer = new Tokenizer(tokenizerMatch.reminder);
-//        Terminator linkSeparator = tokenizer.addTerminator("][", PRESERVE);
-//
-//        URL = tokenizer.expectAndConsumeNextTerminatorToken(null).token;
-//
-//        if (tokenizer.hasMoreContent()){ // link label is optional
-//            tokenizer.expectAndConsumeNextTerminatorToken(linkSeparator);
-//            label = tokenizer.expectAndConsumeNextTerminatorToken(null).token;
-//        }
+    private void parseOrg(TokenizerMatch tokenizerMatch) {
+        String[] regExpGroups = tokenizerMatch.getRegExpGroups();
+        URL = regExpGroups[0];
+        if (tokenizerMatch.terminator == orgTerminator){
+            label = regExpGroups[1];
+        }
+
     }
 
 }
index 6f46732..b025248 100644 (file)
@@ -9,7 +9,7 @@ public class PlainText implements FormattedTextElement {
     }
 
     @Override
-    public String compileMd() {
+    public String toMD() {
         return content;
     }
 }