Added support for: Verse, Multiline code block, drawer property. Fixes.
authorSvjatoslav Agejenko <svjatoslav@svjatoslav.eu>
Wed, 2 Sep 2020 20:40:00 +0000 (23:40 +0300)
committerSvjatoslav Agejenko <svjatoslav@svjatoslav.eu>
Wed, 2 Sep 2020 20:40:11 +0000 (23:40 +0300)
Verse and text block support. Support for code block without language.
Multiline code blocks support. Better handling of broken list indents.
Detect and ignore drawer property. Generate proper links for most URLs.
Document property name shall not have whitespace.
Documented missing features.

src/main/java/eu/svjatoslav/sixth/core/document/Document.java
src/main/java/eu/svjatoslav/sixth/core/document/Heading.java
src/main/java/eu/svjatoslav/sixth/core/document/Helper.java
src/main/java/eu/svjatoslav/sixth/core/document/Utils.java [new file with mode: 0644]
src/main/java/eu/svjatoslav/sixth/core/document/content/ListElement.java
src/main/java/eu/svjatoslav/sixth/core/document/content/MultilineCode.java [new file with mode: 0644]
src/main/java/eu/svjatoslav/sixth/core/document/content/TextBlock.java
src/main/java/eu/svjatoslav/sixth/core/document/content/Verse.java [new file with mode: 0644]
src/main/java/eu/svjatoslav/sixth/core/document/text/FormattedText.java
src/main/java/eu/svjatoslav/sixth/core/document/text/Hyperlink.java

index 1f7ef26..e4c45bd 100644 (file)
@@ -58,14 +58,43 @@ public class Document {
         // "   * my list title"
         tokenizer.addTerminator(PRESERVE, "([ \\t]+)(\\*)[ \\t]+(.*)?\\r?\\n", TG_LIST);
 
+        // TODO: add numbered list
+
         // DocumentProperty:
         // "#+OPTIONS: H:20 num:20"
-        tokenizer.addTerminator(PRESERVE, "#\\+.+:.*\\r?\\n", TG_DOCUMENT_PROPERTY);
+        tokenizer.addTerminator(PRESERVE, "#\\+[^\\s]+:.*\\r?\\n", TG_DOCUMENT_PROPERTY);
 
-        // newline
-        tokenizer.addTerminator(PRESERVE,".*\\r?\\n", TG_NORMAL_TEXT);
+        // Drawer property:
+        //  " :ID:       533734b9-0456-4448-9830-a43646345615"
+        tokenizer.addTerminator(PRESERVE, "([ \\t]*):([^\\s]+):(.*)\\r?\\n", TG_DRAWER_PROPERTY);
+
+
+        // multiline code block
+        tokenizer.addTerminator(PRESERVE,
+                "([ \\t]*)#\\+BEGIN_SRC" +  // source begin identifier
+                        "(([ \\t]+)(.*))?(\\r?\\n)" + // source block parameters
+                        "((?:.|\\n|\\r)*?)" + // source content
+                        "(\\r?\\n)([ \\t]*)#\\+END_SRC(.*)\\r?\\n" // source end identifier
+                , TG_MULTILINE_CODE);
+
+        // verse
+        tokenizer.addTerminator(PRESERVE,
+                "([ \\t]*)#\\+BEGIN_VERSE" + // verse begin identifier
+                        "(([ \\t]+)(.*))?(\\r?\\n)" + // verse block parameters
+                        "((?:.|\\n|\\r)*?)" + // verse
+                        "(\\r?\\n)([ \\t]*)#\\+END_VERSE(.*)\\r?\\n" // verse end identifier
+                , TG_VERSE);
 
 
+        // TODO: add support for export blocks:
+        //        #+begin_export latex
+        //  \clearpage
+        //#+end_export
+
+
+        // normal text
+        tokenizer.addTerminator(PRESERVE,".*\\r?\\n", TG_NORMAL_TEXT);
+
         while (tokenizer.hasMoreContent()) {
             final TokenizerMatch tm = tokenizer.getNextToken();
 
index f4a9513..ca72321 100644 (file)
@@ -80,7 +80,7 @@ public class Heading {
             return;
         }
 
-        if (indent == currentListElement.indent){
+        if (indent > currentListElement.parent.indent){
             // list depth is the same
             ListElement newElement = new ListElement(title, indent, currentListElement.parent, type);
             currentListElement.parent.addContent(newElement);
@@ -91,16 +91,6 @@ public class Heading {
         // list dept decreases
         while (true){
             if (currentListElement.parent.indent <= indent){
-                if (currentListElement.parent.indent < 0){
-                    // reached first depth level, cannot go any deeper.
-                    // This special situation arisesbb only when lint indents are not properly aligned.
-                    // That is, document structure is incorrect.
-                    ListElement newElement = new ListElement(title, indent, currentListElement.parent, type);
-                    currentListElement.parent.addContent(newElement);
-                    currentListElement = newElement;
-                    return;
-                }
-
                 ListElement newElement = new ListElement(title, indent, currentListElement.parent.parent, type);
                 currentListElement.parent.parent.addContent(newElement);
                 currentListElement = newElement;
index 6834e60..7ae3274 100644 (file)
@@ -3,10 +3,13 @@ package eu.svjatoslav.sixth.core.document;
 public class Helper {
 
     public static final String TG_NORMAL_TEXT = "normaltext";
+    public static final String TG_MULTILINE_CODE = "multiline code";
+    public static final String TG_VERSE = "verse";
     public static final String TG_HYPERLINK = "hyperlink";
     public static final String TG_HEADING = "heading";
     public static final String TG_LIST = "list";
-    public static final String TG_DOCUMENT_PROPERTY = "document property";
 
+    public static final String TG_DOCUMENT_PROPERTY = "document property";
+    public static final String TG_DRAWER_PROPERTY = "drawer property";
 
 }
diff --git a/src/main/java/eu/svjatoslav/sixth/core/document/Utils.java b/src/main/java/eu/svjatoslav/sixth/core/document/Utils.java
new file mode 100644 (file)
index 0000000..406e7ef
--- /dev/null
@@ -0,0 +1,23 @@
+package eu.svjatoslav.sixth.core.document;
+
+import eu.svjatoslav.commons.string.String2;
+
+public class Utils {
+
+    public static String addIndentExceptFirstLine(String input, int indent) {
+        String[] lines = input.split("\\r?\\n");
+
+        StringBuilder sb = new StringBuilder();
+
+        if (lines.length >0 ) sb.append(lines[0]);
+
+        for (int i = 1; i< lines.length; i++) {
+            sb.append("\n");
+            sb.append(new String2(" ").repeat(indent).toString());
+            sb.append(lines[i]);
+        }
+
+        return sb.toString();
+    }
+
+}
index a82fd53..e1cf33d 100644 (file)
@@ -7,8 +7,7 @@ import eu.svjatoslav.sixth.core.document.text.FormattedText;
 import java.util.ArrayList;
 import java.util.List;
 
-import static eu.svjatoslav.sixth.core.document.Helper.TG_DOCUMENT_PROPERTY;
-import static eu.svjatoslav.sixth.core.document.Helper.TG_NORMAL_TEXT;
+import static eu.svjatoslav.sixth.core.document.Helper.*;
 
 public class ListElement implements Content {
     public final FormattedText name;
@@ -16,6 +15,7 @@ public class ListElement implements Content {
     public final ListElement parent;
     private final String type;
     private final List<Content> content = new ArrayList<>();
+    StringBuilder normalTextAccumulator = new StringBuilder();
 
     public ListElement(FormattedText name, int indent, ListElement parent, String type) {
         this.indent = indent;
@@ -24,19 +24,52 @@ public class ListElement implements Content {
         this.parent = parent;
     }
 
-    public void addContent(ListElement content) {
+    public void addContent(Content content) {
+        applyTextAccumulator();
         this.content.add(content);
     }
 
+    private void applyTextAccumulator(){
+        if (normalTextAccumulator.length() == 0)
+            return;
+
+        content.add(new TextBlock(normalTextAccumulator.toString()));
+
+        normalTextAccumulator.setLength(0);
+    }
+
     public void parse(TokenizerMatch tm) {
 
         if (tm.isGroup(TG_DOCUMENT_PROPERTY)) {
-//            System.out.println("DOCUMENT PROPERT!!!: " + tm.token);
+            // TODO
+            // System.out.println("DOCUMENT PROPERTY!!!: " + tm.token);
+            return;
+        }
+
+        if (tm.isGroup(TG_DRAWER_PROPERTY)) {
+            // TODO
+            // System.out.println("DOCUMENT PROPERTY!!!: " + tm.token);
             return;
         }
 
         if (tm.isGroup(TG_NORMAL_TEXT)) {
-//            System.out.println("  Plain text content: " + tm.token);
+            normalTextAccumulator.append(tm.token);
+            return;
+        }
+
+        if (tm.isGroup(TG_MULTILINE_CODE)){
+           // System.out.println(tm.toString());
+            String[] groups = tm.getRegExpGroups();
+            addContent(new MultilineCode(
+                    groups[3], // language
+                    groups[5]  // code
+            ));
+            return;
+        }
+
+        if (tm.isGroup(TG_VERSE)){
+            String[] groups = tm.getRegExpGroups();
+            addContent(new Verse(groups[5]));
             return;
         }
 
@@ -45,6 +78,8 @@ public class ListElement implements Content {
 
 
     public void toMD(StringBuilder sb, int indent) {
+        applyTextAccumulator();
+
         if (this.indent >= 0) {
             String2 s = new String2();
             s.append(" ", indent).append(type).append(" ").append(name.toMD(indent + 2)).append("\n");
diff --git a/src/main/java/eu/svjatoslav/sixth/core/document/content/MultilineCode.java b/src/main/java/eu/svjatoslav/sixth/core/document/content/MultilineCode.java
new file mode 100644 (file)
index 0000000..db64e3e
--- /dev/null
@@ -0,0 +1,29 @@
+package eu.svjatoslav.sixth.core.document.content;
+
+import eu.svjatoslav.commons.string.String2;
+
+public class MultilineCode implements Content {
+    public final String language;
+    public final String code;
+
+    public MultilineCode(String language, String code) {
+        this.language = language;
+        this.code = code;
+    }
+
+    @Override
+    public void toMD(StringBuilder sb, int indent) {
+        String2 s = new String2();
+        s.append(" ", indent).append("```" + getMDlanguage() + "\n");
+        // TODO: ensure that required indent is present
+        s.append(code + "\n");
+        s.append(" ", indent).append("```\n");
+        sb.append(s.toString());
+    }
+
+    public String getMDlanguage(){
+        // TODO: do not append ORG parameters to language, like: file: ....
+        if (language == null) return "";
+        return language;
+    }
+}
index 104fb21..a3a4611 100644 (file)
@@ -3,11 +3,14 @@ package eu.svjatoslav.sixth.core.document.content;
 import eu.svjatoslav.sixth.core.document.text.FormattedText;
 
 public class TextBlock implements Content {
-    private FormattedText text;
+    private final FormattedText text;
+
+    public TextBlock (String contentInOrgMarkup){
+        text = FormattedText.fromOrg(contentInOrgMarkup);
+    }
 
     @Override
     public void toMD(StringBuilder sb, int indent) {
-        // TODO
-        //        sb.append(text.toMD());
+        sb.append(text.toMD(indent) + "\n");
     }
 }
diff --git a/src/main/java/eu/svjatoslav/sixth/core/document/content/Verse.java b/src/main/java/eu/svjatoslav/sixth/core/document/content/Verse.java
new file mode 100644 (file)
index 0000000..9c62af7
--- /dev/null
@@ -0,0 +1,21 @@
+package eu.svjatoslav.sixth.core.document.content;
+
+import eu.svjatoslav.commons.string.String2;
+
+public class Verse implements Content {
+    public final String verse;
+
+    public Verse(String verse) {
+        this.verse = verse;
+    }
+
+    @Override
+    public void toMD(StringBuilder sb, int indent) {
+        String2 s = new String2();
+        s.append(" ", indent).append("```\n");
+        // TODO: ensure that required indent is present
+        s.append(verse + "\n");
+        s.append(" ", indent).append("```\n");
+        sb.append(s.toString());
+    }
+}
index 44af05c..350abf3 100644 (file)
@@ -1,6 +1,5 @@
 package eu.svjatoslav.sixth.core.document.text;
 
-import eu.svjatoslav.commons.string.String2;
 import eu.svjatoslav.commons.string.tokenizer.Tokenizer;
 import eu.svjatoslav.commons.string.tokenizer.TokenizerMatch;
 
@@ -8,6 +7,7 @@ import java.util.ArrayList;
 import java.util.List;
 
 import static eu.svjatoslav.sixth.core.document.Helper.TG_HYPERLINK;
+import static eu.svjatoslav.sixth.core.document.Utils.addIndentExceptFirstLine;
 
 public class FormattedText {
     List<FormattedTextElement> elements = new ArrayList<>();
@@ -41,23 +41,7 @@ public class FormattedText {
         for (FormattedTextElement element : elements)
             sb.append(element.toMD());
 
-        return ensureIndent(sb.toString(), indent);
-    }
-
-    public static String ensureIndent(String input, int indent) {
-        String[] lines = input.split("\\r?\\n");
-
-        StringBuilder sb = new StringBuilder();
-
-        sb.append(lines[0]);
-
-        for (int i = 1; i< lines.length; i++) {
-            sb.append("\n");
-            sb.append(new String2(" ").repeat(indent).toString());
-            sb.append(lines[i]);
-        }
-
-        return sb.toString();
+        return addIndentExceptFirstLine(sb.toString(), indent);
     }
 
     private Tokenizer getTokenizer(String contents) {
index c35752f..508003c 100644 (file)
@@ -19,7 +19,11 @@ public class Hyperlink implements FormattedTextElement {
 
     @Override
     public String toMD() {
-        return "<URL: " + URL + ", LABEL: " + label + ">";
+
+        if (URL.startsWith("id:"))
+            return label; // TODO
+
+        return  "[" + label + "]("+ URL + ")";
     }
 
     public static Hyperlink fromOrg(TokenizerMatch tokenizerMatch) {