From 06527b2a72ee8f1c8434288d522fe2fe8aea5e8b Mon Sep 17 00:00:00 2001 From: Svjatoslav Agejenko Date: Sun, 23 Aug 2020 06:42:42 +0300 Subject: [PATCH] Proper handling fo multiline links. Better list handling. --- .../sixth/core/document/Document.java | 7 ++- .../sixth/core/document/Heading.java | 46 ++++++++++++------- .../sixth/core/document/Helper.java | 2 +- .../core/document/content/ListElement.java | 16 +++---- .../core/document/text/FormattedText.java | 20 +++++++- .../sixth/core/document/text/Hyperlink.java | 4 +- 6 files changed, 61 insertions(+), 34 deletions(-) diff --git a/src/main/java/eu/svjatoslav/sixth/core/document/Document.java b/src/main/java/eu/svjatoslav/sixth/core/document/Document.java index c4e18cd..1f7ef26 100644 --- a/src/main/java/eu/svjatoslav/sixth/core/document/Document.java +++ b/src/main/java/eu/svjatoslav/sixth/core/document/Document.java @@ -5,7 +5,6 @@ import eu.svjatoslav.commons.string.tokenizer.Tokenizer; import eu.svjatoslav.commons.string.tokenizer.TokenizerMatch; import eu.svjatoslav.sixth.core.document.text.FormattedText; -import static eu.svjatoslav.commons.string.tokenizer.Terminator.TerminationStrategy.DROP; import static eu.svjatoslav.commons.string.tokenizer.Terminator.TerminationStrategy.PRESERVE; import static eu.svjatoslav.sixth.core.document.Helper.*; import static eu.svjatoslav.sixth.core.document.text.FormattedText.fromOrg; @@ -54,17 +53,17 @@ public class Document { // Org list. Examples: // " + my list title" // " - my list title" - tokenizer.addTerminator(PRESERVE, "([ \\t]*)(\\+|-)[ \\t]?(.*)?\\r?\\n", TG_LIST); + tokenizer.addTerminator(PRESERVE, "([ \\t]*)(\\+|-)[ \\t]+(.*)?\\r?\\n", TG_LIST); // " * my list title" - tokenizer.addTerminator(PRESERVE, "([ \\t]+)(\\*)[ \\t]?(.*)?\\r?\\n", TG_LIST); + tokenizer.addTerminator(PRESERVE, "([ \\t]+)(\\*)[ \\t]+(.*)?\\r?\\n", TG_LIST); // DocumentProperty: // "#+OPTIONS: H:20 num:20" tokenizer.addTerminator(PRESERVE, "#\\+.+:.*\\r?\\n", TG_DOCUMENT_PROPERTY); // newline - tokenizer.addTerminator(DROP,"\\r?\\n", TG_NEWLINE); + tokenizer.addTerminator(PRESERVE,".*\\r?\\n", TG_NORMAL_TEXT); while (tokenizer.hasMoreContent()) { diff --git a/src/main/java/eu/svjatoslav/sixth/core/document/Heading.java b/src/main/java/eu/svjatoslav/sixth/core/document/Heading.java index 0ae9ad5..db9f9df 100644 --- a/src/main/java/eu/svjatoslav/sixth/core/document/Heading.java +++ b/src/main/java/eu/svjatoslav/sixth/core/document/Heading.java @@ -1,5 +1,6 @@ package eu.svjatoslav.sixth.core.document; +import eu.svjatoslav.commons.string.String2; import eu.svjatoslav.commons.string.tokenizer.Tokenizer; import eu.svjatoslav.commons.string.tokenizer.TokenizerMatch; import eu.svjatoslav.sixth.core.document.content.ListElement; @@ -16,7 +17,7 @@ public class Heading { public final Heading parent; private final List children = new ArrayList<>(); - public final ListElement rootListElement = new ListElement(null, -1, null, ""); + public final ListElement rootListElement = new ListElement(null, -2, null, ""); private ListElement currentListElement = rootListElement; public Heading(FormattedText name, int level, Heading parent){ @@ -46,12 +47,9 @@ public class Heading { } private String enlistTitleInMD() { - StringBuilder sb = new StringBuilder(); - for (int i = 0; i < level; i++) - sb.append("#"); - - sb.append(" ").append(name.toMD()).append("\n"); - return sb.toString(); + String2 s = new String2(); + s.append("#", level).append(" ").append(name.toMD(0)).append("\n"); + return s.toString(); } public ListElement getCurrentHeading(){ @@ -72,23 +70,37 @@ public class Heading { String[] listSections = tm.getRegExpGroups(); int indent = listSections[0].length(); String type = listSections[1]; - - FormattedText title = FormattedText.fromOrg(parseFullListTitle(listSections.length > 2 ? listSections[2] : "", tm.getTokenizer(), indent)); - -// System.out.println(" indent: " + indent); -// System.out.println(" type: " + type); -// System.out.println(" title: " + title); - - ListElement parent = null; + FormattedText title = FormattedText.fromOrg(parseFullListTitle(getPartialTitle(listSections), tm.getTokenizer(), indent)); if (indent > currentListElement.indent){ - ListElement newElement = new ListElement(title, indent, parent, type); + ListElement newElement = new ListElement(title, indent, currentListElement, type); currentListElement.addContent(newElement); currentListElement = newElement; + return; + } + + if (indent == currentListElement.indent){ + ListElement newElement = new ListElement(title, indent, currentListElement.parent, type); + currentListElement.parent.addContent(newElement); + currentListElement = newElement; + return; + } + + while (true){ + if (currentListElement.parent.indent <= indent){ + ListElement newElement = new ListElement(title, indent, currentListElement.parent, type); + currentListElement.parent.addContent(newElement); + currentListElement = newElement; + return; + } + currentListElement = currentListElement.parent; } } + private String getPartialTitle(String[] listSections) { + return listSections.length > 2 ? listSections[2] : ""; + } private String parseFullListTitle(String partialTitle, Tokenizer tokenizer, int listIndent){ StringBuilder sb = new StringBuilder(); @@ -97,7 +109,7 @@ public class Heading { while (tokenizer.hasMoreContent()){ final TokenizerMatch tm = tokenizer.getNextToken(); - if (isContentContinuation(tm, listIndent, null)){ + if (isContentContinuation(tm, listIndent, Helper.TG_NORMAL_TEXT)){ String titleContinuation = tm.token.substring(listIndent).trim(); sb.append("\n").append(titleContinuation); continue; diff --git a/src/main/java/eu/svjatoslav/sixth/core/document/Helper.java b/src/main/java/eu/svjatoslav/sixth/core/document/Helper.java index 2d9c228..6834e60 100644 --- a/src/main/java/eu/svjatoslav/sixth/core/document/Helper.java +++ b/src/main/java/eu/svjatoslav/sixth/core/document/Helper.java @@ -2,7 +2,7 @@ package eu.svjatoslav.sixth.core.document; public class Helper { - public static final String TG_NEWLINE = "newline"; + public static final String TG_NORMAL_TEXT = "normaltext"; public static final String TG_HYPERLINK = "hyperlink"; public static final String TG_HEADING = "heading"; public static final String TG_LIST = "list"; diff --git a/src/main/java/eu/svjatoslav/sixth/core/document/content/ListElement.java b/src/main/java/eu/svjatoslav/sixth/core/document/content/ListElement.java index c4e4425..a82fd53 100644 --- a/src/main/java/eu/svjatoslav/sixth/core/document/content/ListElement.java +++ b/src/main/java/eu/svjatoslav/sixth/core/document/content/ListElement.java @@ -8,19 +8,20 @@ import java.util.ArrayList; import java.util.List; import static eu.svjatoslav.sixth.core.document.Helper.TG_DOCUMENT_PROPERTY; +import static eu.svjatoslav.sixth.core.document.Helper.TG_NORMAL_TEXT; public class ListElement implements Content { public final FormattedText name; public final int indent; public final ListElement parent; - private String type; + private final String type; private final List content = new ArrayList<>(); public ListElement(FormattedText name, int indent, ListElement parent, String type) { this.indent = indent; this.name = name; - this.parent = parent; this.type = type; + this.parent = parent; } public void addContent(ListElement content) { @@ -34,25 +35,24 @@ public class ListElement implements Content { return; } - if (tm.isGroup(null)) { + if (tm.isGroup(TG_NORMAL_TEXT)) { // System.out.println(" Plain text content: " + tm.token); return; } System.out.println("ERROR!!!! Unable to handle: " + tm); - } - public void toMD(StringBuilder sb, int i) { - if (indent >= 0){ + public void toMD(StringBuilder sb, int indent) { + if (this.indent >= 0) { String2 s = new String2(); - s.addSuffix(" ", indent).addSuffix(type).addSuffix(" ").addSuffix(name.toMD()).addSuffix("\n"); + s.append(" ", indent).append(type).append(" ").append(name.toMD(indent + 2)).append("\n"); sb.append(s.toString()); } for (Content c : content) { - c.toMD(sb, indent + 2); + c.toMD(sb, this.indent + 2); } } } diff --git a/src/main/java/eu/svjatoslav/sixth/core/document/text/FormattedText.java b/src/main/java/eu/svjatoslav/sixth/core/document/text/FormattedText.java index 1b51320..ad7d8ca 100644 --- a/src/main/java/eu/svjatoslav/sixth/core/document/text/FormattedText.java +++ b/src/main/java/eu/svjatoslav/sixth/core/document/text/FormattedText.java @@ -1,5 +1,6 @@ package eu.svjatoslav.sixth.core.document.text; +import eu.svjatoslav.commons.string.String2; import eu.svjatoslav.commons.string.tokenizer.Tokenizer; import eu.svjatoslav.commons.string.tokenizer.TokenizerMatch; @@ -28,19 +29,34 @@ public class FormattedText { } - public static FormattedText fromOrg(String orgText){ FormattedText formattedText = new FormattedText(); formattedText.parseOrgSyntax(orgText); return formattedText; } - public String toMD(){ + public String toMD(int indent){ StringBuilder sb = new StringBuilder(); for (FormattedTextElement element : elements) sb.append(element.toMD()); + return ensureIndent(sb.toString(), indent); + } + + public static String ensureIndent(String input, int indent) { + String[] lines = input.split("\\r?\\n"); + + StringBuilder sb = new StringBuilder(); + + sb.append(lines[0]); + + for (int i = 1; i< lines.length; i++) { + sb.append("\n"); + sb.append(new String2(" ").repeat(indent).toString()); + sb.append(lines[i]); + } + return sb.toString(); } diff --git a/src/main/java/eu/svjatoslav/sixth/core/document/text/Hyperlink.java b/src/main/java/eu/svjatoslav/sixth/core/document/text/Hyperlink.java index 41d7ab5..c35752f 100644 --- a/src/main/java/eu/svjatoslav/sixth/core/document/text/Hyperlink.java +++ b/src/main/java/eu/svjatoslav/sixth/core/document/text/Hyperlink.java @@ -9,10 +9,10 @@ import static eu.svjatoslav.sixth.core.document.Helper.TG_HYPERLINK; public class Hyperlink implements FormattedTextElement { public static final Terminator orgTerminator = - new Terminator(PRESERVE, "\\[\\[(.+)\\][ \\t]*\\[(.+)\\]\\]", TG_HYPERLINK); + new Terminator(PRESERVE, "\\[\\[([\\s\\S]+)\\][ \\t\\r\\n]*\\[([\\s\\S]+)\\]\\]", TG_HYPERLINK); public static final Terminator orgTerminator2 = - new Terminator(PRESERVE, "\\[\\[(.*)\\]\\]", TG_HYPERLINK); + new Terminator(PRESERVE, "\\[\\[([\\s\\S]+)\\]\\]", TG_HYPERLINK); private String label; private String URL; -- 2.20.1