From e8052e0b1948260c3701bfe80dfaa8fdf5c38053 Mon Sep 17 00:00:00 2001 From: Svjatoslav Agejenko Date: Sat, 15 Aug 2020 18:18:45 +0300 Subject: [PATCH] Parse multiline list headings --- .../sixth/core/document/Document.java | 27 ++---- .../sixth/core/document/Heading.java | 90 ++++++++++--------- .../sixth/core/document/ListElement.java | 49 ---------- .../sixth/core/document/content/Content.java | 1 + .../core/document/content/ListElement.java | 58 ++++++++++++ .../document/content/PropertyCollection.java | 4 + .../core/document/content/TextBlock.java | 6 ++ .../core/document/text/FormattedText.java | 14 +-- .../document/text/FormattedTextElement.java | 2 +- .../sixth/core/document/text/Hyperlink.java | 29 +++--- .../sixth/core/document/text/PlainText.java | 2 +- 11 files changed, 151 insertions(+), 131 deletions(-) delete mode 100644 src/main/java/eu/svjatoslav/sixth/core/document/ListElement.java create mode 100644 src/main/java/eu/svjatoslav/sixth/core/document/content/ListElement.java diff --git a/src/main/java/eu/svjatoslav/sixth/core/document/Document.java b/src/main/java/eu/svjatoslav/sixth/core/document/Document.java index 262bee3..c4e18cd 100644 --- a/src/main/java/eu/svjatoslav/sixth/core/document/Document.java +++ b/src/main/java/eu/svjatoslav/sixth/core/document/Document.java @@ -1,6 +1,5 @@ package eu.svjatoslav.sixth.core.document; -import eu.svjatoslav.commons.string.String2; import eu.svjatoslav.commons.string.tokenizer.InvalidSyntaxException; import eu.svjatoslav.commons.string.tokenizer.Tokenizer; import eu.svjatoslav.commons.string.tokenizer.TokenizerMatch; @@ -28,14 +27,10 @@ public class Document { return createHeading(name, targetLevel); } - try { - Heading missingIntermediate = new Heading(fromOrg(""), currentHeading.level + 1, currentHeading); - currentHeading.addChild(missingIntermediate); - currentHeading = missingIntermediate; - return createHeading(name, targetLevel); - } catch (InvalidSyntaxException e) { - throw new IllegalStateException("impossible situation"); - } + Heading missingIntermediate = new Heading(fromOrg(""), currentHeading.level + 1, currentHeading); + currentHeading.addChild(missingIntermediate); + currentHeading = missingIntermediate; + return createHeading(name, targetLevel); } public Heading getCurrentHeading(){ @@ -43,10 +38,7 @@ public class Document { } private void parseHeading(TokenizerMatch token) throws InvalidSyntaxException { - // expected sample heading: - // ***** test heading - - String[] headingSections = String2.getGroups(token.token, "(\\*+)[ \\t](.*)\\r?\\n"); + String[] headingSections = token.getRegExpGroups(); int level = headingSections[0].length(); String title = headingSections[1]; createHeading(fromOrg(title), level); @@ -57,15 +49,15 @@ public class Document { // Org heading: // "*** Example Heading 1234" - tokenizer.addTerminator(PRESERVE, "\\*+[ \\t].*\\r?\\n", TG_HEADING); + tokenizer.addTerminator(PRESERVE, "(\\*+)[ \\t](.*)\\r?\\n", TG_HEADING); // Org list. Examples: // " + my list title" // " - my list title" - tokenizer.addTerminator(PRESERVE, "[ \\t]*(\\+|-)([ \\t].*)?\\r?\\n", TG_LIST); + tokenizer.addTerminator(PRESERVE, "([ \\t]*)(\\+|-)[ \\t]?(.*)?\\r?\\n", TG_LIST); // " * my list title" - tokenizer.addTerminator(PRESERVE, "[ \\t]+\\*([ \\t].*)?\\r?\\n", TG_LIST); + tokenizer.addTerminator(PRESERVE, "([ \\t]+)(\\*)[ \\t]?(.*)?\\r?\\n", TG_LIST); // DocumentProperty: // "#+OPTIONS: H:20 num:20" @@ -83,8 +75,7 @@ public class Document { continue; } - tokenizer.unreadToken(); - currentHeading.parse(tokenizer); + currentHeading.parse(tm); } } diff --git a/src/main/java/eu/svjatoslav/sixth/core/document/Heading.java b/src/main/java/eu/svjatoslav/sixth/core/document/Heading.java index 23cc92f..0ae9ad5 100644 --- a/src/main/java/eu/svjatoslav/sixth/core/document/Heading.java +++ b/src/main/java/eu/svjatoslav/sixth/core/document/Heading.java @@ -1,16 +1,14 @@ package eu.svjatoslav.sixth.core.document; -import eu.svjatoslav.commons.string.tokenizer.InvalidSyntaxException; import eu.svjatoslav.commons.string.tokenizer.Tokenizer; import eu.svjatoslav.commons.string.tokenizer.TokenizerMatch; +import eu.svjatoslav.sixth.core.document.content.ListElement; import eu.svjatoslav.sixth.core.document.text.FormattedText; import java.util.ArrayList; import java.util.List; -import static eu.svjatoslav.sixth.core.document.Helper.TG_DOCUMENT_PROPERTY; import static eu.svjatoslav.sixth.core.document.Helper.TG_LIST; -import static eu.svjatoslav.sixth.core.document.text.FormattedText.fromOrg; public class Heading { public final FormattedText name; @@ -18,7 +16,7 @@ public class Heading { public final Heading parent; private final List children = new ArrayList<>(); - public final ListElement rootListElement = new ListElement(null, 0, null); + public final ListElement rootListElement = new ListElement(null, -1, null, ""); private ListElement currentListElement = rootListElement; public Heading(FormattedText name, int level, Heading parent){ @@ -40,8 +38,7 @@ public class Heading { if (level > 0) sb.append(enlistTitleInMD()); - // sb.append(unparsedContent); - + rootListElement.toMD(sb, -2); children.stream().map(Heading::toMD).forEach(sb::append); @@ -53,59 +50,70 @@ public class Heading { for (int i = 0; i < level; i++) sb.append("#"); - sb.append(" ").append(name.compileMd()).append("\n"); + sb.append(" ").append(name.toMD()).append("\n"); return sb.toString(); } - public ListElement createListElement(FormattedText name, int targetLevel){ - if (currentListElement.level == (targetLevel - 1)){ - ListElement newListElement = new ListElement(name, targetLevel, currentListElement); - currentListElement.addChild(newListElement); - currentListElement = newListElement; - return newListElement; - } + public ListElement getCurrentHeading(){ + return currentListElement; + } - if (currentListElement.level > (targetLevel - 1)){ - currentListElement = currentListElement.parent; - return createListElement(name, targetLevel); - } + public void parse(TokenizerMatch tm){ - try { - ListElement missingIntermediate = new ListElement( - fromOrg(""), currentListElement.level + 1, currentListElement); - currentListElement.addChild(missingIntermediate); - currentListElement = missingIntermediate; - return createListElement(name, targetLevel); - } catch (InvalidSyntaxException e) { - throw new IllegalStateException("impossible situation"); + if (tm.isGroup(TG_LIST)){ + parseList(tm); + return; } + + currentListElement.parse(tm); } - public ListElement getCurrentHeading(){ - return currentListElement; + private void parseList(TokenizerMatch tm) { + String[] listSections = tm.getRegExpGroups(); + int indent = listSections[0].length(); + String type = listSections[1]; + + FormattedText title = FormattedText.fromOrg(parseFullListTitle(listSections.length > 2 ? listSections[2] : "", tm.getTokenizer(), indent)); + +// System.out.println(" indent: " + indent); +// System.out.println(" type: " + type); +// System.out.println(" title: " + title); + + ListElement parent = null; + + if (indent > currentListElement.indent){ + ListElement newElement = new ListElement(title, indent, parent, type); + currentListElement.addContent(newElement); + currentListElement = newElement; + } + } - public void parse(Tokenizer tokenizer){ - while (tokenizer.hasMoreContent()) { - final TokenizerMatch tm = tokenizer.getNextToken(); - if (tm.isGroup(TG_LIST)){ - System.out.println("LIST!: " + tm.token); - continue; - } + private String parseFullListTitle(String partialTitle, Tokenizer tokenizer, int listIndent){ + StringBuilder sb = new StringBuilder(); + sb.append(partialTitle); - if (tm.isGroup(TG_DOCUMENT_PROPERTY)){ -// System.out.println("DOCUMENT PROPERTY!!!: " + tm.token); - continue; - } + while (tokenizer.hasMoreContent()){ + final TokenizerMatch tm = tokenizer.getNextToken(); - if (tm.isGroup(null)){ - System.out.println("unhandled \"" + tm.token + "\""); + if (isContentContinuation(tm, listIndent, null)){ + String titleContinuation = tm.token.substring(listIndent).trim(); + sb.append("\n").append(titleContinuation); continue; } tokenizer.unreadToken(); break; } + + return sb.toString(); } + + public static boolean isContentContinuation(TokenizerMatch tm, int requiredIndent, String requiredGroup) { + if (tm.token.length() <= requiredIndent) return false; + + return tm.isGroup(requiredGroup) && tm.token.substring(0, requiredIndent +1).trim().length() == 0; + } + } diff --git a/src/main/java/eu/svjatoslav/sixth/core/document/ListElement.java b/src/main/java/eu/svjatoslav/sixth/core/document/ListElement.java deleted file mode 100644 index 908b190..0000000 --- a/src/main/java/eu/svjatoslav/sixth/core/document/ListElement.java +++ /dev/null @@ -1,49 +0,0 @@ -package eu.svjatoslav.sixth.core.document; - -import eu.svjatoslav.commons.string.tokenizer.Tokenizer; -import eu.svjatoslav.commons.string.tokenizer.TokenizerMatch; -import eu.svjatoslav.sixth.core.document.text.FormattedText; - -import java.util.ArrayList; -import java.util.List; - -import static eu.svjatoslav.sixth.core.document.Helper.TG_DOCUMENT_PROPERTY; - -public class ListElement { - public final FormattedText name; - public final int level; - public final ListElement parent; - private final List children = new ArrayList<>(); - - public ListElement(FormattedText name, int level, ListElement parent){ - this.level = level; - this.name = name; - this.parent = parent; - } - - public void addChild(ListElement listElement){ - children.add(listElement); - } - - public void parse(Tokenizer tokenizer){ - while (tokenizer.hasMoreContent()) { - final TokenizerMatch tm = tokenizer.getNextToken(); - - if (tm.isGroup(TG_DOCUMENT_PROPERTY)){ - System.out.println("DOCUMENT PROPERT!!!: " + tm.token); - continue; - } - - if (tm.isGroup(null)){ - System.out.println(" HC: " + tm.token); - continue; - } - - tokenizer.unreadToken(); - break; - } - } - - - -} diff --git a/src/main/java/eu/svjatoslav/sixth/core/document/content/Content.java b/src/main/java/eu/svjatoslav/sixth/core/document/content/Content.java index d57b01b..1dad36c 100644 --- a/src/main/java/eu/svjatoslav/sixth/core/document/content/Content.java +++ b/src/main/java/eu/svjatoslav/sixth/core/document/content/Content.java @@ -1,4 +1,5 @@ package eu.svjatoslav.sixth.core.document.content; public interface Content { + void toMD(StringBuilder sb, int indent); } diff --git a/src/main/java/eu/svjatoslav/sixth/core/document/content/ListElement.java b/src/main/java/eu/svjatoslav/sixth/core/document/content/ListElement.java new file mode 100644 index 0000000..c4e4425 --- /dev/null +++ b/src/main/java/eu/svjatoslav/sixth/core/document/content/ListElement.java @@ -0,0 +1,58 @@ +package eu.svjatoslav.sixth.core.document.content; + +import eu.svjatoslav.commons.string.String2; +import eu.svjatoslav.commons.string.tokenizer.TokenizerMatch; +import eu.svjatoslav.sixth.core.document.text.FormattedText; + +import java.util.ArrayList; +import java.util.List; + +import static eu.svjatoslav.sixth.core.document.Helper.TG_DOCUMENT_PROPERTY; + +public class ListElement implements Content { + public final FormattedText name; + public final int indent; + public final ListElement parent; + private String type; + private final List content = new ArrayList<>(); + + public ListElement(FormattedText name, int indent, ListElement parent, String type) { + this.indent = indent; + this.name = name; + this.parent = parent; + this.type = type; + } + + public void addContent(ListElement content) { + this.content.add(content); + } + + public void parse(TokenizerMatch tm) { + + if (tm.isGroup(TG_DOCUMENT_PROPERTY)) { +// System.out.println("DOCUMENT PROPERT!!!: " + tm.token); + return; + } + + if (tm.isGroup(null)) { +// System.out.println(" Plain text content: " + tm.token); + return; + } + + System.out.println("ERROR!!!! Unable to handle: " + tm); + + } + + + public void toMD(StringBuilder sb, int i) { + if (indent >= 0){ + String2 s = new String2(); + s.addSuffix(" ", indent).addSuffix(type).addSuffix(" ").addSuffix(name.toMD()).addSuffix("\n"); + sb.append(s.toString()); + } + + for (Content c : content) { + c.toMD(sb, indent + 2); + } + } +} diff --git a/src/main/java/eu/svjatoslav/sixth/core/document/content/PropertyCollection.java b/src/main/java/eu/svjatoslav/sixth/core/document/content/PropertyCollection.java index f3840b4..4034670 100644 --- a/src/main/java/eu/svjatoslav/sixth/core/document/content/PropertyCollection.java +++ b/src/main/java/eu/svjatoslav/sixth/core/document/content/PropertyCollection.java @@ -7,4 +7,8 @@ public class PropertyCollection implements Content { private Map propertyToValue = new HashMap<>(); + @Override + public void toMD(StringBuilder sb, int indent) { + // TODO + } } diff --git a/src/main/java/eu/svjatoslav/sixth/core/document/content/TextBlock.java b/src/main/java/eu/svjatoslav/sixth/core/document/content/TextBlock.java index e5bd4ff..104fb21 100644 --- a/src/main/java/eu/svjatoslav/sixth/core/document/content/TextBlock.java +++ b/src/main/java/eu/svjatoslav/sixth/core/document/content/TextBlock.java @@ -4,4 +4,10 @@ import eu.svjatoslav.sixth.core.document.text.FormattedText; public class TextBlock implements Content { private FormattedText text; + + @Override + public void toMD(StringBuilder sb, int indent) { + // TODO + // sb.append(text.toMD()); + } } diff --git a/src/main/java/eu/svjatoslav/sixth/core/document/text/FormattedText.java b/src/main/java/eu/svjatoslav/sixth/core/document/text/FormattedText.java index ae6a421..1b51320 100644 --- a/src/main/java/eu/svjatoslav/sixth/core/document/text/FormattedText.java +++ b/src/main/java/eu/svjatoslav/sixth/core/document/text/FormattedText.java @@ -1,22 +1,23 @@ package eu.svjatoslav.sixth.core.document.text; -import eu.svjatoslav.commons.string.tokenizer.InvalidSyntaxException; import eu.svjatoslav.commons.string.tokenizer.Tokenizer; import eu.svjatoslav.commons.string.tokenizer.TokenizerMatch; import java.util.ArrayList; import java.util.List; +import static eu.svjatoslav.sixth.core.document.Helper.TG_HYPERLINK; + public class FormattedText { List elements = new ArrayList<>(); - public void parseOrgSyntax(String orgText) throws InvalidSyntaxException { + public void parseOrgSyntax(String orgText) { Tokenizer tokenizer = getTokenizer(orgText); while (tokenizer.hasMoreContent()) { final TokenizerMatch token = tokenizer.getNextToken(); - if (token.terminator == Hyperlink.orgTerminator){ + if (token.isGroup(TG_HYPERLINK)){ elements.add(Hyperlink.fromOrg(token)); continue; } @@ -28,17 +29,17 @@ public class FormattedText { } - public static FormattedText fromOrg(String orgText) throws InvalidSyntaxException { + public static FormattedText fromOrg(String orgText){ FormattedText formattedText = new FormattedText(); formattedText.parseOrgSyntax(orgText); return formattedText; } - public String compileMd(){ + public String toMD(){ StringBuilder sb = new StringBuilder(); for (FormattedTextElement element : elements) - sb.append(element.compileMd()); + sb.append(element.toMD()); return sb.toString(); } @@ -46,6 +47,7 @@ public class FormattedText { private Tokenizer getTokenizer(String contents) { final Tokenizer tokenizer = new Tokenizer(contents); tokenizer.addTerminator(Hyperlink.orgTerminator); + tokenizer.addTerminator(Hyperlink.orgTerminator2); return tokenizer; } diff --git a/src/main/java/eu/svjatoslav/sixth/core/document/text/FormattedTextElement.java b/src/main/java/eu/svjatoslav/sixth/core/document/text/FormattedTextElement.java index d769ce4..9a17ce0 100644 --- a/src/main/java/eu/svjatoslav/sixth/core/document/text/FormattedTextElement.java +++ b/src/main/java/eu/svjatoslav/sixth/core/document/text/FormattedTextElement.java @@ -1,6 +1,6 @@ package eu.svjatoslav.sixth.core.document.text; public interface FormattedTextElement { - String compileMd(); + String toMD(); } diff --git a/src/main/java/eu/svjatoslav/sixth/core/document/text/Hyperlink.java b/src/main/java/eu/svjatoslav/sixth/core/document/text/Hyperlink.java index 5e6884c..41d7ab5 100644 --- a/src/main/java/eu/svjatoslav/sixth/core/document/text/Hyperlink.java +++ b/src/main/java/eu/svjatoslav/sixth/core/document/text/Hyperlink.java @@ -1,6 +1,5 @@ package eu.svjatoslav.sixth.core.document.text; -import eu.svjatoslav.commons.string.tokenizer.InvalidSyntaxException; import eu.svjatoslav.commons.string.tokenizer.Terminator; import eu.svjatoslav.commons.string.tokenizer.TokenizerMatch; @@ -10,32 +9,32 @@ import static eu.svjatoslav.sixth.core.document.Helper.TG_HYPERLINK; public class Hyperlink implements FormattedTextElement { public static final Terminator orgTerminator = - new Terminator(PRESERVE, "\\[\\[.*\\]\\]", TG_HYPERLINK); + new Terminator(PRESERVE, "\\[\\[(.+)\\][ \\t]*\\[(.+)\\]\\]", TG_HYPERLINK); + + public static final Terminator orgTerminator2 = + new Terminator(PRESERVE, "\\[\\[(.*)\\]\\]", TG_HYPERLINK); private String label; private String URL; @Override - public String compileMd() { + public String toMD() { return ""; } - public static Hyperlink fromOrg(TokenizerMatch tokenizerMatch) throws InvalidSyntaxException { + public static Hyperlink fromOrg(TokenizerMatch tokenizerMatch) { Hyperlink hyperlink = new Hyperlink(); - hyperlink.parseOrgSyntax(tokenizerMatch); + hyperlink.parseOrg(tokenizerMatch); return hyperlink; } - private void parseOrgSyntax(TokenizerMatch tokenizerMatch) throws InvalidSyntaxException { -// Tokenizer tokenizer = new Tokenizer(tokenizerMatch.reminder); -// Terminator linkSeparator = tokenizer.addTerminator("][", PRESERVE); -// -// URL = tokenizer.expectAndConsumeNextTerminatorToken(null).token; -// -// if (tokenizer.hasMoreContent()){ // link label is optional -// tokenizer.expectAndConsumeNextTerminatorToken(linkSeparator); -// label = tokenizer.expectAndConsumeNextTerminatorToken(null).token; -// } + private void parseOrg(TokenizerMatch tokenizerMatch) { + String[] regExpGroups = tokenizerMatch.getRegExpGroups(); + URL = regExpGroups[0]; + if (tokenizerMatch.terminator == orgTerminator){ + label = regExpGroups[1]; + } + } } diff --git a/src/main/java/eu/svjatoslav/sixth/core/document/text/PlainText.java b/src/main/java/eu/svjatoslav/sixth/core/document/text/PlainText.java index 6f46732..b025248 100644 --- a/src/main/java/eu/svjatoslav/sixth/core/document/text/PlainText.java +++ b/src/main/java/eu/svjatoslav/sixth/core/document/text/PlainText.java @@ -9,7 +9,7 @@ public class PlainText implements FormattedTextElement { } @Override - public String compileMd() { + public String toMD() { return content; } } -- 2.20.1