From 7f9e6d11eb956ba3e1df724a21313c6a6bd1eb49 Mon Sep 17 00:00:00 2001 From: Svjatoslav Agejenko Date: Mon, 8 Jun 2020 21:08:26 +0300 Subject: [PATCH] Parse Emacs org mode file to object graph. WIP. --- pom.xml | 6 + .../sixth/core/document/Document.java | 4 + .../sixth/core/document/OrgParser.java | 138 ++++++++++++++++++ 3 files changed, 148 insertions(+) create mode 100644 src/main/java/eu/svjatoslav/sixth/core/document/Document.java create mode 100644 src/main/java/eu/svjatoslav/sixth/core/document/OrgParser.java diff --git a/pom.xml b/pom.xml index 65f2da6..e8e9fea 100644 --- a/pom.xml +++ b/pom.xml @@ -26,6 +26,12 @@ 1.2-SNAPSHOT + + eu.svjatoslav + svjatoslavcommons + 1.8-SNAPSHOT + + eu.svjatoslav javainspect diff --git a/src/main/java/eu/svjatoslav/sixth/core/document/Document.java b/src/main/java/eu/svjatoslav/sixth/core/document/Document.java new file mode 100644 index 0000000..2c3d2f6 --- /dev/null +++ b/src/main/java/eu/svjatoslav/sixth/core/document/Document.java @@ -0,0 +1,4 @@ +package eu.svjatoslav.sixth.core.document; + +public class Document { +} diff --git a/src/main/java/eu/svjatoslav/sixth/core/document/OrgParser.java b/src/main/java/eu/svjatoslav/sixth/core/document/OrgParser.java new file mode 100644 index 0000000..f420c48 --- /dev/null +++ b/src/main/java/eu/svjatoslav/sixth/core/document/OrgParser.java @@ -0,0 +1,138 @@ +package eu.svjatoslav.sixth.core.document; + +import eu.svjatoslav.commons.string.String2; +import eu.svjatoslav.commons.string.tokenizer.InvalidSyntaxException; +import eu.svjatoslav.commons.string.tokenizer.Tokenizer; +import eu.svjatoslav.commons.string.tokenizer.TokenizerMatch; + +import java.io.File; +import java.io.IOException; + +import static eu.svjatoslav.commons.file.IOHelper.getFileContentsAsString; +import static eu.svjatoslav.commons.string.tokenizer.Terminator.TerminationStrategy.DROP; +import static eu.svjatoslav.commons.string.tokenizer.Terminator.TerminationStrategy.PRESERVE; + +public class OrgParser { + + public Document parse(File file) throws IOException, InvalidSyntaxException { + Document document = new Document(); + + Tokenizer lineTokenizer = getFileToLineTokenizer(getFileContentsAsString(file)); + + while (true) { + final TokenizerMatch line = lineTokenizer.getNextToken(); + if (line == null) + break; // EOF + + parseLine(line.token); + } + + return document; + } + + private void parseLine(String line) throws InvalidSyntaxException { + Tokenizer lineTokenizer = getLineTokenizer(line + "\n"); + + TokenizerMatch token = lineTokenizer.getNextToken(); + if (token == null) return; + + if (token.terminator == null) + return; + + if (token.token.startsWith("*")){ + System.out.println("LINE: " + line); + System.out.println(token); + System.out.println(); + } + } + + + private Tokenizer getLineTokenizer(String contents) { + final Tokenizer tokenizer = new Tokenizer(contents); + for (int i = 1; i<50; i++){ + String prefix = new String2("*").repeat(i).toString(); + tokenizer.addTerminator(prefix +" ","\n", PRESERVE); + } + return tokenizer; + } + + private Tokenizer getFileToLineTokenizer(String contents) { + final Tokenizer tokenizer = new Tokenizer(contents); + + // empty space +// tokenizer.addTerminator(" ", DROP); +// tokenizer.addTerminator("\t", DROP); +// tokenizer.addTerminator("\n", DROP); + + // newline + tokenizer.addTerminator("\n", DROP); + +// tokenizer.addTerminator(";", PRESERVE); +// tokenizer.addTerminator("{", PRESERVE); +// tokenizer.addTerminator("}", PRESERVE); +// tokenizer.addTerminator("(", PRESERVE); +// tokenizer.addTerminator(")", PRESERVE); +// tokenizer.addTerminator("[", PRESERVE); +// tokenizer.addTerminator("]", PRESERVE); +// tokenizer.addTerminator("<", PRESERVE); +// tokenizer.addTerminator(">", PRESERVE); +// tokenizer.addTerminator(",", PRESERVE); +// tokenizer.addTerminator("@", PRESERVE); + + // comments +// tokenizer.addTerminator("//", "\n", DROP); +// tokenizer.addTerminator("/*", "*/", DROP); + return tokenizer; + } +// +// private void parseImport(final Tokenizer tokenizer) +// throws InvalidSyntaxException { +// +// final Import imp = new Import(); +// +// final TokenizerMatch match = tokenizer.getNextToken(); +// +// if (match.token.equals("static")) { +// imp.isStatic = true; +// imp.path = tokenizer.getNextToken().token; +// } else +// imp.path = match.token; +// +// imports.add(imp); +// +// tokenizer.expectAndConsumeNextToken(";"); +// } +// +// private void parseInterface(final Tokenizer tokenizer) +// throws InvalidSyntaxException { +// +// final TokenizerMatch match = tokenizer.getNextToken(); +// final Clazz clazz = new Clazz(packageName, match.token, tokenizer, true); +// // System.out.println(clazz.toString()); +// classes.add(clazz); +// } +// +// private void parsePackage(final Tokenizer tokenizer) +// throws InvalidSyntaxException { +// +// final TokenizerMatch match = tokenizer.getNextToken(); +// +// packageName = match.token; +// +// tokenizer.expectAndConsumeNextToken(";"); +// } + + public void skipUntilSemicolon(final Tokenizer tokenizer) throws InvalidSyntaxException { + while (true) { + final TokenizerMatch token = tokenizer.getNextToken(); + + if (token == null) + return; + + if (token.token.equals(";")) + return; + } + } + + +} -- 2.20.1