Misc fixes:
[sixth.git] / src / main / java / eu / svjatoslav / sixth / core / document / Document.java
1 package eu.svjatoslav.sixth.core.document;
2
3 import eu.svjatoslav.commons.string.tokenizer.InvalidSyntaxException;
4 import eu.svjatoslav.commons.string.tokenizer.Tokenizer;
5 import eu.svjatoslav.commons.string.tokenizer.TokenizerMatch;
6 import eu.svjatoslav.sixth.core.document.text.FormattedText;
7
8 import static eu.svjatoslav.commons.string.tokenizer.Terminator.TerminationStrategy.PRESERVE;
9 import static eu.svjatoslav.sixth.core.document.Helper.*;
10 import static eu.svjatoslav.sixth.core.document.text.FormattedText.fromOrg;
11
12 public class Document {
13     public final Heading rootHeading = new Heading( null , 0, null);
14     private Heading currentHeading = rootHeading;
15
16     public Heading createHeading(FormattedText name, int targetLevel){
17         if (currentHeading.level == (targetLevel - 1)){
18             Heading newHeading = new Heading(name, targetLevel, currentHeading);
19             currentHeading.addChild(newHeading);
20             currentHeading = newHeading;
21             return newHeading;
22         }
23
24         if (currentHeading.level > (targetLevel - 1)){
25             currentHeading = currentHeading.parent;
26             return createHeading(name, targetLevel);
27         }
28
29         Heading missingIntermediate = new Heading(fromOrg("<noname>"), currentHeading.level + 1, currentHeading);
30         currentHeading.addChild(missingIntermediate);
31         currentHeading = missingIntermediate;
32         return createHeading(name, targetLevel);
33     }
34
35     public Heading getCurrentHeading(){
36         return currentHeading;
37     }
38
39     private void parseHeading(TokenizerMatch token) throws InvalidSyntaxException {
40         String[] headingSections = token.getRegExpGroups();
41         int level = headingSections[0].length();
42         String title = headingSections[1];
43         createHeading(fromOrg(title), level);
44     }
45
46     public void parse(String fileContentsAsString) throws InvalidSyntaxException {
47         final Tokenizer tokenizer = new Tokenizer(fileContentsAsString);
48
49         // Org heading:
50         // "*** Example Heading 1234"
51         tokenizer.addTerminator(PRESERVE, "(\\*+)[ \\t](.*)\\r?\\n", TG_HEADING);
52
53         // Org list. Examples:
54         // "   + my list title"
55         // "   - my list title"
56         tokenizer.addTerminator(PRESERVE, "([ \\t]*)(\\+|-)[ \\t]+(.*)?\\r?\\n", TG_LIST);
57
58         // "   * my list title"
59         tokenizer.addTerminator(PRESERVE, "([ \\t]+)(\\*)[ \\t]+(.*)?\\r?\\n", TG_LIST);
60
61         // TODO: add numbered list
62
63         // DocumentProperty:
64         // "#+OPTIONS: H:20 num:20"
65         tokenizer.addTerminator(PRESERVE, "#\\+([^\\s]+):(.*)\\r?\\n", TG_DOCUMENT_PROPERTY);
66
67         // Drawer property:
68         //  " :ID:       533734b9-0456-4448-9830-a43646345615"
69         tokenizer.addTerminator(PRESERVE, "([ \\t]*):([^\\s]+):(.*)\\r?\\n", TG_DRAWER_PROPERTY);
70
71
72         // multiline code block
73         tokenizer.addTerminator(PRESERVE,
74                 "([ \\t]*)#\\+BEGIN_SRC" +  // source begin identifier
75                         "(([ \\t]+)(.*))?(\\r?\\n)" + // source block parameters
76                         "((?:.|\\n|\\r)*?)" + // source content
77                         "(\\r?\\n)([ \\t]*)#\\+END_SRC(.*)\\r?\\n" // source end identifier
78                 , TG_MULTILINE_CODE);
79
80         // verse
81         tokenizer.addTerminator(PRESERVE,
82                 "([ \\t]*)#\\+BEGIN_VERSE" + // verse begin identifier
83                         "(([ \\t]+)(.*))?(\\r?\\n)" + // verse block parameters
84                         "((?:.|\\n|\\r)*?)" + // verse
85                         "(\\r?\\n)([ \\t]*)#\\+END_VERSE(.*)\\r?\\n" // verse end identifier
86                 , TG_VERSE);
87
88
89         // TODO: add support for export blocks:
90         //        #+begin_export latex
91         //  \clearpage
92         //#+end_export
93
94
95         // normal text
96         tokenizer.addTerminator(PRESERVE,".*\\r?\\n", TG_NORMAL_TEXT);
97
98         while (tokenizer.hasMoreContent()) {
99             final TokenizerMatch tm = tokenizer.getNextToken();
100
101             if (tm.isGroup(TG_HEADING)){
102                 parseHeading(tm);
103                 continue;
104             }
105
106             currentHeading.parse(tm);
107         }
108
109     }
110 }