Overview
Comment: | all kindsa shit |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA3-256: |
52b9bce7dd9317730dfccf2eefd17494 |
User & Date: | lexi on 2021-12-26 04:08:02 |
Other Links: | manifest | tags |
Context
2021-12-26
| ||
17:49 | get math parser working check-in: d1b7d2fd5f user: lexi tags: trunk | |
04:08 | all kindsa shit check-in: 52b9bce7dd user: lexi tags: trunk | |
2021-12-22
| ||
10:23 | fix bugged makefile check-in: 36024a43c5 user: lexi tags: trunk | |
Changes
Modified cli.lua from [a9857f9cb6] to [ad6ab18d31].
3 3 4 4 local default_mode = { 5 5 ['render:format'] = 'html'; 6 6 ['html:gen-styles'] = true; 7 7 } 8 8 9 9 local function 10 -main(input, output, log, mode, suggestions, vars) 11 - local doc = ct.parse(input.stream, input.src, mode) 10 +main(input, output, log, mode, suggestions, vars, extrule) 11 + local doc = ct.parse(input.stream, input.src, mode, function(c) 12 + c.doc.ext = extrule 13 + end) 12 14 input.stream:close() 13 15 if mode['parse:show-tree'] then 14 16 log:write(ss.dump(doc)) 15 17 end 16 18 17 19 -- the document has now had a chance to give its say; if it hasn't specified 18 20 -- any modes of its own, we now merge in the 'weak modes' (suggestions) ................................................................................ 70 72 ['mode-set'] = 1; 71 73 ['mode-clear'] = 1; 72 74 mode = 2; 73 75 74 76 ['mode-set-weak'] = 1; 75 77 ['mode-clear-weak'] = 1; 76 78 ['mode-weak'] = 2; 79 + ['use'] = 1; 80 + ['inhibit'] = 1; 81 + ['need'] = 1; 82 + ['load'] = 1; 83 + ['enc'] = 1; 77 84 } 78 85 return param_opts[o] or 0 79 86 end 80 87 81 88 local optmap = { 82 89 o = 'out'; 83 90 l = 'log'; 84 91 d = 'define'; 85 92 V = 'version'; 86 93 h = 'help'; 87 94 y = 'mode-set', Y = 'mode-set-weak'; 88 95 n = 'mode-clear', N = 'mode-clear-weak'; 89 96 m = 'mode', M = 'mode-weak'; 97 + L = 'load', 98 + u = 'use', i = 'inhibit', r = 'require'; 99 + e = 'enc'; 90 100 } 101 + 102 + local extrule = {use={},inhibit={},need={}} 91 103 92 104 local checkmodekey = function(key) 93 105 if not key:match '[^:]+:.+' then 94 106 ct.exns.cli('invalid mode key %s', key):throw() 95 107 end 96 108 return key 97 109 end ................................................................................ 117 129 mode = function(key,value) mode[checkmodekey(key)] = value end; 118 130 ['mode-set'] = function(key) mode[checkmodekey(key)] = true end; 119 131 ['mode-clear'] = function(key) mode[checkmodekey(key)] = false end; 120 132 121 133 ['mode-weak'] = function(key,value) suggestions[checkmodekey(key)] = value end; 122 134 ['mode-set-weak'] = function(key) suggestions[checkmodekey(key)] = true end; 123 135 ['mode-clear-weak'] = function(key) suggestions[checkmodekey(key)] = false end; 124 - 136 + ['use' ] = function(ext) extrule.use [ext] = true end; 137 + ['inhibit'] = function(ext) extrule.inhibit[ext] = true end; 138 + ['require'] = function(ext) extrule.need [ext] = true end; 139 + ['load'] = function(extpath) end; 140 + ['enc'] = function(enc) end; 125 141 ['version'] = function() 126 142 outp:write(ct.info:about()) 127 143 if next(ct.ext.loaded) then 128 144 outp:write('\nactive extensions:\n') 129 145 for k,v in pairs(ct.ext.loaded) do 130 146 outp:write(string.format(' * %s', v.id .. 131 147 (v.version and (' ' .. v.version:string()) or ''))) ................................................................................ 175 191 keepParsing = false 176 192 else 177 193 local longopt = v:match '^%-%-(.+)$' 178 194 if keepParsing and longopt then 179 195 execLongOpt(longopt) 180 196 else 181 197 if keepParsing and v:sub(1,1) == '-' then 182 - for c,p in ss.str.enc.utf8.each(v:sub(2)) do 198 + for c,p in ss.str.each(ss.str.enc.utf8, v:sub(2)) do 183 199 if optmap[c] then 184 200 execLongOpt(optmap[c]) 185 201 else 186 202 ct.exns.cli('switch -%s unrecognized', c):throw() 187 203 end 188 204 end 189 205 else ................................................................................ 197 213 if args[1] and args[1] ~= '' then 198 214 local file = io.open(args[1], "rb") 199 215 if not file then error('unable to load file ' .. args[1]) end 200 216 input.stream = file 201 217 input.src.file = args[1] 202 218 end 203 219 204 - return main(input, outp, log, mode, suggestions, vars) 220 + return main(input, outp, log, mode, suggestions, vars, extrule) 205 221 end 206 222 207 -local ok, e = pcall(entry_cli) 208 --- local ok, e = true, entry_cli() 223 +-- local ok, e = pcall(entry_cli) 224 +local ok, e = true, entry_cli() 209 225 if not ok then 210 226 local str = 'translation failure' 211 227 if ss.exn.is(e) then 212 228 str = e.kind.desc 213 229 end 214 230 local color = false 215 231 if log:seek() == nil then
Modified cortav.ct from [c71fe3a9e8] to [5df14cacc3].
4 4 dict: http://ʞ.cc/fic/spirals/glossary 5 5 6 6 the cortav [!format] can be called [!cortavgil], or [!gil cortavi], to differentiate it from the reference implementation [!cortavsir] or [!sir cortavi]. 7 7 8 8 %toc 9 9 10 10 ## cortav vs. markdown 11 -the most important difference between cortav and markdown is that cortav is strictly line-oriented. this choice was made to ensure that cortav was relatively easy to parse. so while a simple [$.ct] file may look a bit like a [$.md] file, in reality it's a lot closer to gemtext than any flavor of markdown. 11 +the most important difference between cortav and markdown is that cortav is strictly line-oriented. this choice was made to ensure that cortav was relatively easy to parse. so while a simple [`.ct] file may look a bit like a [`.md] file, in reality it's a lot closer to gemtext than any flavor of markdown. 12 12 13 13 ## encoding 14 14 a cortav document is made up of a sequence of codepoints. UTF-8 must be supported, but other encodings (such as UTF-32 or C6B) may be supported as well. lines will be derived by splitting the codepoints at the linefeed character or equivalent. note that unearthly encodings like C6B or EBCDIC will need to select their own control sequences. 15 15 16 16 ## file type 17 17 a cortav source file is identified using a file extension, file type, and/or magic byte sequence. 18 18 19 19 three file extensions are defined as identifying a cortav source file. where relevant, all must be recognized as indicating a cortav source file. 20 -* [$ct] is the shorthand extension 21 -* [$cortav] is the canonical disambiguation extension, for use in circumstances where [$*.ct] is already defined to mean a different file format. 22 -* [$] is the canonical Corran extension, a byte sequence comprising the unicode codepoints [$U+E3CE U+E3BD]. where the filesystem in question does not specify a filename encoding, the bytes should be expressed in UTF-8. 20 +* [`ct] is the shorthand extension 21 +* [`cortav] is the canonical disambiguation extension, for use in circumstances where [`*.ct] is already defined to mean a different file format. 22 +* [`] is the canonical Corran extension, a byte sequence comprising the unicode codepoints [`U+E3CE U+E3BD]. where the filesystem in question does not specify a filename encoding, the bytes should be expressed in UTF-8. 23 23 24 24 three more extensions are reserved for identifying a cortav intent file. 25 -* [$ctc] is the shorthand extension 26 -* [$cortavcun] is the canonical disambiguation extension 27 -* [$] is the canonical Corran extension, a byte sequence comprising the unicode codepoints [$U+E3CE U+E3BD U+E3CE]. where the filesystem in question does not specify a filename encoding, the bytes should be expressed in UTF-8. 25 +* [`ctc] is the shorthand extension 26 +* [`cortavcun] is the canonical disambiguation extension 27 +* [`] is the canonical Corran extension, a byte sequence comprising the unicode codepoints [`U+E3CE U+E3BD U+E3CE]. where the filesystem in question does not specify a filename encoding, the bytes should be expressed in UTF-8. 28 28 29 29 on systems which use metadata to encode filetype, two values are defined to identify cortav source files 30 -* [$text/x-cortav] should be used when strings or arbitrary byte sequences are supported 31 -* [$CTAV] (that is, the byte sequence [$0x43 0x54 0x41 0x56]) should be used on systems that support only 32-bit file types/4-character type codes like Classic Mac OS. 30 +* [`text/x-cortav] should be used when strings or arbitrary byte sequences are supported 31 +* [`CTAV] (that is, the byte sequence [`0x43 54 41 56]) should be used on systems that support only 32-bit file types/4-character type codes like Classic Mac OS. 32 32 33 33 two more values are defined to identify cortav intent files. 34 -* [$text/x-cortav-intent] 35 -* [$CTVC] (the byte sequence [$0x43 0x54 0x56 0x43]) 34 +* [`text/x-cortav-intent] 35 +* [`CTVC] (the byte sequence [`0x43 54 56 43]) 36 36 37 -on systems which do not define a canonical way of encoding the filetype but support extended attributes of some kind, such as linux, an attribute named [$mime] may be created and given the value [$text/x-cortav] or [$text/x-cortav-intent]; alternatively, extensions may be used. 37 +on systems which do not define a canonical way of encoding the filetype but support extended attributes of some kind, such as linux, an attribute named [$mime] may be created and given the value [`text/x-cortav] or [`text/x-cortav-intent]; alternatively, extensions may be used. 38 38 39 39 it is also possible to indicate the nature of a cortav file without using filesystem metadata. this is done by prefixing the file with a magic byte sequence. the sequence used depends on the encoding. 40 -* for UTF-8 and ASCII, [$%ct[!\\n]] (that is, the byte sequence [$0x25 0x63 0x74 0x0A]) should be used 41 -* for C6B, the file should begin with the word [$] (that is, the byte sequence [$0x03 0x07 0x3E 0x2D]). 40 +* for UTF-8 and ASCII plain text files, [`%ct[!\\n]] (that is, the byte sequence [`0x25 63 74 0A]) should be used 41 +* for C6B+PS files (parastream), the file should begin with the paragraph [`], which equates to the byte sequence [` 0x3E 2E 14 0C 01 04 00 00 00 03 07 3E 2D], including the parastream header). 42 42 consequently, this sequence should be ignored by a cortav parser at the start of a file (except as an indication of file format). 43 43 44 -for FreeDesktop-based systems, the [$velartrill-cortav.xml] file included in the repository supplies mappings for the extensions and magic byte sequences. a script is also included which can be registered with xdg-open so that double-clicking on a cortav file will render it out and open it in your default web browser. 44 +for FreeDesktop-based systems, the [`build/velartrill-cortav.xml] file included in the repository supplies mappings for the extensions and magic byte sequences. a script is also included which can be registered with xdg-open so that double-clicking on a cortav file will render it out and open it in your default web browser. [`$ make install] will generate the necessary FreeDesktop XML files and register them, as well as install the script and the [`cortav] executable itself. for more information see [>refimpl-build building the reference implementation]. 45 45 46 46 ## structure 47 47 cortav is based on an HTML-like block model, where a document consists of sections, which are made up of blocks, which may contain a sequence of spans. flows of text are automatically conjoined into spans, and blocks are separated by one or more newlines. this means that, unlike in markdown, a single logical paragraph [*cannot] span multiple ASCII lines. the primary purpose of this was to ensure ease of parsing, but also, both markdown and cortav are supposed to be readable from within a plain text editor. this is the 21st century. every reasonable text editor supports soft word wrap, and if yours doesn't, that's entirely your own damn fault. 48 48 49 -the first character(s) of every line (the "control sequence") indicates the role of that line. if no control sequence is recognized, the sequence [$.] is implied instead. the standard line classes and their associated control sequences are listed below. some control sequences have alternate forms, in order to support modern, readable unicode characters as well as plain ascii text. 50 - 51 -* paragraphs (. ¶ ❡): a paragraph is a simple block of text. the period control sequence is only necessary if the paragraph text begins with something that would otherwise be interpreted as a control sequence. 52 -* newlines (\\): inserts a line break into previous paragraph and attaches the following text. mostly useful for poetry or lyrics. 53 -* section starts (# §): starts a new section. all sections have an associated depth, determined by the number of sequence repetitions (e.g. "###" indicates depth-three"). sections may have headers and IDs; both are optional. IDs, if present, are a sequence of raw-text immediately following the hash marks. if the line has one or more space character followed by styled-text, a header will be attached. the character immediately following the hashes can specify a particular type of section. e.g.: 54 -** [$#] is a simple section break. 55 -** [$#anchor] opens a new section with the ID [$anchor]. 56 -** [$# header] opens a new section with the title "header". 57 -** [$#anchor header] opens a new section with both the ID [$anchor] and the title "header". 58 -** [$#>conversation] opens a blockquote section named [$conversation] without a header. 59 -** [$#^id] opens a footnote section for the multiline footnote [$id]. the ID must be specified. 60 -** [$#$id] opens the multiline macro [$id]. the ID must be specified. 61 -** [$#&id mime] opens a new inline object [$id] of type [$mime]. useful for embedding SVGs. the ID and mime type must be specified. 62 -* lists (* :): these are like paragraph nodes, but list nodes that occur next to each other will be arranged so as to show they compose a sequence. depth is determined by the number of stars/colons. like headers, a list entry may have an ID that can be used to refer back to it; it is indicated in the same way. if colons are used, this indicates that the order of the items is signifiant. :-lists and *-lists may be intermixed; however, note than only the last character in the sequence actually controls the depth type. 63 -* directives (%): a directive issues a hint to the renderer in the form of an arbitrary string. directives are normally ignored if they are not supported, but you may cause a warning to be emitted where the directive is not supported with [$%!] or mark a directive critical with [$%!!] so that rendering will entirely fail if it cannot be parsed. 64 -* comments (%%): a comment is a line of text that is simply ignored by the renderer. 65 -* asides (!): indicates text that diverges from the narrative, and can be skipped without interrupting it. think of it like block-level parentheses. asides which follow one another are merged as paragraphs of the same aside, usually represented as a sort of box. if the first line of an aside contains a colon, the stretch of styled-text from the beginning to the aside to the colon will be treated as a "type heading," e.g. "Warning:" 66 -* code (~~~): a line beginning with ~~~ begins or terminates a block of code. the opening line should look like one of the below 67 -** [$~~~] 68 -** [$~~~ language] (markdown-style shorthand syntax) 69 -** [$~~~ \[language\] ~~~] (cortav syntax) 70 -** [$~~~ \[language\] #id ~~~] 71 -** [$~~~ title ~~~] 72 -** [$~~~ title \[language\] ~~~] 73 -** [$~~~ \[language\] title ~~~] 74 -** [$~~~ title \[language\] #id ~~~] 75 -* reference (tab): a line beginning with a tab is treated as a "reference." references hold out-of-line metadata for preceding text like links and footnotes. a reference consists of an identifier followed by a colon and an arbitrary number of spaces or tabs, followed by text. whether this text is interpreted as raw-text or styled-text depends on the context in which the reference is used. 76 -* quotation (<): a line of the form [$<[!name]> [!quote]] denotes an utterance by [$name]. 77 -* blockquote (>): alternate blockquote syntax. can be nested by repeating the 78 -* subtitle (--): attaches a subtitle to the previous header 79 -* embed (&): embeds a referenced object. can be used to show images or repeat previously defined objects like lists or tables, optionally with a caption. 80 -** &myimg All that remained of the unfortunate blood magic pageant contestants and audience (police photo) 81 -** &$mymacro arg 1|arg 2|arg 3 82 -* break (---): inserts a horizontal rule or other context break; does not end the section. must be followed by newline. 83 -* table cells (+ |): see [>ex.tab table examples]. 49 +the first character(s) of every line (the "control sequence") indicates the role of that line. if no control sequence is recognized, the line is treated as a paragraph. the currently supported control sequences are listed below. some control sequences have alternate forms, in order to support modern, readable unicode characters as well as plain ascii text. 50 + 51 +* [*paragraphs] ([`.] [` ¶] [`❡]): a paragraph is a simple block of text. the period control sequence is only necessary if the paragraph text starts with text that would be interpreted as a control sequence otherwise 52 +* newlines [` \\]: inserts a line break into previous paragraph and attaches the following text. mostly useful for poetry or lyrics 53 +* [*section starts] [`#] [`§]: starts a new section. all sections have an associated depth, determined by the number of sequence repetitions (e.g. "###" indicates depth-three"). sections may have headers and IDs; both are optional. IDs, if present, are a sequence of raw-text immediately following the hash marks. if the line has one or more space character followed by styled-text, a header will be attached. the character immediately following the hashes can specify a particular type of section. e.g.: 54 +** [`#] is a simple section break. 55 +** [`#anchor] opens a new section with the ID [`anchor]. 56 +** [`# header] opens a new section with the title "header". 57 +** [`#anchor header] opens a new section with both the ID [`anchor] and the title "header". 58 +** [`#>conversation] opens a blockquote section named [`conversation] without a header. 59 +* [*nonprinting sections] ([`^]): sometimes, you'll want to create a namespace without actually adding a visible new section to the document. you can achieve this by creating a [!nonprinting section] and defining resources within it. nonprinting sections can also be used to store comments, notes, or other information that is useful to have in the source file without it becoming a part of the output 60 +** [`#&id mime] opens a new inline object [`id] of type [`mime]. useful for embedding SVGs. the ID and mime type must be specified. 61 +* [*resource] ([`@]): defines a [!resource]. a resource is an file or object that exists outside of the document but which will be included in the document somehow. common examples of resources include images, videos, iframes, or headers/footers. see [>rsrc resources] for more information. 62 +* [*lists] ([`*] [`:]): these are like paragraph nodes, but list nodes that occur next to each other will be arranged so as to show they compose a sequence. depth is determined by the number of stars/colons. like headers, a list entry may have an ID that can be used to refer back to it; it is indicated in the same way. if colons are used, this indicates that the order of the items is signifiant. :-lists and *-lists may be intermixed; however, note than only the last character in the sequence actually controls the depth type. 63 +* [*directives] ([`%]): a directive issues a hint to the renderer in the form of an arbitrary string. directives are normally ignored if they are not supported, but you may cause a warning to be emitted where the directive is not supported with [`%!] or mark a directive critical with [`%!!] so that rendering will entirely fail if it cannot be parsed. 64 +* [*comments] ([`%%]): a comment is a line of text that is simply ignored by the renderer. 65 +* [*asides] ([`!]): indicates text that diverges from the narrative, and can be skipped without interrupting it. think of it like block-level parentheses. asides which follow one another are merged as paragraphs of the same aside, usually represented as a sort of box. if the first line of an aside contains a colon, the stretch of styled-text from the beginning to the aside to the colon will be treated as a "type heading," e.g. "Warning:" 66 +* [*code] ([`~~~]): a line beginning with ~~~ begins or terminates a block of code. code blocks are by default not parsed, but parsing can be activated by preceding the code block with an [`%[*expand]] directive. the opening line should look like one of the below 67 +** [`~~~] 68 +** [`~~~ language] (markdown-style shorthand syntax) 69 +** [`~~~ \[language\] ~~~] (cortav syntax) 70 +** [`~~~ \[language\] #id ~~~] 71 +** [`~~~ title ~~~] 72 +** [`~~~ title \[language\] ~~~] 73 +** [`~~~ \[language\] title ~~~] 74 +** [`~~~ title \[language\] #id ~~~] 75 +*[*reference] (tab): a line beginning with a tab is treated as a "reference." references hold out-of-line metadata for preceding text like links and footnotes. a reference consists of an identifier followed by a colon and an arbitrary number of spaces or tabs, followed by text. whether this text is interpreted as raw-text or styled-text depends on the context in which the reference is used. in encodings without tab characters, two preceding blanks can be used instead. 76 +* [*quotation] ([`<]): a line of the form [`<[$name]> [$quote]] denotes an utterance by [$name]. 77 +* [*blockquote] ([`>]): alternate blockquote syntax. can be nested by repeating the [`>] character. 78 +* [*subtitle] ([`--]): attaches a subtitle to the previous header 79 +* [*embed] ([`&]): embeds a referenced object. can be used to show images or repeat previously defined objects like lists or tables, optionally with a caption. 80 +** [`&$[$macro] [$arg1]|[$arg2]|[$argn]…] invokes a block-level macro with the supplied arguments 81 +*** [`&$mymacro arg 1|arg 2|arg 3] 82 +** [`&[$image]] embeds an image or other block-level object. [!image] can be a reference with a url or file path, or it can be an embed section (e.g. for SVG files) 83 +***[`&myimg All that remained of the unfortunate blood magic pageant contestants and audience (police photo)] 84 +** [`&-[$section]] embeds a closed disclosure element. in interactive outputs, this will display as a block [!section] which can be clicked on to view the full contents of the referenced section; in static outputs, it will display as an enclosed box with [$section] as the title text 85 +*** [`&-ex-a Prosecution Exhibit A (GRAPHIC CONTENT)] 86 +** [`&+[$section]] is like the above, but the disclosure element is open by default 87 +* [*horizontal rule] ([`\---]): inserts a horizontal rule or other context break; does not end the section. must be followed by newline. underlines can also be used in place of dashes. 88 +* [*page break] ([`\^^]): for formats that support pagination, like HTML (when printed), indicates that the rest of the current page should be blank. for formats that do not, extra margins will be inserted. does not create a new section 89 +* [*page rule] ([`\^^-]): inserts a page break for formats that support them, and a horizontal rule for formats that do not. does not create a new section 90 +* [*table cells] ([`+ |]): see [>ex.tab table examples]. 91 +* [*equations] ([`=]) block-level equations can be inserted with the [`=] 92 +* [*empty lines] (that is, lines consisting of nothing but whitespace) constitute a [!break], which terminates multiline objects that do not have a dedicated termination sequence, for example lists and asides. 84 93 85 94 ## styled text 86 -most blocks contain a sequence of spans. these spans are produced by interpreting a stream of [*styled-text] following the control sequence. styled-text is a sequence of codepoints potentially interspersed with escapes. an escape is formed by an open square bracket [$\[] followed by a [*span control sequence], and arguments for that sequence like more styled-text. escapes can be nested. 95 +most blocks contain a sequence of spans. these spans are produced by interpreting a stream of [*styled-text] following the control sequence. styled-text is a sequence of codepoints potentially interspersed with escapes. an escape is formed by an open square bracket [`\[] followed by a [*span control sequence], and arguments for that sequence like more styled-text. escapes can be nested. 87 96 88 -* strong \[*[!styled-text]\]: causes its text to stand out from the narrative, generally rendered as bold or a brighter color. 89 -* emphatic \[![!styled-text]\]: indicates that its text should be spoken with emphasis, generally rendered as italics 90 -* literal \[$[!styled-text]\]: indicates that its text is a reference to a literal sequence of characters, variable name, or other discrete token. generally rendered in monospace 91 -* strikeout \[~[!styled-text]\]: indicates that its text should be struck through or otherwise indicated for deletion 92 -* insertion \[+[!styled-text]\]: indicates that its text should be indicated as a new addition to the text body. 93 -** consider using a macro definition [$\edit: [~[#1]][+[#2]]] to save typing if you are doing editing work 94 -* link \[>[!ref] [!styled-text]\]: produces a hyperlink or cross-reference denoted by [$ref], which may be either a URL specified with a reference or the name of an object like an image or section elsewhere in the document. the unicode characters [$→] and [$🔗] can also be used instead of [$>] to denote a link. 95 -* footnote \[^[!ref] [!styled-text]\]: annotates the text with a defined footnote 96 -* raw \[\\[!raw-text]\]: causes all characters within to be interpreted literally, without expansion. the only special characters are square brackets, which must have a matching closing bracket 97 +* strong {obj *|styled-text}: causes its text to stand out from the narrative, generally rendered as bold or a brighter color. 98 +* emphatic {obj !|styled-text}: indicates that its text should be spoken with emphasis, generally rendered as italics 99 +* literal {obj `|styled-text}: indicates that its text is a reference to a literal sequence of characters or other discrete token. generally rendered in monospace 100 +* variable {obj $|styled-text}: indicates that its text is a stand-in that will be replaced with what it names. generally rendered in italic monospace, ideally of a different color 101 +* underline {obj _|styled-text}: underlines the text. use sparingly on text intended for webpages -- underlined text [!is] distinct from links, but underlining non-links is still a violation of convention. 102 +* strikeout {obj ~|styled-text}: indicates that its text should be struck through or otherwise indicated for deletion 103 +* insertion {obj +|styled-text}: indicates that its text should be indicated as a new addition to the text body. 104 +** consider using a macro definition [`\edit: [~[#1]][+[#2]]] to save typing if you are doing editing work 105 +* link \[>[!ref] [!styled-text]\]: produces a hyperlink or cross-reference denoted by [$ref], which may be either a URL specified with a reference or the name of an object like an image or section elsewhere in the document. the unicode characters [`→] and [`🔗] can also be used instead of [`>] to denote a link. 106 +* footnote {span ^|ref|[$styled-text]}: annotates the text with a defined footnote. in interactive output media [`\[^citations.qtheo Quantum Theosophy: A Neophyte's Catechism]] will insert a link with the next [`Quantum Theosophy: A Neophyte's Catechism] that, when clicked, causes a footnote to pop up on the screen. for static output media, the text will simply have a superscript integer after it denoting where the footnote is to be found. 107 +* superscript {obj '|[$styled-text]}: 108 +* subscript {obj ,|[$styled-text]}: 109 +* raw \[\\[`raw-text]\]: causes all characters within to be interpreted literally, without expansion. the only special characters are square brackets, which must have a matching closing bracket 97 110 * raw literal \[$\\[!raw-text]\]: shorthand for [\[$[\…]]] 98 -* macro \{[!name] [!arguments]\}: invokes a [>ex.mac macro], specified with a reference 99 -* argument \[#[!var]\]: in macros only, inserts the [$var]-th argument. otherwise, inserts a context variable provided by the renderer. 100 -* raw argument \[##[!var]\]: like above, but does not evaluate [$var]. 101 -* term \[&[!name] ([!label])\]: quotes a defined term with a link to its definition 102 -* inline image \[&@[!name]\]: shows a small image or other object inline. the unicode character [$🖼] can also be used instead of [$&@]. 103 - 104 -## identifiers 105 -any identifier (including a reference) that is defined within a named section must be referred to from outside that section as [$[!sec].[!obj]], where [$sec] is the ID of the containing section and [$obj] is the ID of the object one wishes to reference. 106 - 107 -## context variables 108 -context variables are provided so that cortav renderers can process templates. certain context variables are provided for by the standard. you can test for the presence of a context variable with the directive [$%[*when] ctx [!var]]. 111 +* macro [`\{[!name] [!arguments]\}]: invokes a [>ex.mac macro], specified with a reference 112 +* argument {obj #|var}: in macros only, inserts the [$var]-th argument. otherwise, inserts a context variable provided by the renderer. 113 +* raw argument {obj ##|var}: like above, but does not evaluate [$var]. 114 +* term {obj &|name}, {span &|name|[$expansion]}: quotes a defined term with a link to its definition, optionally with a custom expansion of the term (for instance, to expand the first use of an acronym) 115 +* inline image {obj &@|name}: shows a small image or other object inline. the unicode character [`🖼] can also be used instead of [`&@]. 116 +* unicode codepoint {obj U+|hex-integer}: inserts an arbitrary UCS codepoint in the output, specified by [$hex-integer]. lowercase [`u] is also legal. 117 +* math mode {obj =|equation}: activates additional transformations on the span to format it as a mathematical equation; e.g. [`*] becomes [`×] and [`/] --> [`÷]. 118 +* extension {span %|ext|…}: invokes extension named in [$ext]. [$ext] will usually be an extension name followed by a symbol (often a period) and then an extension-specific directive, although for some simple extensions it may just be the plain extension name. further syntax and semantics depend on the extension. this syntax can also be used to apply formatting specific to certain renderers, such as assigning a CSS class in the [`html] renderer ([`\[%html.myclass my [!styled] text]]). 119 +* critical extension {span %!|ext|…}: like [!extension], but will trigger an error if the requested extension is not available 120 +* extension text {span %:|ext|styled-text}: like [!extension], but when the requested extension is not present, [$styled-text] wlil be emitted as-is. this is a better way to apply CSS classes, as the text will still be visible when rendered to formats other than HTML. 121 +* inline comment {obj %%|...}: ignored. useful for editorial annotations not intended to be part of the rendered product. 122 + 123 + span: [` \[[*[#1]][$[#2]] [#3]\]] 124 + obj: [` \[[*[#1]][$[#2]]\]] 125 + 126 +##ident identifiers 127 +any identifier (including a reference) that is defined within a named section must be referred to from outside that section as [`[!sec].[!obj]], where [$sec] is the ID of the containing section and [$obj] is the ID of the object one wishes to reference. 128 + 129 +##rsrc resources 130 +a [!resource] represents content that is not encoded directly into the source file, but which is embedded by some means in the output. resources can either be [!embedded], in which case they are compiled into the final document itself, or they can be [!linked], in which case the final document only contains a URI or similar tag referencing the resource. not all render backends support both linking and embedding embedding, nor do all backends support all object types (for instance, [`groff] does not support video embedding.) 131 + 132 +a resource definition is begun by line consisting of an [`@] sign and an [>ident identifier]. this line is followed by any number of parameters. a parameter is a line beginning with a single tab, a keyword, a colon, and a then a value. additional lines can be added to a parameter by following it with a line that consists of two tabs followed by the text you wish to add. (this is the same syntax used by references.) a resource definition is terminated by a break, or any line that does not begin with a tab 133 + 134 +a resource definition in use looks like this: 135 + 136 +~~~ 137 +this is a demonstration of resources 138 +@smiley 139 + src: link image/webp http://cdn.example.net/img/smile.webp 140 + link image/png file:img/smile.png 141 + embed image/gif file img/smile.gif 142 + desc: the Smiling Man would like to see you in his office 143 +here is the resource in span context [&smiley] 144 +and here it is in block context: 145 +&smiley 146 +~~~ 147 + 148 +rendered as HTML, this might produce the following: 149 + 150 +~~~ 151 +<style> 152 + .res-smiley { 153 + content: image-set( 154 + url(http://cdn.example.net/img/smile.webp) type(image/webp), 155 + url(img/smile.png) type(image/png), 156 + url(* … */) type(image/gif) 157 + ); /* this will actually be repeated with a -webkit- prefix */ 158 + } 159 +</style> 160 +<p>this is a demonstration of resources</p> 161 +<p>here is the resource in span context: <span class="res-smiley"></span></p> 162 +<p>and here it is in block context:</p> 163 +<div class=".res-smiley"></div> 164 +~~~ 165 + 166 +note that empty elements with CSS classes are used in the output, to avoid repeating long image definitions (especially base64 inline encoded ones!) 167 + 168 +### supported parameters 169 +* [`src] (all): specifies where to find the file, what it is, and how to embed it. each line of [`src] should consist of three whitespace-separated words: embed method, MIME type, and URI. 170 +** embed methods 171 +*** [`local]: loads the resource at build time and embeds it into the output file. not all implementations may allow loading remote network resources at build time. 172 +*** [`remote]: only embeds a reference to the location of the resource. use this for e.g. live iframes, dynamic images, or images hosted by a CDN. 173 +*** [`auto]: embeds a reference in file formats where that's practical, and use a remote reference otherwise. 174 +** MIME types: which file types are supported depends on the individual implementation and renderer backend; additionally, extensions can add support for extra types. MIME-types that have no available handler will, where possible, result in an attachment that can be extracted by the user, usually by clicking on a link. however, the following should be usable with all compliant implementations 175 +*** [`image/*] (graphical outputs only) 176 +*** [`video/*] (interactive outputs only) 177 +*** [`image/svg+xml] is handled specially for HTML files, and may or may not be compatible with other renderer backends. 178 +*** [`font/*] can be used with the HTML backend to reference a web font 179 +*** [`font/woff2] can be used with the HTML backend to reference a web font 180 +*** [`text/plain] (will be inserted as a preformatted text block) 181 +*** [`text/css] (can be used when producing HTML files to link in an extra stylesheet, either by embedding it or referencing it from the header) 182 +*** [`text/x-cortav] (will be parsed and inserted as a formatted text block; context variables can be passed to the file with [`ctx.[$var]] parameters) 183 +*** any MIME-type that matches the type of file being generated by the renderer can be used to include a block of data that will be passed directly to the renderer. 184 +** URI types: additional URI types can be added by extensions or different implementations, but every compliant implementation must support these URIs. 185 +*** [`http], [`https]: accesses resources over HTTP. add a [`file] fallback if possible for the benefit of renderers/viewers that do not have internet access abilities. 186 +*** [`file]: references local files. absolute paths should begin [`file:/]; the slash should be omitted for relative paths. note that this doesn't have quite the same meaning as in HTML -- [`file] can (and usually should be) used with HTML outputs to refer to resources that reside on the same server. a cortav URI of [`file:/etc/passwd] will actually result in the link [`/etc/passwd], not [`file:///etc/passwd] when converted to HTML. generally, you only should use [`http] when you're referring to a resource that exists on a different domain. 187 +*** [`name]: a special URI used generally for referencing resources that are already installed on a target system and do not need to be embedded or linked, the name and type are enough for a renderer on another machine to locate the correct resource. this is useful mostly for [>fonts fonts], where it's more typical to refer to fonts that are installed on your system rather than providing paths to font files. 188 +*** [`gemini]: accesses resources over the gemini protocol. currently you should really only use this for [`local] resources unless you're using the gemtext renderer backend, since nothing but gemini browsers are liable to support this protocol. 189 +* [`desc]: supplies a narrative description of the resources, for use as an "alt-text" when the image cannot be loaded and for screenreaders. 190 +* [`detail]: supplies extra narrative commentary that is displayed contextually, e.g. when the user hovers her mouse cursor over the embedded object. 191 + 192 +note that in certain cases, full MIME types do not need to be used. say you're defining a font with the [`name] URI -- you can't necessary know what file type the system fonts on another computer are going to be. in this case, you can just write [`font] instead of [`font/ttf] or [`font/woff2] or similar. all cortav needs to know in this case is what abstract kind of object you're referencing. 193 + 194 + 195 +##ctxvar context variables 196 +context variables are provided so that cortav renderers can process templates. certain context variables are provided for by the standard. you can test for the presence of a context variable with the directive [`%[*when] ctx [$var]]. context variables are accessed with the [` \[#[$name]\]] span. 109 197 110 198 * {def cortav.file} the name of the file currently being rendered 111 199 * {def cortav.path} the absolute path of the file currently being rendered 112 -* {def cortav.time} the current system time in the form [$[#cortav.time]] 113 -* {def cortav.date} the current system date in the form [$[#cortav.date]] 114 -* {def cortav.datetime} the current system date and time represented in the locale or system-standard manner (e.g. [$[#cortav.datetime]]) 200 +* {def cortav.time} the current system time in the form [`[#cortav.time]] 201 +* {def cortav.date} the current system date in the form [`[#cortav.date]] 202 +* {def cortav.datetime} the current system date and time represented in the locale or system-standard manner (e.g. [`[#cortav.datetime]]) 115 203 * {def cortav.page} the number of the page currently being rendered 116 204 * {def cortav.id} the identifier of the renderer 117 205 * {def cortav.hash} the SHA3 hash of the source file being rendered 118 206 def: [*[#1]]: 119 207 120 -on systems with environment variables, these may be accessed as context variables by prefixing their name with [$env.]. 208 +on systems with environment variables, these may be accessed as context variables by prefixing their name with [`env.]. 121 209 122 210 different renderers may provide context in different ways, such as from command line options or a context file. any predefined variables should carry an appropriate prefix to prevent conflation. 123 211 124 -## directives 125 - d: [$%[*[##1]]] 126 -* {d author} encodes document authorship 212 +##fonts fonts 213 +for output backends that support font specification, cortav provides a sophisticated font management system by means of the [!font stack]. 214 + 215 +when a document parse begins, the font stack is empty (unless a default font has already been loaded by an intent file). 216 +when the font stack is empty, cortav does not include font specifications in its output, and thus will use whatever the default of the various rendering programs is. 217 + 218 +to use fonts, we first have to define the fonts as [>rsrc resources]. 219 + 220 +~~~cortav 221 +%% first, we create a new section to namespace the fonts 222 +#^fonts 223 +%% we then define each font as a resource 224 +@serif 225 + src: auto font name:Alegreya 226 + embed font/ttf file:project-fonts/alegreya.ttf 227 + link font/woff2 file:/assets/font/alegreya.woff2 228 + auto font name:Times New Roman 229 +@sans 230 + src: link font name:Alegreya Sans 231 + link font name:Open Sans 232 + link font name:sans-serif 233 +~~~ 234 + 235 +here we have defined two font families, [`fonts.serif] and [`fonts.sans]. each contains a list of references to fonts which will be tried in order. for example, this could be translated into the following CSS: 236 + 237 +~~~css 238 +@font-face { 239 + font-family: "fontdef-serif"; 240 + src: local("Alegreya"), 241 + url("data:font/ttf;base64,…") format("font/ttf"), 242 + url("/assets/font/alegreya.woff2") format("font/woff2"), 243 + local("Times New Roman"); 244 +} 245 +@font-face { 246 + font-family: "fontdef-sans"; 247 + src: local("Alegreya Sans"), 248 + local("Open Sans"), 249 + local("sans-serif"); 250 +} 251 +~~~ 252 + 253 +there are two things that aren't super clear from the CSS, however. notice how we used [`auto] on a couple of those specs? this means it's up to the renderer to decide whether to link or embed the font. for html, a font specified by name can't really be embedded, but for some file formats, it can be. [`auto] lets us produce valid HTML while still taking advantage of font embedding in other formats. 254 + 255 +now that we have our font families defined, we can use their identifiers with the [`%[*font]] directive to control the font stack. the first thing we need to do is push a new font context. there's two ways we can do this: 256 + fnd: [`%[*font] [#1]] 257 +* {fnd dup} will create a copy of the current font context, allowing us to make some changes and then revert later with the {fnd pop} command. this isn't useful in our case, however, because right now the stack is empty; there's nothing to duplicate. 258 +* {fnd new} will create a brand new empty context for us to work with and push it to the stack. this can also be used to temporarily revert to the system default fonts, and then switch back with {fnd pop}. 259 +* {fnd set} changes one or more entries in the current font context. it can take a space-separated list of arguments in the form [`[$entry]=[$font-id]]. the supported entries are: 260 +** [`body]: the fallback font. if only this is set in a given font context, it will be used for everything 261 +** [`paragraph]: the font used for normal paragraphs 262 +** [`header]: the font used in headers 263 +** [`subtitle]: the font used in subtitles 264 +** [`list]: the font used in lists 265 +** [`table]: the font used in tables 266 +** [`caption]: the font used for captions 267 +* {fnd pop} removes the top context from the font stack. 268 + 269 +note that extensions may consult the font context for their entries specific to them. for instance, [>toc toc] checks for [`toc] before falling back to [`body] and then the default font. 270 + 271 +these commands are enough to give us a very flexible setup. consider the following: 272 + 273 +~~~cortav 274 +%% let's pretend we've also defined the fonts 'title', 'cursive', and 'thin' 275 + 276 +%font new 277 +%font set body=sans header=serif 278 +%font dup 279 +%font header=title 280 +# lorem ipsum dolor 281 +%font pop 282 + 283 +%% we've now set up a default font context, created a new context for the title of the 284 +%% document, and then popped it back off after the title was inserted so that our 285 +%% first font context is active again. everything after that last '%font pop' will 286 +%% be printed in sans, except for headers, which will be printed in 'serif' 287 + 288 +lorem ipsum dolor sit amet, sed consectetur apiscing elit… 289 + 290 +%font dup 291 +%font set body=cursive 292 +> sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. 293 +> Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut 294 +%font pop 295 + 296 +%% above we created a blockquote whose text is printed in a cursive font; afterwards, 297 +%% we simply remove this new context— 298 + 299 +and everything is back the way it was at "lorem ipsum" 300 + 301 +%% the font mechanism is at its most powerful when used with multiline macros: 302 + 303 + cursive-quote: %font dup 304 + %font set body=cursive 305 + > [#1] 306 + %font pop 307 + 308 +%% now, whenever we want a block with a cursive body, we can simply invoke 309 + 310 +&$cursive-quote Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident 311 + 312 +%% without affecting the overall font context. in fact, since 'cursive-quote' creates 313 +%% its context using 'dup', it would import all font specifications besides 'body' 314 +%% from the environment it is invoked in 315 +~~~ 316 + 317 +##dir directives 318 + d: [`%[*[##1]]] 319 +* {d author} encodes document authorship. multiple author directives can be issued to add additional coauthors 127 320 * {d cols} specifies the number of columns the next object should be rendered with 128 321 * {d include} transcludes another file 322 +* {d import} reads in the contents of another file as an embeddable section 129 323 * {d quote} transcludes another file, without expanding the text except for paragraphs 130 324 * {d embed}, where possible, embeds another file as an object within the current one. in HTML this could be accomplished with e.g. an iframe. 131 325 * {d expand} causes the next object (usually a code block) to be fully expanded when it would otherwise not be 132 -* {d pragma} supplies semantic data about author intent, the kind of information document contains and hints about how it should be displayed to the user. think of them like offhand remarks to the renderer -- there's no guarantee that it'll pay any attention, but if it does, your document will look better. pragmas have no scope; they affect the entire document. the pragma function exists primarily as a means to allow parameters that would normally need to be specified on e.g. the command line to be encoded in the document instead in a way that multiple implementations can understand. a few standard pragmas are defined. 326 +* {d font} controls the font stack, for outputs that support changing fonts. see [>fonts fonts] for more information. 327 +* {d lang} changes the current language, which is used by extensions to e.g. control typographical conventions, and may be encoded into the output by certain renderers (e.g. HTML). note that quotes and blockquotes can be set to a separate language with a simpler syntax. the language should be notated using IETF language tags 328 +** {d lang is x-ranuir-CR8} sets the current language to Ranuir as spoken in the Central Worlds, written in Corran and encoded using UTF-8. this might be used at the top of a document to set its primary language. 329 +** {d lang push gsw-u-sd-chzh} temporarily switches to Zürich German, e.g. to quote a German passage in an otherwise Ranuir document 330 +** {d lang sec en-US} switches to American English for the duration of a section. does not affect the language stack. 331 +** {d lang pop} drops the current language off the language stack, returning to whatever was pushed or set before it. this would be used, for instance, at the end of a passage 332 +* {d pragma} supplies semantic data about author intent, the kind of information the document contains and hints about how it should be displayed to the user. think of them like offhand remarks to the renderer -- there's no guarantee that it'll pay any attention, but if it does, your document will look better. pragmas have no scope; they affect the entire document. the pragma function exists primarily as a means to allow parameters that would normally need to be specified on e.g. the command line to be encoded in the document instead in a way that multiple implementations can understand. a few standard pragmas are defined. 133 333 ** {d pragma layout} gives a hint on how the document should be layed out. the first hint that is understood will be applied; all others will be discarded. standard hints include: 134 -*** essay 135 -*** narrative 136 -*** screenplay: uses asides to denote actions, quotes for dialogue 137 -*** stageplay: uses asides to denote actions, quotes for dialogue 138 -*** manual 139 -*** glossary 140 -*** news 334 +*** [`essay] 335 +*** [`narrative] 336 +*** [`screenplay]: uses asides to denote actions, quotes for dialogue 337 +*** [`stageplay]: uses asides to denote actions, quotes for dialogue 338 +*** [`manual] 339 +*** [`glossary] 340 +*** [`news] 341 +*** [`book]: section depths 1-3 gain additional semantics 342 +***: [*part]: the section gets a page to itself to announce the beginning of a new part or appendix 343 +***: [*chapter]: the section is preceded by a page break 344 +***: [*heading]: the section can occur on the same page as text and headings from other sections 141 345 ** {d pragma accent} specifies an accent hue (in degrees around the color wheel) for renderers which support colorized output 142 346 ** {d pragma accent-spread} is a factor that controls the "spread" of hues used in the document. if 0, only the accent color will be used; if larger, other hues will be used in addition to the primary accent color. 143 -** {d pragma dark-on-light on|off} controls whether the color scheme used should be light-on-dark or dark-on-light 347 +** {d pragma dark-on-light on\|off} controls whether the color scheme used should be light-on-dark or dark-on-light 144 348 ** {d pragma page-width} indicates how wide the pages should be 349 +** {d pragma title-page} specifies a section to use as a title page, for renderer backends that support pagination 145 350 146 -! note on pragmas: particularly when working with collections of documents, you should not keep formatting metadata in the documents themselves! the best thing to do is to have a makefile for compiling the documents using whatever tools you want to support, and encoding the rendering options in this file (for the reference implementation this currently means as command line arguments, but eventually it will support intent files as well) so they can all be changed in one place; pragmas should instead be used for per-document [*overrides] of default settings. 351 +! note on pragmata: particularly when working with collections of documents, you should not keep formatting metadata in the documents themselves! the best thing to do is to have a makefile for compiling the documents using whatever tools you want to support, and encoding the rendering options in this file (for the reference implementation this currently means as command line arguments, but eventually it will support intent files as well) so they can all be changed in one place; pragmas should instead be used for per-document [*overrides] of default settings. 147 352 ! a workaround for the lack of intent files in the reference implementation is to have a single pseudo-stylesheet that contains only {d pragma} statements, and then import this file from each individual source file using the {d include} directive. this is suboptimal and recommended only when you need to ensure compatibility between different implementations. 148 353 ! when creating HTML files, an even better alternative may be to turn off style generation entirely and link in an external, hand-written CSS stylesheet. this is generally the way you should compile sources for existing websites if you aren't going to write your own extension. 149 354 150 355 ##ex examples 151 356 152 357 ~~~ blockquotes #bq [cortav] ~~~ 153 358 the following excerpts of text were recovered from a partially erased hard drive found in the Hawthorne manor in the weeks after the Incident. context is unknown. ................................................................................ 185 390 186 391 +:english :| honor | 187 392 +:ranuir :| tef | 188 393 +:zia ţai :| pang | 189 394 +:thalishte:| mbecheve | 190 395 ~~~ 191 396 192 -## extensions 397 +##extns extensions 193 398 the cortav specification also specifies a number of extensions that do not have to be supported for a renderer to be compliant. the extension mechanism supports the following directives. 194 399 195 400 * inhibits: prevents an extension from being used even where available 196 401 * uses: turns on an extension that is not specified by the user operating the renderer (e.g. on the command line) 197 402 * needs: causes rendering to fail with an error if the extensions are not available 198 403 199 -where possible, instead of [$needs x y z], the directive [$when has-ext x y z] should be used instead. this causes the next section to be rendered only if the named extensions are available. [$unless has-ext x y z] can be used to provide an alternative format. 404 +where possible, instead of [`needs [$x y z]], the directive [`when has-ext [$x y z]] should be used instead. this causes the next section to be rendered only if the named extensions are available. [`unless has-ext [$x y z]] can be used to provide an alternative format. 200 405 201 406 extensions are mainly interacted with through directives. all extension directives must be prefixed with the name of the extension. 202 407 203 -### toc 204 -sections that have a title will be included in the table of contents. the table of contents is by default inserted at the break between the first level-1 section and the section immediately following it. you may instead place the directive [$toc] where you wish the TOC to be inserted, or suppress it entirely with [$inhibits toc]. note that some renderers may not display the TOC as part of the document itself. 408 +the reference implementation seeks to support all standardized extensions. it's not quite there yet, however. 409 + 410 +###toc toc 411 +sections that have a title will be included in the table of contents. the table of contents is by default inserted at the break between the first level-1 section and the section immediately following it. you may instead place the directive [`toc] where you wish the TOC to be inserted, or suppress it entirely with [`inhibits toc]. note that some renderers may not display the TOC as part of the document itself. 205 412 206 413 toc provides the directives: 207 414 208 -* [$%[*toc]]: insert a table of contents in the specified position. this can be used more than once, but doing so may have confusing, incorrect, or nonsensical results under some renderers, and some may just ignore the directive entirely 209 -* [$%[*toc] mark [!styled-text]]: inserts a TOC entry with the label [!styled-text] pointing to the current location. this can be used to e.g. mark noteworthy images, instances of long quotes or literal blocks, or functions inside an expanded code block. 210 -* [$%[*toc] name [!id styled-text]]: like [$%[*toc] mark] but allows an additional [!id] parameter which specifies the ID the renderer will assign to an anchor element. this is not meaningful for all renderers and when it is, it is up to the renderer to decide what it means. 211 -** the [*html] render backend interprets [!id] as the [$id] element for the anchor tag 212 -** the [*groff] render backend ignores [!id] 213 - 214 -### smart-quotes 215 -a cortav renderer may automatically translate punctuation marks to other punctuation marks depending on their context. 216 - 217 -### hilite 218 -code can be highlighted according to the formal language it is written in. 219 - 220 -### lua 415 +* [`%[*toc]]: insert a table of contents in the specified position. this can be used more than once, but doing so may have confusing, incorrect, or nonsensical results under some renderers, and some may just ignore the directive entirely 416 +* [`%[*toc] mark [$styled-text]]: inserts a TOC entry with the label [$styled-text] pointing to the current location. this can be used to e.g. mark noteworthy images, instances of long quotes or literal blocks, or functions inside an expanded code block. 417 +* [`%[*toc] name [$id styled-text]]: like [`%[*toc] mark] but allows an additional [$id] parameter which specifies the ID the renderer will assign to an anchor element. this is not meaningful for all renderers and when it is, it is up to the renderer to decide what it means. 418 +** the [*html] render backend interprets [$id] as the [`id] element for the anchor tag 419 +** the [*groff] render backend ignores [$id] 420 + 421 +###tsmog transmogrify 422 +a cortav renderer may automatically translate punctuation marks or symbol sequences to superior representations depending on their context. to be compliant this extension should implement, at minimum: 423 +* smart quotes (with consideration for the typographical conventions languages like German or Spanish) 424 +** {dir.d transmogrify|language [$lang]} can be used to explicitly set the language; otherwise, it must be determined from the value of {dir.d pragma|lang}. if this is not present, implementations may fall back on their own methods for determining the language in use, such as command-line flags. 425 +* multigraph to glyph conversion, including at least: 426 +** [`\--] --> "—" 427 +** [`\-->] --> "→" 428 +** [`\<--] --> "←" 429 + 430 +an escape character before any of the sequence characters should prevent the sequence from being rendered. raw nodes (that is, [`\[\…\]] and [`\[`\…\]]) should not be scanned for transmogrification, nor should the contents of code blocks unless marked with the [`%[*expand]] directive 431 + 432 +transmogrification shall only take place after all other parsing steps are completed. 433 + 434 +###hilite hilite 435 +code can be highlighted according to the formal language it is written in. a compliant hilite implementation must implement basic keyword, symbol, comment, pragma, and literal highlighing for the following formal languages. 436 +* C 437 +* [>lua Lua] 438 +* [>html HTML] 439 +* [>scheme Scheme] 440 +* [>terra Terra] 441 +* [>libconfig libconfig] 442 + 443 + lua: https://lua.org 444 + scheme: https://call-cc.org 445 + terra: https://terralang.org 446 + html: https://dev.w3.org/html5/spec-LC/ 447 + libconfig: http://hyperrealm.github.io/libconfig/ 448 + 449 +the highlighter should make use of semantic HTML tags like [`<var>] where possible. 450 + 451 +###lua lua 221 452 renderers with a lua interpreter available can evaluate lua code: 222 -* [$%lua use [!file]]: evaluates [$file] and makes its definitions available 223 -* [$\[%lua raw [!script]\]]: evaluates [$script] and emits the string it returns (if any) in raw span context. 224 -* [$\[%lua exp [!script]\]]: evaluates [$script] and emits the string it returns (if any) in expanded span context. 225 -* [$%lua raw [!script]]: evaluates [$script] and emits the string array it returns (if any) in raw block context. 226 -* [$%lua exp [!script]]: evaluates [$script] and emits the string array it returns (if any) in expanded block context. 453 +* [`%lua use [!file]]: evaluates [$file] and makes its definitions available 454 +* [`\[%lua raw [!script]\]]: evaluates [$script] and emits the string it returns (if any) in raw span context. 455 +* [` \[%lua exp [!script]\]]: evaluates [$script] and emits the string it returns (if any) in expanded span context. 456 +* [`%lua raw [!script]]: evaluates [$script] and emits the string array it returns (if any) in raw block context. 457 +* [`%lua exp [!script]]: evaluates [$script] and emits the string array it returns (if any) in expanded block context. 227 458 228 -the interpreter should provide a [$cortav] table with the objects: 459 +the interpreter should provide a [`cortav] table with the objects: 229 460 * ctx: contains context variables 230 461 231 462 used files should return a table with the following members 232 463 * macros: an array of functions that return strings or arrays of strings when invoked. these will be injected into the global macro namespace. 233 464 234 -### ts 465 +###ts ts 235 466 the [*ts] extension allows documents to be marked up for basic classification constraints and automatically redacted. if you are seriously relying on ts for confidentiality, make damn sure you start the file with [$%[*requires] ts], so that rendering will fail with an error if the extension isn't supported. 236 467 237 468 ts enables the directives: 238 -* [$ts class [!scope] [!level] (styled-text)]: indicates a classification level for either the while document (scope [!doc]) or the next section (scope [!sec]). if the ts level is below [$level], the section will be redacted or rendering will fail with an error, as appropriate. if styled-text is included, this will be treated as the name of the classification level. 239 -* [$ts word [!scope] [!word] (styled-text)]: indicates a codeword clearance that must be present for the text to render. if styled-text is present, this will be used to render the name of the codeword instead of [$word]. 240 -* [$when ts level [!level]] 241 -* [$when ts word [!word]] 469 +* [`%[*ts] class [$scope level] ([$styled-text])]: indicates a classification level for either the while document (scope [$doc]) or the next section (scope [$sec]). if the ts level is below [$level], the section will be redacted or rendering will fail with an error, as appropriate. if styled-text is included, this will be treated as the name of the classification level. 470 +* [`%[*ts] word [$scope word] ([$styled-text])]: indicates a codeword clearance that must be present for the text to render. if styled-text is present, this will be used to render the name of the codeword instead of [$word]. 471 +* [`%[*when] ts level [$level]] 472 +* [`%[*when] ts word [$word]] 242 473 243 474 ts enables the spans: 244 -* [$\[🔒#[!level] [!styled-text]\]]: redacts the span if the security level is below that specified. 245 -* [$\[🔒.[!word] [!styled-text]\]]: redacts the span if the specified codeword clearance is not enabled. 246 -(the padlock emoji is shorthand for [$%ts].) 475 +* [`\[🔒#[!level] [$styled-text]\]]: redacts the span if the security level is below that specified. 476 +* [`\[🔒.[!word] [$styled-text]\]]: redacts the span if the specified codeword clearance is not enabled. 477 +(the padlock emoji is shorthand for [`%[*ts]].) 247 478 248 479 ts redacts spans securely; that is, they are simply replaced with an indicator that they have been redacted, without visually leaking the length of the redacted text. 249 480 250 481 ~~~#ts-example example [cortav] ~~~ 251 482 %ts word doc sorrowful-pines SORROWFUL PINES 252 483 253 484 # intercept R1440 TCT S3 ................................................................................ 266 497 <B> Hyacinth, I told you not to contact me without— 267 498 <A, shouting> god DAMMIT woman I am trying to SAVE your worthless skin 268 499 <B> Hyacinth! your Godforsaken scrambler! 269 500 <A> …oh, [!fuck]. 270 501 (signal lost) 271 502 ~~~ 272 503 273 -# reference implementation 274 -the cortav standard is implemented in [$cortav.lua], found in this repository. only the way [$cortav.lua] interprets the cortav language is defined as a reference implementation; other behaviors are simply how [$cortav.lua] implements the specification and may be copied, ignored, tweaked, violently assaulted, or used as inspiration by a compliant parser. 504 +#refimpl reference implementation 505 +the cortav standard is implemented in [`cortav.lua], found in this repository. only the way [`cortav.lua] interprets the cortav language is defined as a reference implementation; other behaviors are simply how [`cortav.lua] implements the specification and may be copied, ignored, tweaked, violently assaulted, or used as inspiration by a compliant parser. 275 506 276 -the reference implementation can be used both as a lua library and from the command line. [$cortav.lua] contains the parser and renderers, [$ext/*] contain various extensions, [$sirsem.lua] contains utility functions, and [$cli.lua] contains the CLI driver. 507 +the reference implementation can be used both as a lua library and from the command line. [`cortav.lua] contains the parser and renderers, [`ext/*] contain various extensions, [`sirsem.lua] contains utility functions, and [`cli.lua] contains the CLI driver. 277 508 278 -## lua library 509 +##refimpl-lib lua library 279 510 there are various ways to use cortav from a lua script; the simplest however is probably to precompile your script with luac and link in the necessary components of the implementation. for instance, say we have the following program 280 511 281 512 ~~~ stdin2html.lua [lua] ~~~ 282 513 local ct = require 'cortav' 283 514 local mode = {} 284 515 local doc = ct.parse(io.stdin, {file = '(stdin)'}, mode) 285 516 doc.stage = { ................................................................................ 292 523 293 524 and the only extension we need is the table-of-contents extension. our script can be translated into a self-contained lua bytecode blob with the following command 294 525 295 526 ~~~ 296 527 $ luac -s -o stdin2html.lc $cortav_repo/{sirsem,cortav,ext/toc}.lua stdin2html.lua 297 528 ~~~ 298 529 299 -and can then be operated with the command [$lua stdin2html.lc], with no further need for the cortav repository files. note that the order of the [$luac] command is important! [$sirsem.lua] must come first, followed by [$cortav.lua], followed by any extensions. your driver script (i.e. the script with the entry point into the application) should always come last. 530 +and can then be operated with the command [`lua stdin2html.lc], with no further need for the cortav repository files. note that the order of the [`luac] command is important! [`sirsem.lua] must come first, followed by [`cortav.lua], followed by any extensions. your driver script (i.e. the script with the entry point into the application) should always come last. 300 531 301 -### building custom tools 532 +###refimpl-tools building custom tools 302 533 generally, most existing file-format conversion tools (cmark, pandoc, and so on) have a crucial limitation: they hardcode specific assumptions like document structure. this means that the files they output are generally not suitable as-is for the users' purposes, and require further munging, usually by hateful shell or perl scripts. some tools do provide libraries end users to use as a basis for designing their own tools, but these are often limited, and in any case the user ends up having to write their own (non-standard) driver. it's no surprise that very few people end up doing this. 303 534 304 -[$cortav.lua]'s design lends itself to a more elegant solution. one can of course write their own driver using [$cortav] as a library, but most of the time when you're compiling document sources, you just want a binary you can run from the command line or a makefile. with [$cortav.lua], you can extend its capabilities easily while keeping the same driver. 535 +[`cortav.lua]'s design lends itself to a more elegant solution. one can of course write their own driver using [`cortav] as a library, but most of the time when you're compiling document sources, you just want a binary you can run from the command line or a makefile. with [`cortav.lua], you can extend its capabilities easily while keeping the same driver. 305 536 306 -in the [$cortav] spec, extensions are mostly intended to give different implementations the ability to offer extra capabilities, but the reference implementation uses an extension architecture that makes it easy to write and add your own. for each type of new behavior you want to implement, just create a new extension and list it on the make command line: 537 +in the [`cortav] spec, extensions are mostly intended to give different implementations the ability to offer extra capabilities, but the reference implementation uses an extension architecture that makes it easy to write and add your own. for each type of new behavior you want to implement, just create a new extension and list it on the make command line: 307 538 308 539 ~~~ 309 540 $ nvim ~/dev/my-cortav-exts/imperial-edict.lua 310 541 $ make cortav extens+=$HOME/dev/my-cortav-exts/*.lua 311 542 ~~~ 312 543 313 -the cortav binary this produces will have all the extra capabilities you personally need, without any need to fork [$cortav.lua] itself or even touch the repository. 544 +the cortav binary this produces will have all the extra capabilities you personally need, without any need to fork [`cortav.lua] itself or even touch the repository. 314 545 315 -there's no reason [$cortav.lua] shouldn't be able to load extensions at runtime as well; i just haven't implemented this behavior yet. it probably would only take a few extra lines of code tho. 546 +there's no reason [`cortav.lua] shouldn't be able to load extensions at runtime as well; i just haven't implemented this behavior yet. it probably would only take a few extra lines of code tho. 316 547 317 -i will eventually document the extension API, but for now, look at [$ext/toc.lua] for a simple example of how to register an extension. 548 +i will eventually document the extension API, but for now, look at [`ext/toc.lua] for a simple example of how to register an extension. 318 549 319 -## command line driver 320 -the [$cortav.lua] command line driver can be run from the repository directory with the command [$lua ./cli.lua], or by first compiling it into a bytecode form that links in all its dependencies. this is the preferred method for installation, as it produces a self-contained executable which loads more quickly, but running the driver in script form may be desirable for development or debugging. 550 +##refimpl-cli command line driver 551 +the [$cortav.lua] command line driver can be run from the repository directory with the command [`lua ./cli.lua], or by first compiling it into a bytecode form that links in all its dependencies. this is the preferred method for installation, as it produces a self-contained executable which loads more quickly, but running the driver in script form may be desirable for development or debugging. 321 552 322 -the repository contains a GNU makefile to automate compilation of the reference implementation on unix-like OSes. simply run [$$ make cortav] or [$$ gmake cortav] from the repository root to produce a self-contained bytecode executable that can be installed anywhere on your filesystem, with no dependencies other than the lua interpreter. 553 +the repository contains a GNU makefile to automate compilation of the reference implementation on unix-like OSes. simply run [`$ make cortav] or [`$ gmake cortav] from the repository root to produce a self-contained bytecode executable that can be installed anywhere on your filesystem, with no dependencies other than the lua interpreter. 323 554 324 -! note that the makefile strips debugging symbols to save space, so running [$cli.lua] directly as a script may be helpful if you encounter errors and need stacktraces or other debugging information. 555 +! note that the makefile strips debugging symbols to save space, so running [`cli.lua] directly as a script may be helpful if you encounter errors and need stacktraces or other debugging information. 325 556 326 -henceforth it will be assumed that you have produced the [$cortav] executable and placed it somewhere in your [$$PATH]; if you are instead running [$cortav.lua] directly as an interpreted script, you'll need to replace [$$ cortav] with [$$ lua ./cli.lua] in incantations. 557 +henceforth it will be assumed that you have produced the [`cortav] executable and placed it somewhere in your [$$PATH]; if you are instead running [`cortav.lua] directly as an interpreted script, you'll need to replace [`$ cortav] with [`$ lua ./cli.lua] in incantations. 327 558 328 -when run without commands, [$cortav.lua] will read input from standard input and write to standard output. alternately, a source file can be given as an argument. to write to a specific file instead of the standard output stream, use the [$-o [!file]] flag. 559 +when run without commands, [`cortav.lua] will read input from standard input and write to standard output. alternately, a source file can be given as an argument. to write to a specific file instead of the standard output stream, use the [`-o [!file]] flag. 329 560 330 561 ~~~ 331 562 $ cortav readme.ct -o readme.html 332 563 # reads from readme.ct, writes to readme.html 333 564 $ cortav -o readme.html 334 565 # reads from standard input, writes to readme.html 335 566 $ cortav readme.ct 336 567 # reads from readme.ct, writes to standard output 337 568 ~~~ 338 569 339 -### switches 340 -[$cortav.lua] offers various switches to control its behavior. 570 +###refimpl-build building 571 +the command line driver is built and installed with a GNU [$make] script. this script accepts the variables shown below with their default values: 572 ++ prefix | [`[$$HOME]/.local] | the path under which the package will be installed 573 ++ build | [`build] | the directory where generated objects will be placed; useful for out-of-tree builds 574 ++ bin-prefix | [`[$$prefix]/bin] | directory to install the executables to" 575 ++ default-format-flags | [`-m html:width 35em] | a list of flags that will be passed by the viewer script to [`cortav] when generating a html fille 576 + 577 +the following targets are supplied to automate the build: 578 +* [`install] builds everything, installs the executable and the viewer script to [$$bin_prefix], and registers the viewer script with XDG 579 +* [`excise] deletes everything installed and deregisters the file handlers (note that the same variables must be passed to [`exicse] as were passed to [`install]!) 580 +* [`clean] deletes build artifacts from the [$$build] directory like it was never there 581 +* [`wipe] is equivalent to [`$ make excise && make clean] 582 + 583 +###refimpl-switches switches 584 +[`cortav.lua] offers various switches to control its behavior. 341 585 + long + short + function + 342 -| [$--out [!file]] :|:[$-o]:| sets the output file (default stdout) | 343 -| [$--log [!file]] :|:[$-l]:| sets the log file (default stderr) | 344 -| [$--define [!var] [!val]] :|:[$-d]:| sets the context variable [$var] to [$val] | 345 -| [$--mode-set [!mode]] :|:[$-y]:| activates the [>refimpl-mode mode] with ID [!mode] 346 -| [$--mode-clear [!mode]] :|:[$-n]:| disables the mode with ID [!mode] | 347 -| [$--mode [!id] [!val]] :|:[$-m]:| configures mode [!id] with the value [!val] | 348 -| [$--mode-set-weak [!mode]] :|:[$-Y]:| activates the [>refimpl-mode mode] with ID [!mode] if the source file does not specify otherwise 349 -| [$--mode-clear-weak [!mode]] :|:[$-N]:| disables the mode with ID [!mode] if the source file does not specify otherwise 350 -| [$--mode-weak [!id] [!val]] :|:[$-M]:| configures mode [!id] with the value [!val] if the source file does not specify otherwise 351 -| [$--help] :|:[$-h]:| display online help | 352 -| [$--version] :|:[$-V]:| display the interpreter version | 586 +| [`--out [!file]] :|:[`-o]:| sets the output file (default stdout) | 587 +| [`--log [!file]] :|:[`-l]:| sets the log file (default stderr) | 588 +| [`--define [!var] [!val]] :|:[`-d]:| sets the context variable [$var] to [$val] | 589 +| [`--mode-set [!mode]] :|:[`-y]:| activates the [>refimpl-mode mode] with ID [!mode] 590 +| [`--mode-clear [!mode]] :|:[`-n]:| disables the mode with ID [!mode] | 591 +| [`--mode [!id] [!val]] :|:[`-m]:| configures mode [!id] with the value [!val] | 592 +| [`--mode-set-weak [!mode]] :|:[`-Y]:| activates the [>refimpl-mode mode] with ID [!mode] if the source file does not specify otherwise 593 +| [`--mode-clear-weak [!mode]] :|:[`-N]:| disables the mode with ID [$mode] if the source file does not specify otherwise 594 +| [`--mode-weak [!id] [!val]] :|:[`-M]:| configures mode [$id] with the value [$val] if the source file does not specify otherwise 595 +| [`--help] :|:[`-h]:| display online help | 596 +| [`--version] :|:[`-V]:| display the interpreter version | 353 597 354 598 ###refimpl-mode modes 355 -most of [$cortav.lua]'s implementation-specific behavior is controlled by use of [!modes]. these are namespaced options which may have a boolean, string, or numeric value. boolean modes are set with the [$-y] [$-n] flags; other modes use the [$-m] flags. 599 +most of [`cortav.lua]'s implementation-specific behavior is controlled by use of [!modes]. these are namespaced options which may have a boolean, string, or numeric value. boolean modes are set with the [`-y] [`-n] flags; other modes use the [`-m] flags. 356 600 357 601 most modes are defined by the renderer backend. the following modes affect the behavior of the frontend: 358 602 359 603 + ID + type + effect 360 -| [$render:format]:| string | selects the [>refimpl-rend renderer] (default [$html]) 361 -| [$parse:show-tree]:| flag | dumps the parse tree to the log after parsing completes 604 +| [`render:format]:| string | selects the [>refimpl-rend renderer] (default [`html]) 605 +| [`parse:show-tree]:| flag | dumps the parse tree to the log after parsing completes 362 606 363 607 ##refimpl-rend renderers 364 -[$cortav.lua] implements a frontend-backend architecture, separating the parsing stage from the rendering stage. this means new renderers can be added to [$cortav.lua] relatively easily. currently, only an [>refimpl-rend-html HTML renderer] is included; however, a [$groff] backend is planned at some point in the future, so that PDFs and manpages can be generated from cortav files. 608 +[`cortav.lua] implements a frontend-backend architecture, separating the parsing stage from the rendering stage. this means new renderers can be added to [`cortav.lua] relatively easily. currently, only an [>refimpl-rend-html HTML renderer] is included; however, a [`groff] backend is planned at some point in the future, so that PDFs and manpages can be generated from cortav files. 365 609 366 610 ###refimpl-rend-html html 367 -the HTML renderer is activated with the incantation [$-m render:format html]. it is currently the default backend. it produces a single HTML file, optionally with CSS styling data, from a [$.ct] input file. 611 +the HTML renderer is activated with the incantation [`-m render:format html]. it is currently the default backend. it produces a single HTML file, optionally with CSS styling data, from a [`.ct] input file. 368 612 369 -it supports the following modes: 613 +####refimpl-rend-html-modes modes 614 +[`html] supports the following modes: 370 615 371 -* string (css length) [$html:width] sets a maximum width for the body content in order to make the page more readable on large displays 372 -* number [$html:accent] applies an accent hue to the generated webpage. the hue is specified in degrees, e.g. [$-m html:accent 0] applies a red accent. 373 -* flag [$html:dark-on-light] uses dark-on-light styling, instead of the default light-on-dark 374 -* flag [$html:fossil-uv] outputs an HTML snippet suitable for use with the Fossil VCS webserver. this is intended to be used with the unversioned content mechanism to host rendered versions of documentation written in cortav that's stored in a Fossil repository. 375 -* number [$html:hue-spread] generates a color palette based on the supplied accent hue. the larger the value, the more the other colors diverge from the accent hue. 376 -* string [$html:link-css] generates a document linking to the named stylesheet 377 -* flag [$html:gen-styles] embeds appropriate CSS styles in the document (default on) 378 -* flag [$html:snippet] produces a snippet of html instead of an entire web page. note that proper CSS scoping is not yet implemented (and can't be implemented hygienically since [$scoped] was removed 😢) 379 -* string [$html:title] specifies the webpage titlebar contents (normally autodetected from the document based on headings or directives) 616 +* string (css length) [`html:width] sets a maximum width for the body content in order to make the page more readable on large displays 617 +* number [`html:accent] applies an accent hue to the generated webpage. the hue is specified in degrees, e.g. [$-m html:accent 0] applies a red accent. 618 +* flag [`html:dark-on-light] uses dark-on-light styling, instead of the default light-on-dark 619 +* flag [`html:fossil-uv] outputs an HTML snippet suitable for use with the Fossil VCS webserver. this is intended to be used with the unversioned content mechanism to host rendered versions of documentation written in cortav that's stored in a Fossil repository. 620 +* number [`html:hue-spread] generates a color palette based on the supplied accent hue. the larger the value, the more the other colors diverge from the accent hue. 621 +* string [`html:link-css] generates a document linking to the named stylesheet 622 +* flag [`html:gen-styles] embeds appropriate CSS styles in the document (default on) 623 +* flag [`html:snippet] produces a snippet of html instead of an entire web page. note that proper CSS scoping is not yet implemented (and can't be implemented hygienically since [$scoped] was removed 😢) 624 +* string [`html:title] specifies the webpage titlebar contents (normally autodetected from the document based on headings or directives) 625 +* string [`html:font] specifies the default font to use when rendering as a CSS font specification (e.g. [`-m html:font 'Alegreya, Junicode, Georgia, "Times New Roman"]) 380 626 381 627 ~~~ 382 628 $ cortav readme.ct --out readme.html \ 383 629 -m render:format html \ 384 630 -m html:width 40em \ 385 631 -m html:accent 80 \ 386 632 -m html:hue-spread 35 \ 387 633 -y html:dark-on-light # could also be written as: 388 634 $ cortav readme.ct -ommmmy readme.html render:format html html:width 40em html:accent 80 html:hue-spread 35 html:dark-on-light 389 635 ~~~ 390 636 637 +#### directives 638 +[`html] supplies the following render directives. 639 + 640 +* [`%[*html] link [$rel] [$mime] [$href]]: inserts a [`<link>] tag in the header, for example, to link in an alternate stylesheet, or help feed readers find your atom or rss feed. 641 +** [`%[*html] link alternate\\ stylesheet text/css /res/style2.css] 642 +** [`%[*html] link alternate application/atom+xml /feed.atom] 643 +* [`%[*html] style [$id]]: adds the stylesheet referenced by [$id] into the document stylesheet. the stylesheet is specified using a [>rsrc resource]. 644 + 645 +#### stylsheets 646 +the [`html] backend offers some additional directives for external CSS files that are embedded into the document, in order to simplify integration with the accent mechanism. these are: 647 + 648 +* [`@[*fg]]: resolves to a color expression denoting the selected foreground color. equivalent to [`[*tone](1)] 649 +* [`@[*bg]]: resolves to a color expression denoting the selected background color. equivalent to [`[*tone](0)] 650 +* [`@[*tone]\[/[$alpha]\]([$fac] \[[$shift] \[[$saturate]\]\] )]: resolves to a color expression. [$fac] is a floating-point value scaling from the background color to the foreground color. [$shift] is a value in degrees controlling how far the hue will shift relative to the accent. [$saturate] is a floating-point value controlling how satured the color is. 651 + 652 +###refimpl-rend-groff groff 653 +the [`groff] backend produces a text file suitable for supplying to a [`groff] compiler. [`groff] is the GNU implementation of a venerable typesetting system from the early days of UNIX 654 + 655 +as a convenience, the groff backend supports two modes of operation: it can write a [`groff] file directly to disk, or it can automatically launch a [`groff] process with the appropriate command line options and environment variables. this second mode is recommended unless you're rendering very large files to multiple formats, as [`groff] invocation is nontrivial and it's best to let the renderer handle that for you. 656 + 657 +####refimpl-rend-groff-modes modes 658 +[`groff] supports the following modes: 659 + 660 +* string [`groff:annotate] controls how footnotes will be handled. 661 +** [`footnote] places footnotes at the end of the page they are referenced on. if the same footnote is used on multiple pages, it will be duplicated on each. 662 +** [`secnote] places footnotes at the end of each section. footnotes used in multiple sections will be duplicated for each 663 +** [`endnote] places all footnotes at the end of the rendered document. 664 +* string [`groff:dev] names an output device (such as [`dvi] or[`pdf]). if this mode is present, [`groff] will be automatically invoked 665 +* string [`groff:title-page] takes an identifier that names a section. this section will be treated as the title page for the document. 666 + 667 +### directives 668 +* [`%[*pragma] title-page [$id]] sets the title page to section [$id]. this causes it to be specially formatted, with a large, centered title and subtitle. 669 + 670 +### quirks 671 +if the [`toc] extension is active but [`%[*toc]] directive is provided, the table of contents will be given its own section at the start of the document (after the title page, if any). 672 + 391 673 ## further directions 392 674 393 675 ### additional backends 394 676 it is eventually intended to support to following backends, if reasonably practicable. 395 677 * [*html]: emit HTML and CSS code to typeset the document. [!in progress] 396 678 * [*svg]: emit SVG, taking advantage of its precise layout features to produce a nicely formatted and paginated document. pagination can be accomplished through emitting multiple files or by assigning one layer to each page. [!long term] 397 679 * [*groff]: the most important output backend, rivalling [*html]. will allow the document to be typeset in a wide variety of formats, including PDF and manpage. [!short term] 398 680 * [*gemtext]: essentially a downrezzing of cortav to make it readable to Gemini clients 681 +* [*ast]: produces a human- and/or machine-readable dump of the document's syntax tree, to aid in debugging or for interoperation with systems that do not support `cortav` direcly. mode [`ast:repr] wil allow selecting formats for the dump. [`ast:rel] can be [`tree] (the default) to emit a hierarchical representation, or [`flat] to emit an array of nodes that convey hierarchy [^flatdoc by naming one another], rather than being placed inside one another. [`tree] is easier for humans to parse; [`flat] is easier for computers. origin information can be included for each node with the flag [`ast:debug-syms], but be aware this will greatly increase file size. 682 +** [`tabtree] [!(default)]: a hierarchical tree view, with the number of tabs preceding an item showing its depth in the tree 683 +** [`sexp] 684 +** [`binary]: emit a raw binary format that is easier for programs to read. maybe an lmdb or cdb file? 685 +** [`json] 686 + 687 + flatdoc: ~~~flat sexp example output [scheme]~~~ 688 + (nodes 689 + (section (id . "section1") 690 + (anchor "introduction") 691 + (kind . "ordinary") 692 + (label . "section1-heading") 693 + (nodes 694 + "section1-heading" 695 + "para1" 696 + "para2" 697 + "hzrule" 698 + "para3")) 699 + (section (id . "section2") 700 + (kind . "ordinary") 701 + (label . "section2-heading") 702 + (nodes 703 + "para4" 704 + "hzrule" 705 + "para5" 706 + "list1")) 707 + (block list (id . "list1") 708 + (kind . "ordered") 709 + (nodes 710 + "para6" 711 + "list2" 712 + "para7")) 713 + (block list (id . "list2") 714 + (kind . "unordered") 715 + (nodes 716 + "para8" 717 + "para9" 718 + "para10")) 719 + (block para (id . "para1") 720 + (nodes "text1" "format1" "text3" "foonote1" "text4")) 721 + (block label (id . "section1-heading") (nodes "section1-heading-text")) 722 + (text (id . "section1-heading-text") "Contemplating the Anathema") 723 + (text (id . "text1") 724 + "Disquieting information has recently been disclosed to virtual journalists of the Giedi Prime infomatrix by sources close to the Hyperion Entity regarding the catatrophic Year of Schisms and the unidentified agents believed to be responsible for memetically engineering the near-collapse of the Church Galactic.") 725 + (span format (id . "format1") 726 + (style . "emph") 727 + (nodes . "text2")) 728 + (text (id . "text2") "Curiously,") 729 + (text (id . "text3") "his Cyber-Holiness") 730 + (text (id . "footnote1-caption-text") "Pope Chewbacca III") 731 + (span footnote (id . "footnote1") 732 + (note . "footnote1-text") 733 + (ref . "papal-disclaimer") 734 + (nodes 735 + "footnode1-caption-text")) 736 + (text (id . "text4") "has thus far had little to say on the matter, provoking rampant speculation among the faithful.") 737 + (footnote-def (id . "footnote1-def") 738 + (nodes "footnote1-text") 739 + (text (id . "footnote1-text") "Currently recognized as legitimate successor to Peter of Terra by 2,756 sects, rejected by 678 of mostly Neo-Lutheran origin, and decried as an antipope by 73, most notably Pope Peter II of Centaurus Secundus, leader of the ongoing relativistic crusade against star systems owned by Microsoft.") 740 + ;;; snip ;;; 741 + (document 742 + (nodes 743 + "section1" "section2"))) 744 + ~~~ 399 745 400 746 some formats may eventually warrant their own renderer, but are not a priority: 401 747 * [*text]: cortav source files are already plain text, but a certain amount of layout could be done using ascii art. 402 748 * [*ansi]: emit sequences of ANSI escape codes to lay out a document in a terminal-friendly way 403 749 * [*tex]: TeX is an unholy abomination and i neither like nor use it, but lots of people do and if cortav ever catches on, a TeX backend should probably be written eventually. 404 750 405 751 PDF is not on either list because it's a nightmarish mess of a format and groff, which is installed on most linux systems already, can easily generate PDFs 406 752 407 753 ### LCH support 408 754 right now, the use of color in the HTML renderer is very unsatisfactory. the accent mechanism operates on the basis of the CSS HSL function, which is not perceptually uniform; different hues will present different mixes of brightness and some (yellows?) may be ugly or unreadable. 409 755 410 756 the ideal solution would be to simply switch to using LCH based colors. unfortunately, only Safari actually supports the LCH color function right now, and it's unlikely (unless Lea Verou and her husband manage to work a miracle) that Colors Level 4 is going to be implemented very widely any time soon. 411 757 412 -this leaves us in an awkward position. we can of course do the math ourselves, working in LCH to implement the internal [$@tone] macro, and then "converting" these colors to HSL. unfortunately, you can't actually convert from LCH to HSL; it's like converting from pounds to kilograms. LCH can represent any color the human visual system can perceive; sRGB can't, and CSS HSL is implemented in sRGB. however, we could at least approximate something that would allow for perceptually uniform brightness, which would be an improvement, and this is probably the direction to go in, unless a miracle occurs and [$lch()] or [$color()] pop up in Blink. 758 +this leaves us in an awkward position. we can of course do the math ourselves, working in LCH to implement the internal [`@tone] macro, and then "converting" these colors to HSL. unfortunately, you can't actually convert from LCH to HSL; it's like converting from pounds to kilograms. LCH can represent any color the human visual system can perceive; sRGB can't, and CSS HSL is implemented in sRGB. however, we could at least approximate something that would allow for perceptually uniform brightness, which would be an improvement, and this is probably the direction to go in, unless a miracle occurs and [`lch()] or [`color()] pop up in Blink. 413 759 414 760 it may be possible to do a more reasonable job of handling colors in the postscript and TeX outputs. unsure about SVG but i assume it suffers the same problems HTML/CSS do. does groff even support color?? 415 761 416 762 ### intent files 417 -there's currently no standard way to describe the intent and desired formatting of a document besides placing pragmas in the source file itself. this is extremely suboptimal, as when generating collections of documents, it's ideal to be able to keep all formatting information in one place. users should also be able to specify their own styling overrides that describe the way they prefer to read [$cortav] files, especially for uses like gemini or gopher integration. 763 +there's currently no standard way to describe the intent and desired formatting of a document besides placing pragmata in the source file itself. this is extremely suboptimal, as when generating collections of documents, it's ideal to be able to keep all formatting information in one place. users should also be able to specify their own styling overrides that describe the way they prefer to read [`cortav] files, especially for uses like gemini or gopher integration. 764 + 765 +at some point soon [`cortav] needs to address this by adding intent files that can be activated from outside the source file, such as with a command line flag or a configuration file setting. these will probably consist of lines that are interpreted as pragmata. in addition to the standard intent format however, individual implementations should feel free to provide their own ways to provide intent metadata; e.g. the reference implementation, which has a lua interpreter available, should be able to take a lua script that runs after the parse stage and generates . this will be particularly useful for the end-user who wishes to specify a particular format she likes reading her files in without forcing that format on everyone she sends the compiled document to, as it will be able to interrogate the document and make intelligent decisions about what pragmata to apply. 418 766 419 -at some point soon [$cortav] needs to address this by adding intent files that can be activated from outside the source file, such as with a command line flag or a configuration file setting. these will probably consist of lines that are interpreted as pragmata. in addition to the standard intent format however, individual implementations should feel free to provide their own ways to provide intent metadata; e.g. the reference implementation, which has a lua interpreter available, should be able to take a lua script that runs after the parse stage and generates . this will be particularly useful for the end-user who wishes to specify a particular format she likes reading her files in without forcing that format on everyone she sends the compiled document to, as it will be able to interrogate the document and make intelligent decisions about what pragmata to apply. 767 +intent files should also be able to define [>rsrc resources], [>ctxvar context variables], and macros.
Modified cortav.lua from [eb3cc08f95] to [028f351fed].
2 2 -- ~ lexi hale <lexi@hale.su> 3 3 -- © AGPLv3 4 4 -- ? reference implementation of the cortav document language 5 5 6 6 local ss = require 'sirsem' 7 7 -- aliases for commonly used sirsem funcs 8 8 local startswith = ss.str.begins 9 -local eachcode = ss.str.enc.utf8.each 10 9 local dump = ss.dump 11 10 local declare = ss.declare 12 11 13 12 -- make this module available to require() when linked into a lua bytecode program with luac 14 13 local ct = ss.namespace 'cortav' 15 14 ct.info = { 16 15 version = ss.version {0,1; 'devel'}; ................................................................................ 81 80 end); 82 81 cli = ss.exnkind 'command line parse error'; 83 82 mode = ss.exnkind('bad mode', function(msg, ...) 84 83 return string.format("mode “%s” "..msg, ...) 85 84 end); 86 85 unimpl = ss.exnkind 'feature not implemented'; 87 86 ext = ss.exnkind 'extension error'; 87 + enc = ss.exnkind('encoding error', function(msg, ...) 88 + return string.format('[%s]' .. msg, ...) 89 + end); 88 90 } 89 91 90 92 ct.ctx = declare { 91 93 mk = function(src) return {src = src} end; 92 94 ident = 'context'; 93 95 cast = { 94 96 string = function(me) ................................................................................ 112 114 table.insert(self.sec.blocks,block) 113 115 return block 114 116 end; 115 117 ref = function(self,id) 116 118 if not id:find'%.' then 117 119 local rid = self.sec.refs[id] 118 120 if self.sec.refs[id] then 119 - return self.sec.refs[id] 121 + return self.sec.refs[id], id, self.sec 120 122 else self:fail("no such ref %s in current section", id or '') end 121 123 else 122 124 local sec, ref = string.match(id, "(.-)%.(.+)") 123 125 local s = self.doc.sections[sec] 124 126 if s then 125 127 if s.refs[ref] then 126 - return s.refs[ref] 128 + return s.refs[ref], ref, sec 127 129 else self:fail("no such ref %s in section %s", ref, sec) end 128 130 else self:fail("no such section %s", sec) end 129 131 end 130 132 end 131 133 }; 132 134 } 133 135 ................................................................................ 217 219 meta = {}; 218 220 vars = {}; 219 221 ext = { 220 222 inhibit = {}; 221 223 need = {}; 222 224 use = {}; 223 225 }; 226 + enc = ss.str.enc.utf8; 224 227 } end; 225 228 construct = function(me) 226 229 me.docjob = ct.ext.job('doc', me, nil) 227 230 end; 228 231 } 229 232 230 233 -- FP helper functions ................................................................................ 397 400 398 401 -- renderer engines 399 402 function ct.render.html(doc, opts) 400 403 local doctitle = opts['title'] 401 404 local f = string.format 402 405 local ids = {} 403 406 local canonicalID = {} 404 - local function getSafeID(obj) 407 + local function getSafeID(obj,pfx) 408 + pfx = pfx or '' 405 409 if canonicalID[obj] then 406 410 return canonicalID[obj] 407 - elseif obj.id and ids[obj.id] then 411 + elseif obj.id and ids[pfx .. obj.id] then 412 + local objid = pfx .. obj.id 408 413 local newid 409 414 local i = 1 410 - repeat newid = obj.id .. string.format('-%x', i) 415 + repeat newid = objid .. string.format('-%x', i) 411 416 i = i + 1 until not ids[newid] 412 417 ids[newid] = obj 413 418 canonicalID[obj] = newid 414 419 return newid 415 420 else 416 421 local cid = obj.id 417 422 if not cid then 418 423 local i = 1 419 - repeat cid = string.format('x-%x', i) 424 + repeat cid = string.format('%sx-%x', pfx, i) 420 425 i = i + 1 until not ids[cid] 421 426 end 422 427 ids[cid] = obj 423 428 canonicalID[obj] = cid 424 429 return cid 425 430 end 426 431 end 427 432 433 + local footnotes = {} 434 + local footnotecount = 0 435 + 428 436 local langsused = {} 429 437 local langpairs = { 430 438 lua = { color = 0x9377ff }; 431 439 terra = { color = 0xff77c8 }; 432 440 c = { name = 'C', color = 0x77ffe8 }; 433 441 html = { color = 0xfff877 }; 434 442 scheme = { color = 0x77ff88 }; 435 443 lisp = { color = 0x77ff88 }; 436 444 fortran = { color = 0xff779a }; 437 445 python = { color = 0xffd277 }; 438 - python = { color = 0xcdd6ff }; 446 + ruby = { color = 0xcdd6ff }; 439 447 } 440 448 441 449 local stylesets = { 450 + footnote = [[ 451 + div.footnote { 452 + font-family: 90%; 453 + display: none; 454 + grid-template-columns: 1em 1fr min-content; 455 + grid-template-rows: 1fr min-content; 456 + position: fixed; 457 + padding: 1em; 458 + background: @tone(0.05); 459 + border: black; 460 + margin:auto; 461 + } 462 + div.footnote:target { display:grid; } 463 + @media screen { 464 + div.footnote { 465 + left: 10em; 466 + right: 10em; 467 + max-width: calc(@width + 2em); 468 + max-height: 30vw; 469 + bottom: 1em; 470 + } 471 + } 472 + @media print { 473 + div.footnote { 474 + position: relative; 475 + } 476 + div.footnote:first-of-type { 477 + border-top: 1px solid black; 478 + } 479 + } 480 + 481 + div.footnote > a[href="#0"]{ 482 + grid-row: 2/3; 483 + grid-column: 3/4; 484 + display: block; 485 + padding: 0.2em 0.7em; 486 + text-align: center; 487 + text-decoration: none; 488 + background: @tone(0.2); 489 + color: @tone(1); 490 + border: 1px solid black; 491 + margin-top: 0.6em; 492 + -webkit-user-select: none; 493 + -ms-user-select: none; 494 + user-select: none; 495 + -webkit-user-drag: none; 496 + user-drag: none; 497 + } 498 + div.footnote > a[href="#0"]:hover { 499 + background: @tone(0.3); 500 + color: @tone(2); 501 + } 502 + div.footnote > a[href="#0"]:active { 503 + background: @tone(0.05); 504 + color: @tone(0.4); 505 + } 506 + @media print { 507 + div.footnote > a[href="#0"]{ 508 + display:none; 509 + } 510 + } 511 + div.footnote > div.number { 512 + text-align:right; 513 + grid-row: 1/2; 514 + grid-column: 1/2; 515 + } 516 + div.footnote > div.text { 517 + grid-row: 1/2; 518 + grid-column: 2/4; 519 + padding-left: 1em; 520 + overflow-y: scroll; 521 + } 522 + ]]; 442 523 header = [[ 524 + body { padding: 0 2.5em !important } 443 525 h1,h2,h3,h4,h5,h6 { border-bottom: 1px solid @tone(0.7); } 444 526 h1 { font-size: 200%; border-bottom-style: double !important; border-bottom-width: 3px !important; margin: 0em -1em; } 445 527 h2 { font-size: 130%; margin: 0em -0.7em; } 446 528 h3 { font-size: 110%; margin: 0em -0.5em; } 447 529 h4 { font-size: 100%; font-weight: normal; margin: 0em -0.2em; } 448 530 h5 { font-size: 90%; font-weight: normal; } 449 531 h6 { font-size: 80%; font-weight: normal; } ................................................................................ 490 572 section:target > :is(h1,h2,h3,h4,h5,h6) { 491 573 492 574 } 493 575 ]]; 494 576 paragraph = [[ 495 577 p { 496 578 margin: 0.7em 0; 579 + text-align: justify; 497 580 } 498 581 section { 499 582 margin: 1.2em 0; 500 583 } 501 584 section:first-child { margin-top: 0; } 502 585 ]]; 503 586 accent = [[ 504 - body { background: @bg; color: @fg } 505 - a[href] { 506 - color: @tone(0.7 30); 507 - text-decoration-color: @tone/0.4(0.7 30); 587 + @media screen { 588 + body { background: @bg; color: @fg } 589 + a[href] { 590 + color: @tone(0.7 30); 591 + text-decoration-color: @tone/0.4(0.7 30); 592 + } 593 + a[href]:hover { 594 + color: @tone(0.9 30); 595 + text-decoration-color: @tone/0.7(0.7 30); 596 + } 597 + h1 { color: @tone(2); } 598 + h2 { color: @tone(1.5); } 599 + h3 { color: @tone(1.2); } 600 + h4 { color: @tone(1); } 601 + h5,h6 { color: @tone(0.8); } 508 602 } 509 - a[href]:hover { 510 - color: @tone(0.9 30); 511 - text-decoration-color: @tone/0.7(0.7 30); 603 + @media print { 604 + a[href] { 605 + text-decoration: none; 606 + color: black; 607 + font-weight: bold; 608 + } 609 + h1,h2,h3,h4,h5,h6 { 610 + border-bottom: 1px black; 611 + } 512 612 } 513 - h1 { color: @tone(2); } 514 - h2 { color: @tone(1.5); } 515 - h3 { color: @tone(1.2); } 516 - h4 { color: @tone(1); } 517 - h5,h6 { color: @tone(0.8); } 518 613 ]]; 614 + aside = [[ 615 + section > aside { 616 + text-align: justify; 617 + margin: 0 1.5em; 618 + padding: 0.5em 0.8em; 619 + background: @tone(0.05); 620 + font-size: 90%; 621 + border-left: 5px solid @tone(0.2 15); 622 + border-right: 5px solid @tone(0.2 15); 623 + } 624 + section > aside p { 625 + margin: 0; 626 + margin-top: 0.6em; 627 + } 628 + section > aside p:first-child { 629 + margin: 0; 630 + } 631 + ]]; 519 632 code = [[ 520 633 code { 521 - background: @fg; 634 + display: inline-block; 635 + background: @tone(0.9); 522 636 color: @bg; 523 637 font-family: monospace; 524 638 font-size: 90%; 525 639 padding: 3px 5px; 526 640 } 641 + ]]; 642 + var = [[ 643 + var { 644 + font-style: italic; 645 + font-family: monospace; 646 + color: @tone(0.7); 647 + } 648 + code var { 649 + color: @tone(0.25); 650 + } 651 + ]]; 652 + math = [[ 653 + span.equation { 654 + display: inline-block; 655 + background: @tone(0.08); 656 + color: @tone(2); 657 + padding: 0.1em 0.3em; 658 + border: 1px solid @tone(0.5); 659 + } 527 660 ]]; 528 661 abbr = [[ 529 662 abbr[title] { cursor: help; } 530 663 ]]; 531 664 editors_markup = [[]]; 532 665 block_code_listing = [[ 533 - section > figure.listing { 666 + figure.listing { 534 667 font-family: monospace; 535 668 background: @tone(0.05); 536 669 color: @fg; 537 670 padding: 0; 538 671 margin: 0.3em 0; 539 672 counter-reset: line-number; 540 673 position: relative; 541 674 border: 1px solid @fg; 542 675 } 543 - section > figure.listing>div { 676 + figure.listing>div { 544 677 white-space: pre-wrap; 678 + tab-size: 3; 679 + -moz-tab-size: 3; 545 680 counter-increment: line-number; 546 681 text-indent: -2.3em; 547 682 margin-left: 2.3em; 548 683 } 549 - section > figure.listing>:is(div,hr)::before { 684 + figure.listing>:is(div,hr)::before { 550 685 width: 1.0em; 551 686 padding: 0.2em 0.4em; 552 687 text-align: right; 553 688 display: inline-block; 554 689 background-color: @tone(0.2); 555 690 border-right: 1px solid @fg; 556 691 content: counter(line-number); 557 692 margin-right: 0.3em; 558 693 } 559 - section > figure.listing>hr::before { 694 + figure.listing>hr::before { 560 695 color: transparent; 561 696 padding-top: 0; 562 697 padding-bottom: 0; 563 698 } 564 - section > figure.listing>div::before { 699 + figure.listing>div::before { 565 700 color: @fg; 566 701 } 567 - section > figure.listing>div:last-child::before { 702 + figure.listing>div:last-child::before { 568 703 padding-bottom: 0.5em; 569 704 } 570 - section > figure.listing>figcaption:first-child { 705 + figure.listing>figcaption:first-child { 571 706 border: none; 572 707 border-bottom: 1px solid @fg; 573 708 } 574 - section > figure.listing>figcaption::after { 709 + figure.listing>figcaption::after { 575 710 display: block; 576 711 float: right; 577 712 font-weight: normal; 578 713 font-style: italic; 579 714 font-size: 70%; 580 715 padding-top: 0.3em; 581 716 } 582 - section > figure.listing>figcaption { 717 + figure.listing>figcaption { 583 718 font-family: sans-serif; 584 719 font-size: 120%; 585 720 padding: 0.2em 0.4em; 586 721 border: none; 587 722 color: @tone(2); 588 723 } 589 - section > figure.listing > hr { 724 + figure.listing > hr { 590 725 border: none; 591 726 margin: 0; 592 727 height: 0.7em; 593 728 counter-increment: line-number; 594 729 } 595 730 ]]; 596 731 } ................................................................................ 604 739 stylesets = stylesets; 605 740 stylesets_active = stylesNeeded; 606 741 obj_htmlid = getSafeID; 607 742 -- remaining fields added later 608 743 } 609 744 610 745 local renderJob = doc:job('render_html', nil, render_state_handle) 746 + doc.stage.job = renderJob; 611 747 612 748 local runhook = function(h, ...) 613 749 return renderJob:hook(h, render_state_handle, ...) 614 750 end 615 751 616 - local function getSpanRenderers(procs) 752 + local tagproc do 753 + local elt = function(t,attrs) 754 + return f('<%s%s>', t, 755 + attrs and ss.reduce(function(a,b) return a..b end, '', 756 + ss.map(function(v,k) 757 + if v == true 758 + then return ' '..k 759 + elseif v then return f(' %s="%s"', k, v) 760 + end 761 + end, attrs)) or '') 762 + end 763 + 764 + tagproc = { 765 + toTXT = { 766 + tag = function(t,a,v) return v end; 767 + elt = function(t,a) return '' end; 768 + catenate = table.concat; 769 + }; 770 + toIR = { 771 + tag = function(t,a,v,o) return { 772 + tag = t, attrs = a; 773 + nodes = type(v) == 'string' and {v} or v, src = o 774 + } end; 775 + 776 + elt = function(t,a,o) return { 777 + tag = t, attrs = a, src = o 778 + } end; 779 + 780 + catenate = function(...) return ... end; 781 + }; 782 + toHTML = { 783 + elt = elt; 784 + tag = function(t,attrs,body) 785 + return f('%s%s</%s>', elt(t,attrs), body, t) 786 + end; 787 + catenate = table.concat; 788 + }; 789 + } 790 + end 791 + 792 + local function getBaseRenderers(procs, span_renderers) 617 793 local tag, elt, catenate = procs.tag, procs.elt, procs.catenate 618 794 local htmlDoc = function(title, head, body) 619 795 return [[<!doctype html>]] .. tag('html',nil, 620 796 tag('head', nil, 621 797 elt('meta',{charset = 'utf-8'}) .. 622 798 (title and tag('title', nil, title) or '') .. 623 799 (head or '')) .. 624 800 tag('body', nil, body or '')) 625 801 end 626 802 627 - local span_renderers = {} 628 803 local function htmlSpan(spans, block, sec) 629 804 local text = {} 630 805 for k,v in pairs(spans) do 631 806 if type(v) == 'string' then 632 - table.insert(text,(v:gsub('[<>&"]', 633 - function(x) 807 + v=v:gsub('[<>&"]', function(x) 634 808 return string.format('&#%02u;', string.byte(x)) 635 - end))) 809 + end) 810 + for fn, ext in renderJob:each('hook','render_html_sanitize') do 811 + v = fn(renderJob:delegate(ext), v) 812 + end 813 + table.insert(text,v) 636 814 else 637 - table.insert(text, span_renderers[v.kind](v, block, sec)) 815 + table.insert(text, (span_renderers[v.kind](v, block, sec))) 638 816 end 639 817 end 640 818 return table.concat(text) 641 819 end 820 + return {htmlDoc=htmlDoc, htmlSpan=htmlSpan} 821 + end 822 + 823 + local spanparse = function(...) 824 + local s = ct.parse_span(...) 825 + doc.docjob:hook('meddle_span', s) 826 + return s 827 + end 828 + 829 + local cssRulesFor = {} 830 + local function getSpanRenderers(procs) 831 + local tag, elt, catenate = procs.tag, procs.elt, procs.catenate 832 + local span_renderers = {} 833 + local plainrdr = getBaseRenderers(tagproc.toTXT, span_renderers) 834 + local htmlSpan = getBaseRenderers(procs, span_renderers).htmlSpan 642 835 643 836 function span_renderers.format(sp,...) 644 - local tags = { strong = 'strong', emph = 'em', strike = 'del', insert = 'ins', literal = 'code' } 837 + local tags = { strong = 'strong', emph = 'em', strike = 'del', insert = 'ins', literal = 'code', variable = 'var'} 645 838 if sp.style == 'literal' and not opts['fossil-uv'] then 646 839 stylesNeeded.code = true 647 - end 648 - if sp.style == 'del' or sp.style == 'ins' then 840 + elseif sp.style == 'strike' or sp.style == 'insert' then 649 841 stylesNeeded.editors_markup = true 842 + elseif sp.style == 'variable' then 843 + stylesNeeded.var = true 650 844 end 651 845 return tag(tags[sp.style],nil,htmlSpan(sp.spans,...)) 652 846 end 653 847 654 - function span_renderers.term(t,b,s) 848 + function span_renderers.deref(t,b,s) 655 849 local r = b.origin:ref(t.ref) 656 850 local name = t.ref 657 851 if name:find'%.' then name = name:match '^[^.]*%.(.+)$' end 658 - if type(r) ~= 'string' then 659 - b.origin:fail('%s is an object, not a reference', t.ref) 852 + if type(r) == 'string' then 853 + stylesNeeded.abbr = true 854 + return tag('abbr',{title=r},next(t.spans) and htmlSpan(t.spans,b,s) or name) 660 855 end 661 - stylesNeeded.abbr = true 662 - return tag('abbr',{title=r},next(t.spans) and htmlSpan(t.spans,b,s) or name) 663 - end 664 - 665 - function span_renderers.macro(m,b,s) 666 - local r = b.origin:ref(m.macro) 667 - if type(r) ~= 'string' then 668 - b.origin:fail('%s is an object, not a reference', t.ref) 856 + if r.kind == 'resource' then 857 + local rid = getSafeID(r, 'res-') 858 + if r.class == 'image' then 859 + if not cssRulesFor[r] then 860 + local css = prepcss(string.format([[ 861 + section p > .%s { 862 + } 863 + ]], rid)) 864 + stylesets[r] = css 865 + cssRulesFor[r] = css 866 + stylesNeeded[r] = true 867 + end 868 + return tag('div',{class=rid},catenate{'blaah'}) 869 + elseif r.class == 'video' then 870 + local vid = {} 871 + return tag('video',nil,vid) 872 + elseif r.class == 'font' then 873 + b.origin:fail('fonts cannot be instantiated, use %font directive instead') 874 + end 875 + else 876 + b.origin:fail('%s is not an object that can be embedded', t.ref) 669 877 end 670 - local mctx = b.origin:clone() 671 - mctx.invocation = m 672 - return htmlSpan(ct.parse_span(r, mctx),b,s) 673 878 end 674 879 675 880 function span_renderers.var(v,b,s) 676 881 local val 677 882 if v.pos then 678 883 if not v.origin.invocation then 679 884 v.origin:fail 'positional arguments can only be used in a macro invocation' ................................................................................ 686 891 end 687 892 if v.raw then 688 893 return val 689 894 else 690 895 return htmlSpan(ct.parse_span(val, v.origin), b, s) 691 896 end 692 897 end 898 + 899 + function span_renderers.raw(v,b,s) 900 + return htmlSpan(v.spans, b, s) 901 + end 693 902 694 903 function span_renderers.link(sp,b,s) 695 904 local href 696 905 if b.origin.doc.sections[sp.ref] then 697 906 href = '#' .. sp.ref 698 907 else 699 908 if sp.addr then href = sp.addr else ................................................................................ 701 910 if type(r) == 'table' then 702 911 href = '#' .. getSafeID(r) 703 912 else href = r end 704 913 end 705 914 end 706 915 return tag('a',{href=href},next(sp.spans) and htmlSpan(sp.spans,b,s) or href) 707 916 end 708 - return { 709 - span_renderers = span_renderers; 710 - htmlSpan = htmlSpan; 711 - htmlDoc = htmlDoc; 712 - } 917 + 918 + span_renderers['line-break'] = function(sp,b,s) 919 + return elt('br') 920 + end 921 + 922 + function span_renderers.macro(m,b,s) 923 + local macroname = plainrdr.htmlSpan( 924 + ct.parse_span(m.macro, b.origin), b,s) 925 + local r = b.origin:ref(macroname) 926 + if type(r) ~= 'string' then 927 + b.origin:fail('%s is an object, not a reference', t.ref) 928 + end 929 + local mctx = b.origin:clone() 930 + mctx.invocation = m 931 + return htmlSpan(ct.parse_span(r, mctx),b,s) 932 + end 933 + function span_renderers.math(m,b,s) 934 + stylesNeeded.math = true 935 + return tag('span',{class='equation'},htmlSpan(m.spans, b, s)) 936 + end; 937 + function span_renderers.directive(d,b,s) 938 + if d.ext == 'html' then 939 + elseif b.origin.doc:allow_ext(d.ext) then 940 + elseif d.crit then 941 + b.origin:fail('critical extension %s unavailable', d.ext) 942 + elseif d.failthru then 943 + return htmlSpan(d.spans, b, s) 944 + end 945 + end 946 + function span_renderers.footnote(f,b,s) 947 + stylesNeeded.footnote = true 948 + local source, sid, ssec = b.origin:ref(f.ref) 949 + local cnc = getSafeID(ssec) .. ' ' .. sid 950 + local fn 951 + if footnotes[cnc] then 952 + fn = footnotes[cnc] 953 + else 954 + footnotecount = footnotecount + 1 955 + fn = {num = footnotecount, origin = b.origin, fnid=cnc, source = source} 956 + fn.id = getSafeID(fn) 957 + footnotes[cnc] = fn 958 + end 959 + return tag('a', {href='#'..fn.id}, htmlSpan(f.spans) .. 960 + tag('sup',nil, fn.num)) 961 + end 962 + 963 + return span_renderers 713 964 end 714 - 715 965 716 966 local function getBlockRenderers(procs, sr) 717 967 local tag, elt, catenate = procs.tag, procs.elt, procs.catenate 718 968 local null = function() return catenate{} end 719 969 720 970 local block_renderers = { 721 971 anchor = function(b,s) ................................................................................ 766 1016 if #l > 0 then 767 1017 return tag('div',nil,sr.htmlSpan(l, b, s)) 768 1018 else 769 1019 return elt('hr') 770 1020 end 771 1021 end, b.lines) 772 1022 if b.title then 773 - table.insert(nodes,1,tag('figcaption',nil,sr.htmlSpan(b.title))) 1023 + table.insert(nodes,1, tag('figcaption',nil,sr.htmlSpan(b.title))) 774 1024 end 775 1025 if b.lang then langsused[b.lang] = true end 776 1026 return tag('figure', {class='listing', lang=b.lang, id=b.id and getSafeID(b)}, catenate(nodes)) 777 1027 end; 778 1028 aside = function(b,s) 779 1029 local bn = {} 780 - for _,v in pairs(b.lines) do 781 - table.insert(bn, tag('p', {}, sr.htmlSpan(v, b, s))) 1030 + stylesNeeded.aside = true 1031 + if #b.lines == 1 then 1032 + bn[1] = sr.htmlSpan(b.lines[1], b, s) 1033 + else 1034 + for _,v in pairs(b.lines) do 1035 + table.insert(bn, tag('p', {}, sr.htmlSpan(v, b, s))) 1036 + end 782 1037 end 783 1038 return tag('aside', {}, bn) 784 1039 end; 785 - ['break'] = function() --[[nop]] end; 1040 + ['break'] = function() -- HACK 1041 + -- lists need to be rewritten to work like asides 1042 + return ''; 1043 + end; 786 1044 } 787 1045 return block_renderers; 788 1046 end 789 1047 790 1048 local function getRenderers(procs) 791 - local r = getSpanRenderers(procs) 1049 + local span_renderers = getSpanRenderers(procs) 1050 + local r = getBaseRenderers(procs,span_renderers) 792 1051 r.block_renderers = getBlockRenderers(procs, r) 793 1052 return r 794 - end 795 - 796 - local tagproc do 797 - local elt = function(t,attrs) 798 - return f('<%s%s>', t, 799 - attrs and ss.reduce(function(a,b) return a..b end, '', 800 - ss.map(function(v,k) 801 - if v == true 802 - then return ' '..k 803 - elseif v then return f(' %s="%s"', k, v) 804 - end 805 - end, attrs)) or '') 806 - end 807 - 808 - tagproc = { 809 - toTXT = { 810 - tag = function(t,a,v) return v end; 811 - elt = function(t,a) return '' end; 812 - catenate = table.concat; 813 - }; 814 - toIR = { 815 - tag = function(t,a,v,o) return { 816 - tag = t, attrs = a; 817 - nodes = type(v) == 'string' and {v} or v, src = o 818 - } end; 819 - 820 - elt = function(t,a,o) return { 821 - tag = t, attrs = a, src = o 822 - } end; 823 - 824 - catenate = function(...) return ... end; 825 - }; 826 - toHTML = { 827 - elt = elt; 828 - tag = function(t,attrs,body) 829 - return f('%s%s</%s>', elt(t,attrs), body, t) 830 - end; 831 - catenate = table.concat; 832 - }; 833 - } 834 1053 end 835 1054 836 1055 local astproc = { 837 1056 toHTML = getRenderers(tagproc.toHTML); 838 1057 toTXT = getRenderers(tagproc.toTXT); 839 1058 toIR = { }; 840 1059 } ................................................................................ 853 1072 local ir = {} 854 1073 local dr = astproc.toHTML -- default renderers 855 1074 local plainr = astproc.toTXT 856 1075 local irBlockRdrs = astproc.toIR.block_renderers; 857 1076 858 1077 render_state_handle.ir = ir; 859 1078 1079 + local function renderBlocks(blocks, irs) 1080 + for i, block in ipairs(blocks) do 1081 + local rd 1082 + if irBlockRdrs[block.kind] then 1083 + rd = irBlockRdrs[block.kind](block,sec) 1084 + else 1085 + local rdr = renderJob:proc('render',block.kind,'html') 1086 + if rdr then 1087 + rd = rdr({ 1088 + state = render_state_handle; 1089 + tagproc = tagproc.toIR; 1090 + astproc = astproc.toIR; 1091 + }, block, sec) 1092 + end 1093 + end 1094 + if rd then 1095 + if opts['heading-anchors'] and block == sec.heading_node then 1096 + stylesNeeded.headingAnchors = true 1097 + table.insert(rd.nodes, ' ') 1098 + table.insert(rd.nodes, { 1099 + tag = 'a'; 1100 + attrs = {href = '#' .. irs.attrs.id, class='anchor'}; 1101 + nodes = {type(opts['heading-anchors'])=='string' and opts['heading-anchors'] or '§'}; 1102 + }) 1103 + end 1104 + if rd.src and rd.src.origin.lang then 1105 + if not rd.attrs then rd.attrs = {} end 1106 + rd.attrs.lang = rd.src.origin.lang 1107 + end 1108 + table.insert(irs.nodes, rd) 1109 + runhook('ir_section_node_insert', rd, irs, sec) 1110 + end 1111 + end 1112 + end 860 1113 runhook('ir_assemble', ir) 861 1114 for i, sec in ipairs(doc.secorder) do 862 1115 if doctitle == nil and sec.depth == 1 and sec.heading_node then 863 1116 doctitle = astproc.toTXT.htmlSpan(sec.heading_node.spans, sec.heading_node, sec) 864 1117 end 865 1118 local irs 866 1119 if sec.kind == 'ordinary' then 867 1120 if #(sec.blocks) > 0 then 868 1121 irs = {tag='section',attrs={id = getSafeID(sec)},nodes={}} 869 - 870 1122 runhook('ir_section_build', irs, sec) 871 - 872 - for i, block in ipairs(sec.blocks) do 873 - local rd 874 - if irBlockRdrs[block.kind] then 875 - rd = irBlockRdrs[block.kind](block,sec) 876 - else 877 - local rdr = renderJob:proc('render',block.kind,'html') 878 - if rdr then 879 - rd = rdr({ 880 - state = render_state_handle; 881 - tagproc = tagproc.toIR; 882 - astproc = astproc.toIR; 883 - }, block, sec) 884 - end 885 - end 886 - if rd then 887 - if opts['heading-anchors'] and block == sec.heading_node then 888 - stylesNeeded.headingAnchors = true 889 - table.insert(rd.nodes, ' ') 890 - table.insert(rd.nodes, { 891 - tag = 'a'; 892 - attrs = {href = '#' .. irs.attrs.id, class='anchor'}; 893 - nodes = {type(opts['heading-anchors'])=='string' and opts['heading-anchors'] or '§'}; 894 - }) 895 - end 896 - table.insert(irs.nodes, rd) 897 - runhook('ir_section_node_insert', rd, irs, sec) 898 - end 899 - end 1123 + renderBlocks(sec.blocks, irs) 900 1124 end 901 1125 elseif sec.kind == 'blockquote' then 902 1126 elseif sec.kind == 'listing' then 903 1127 elseif sec.kind == 'embed' then 904 1128 end 905 1129 if irs then table.insert(ir, irs) end 906 1130 end 1131 + 1132 + for _, fn in pairs(footnotes) do 1133 + local tag = tagproc.toIR.tag 1134 + local body = {nodes={}} 1135 + local ftir = {} 1136 + for l in fn.source:gmatch('([^\n]*)') do 1137 + ct.parse_line(l, fn.origin, ftir) 1138 + end 1139 + renderBlocks(ftir,body) 1140 + local note = tag('div',{class='footnote',id=fn.id}, { 1141 + tag('div',{class='number'}, tostring(fn.num)), 1142 + tag('div',{class='text'}, body.nodes), 1143 + tag('a',{href='#0'},'close') 1144 + }) 1145 + table.insert(ir, note) 1146 + end 907 1147 908 1148 -- restructure passes 909 1149 runhook('ir_restructure_pre', ir) 910 1150 911 1151 ---- list insertion pass 912 1152 local lists = {} 913 1153 for _, sec in pairs(ir) do ................................................................................ 1033 1273 local tonespan = opts.accent and .1 or 0 1034 1274 local tbg = opts['dark-on-light'] and 1.0 - tonespan or tonespan 1035 1275 local tfg = opts['dark-on-light'] and tonespan or 1.0 - tonespan 1036 1276 if var == 'bg' then 1037 1277 return tone(tbg,nil,nil,tonumber(alpha)) 1038 1278 elseif var == 'fg' then 1039 1279 return tone(tfg,nil,nil,tonumber(alpha)) 1280 + elseif var == 'width' then 1281 + return opts['width'] or '100vw' 1040 1282 elseif var == 'tone' then 1041 1283 local l, sep, sat 1042 1284 for i=1,3 do -- 🙄 1043 1285 l,sep,sat = param:match('^%('..string.rep('([^%s]*)%s*',i)..'%)$') 1044 1286 if l then break end 1045 1287 end 1046 1288 l = ss.math.lerp(tonumber(l), tbg, tfg) ................................................................................ 1124 1366 kind = 'var'; 1125 1367 pos = pos; 1126 1368 raw = raw; 1127 1369 var = not pos and s or nil; 1128 1370 origin = c:clone(); 1129 1371 } 1130 1372 end 1373 + end 1374 + local function insert_span_directive(crit, failthru) 1375 + return function(s,c) 1376 + local args = ss.str.breakwords(d.doc.enc, s, 1) 1377 + local brksyms = map(enc.encodeUCS, { 1378 + '.', ',', ':', ';', '!', '$', '&', '^', 1379 + '/', '?', '@', '=' 1380 + }) 1381 + local brkhash = {} for _,s in pairs(brksyms) do 1382 + brkhash[s] = true 1383 + end 1384 + 1385 + local extname = '' 1386 + local sym 1387 + local cmd = '' 1388 + for ch,p in ss.str.each(c.doc.enc, args[1]) do 1389 + if sym == nil then 1390 + if brkhash[ch] then 1391 + sym = ch 1392 + else 1393 + extname = extname .. ch 1394 + end 1395 + elseif brkhash[ch] then 1396 + sym = sym + ch 1397 + else 1398 + cmd = cmd + ch 1399 + end 1400 + end 1401 + if cmd == '' then cmd = nil end 1402 + local spans if failthru then 1403 + spans = ct.parse_span(args[2], c) 1404 + end 1405 + return { 1406 + kind = 'directive'; 1407 + ext = extname; 1408 + cmd = cmd; 1409 + args = args; 1410 + crit = crit; 1411 + failthru = failthru; 1412 + spans = spans; 1413 + } 1414 + end 1131 1415 end 1132 1416 ct.spanctls = { 1133 1417 {seq = '!', parse = formatter 'emph'}; 1134 1418 {seq = '*', parse = formatter 'strong'}; 1135 1419 {seq = '~', parse = formatter 'strike'}; 1136 - {seq = '+', parse = formatter 'inser'}; 1420 + {seq = '+', parse = formatter 'insert'}; 1137 1421 {seq = '\\', parse = function(s, c) -- raw 1138 - return s 1139 - end}; 1140 - {seq = '$\\', parse = function(s, c) -- raw 1141 1422 return { 1142 - kind = 'format'; 1143 - style = 'literal'; 1423 + kind = 'raw'; 1144 1424 spans = {s}; 1145 1425 origin = c:clone(); 1146 1426 } 1147 1427 end}; 1148 - {seq = '$', parse = formatter 'literal'}; 1428 + {seq = '`\\', parse = function(s, c) -- raw 1429 + local o = c:clone(); 1430 + local str = '' 1431 + for c, p in ss.str.each(c.doc.enc, s) do 1432 + local q = p:esc() 1433 + if q then 1434 + str = str .. q 1435 + p.next.byte = p.next.byte + #q 1436 + else 1437 + str = str .. c 1438 + end 1439 + end 1440 + return { 1441 + kind = 'format'; 1442 + style = 'literal'; 1443 + spans = {{ 1444 + kind = 'raw'; 1445 + spans = {str}; 1446 + origin = o; 1447 + }}; 1448 + origin = o; 1449 + } 1450 + end}; 1451 + {seq = '`', parse = formatter 'literal'}; 1452 + {seq = '$', parse = formatter 'variable'}; 1453 + {seq = '^', parse = function(s,c) --footnotes 1454 + local r, t = s:match '^([^%s]+)%s*(.-)$' 1455 + return { 1456 + kind = 'footnote'; 1457 + ref = r; 1458 + spans = ct.parse_span(t, c); 1459 + origin = c:clone(); 1460 + } 1461 + -- TODO support for footnote sections 1462 + end}; 1463 + {seq = '=', parse = function(s,c) --math mode 1464 + local tx = { 1465 + ['%*'] = '×'; 1466 + ['/'] = '÷'; 1467 + } 1468 + for k,v in pairs(tx) do s = s:gsub(k,v) end 1469 + s=s:gsub('%^([0-9]+)', function(num) 1470 + local sup = {'⁰','¹','²','³','⁴','⁵','⁶','⁷','⁸','⁹'}; 1471 + local r = '' 1472 + for i=1,#num do 1473 + r = r .. sup[1 + (num:byte(i) - 0x30)] 1474 + end 1475 + return r 1476 + end) 1477 + local m = {s} --TODO 1478 + return { 1479 + kind = 'math'; 1480 + original = s; 1481 + spans = m; 1482 + origin = c:clone(); 1483 + }; 1484 + end}; 1149 1485 {seq = '&', parse = function(s, c) 1150 1486 local r, t = s:match '^([^%s]+)%s*(.-)$' 1151 1487 return { 1152 - kind = 'term'; 1488 + kind = 'deref'; 1153 1489 spans = (t and t ~= "") and ct.parse_span(t, c) or {}; 1154 1490 ref = r; 1155 1491 origin = c:clone(); 1156 1492 } 1157 1493 end}; 1158 1494 {seq = '^', parse = function(s, c) 1159 1495 local fn, t = s:match '^([^%s]+)%s*(.-)$' ................................................................................ 1165 1501 } 1166 1502 end}; 1167 1503 {seq = '>', parse = insert_link}; 1168 1504 {seq = '→', parse = insert_link}; 1169 1505 {seq = '🔗', parse = insert_link}; 1170 1506 {seq = '##', parse = insert_var_ref(true)}; 1171 1507 {seq = '#', parse = insert_var_ref(false)}; 1508 + {seq = '%%', parse = function() --[[NOP]] end}; 1509 + {seq = '%!', parse = insert_span_directive(true,false)}; 1510 + {seq = '%:', parse = insert_span_directive(false,true)}; 1511 + {seq = '%', parse = insert_span_directive(false,false)}; 1172 1512 } 1173 1513 end 1174 1514 1175 1515 function ct.parse_span(str,ctx) 1176 1516 local function delimited(start, stop, s) 1177 1517 local r = { pcall(ss.str.delimit, nil, start, stop, s) } 1178 1518 if r[1] then return table.unpack(r, 2) end 1179 1519 ctx:fail(tostring(r[2])) 1180 1520 end 1181 1521 local buf = "" 1182 1522 local spans = {} 1183 1523 local function flush() 1184 1524 if buf ~= "" then 1525 + -- for fn, ext in ctx.doc.docjob:each('hook','doc_meddle_string') do 1526 + -- buf = fn(ctx.doc.docjob:delegate(ext), ctx, buf) 1527 + -- end 1185 1528 table.insert(spans, buf) 1186 1529 buf = "" 1187 1530 end 1188 1531 end 1189 1532 local skip = false 1190 - for c,p in eachcode(str) do 1191 - if skip == true then 1192 - skip = false 1193 - buf = buf .. c 1194 - elseif c == '\\' then 1195 - skip = true 1533 + for c,p in ss.str.each(ctx.doc.enc,str) do 1534 + local ba, ca, es = ctx.doc.enc.parse_escape(str:sub(p.byte)) 1535 + if es then 1536 + flush() 1537 + table.insert(spans, { 1538 + kind = 'raw'; 1539 + spans = {es}; 1540 + origin = ctx:clone() 1541 + }) 1542 + p.next.byte = p.next.byte + ba; 1543 + p.next.code = p.next.code + ca; 1196 1544 elseif c == '{' then 1197 1545 flush() 1198 1546 local substr, following = delimited('{','}',str:sub(p.byte)) 1199 1547 local splitstart, splitstop = substr:find'%s+' 1200 1548 local id, argstr 1201 1549 if splitstart then 1202 1550 id, argstr = substr:sub(1,splitstart-1), substr:sub(splitstop+1) ................................................................................ 1214 1562 local i = 1 1215 1563 while i <= #argstr do 1216 1564 while i<=#argstr and (argstr:sub(i,i) ~= '|' or argstr:sub(i-1,i) == '\\|') do 1217 1565 i = i + 1 1218 1566 end 1219 1567 local arg = argstr:sub(start, i == #argstr and i or i-1) 1220 1568 start = i+1 1569 + arg=arg:gsub('\\|','|') 1221 1570 table.insert(o.args, arg) 1222 1571 i = i + 1 1223 1572 end 1224 1573 end 1225 1574 1226 1575 p.next.byte = p.next.byte + following - 1 1227 1576 table.insert(spans,o) ................................................................................ 1236 1585 table.insert(spans, i.parse(substr:sub(1+#i.seq), ctx)) 1237 1586 break 1238 1587 end 1239 1588 end 1240 1589 if not found then 1241 1590 ctx:fail('no recognized control sequence in [%s]', substr) 1242 1591 end 1592 + elseif c == '\n' then 1593 + flush() 1594 + table.insert(spans,{kind='line-break',origin=ctx:clone()}) 1243 1595 else 1244 1596 buf = buf .. c 1245 1597 end 1246 1598 end 1247 1599 flush() 1248 1600 return spans 1249 1601 end 1250 1602 1251 1603 local function 1252 1604 blockwrap(fn) 1253 - return function(l,c,j) 1254 - local block = fn(l,c,j) 1605 + return function(l,c,j,d) 1606 + local block = fn(l,c,j,d) 1255 1607 block.origin = c:clone(); 1256 - table.insert(c.sec.blocks, block); 1608 + table.insert(d, block); 1257 1609 j:hook('block_insert', c, block, l) 1610 + if block.spans then 1611 + c.doc.docjob:hook('meddle_span', block.spans, block) 1612 + end 1258 1613 end 1259 1614 end 1260 1615 1261 1616 local insert_paragraph = blockwrap(function(l,c) 1262 1617 if l:sub(1,1) == '.' then l = l:sub(2) end 1263 1618 return { 1264 1619 kind = "paragraph"; ................................................................................ 1282 1637 if t and t ~= "" then 1283 1638 local heading = { 1284 1639 kind = "label"; 1285 1640 spans = ct.parse_span(t,c); 1286 1641 origin = s.origin; 1287 1642 captions = s; 1288 1643 } 1644 + c.doc.docjob:hook('meddle_span', heading.spans, heading) 1289 1645 table.insert(s.blocks, heading) 1290 1646 s.heading_node = heading 1291 1647 end 1292 1648 c.sec = s 1293 1649 1294 1650 j:hook('section_attach', c, s) 1295 1651 end ................................................................................ 1299 1655 c.doc.meta[key] = val 1300 1656 j:hook('metadata_set', key, val) 1301 1657 end 1302 1658 local dextctl = function(w,c) 1303 1659 local mode, exts = w(1) 1304 1660 for e in exts:gmatch '([^%s]+)' do 1305 1661 if mode == 'uses' then 1662 + c.doc.ext.use[e] = true 1306 1663 elseif mode == 'needs' then 1664 + c.doc.ext.need[e] = true 1307 1665 elseif mode == 'inhibits' then 1666 + c.doc.ext.inhibit[e] = true 1308 1667 end 1309 1668 end 1310 1669 end 1311 1670 local dcond = function(w,c) 1312 1671 local mode, cond, exp = w(2) 1313 1672 c.hide_next = mode == 'unless' 1314 1673 end; ................................................................................ 1315 1674 ct.directives = { 1316 1675 author = dsetmeta; 1317 1676 license = dsetmeta; 1318 1677 keywords = dsetmeta; 1319 1678 desc = dsetmeta; 1320 1679 when = dcond; 1321 1680 unless = dcond; 1681 + pragma = function(w,c) 1682 + end; 1683 + lang = function(w,c) 1684 + local _, op, l = w(2) 1685 + local langstack = c.doc.stage.langstack 1686 + if op == 'is' then 1687 + langstack[math.max(1, #langstack)] = l 1688 + elseif op == 'push' then 1689 + table.insert(langstack, l) 1690 + elseif op == 'pop' then 1691 + if next(langstack) then 1692 + langstack[#langstack] = nil 1693 + end 1694 + elseif op == 'sec' then 1695 + c.sec.lang = l 1696 + else c:fail('bad language directive “%s”', op) end 1697 + c.lang = langstack[#langstack] 1698 + end; 1322 1699 expand = function(w,c) 1323 1700 local _, m = w(1) 1324 1701 if m ~= 'off' then 1325 - c.expand_next = 1 1702 + c.doc.stage.expand_next = 1 1326 1703 else 1327 - c.expand_next = 0 1704 + c.doc.stage.expand_next = 0 1328 1705 end 1329 1706 end; 1330 1707 } 1331 1708 1332 1709 local function insert_table_row(l,c,j) 1333 1710 local row = {} 1334 1711 local buf ................................................................................ 1335 1712 local flush = function() 1336 1713 if buf then 1337 1714 buf.str = buf.str:gsub('%s+$','') 1338 1715 table.insert(row, buf) 1339 1716 end 1340 1717 buf = { str = '' } 1341 1718 end 1342 - for c,p in eachcode(l) do 1719 + for c,p in ss.str.each(c.doc.enc,l) do 1343 1720 if c == '|' or c == '+' and (p.code == 1 or l:sub(p.byte-1,p.byte-1)~='\\') then 1344 1721 flush() 1345 1722 buf.header = c == '+' 1346 1723 elseif c == ':' then 1347 1724 local lst = l:sub(p.byte-#c,p.byte-#c) 1348 1725 local nxt = l:sub(p.next.byte,p.next.byte) 1349 1726 if lst == '|' or lst == '+' and l:sub(p.byte-2,p.byte-2) ~= '\\' then ................................................................................ 1371 1748 else 1372 1749 buf.str = buf.str .. c 1373 1750 end 1374 1751 end 1375 1752 if buf.str ~= '' then flush() end 1376 1753 for _,v in pairs(row) do 1377 1754 v.spans = ct.parse_span(v.str, c) 1755 + c.doc.docjob:hook('meddle_span', v.spans, v) 1378 1756 end 1379 1757 if #c.sec.blocks > 1 and c.sec.blocks[#c.sec.blocks].kind == 'table' then 1380 1758 local tbl = c.sec.blocks[#c.sec.blocks] 1381 1759 table.insert(tbl.rows, row) 1382 1760 j:hook('block_table_attach', c, tbl, row, l) 1383 1761 j:hook('block_table_row_insert', c, tbl, row, l) 1384 1762 else ................................................................................ 1398 1776 {seq = '¶', fn = insert_paragraph}; 1399 1777 {seq = '❡', fn = insert_paragraph}; 1400 1778 {seq = '#', fn = insert_section}; 1401 1779 {seq = '§', fn = insert_section}; 1402 1780 {seq = '+', fn = insert_table_row}; 1403 1781 {seq = '|', fn = insert_table_row}; 1404 1782 {seq = '│', fn = insert_table_row}; 1405 - {seq = '!', fn = function(l,c,j) 1406 - local last = c.sec.blocks[#c.sec.blocks] 1783 + {seq = '!', fn = function(l,c,j,d) 1784 + local last = d[#d] 1407 1785 local txt = l:match '^%s*!%s*(.-)$' 1408 1786 if (not last) or last.kind ~= 'aside' then 1409 1787 local aside = { 1410 1788 kind = 'aside'; 1411 - lines = { ct.parse_span(txt, c) } 1789 + lines = { ct.parse_span(txt, c) }; 1790 + origin = c:clone(); 1412 1791 } 1413 - c:insert(aside) 1792 + c.doc.docjob:hook('meddle_span', aside.lines[1], aside) 1793 + table.insert(d,aside) 1414 1794 j:hook('block_aside_insert', c, aside, l) 1415 1795 j:hook('block_aside_line_insert', c, aside, aside.lines[1], l) 1416 1796 j:hook('block_insert', c, aside, l) 1417 1797 else 1418 1798 local sp = ct.parse_span(txt, c) 1799 + c.doc.docjob:hook('meddle_span', sp, last) 1419 1800 table.insert(last.lines, sp) 1420 1801 j:hook('block_aside_attach', c, last, sp, l) 1421 1802 j:hook('block_aside_line_insert', c, last, sp, l) 1422 1803 end 1423 1804 end}; 1424 1805 {pred = function(s,c) return s:match'^[*:]' end, fn = blockwrap(function(l,c) -- list 1425 1806 local stars = l:match '^([*:]+)' ................................................................................ 1430 1811 return { 1431 1812 kind = 'list-item'; 1432 1813 depth = depth; 1433 1814 ordered = ordered; 1434 1815 spans = ct.parse_span(txt, c); 1435 1816 } 1436 1817 end)}; 1437 - {seq = '\t', fn = function(l,c,j) 1438 - local ref, val = l:match '\t+([^:]+):%s*(.*)$' 1439 - c.sec.refs[ref] = val 1440 - j:hook('section_ref_attach', c, ref, val, l) 1818 + {seq = '\t\t', fn = function(l,c,j,d) 1819 + local last = d[#d] 1820 + if (not last) or (last.kind ~= 'reference') then 1821 + c:fail('reference continuations must immediately follow a reference') 1822 + end 1823 + local str = l:match '^\t\t(.-)%s*$' 1824 + last.val = last.val .. '\n' .. str 1825 + c.sec.refs[last.key] = last.val 1441 1826 end}; 1442 - {seq = '%', fn = function(l,c,j) -- directive 1827 + {seq = '\t', fn = blockwrap(function(l,c,j,d) 1828 + local ref, val = l:match '\t+([^:]+):%s*(.*)$' 1829 + local last = d[#d] 1830 + local rsrc 1831 + if last and last.kind == 'resource' then 1832 + last.props[ref] = val 1833 + rsrc = last 1834 + elseif last and last.kind == 'reference' and last.rsrc then 1835 + last.rsrc.props[ref] = val 1836 + rsrc = last.rsrc 1837 + else 1838 + c.sec.refs[ref] = val 1839 + end 1840 + j:hook('section_ref_attach', c, ref, val, l) 1841 + return { 1842 + kind = 'reference'; 1843 + rsrc = rsrc; 1844 + key = ref; 1845 + val = val; 1846 + } 1847 + end)}; 1848 + {seq = '%', fn = function(l,c,j,d) -- directive 1443 1849 local crit, cmdline = l:match '^%%([!%%]?)%s*(.*)$' 1444 1850 local words = function(i) 1445 1851 local wds = {} 1446 1852 if i == 0 then return cmdline end 1447 1853 for w,pos in cmdline:gmatch '([^%s]+)()' do 1448 1854 table.insert(wds, w) 1449 1855 i = i - 1 1450 1856 if i == 0 then 1451 - table.insert(wds,cmdline:sub(pos)) 1857 + table.insert(wds,(cmdline:sub(pos):match('^%s*(.-)%s*$'))) 1452 1858 return table.unpack(wds) 1453 1859 end 1454 1860 end 1455 1861 end 1456 1862 1457 1863 local cmd, rest = words(1) 1458 1864 if ct.directives[cmd] then ................................................................................ 1459 1865 ct.directives[cmd](words,c,j) 1460 1866 elseif cmd == c.doc.stage.mode['render:format'] then 1461 1867 -- this is a directive for the renderer; insert it into the tree as is 1462 1868 local dir = { 1463 1869 kind = 'directive'; 1464 1870 critical = crit == '!'; 1465 1871 words = words; 1872 + origin = c; 1466 1873 } 1467 - c:insert(dir) 1874 + table.insert(d, dir) 1468 1875 j:hook('block_directive_render', j, c, dir) 1469 1876 elseif c.doc:allow_ext(cmd) then -- extension directives begin with their id 1470 1877 local ext = ct.ext.loaded[cmd] 1471 1878 if ext.directives then 1472 1879 local _, topcmd = words(2) 1473 1880 if ext.directives[topcmd] then 1474 1881 ext.directives[topcmd](j:delegate(ext), c, words) ................................................................................ 1505 1912 kind = 'code'; 1506 1913 listing = { 1507 1914 kind = 'listing'; 1508 1915 lang = lang, id = id, title = title and ct.parse_span(title,c); 1509 1916 lines = {}; 1510 1917 } 1511 1918 } 1919 + if c.doc.stage.expand_next and c.doc.stage.expand_next > 0 then 1920 + c.doc.stage.expand_next = c.doc.stage.expand_next - 1 1921 + mode.expand = true 1922 + end 1512 1923 j:hook('mode_switch', c, mode) 1513 1924 c.mode = mode 1514 1925 if id then 1515 1926 if c.sec.refs[id] then c:fail('duplicate ID %s', id) end 1516 1927 c.sec.refs[id] = c.mode.listing 1517 1928 end 1518 1929 j:hook('block_insert', c, mode.listing, l) 1519 1930 return c.mode.listing; 1520 1931 end)}; 1521 1932 {pred = function(s,c) 1522 1933 if s:match '^[%-_][*_%-%s]+' then return true end 1523 1934 if startswith(s, '—') then 1524 - for c, p in eachcode(s) do 1935 + for c, p in ss.str.each(c.doc.enc,s) do 1525 1936 if ({ 1526 1937 ['—'] = true, ['-'] = true, [' '] = true; 1527 1938 ['*'] = true, ['_'] = true, ['\t'] = true; 1528 1939 })[c] ~= true then return false end 1529 1940 end 1530 1941 return true 1531 1942 end 1532 1943 end; fn = blockwrap(function() 1533 1944 return { kind = 'horiz-rule' } 1945 + end)}; 1946 + {seq='@', fn=blockwrap(function(s,c) 1947 + local id = s:match '^@%s*(.-)%s*$' 1948 + local rsrc = { 1949 + kind = 'resource'; 1950 + props = {}; 1951 + id = id; 1952 + } 1953 + if c.sec.refs[id] then 1954 + c:fail('an object with id “%s” already exists in that section',id) 1955 + else 1956 + c.sec.refs[id] = rsrc 1957 + end 1958 + return rsrc 1534 1959 end)}; 1535 1960 {fn = insert_paragraph}; 1536 1961 } 1537 1962 1538 -function ct.parse(file, src, mode) 1539 - local function 1540 - is_whitespace(cp) 1541 - return cp == 0x20 or cp == 0xe390 1963 +function ct.parse_line(l, ctx, dest) 1964 + local newspan 1965 + local job = ctx.doc.stage.job 1966 + job:hook('line_read',ctx,l) 1967 + if ctx.mode then 1968 + if ctx.mode.kind == 'code' then 1969 + if l and l:match '^~~~%s*$' then 1970 + job:hook('block_listing_end',ctx,ctx.mode.listing) 1971 + job:hook('mode_switch', c, nil) 1972 + ctx.mode = nil 1973 + else 1974 + -- TODO handle formatted code 1975 + local newline 1976 + if ctx.mode.expand 1977 + then newline = ct.parse_span(l, ctx) 1978 + else newline = {l} 1979 + end 1980 + table.insert(ctx.mode.listing.lines, newline) 1981 + job:hook('block_listing_newline',ctx,ctx.mode.listing,newline) 1982 + end 1983 + else 1984 + local mf = job:proc('modes', ctx.mode.kind) 1985 + if not mf then 1986 + ctx:fail('unimplemented syntax mode %s', ctx.mode.kind) 1987 + end 1988 + mf(job, ctx, l, dest) --NOTE: you are responsible for triggering the appropriate hooks if you insert anything! 1989 + end 1990 + else 1991 + if l then 1992 + local function tryseqs(seqs, ...) 1993 + for _, i in pairs(seqs) do 1994 + if ((not i.seq ) or startswith(l, i.seq)) and 1995 + ((not i.pred) or i.pred (l, ctx )) then 1996 + i.fn(l, ctx, job, dest, ...) 1997 + return true 1998 + end 1999 + end 2000 + return false 2001 + end 2002 + 2003 + if not tryseqs(ct.ctlseqs) then 2004 + local found = false 2005 + 2006 + for eb, ext, state in job:each('blocks') do 2007 + if tryseqs(eb, state) then found = true break end 2008 + end 2009 + 2010 + if not found then 2011 + ctx:fail 'incomprehensible input line' 2012 + end 2013 + end 2014 + else 2015 + if next(dest) and dest[#dest].kind ~= 'break' then 2016 + local brk = {kind='break', origin = ctx:clone()} 2017 + job:hook('block_break', ctx, brk, l) 2018 + table.insert(dest, brk) 2019 + end 2020 + end 1542 2021 end 2022 + job:hook('line_end',ctx,l) 2023 +end 2024 + 2025 +function ct.parse(file, src, mode, setup) 1543 2026 1544 2027 local ctx = ct.ctx.mk(src) 1545 2028 ctx.line = 0 1546 2029 ctx.doc = ct.doc.mk() 1547 2030 ctx.doc.src = src 1548 - ctx.doc.stage = { 1549 - kind = 'parse'; 1550 - mode = mode; 1551 - } 1552 2031 ctx.sec = ctx.doc:mksec() -- toplevel section 1553 2032 ctx.sec.origin = ctx:clone() 2033 + ctx.lang = mode['meta:lang'] 2034 + if mode['parse:enc'] then 2035 + local e = ss.str.enc[mode['parse:enc']] 2036 + if not e then 2037 + ct.exns.enc('requested encoding not supported',mode['parse:enc']):throw() 2038 + end 2039 + ctx.doc.enc = e 2040 + end 1554 2041 1555 2042 -- create states for extension hooks 1556 2043 local job = ctx.doc:job('parse',nil,ctx) 2044 + ctx.doc.stage = { 2045 + kind = 'parse'; 2046 + mode = mode; 2047 + job = job; 2048 + langstack = {ctx.lang}; 2049 + fontstack = {}; 2050 + } 2051 + 2052 + local function 2053 + is_whitespace(cp) 2054 + return ctx.doc.enc.iswhitespace(cp) 2055 + end 2056 + 2057 + if setup then setup(ctx) end 2058 + 1557 2059 1558 2060 for full_line in file:lines() do ctx.line = ctx.line + 1 1559 2061 local l 1560 2062 for p, c in utf8.codes(full_line) do 1561 2063 if not is_whitespace(c) then 1562 2064 l = full_line:sub(p) 1563 2065 break 1564 2066 end 1565 2067 end 1566 - job:hook('line_read',ctx,l) 2068 + ct.parse_line(l, ctx, ctx.sec.blocks) 2069 + end 1567 2070 1568 - if ctx.mode then 1569 - if ctx.mode.kind == 'code' then 1570 - if l and l:match '^~~~%s*$' then 1571 - job:hook('block_listing_end',ctx,ctx.mode.listing) 1572 - job:hook('mode_switch', c, nil) 1573 - ctx.mode = nil 1574 - else 1575 - -- TODO handle formatted code 1576 - local newline = {l} 1577 - table.insert(ctx.mode.listing.lines, newline) 1578 - job:hook('block_listing_newline',ctx,ctx.mode.listing,newline) 1579 - end 1580 - else 1581 - ctx:fail('unimplemented syntax mode %s', ctx.mode.kind) 1582 - end 1583 - else 1584 - if l then 1585 - local function tryseqs(seqs, ...) 1586 - for _, i in pairs(seqs) do 1587 - if ((not i.seq ) or startswith(l, i.seq)) and 1588 - ((not i.pred) or i.pred (l, ctx )) then 1589 - i.fn(l, ctx, job, ...) 1590 - return true 1591 - end 2071 + for i, sec in ipairs(ctx.doc.secorder) do 2072 + for refid, r in ipairs(sec.refs) do 2073 + if type(r) == 'table' and r.kind == 'resource' and r.props.src then 2074 + local lines = ss.str.breaklines(ctx.doc.enc, r.props.src) 2075 + local srcs = {} 2076 + for i,l in ipairs(lines) do 2077 + local args = ss.str.breakwords(ctx.doc.enc, l, 2, {escape=true}) 2078 + if #args < 3 then 2079 + r.origin:fail('invalid syntax for resource %s', t.ref) 2080 + end 2081 + local mimebreak = function(s) 2082 + local wds = ss.str.split(ctx.doc.enc, s, '/', 1, {escape=true}) 2083 + return wds 1592 2084 end 1593 - return false 2085 + local mime = mimebreak(args[2]); 2086 + local mimeclasses = { 2087 + ['application/svg+xml'] = 'image'; 2088 + } 2089 + local class = mimeclasses[mime] 2090 + table.insert(srcs, { 2091 + mode = args[1]; 2092 + mime = mime; 2093 + uri = args[3]; 2094 + class = class or mime[1]; 2095 + }) 1594 2096 end 1595 - 1596 - if not tryseqs(ct.ctlseqs) then 1597 - local found = false 1598 - 1599 - for eb, ext, state in job:each('blocks') do 1600 - if tryseqs(eb, state) then found = true break end 1601 - end 1602 - 1603 - if not found then 1604 - ctx:fail 'incomprehensible input line' 1605 - end 1606 - end 1607 - else 1608 - if next(ctx.sec.blocks) and ctx.sec.blocks[#ctx.sec.blocks].kind ~= 'break' then 1609 - local brk = {kind='break'} 1610 - job:hook('block_break', ctx, brk, l) 1611 - table.insert(ctx.sec.blocks, brk) 1612 - end 2097 + --ideally move this into its own mimetype lib 2098 + local kind = r.props.as or srcs[1].class 2099 + r.class = kind 2100 + r.srcs = srcs 1613 2101 end 1614 2102 end 1615 - job:hook('line_end',ctx,l) 1616 2103 end 1617 - 2104 + ctx.doc.stage = nil 2105 + ctx.doc.docjob:hook('meddle_ast') 1618 2106 return ctx.doc 1619 2107 end
Modified desk/cortav.xml from [8189edad17] to [b82e1b14f3].
8 8 --> 9 9 <language name='Cortav' version='1' kateversion='2.4' section='Markup' extensions='*.ct'> 10 10 <highlighting> 11 11 <list name='extension-directives'> 12 12 <item>uses</item> 13 13 <item>needs</item> 14 14 <item>inhibits</item> 15 + </list> 16 + <list name='meta-directives'> 17 + <item>author</item> 18 + <item>lang</item> 19 + <item>pragma</item> 20 + </list> 21 + <list name='ctl-directives'> 22 + <item>when</item> 23 + <item>unless</item> 24 + <item>cols</item> 25 + <item>quote</item> 26 + <item>include</item> 27 + <item>embed</item> 15 28 </list> 16 29 <list name='renderer-directives'> 17 30 <item>html</item> 18 31 <item>groff</item> 19 32 <item>ps</item> 20 33 <item>tex</item> 21 34 <item>plaintext</item> ................................................................................ 24 37 </list> 25 38 <contexts> 26 39 <context name='init' attribute='Normal Text' lineEndContext='#pop' fallthroughContext='text'> 27 40 <RegExpr String='\\.' attribute='Escaped Char'/> 28 41 <RegExpr attribute='Section Cue' context='sec-ident' String='(#|§)+' firstNonSpace='true' /> 29 42 <StringDetect String='~~~' attribute='Literal Block Cue' firstNonSpace='true' context='literal-block-cue'/> 30 43 <RegExpr attribute='List' String='[\*:]+' firstNonSpace='true' context='text' /> 44 + <Detect2Chars char='%' char1='%' attribute='Comment' context='comment'/> 31 45 <Detect2Chars char='%' char1='!' attribute='Critical Directive Cue' context='directive'/> 32 46 <DetectChar char='%' attribute='Directive Cue' context='directive'/> 47 + <DetectChar char='@' attribute='Resource Cue' context='resource'/> 33 48 <DetectChar char='	' attribute='Normal Text' context='refdef-id'/> 34 49 </context> 50 + 51 + <context name='comment' attribute='Comment' lineEndContext='#pop'> 52 + </context> 53 + <context name='error' attribute='Error' lineEndContext='#pop'> 54 + </context> 55 + 56 + <context name='resource' attribute='Resource Identifier' lineEndContext='#pop'> 57 + <DetectSpaces context='#pop!error' attribute='Error'/> 58 + </context> 35 59 36 60 <context name='sec-ident' attribute='Identifier' lineEndContext='#pop'> 37 61 <DetectSpaces context='#pop!sec' attribute='Normal Text'/> 38 62 </context> 39 63 40 64 <context name='sec' attribute='Header' lineEndContext='#pop'> 41 65 <IncludeRules context='text'/> ................................................................................ 83 107 <IncludeRules context='span'/> 84 108 </context> 85 109 86 110 <context name='span-del' attribute='Deleted Text' lineEndContext='#pop'> 87 111 <IncludeRules context='span'/> 88 112 </context> 89 113 90 - <context name='span-cue' attribute='Span Cue' lineEndContext='#pop'> 91 - <StringDetect attribute='Span Cue' String='$\' context='#pop!flat-span' /> 114 + <context name='span-cue' attribute='Span Cue' lineEndContext='#pop' fallthroughContext="error"> 115 + <StringDetect attribute='Span Cue' String='`\' context='#pop!flat-span' /> 92 116 93 117 <DetectChar attribute='Span Cue' char='!' context='#pop!span-emph' /> 94 118 <DetectChar attribute='Span Cue' char='*' context='#pop!span-strong' /> 95 119 <DetectChar attribute='Span Cue' char='~' context='#pop!span-del' /> 96 120 97 - <AnyChar attribute='Span Cue' String='$+🔒' context='#pop!span' /> 121 + <AnyChar attribute='Span Cue' String='`$+🔒' context='#pop!span' /> 98 122 <StringDetect attribute='Span Cue' String='→' context='#pop!ref' /> 99 123 <StringDetect attribute='Span Cue' String='🔗' context='#pop!ref' /> 100 124 <DetectChar attribute='Span Cue' char='>' context='#pop!ref' /> 125 + <DetectChar attribute='Span Cue' char='^' context='#pop!ref' /> 101 126 <DetectChar attribute='Span Cue' char='&' context='#pop!ref' /> 102 127 <DetectChar attribute='Span Cue' char='#' context='#pop!var-ref' /> 103 128 <DetectChar attribute='Span Cue' char='\' context='#pop!flat-span' /> 129 + <Detect2Chars attribute='Comment' char='%' char1='%' context='#pop!inline-comment' /> 130 + <Detect2Chars attribute='Critical Directive Cue' char='%' char1='!' context='#pop!inline-directive' /> 131 + <DetectChar attribute='Directive Cue' char='%' context='#pop!inline-directive' /> 104 132 </context> 105 133 106 134 <context name='flat-span' attribute='Unstyled Text' lineEndContext='#pop'> 107 135 <Detect2Chars attribute='Escaped Char' context='#stay' char='\' char1=']'/> 108 136 <DetectChar attribute='Span Delimiter' context='#pop' char=']'/> 109 137 </context> 138 + 139 + <context name='inline-comment' attribute='Comment' lineEndContext='#pop'> 140 + <IncludeRules context='flat-span'/> 141 + </context> 142 + 143 + <context name='inline-directive' attribute='Directive' lineEndContext='#pop'> 144 + <IncludeRules context='flat-span'/> 145 + <AnyChar String=".:!#$%@~'"" attribute='Directive Cue'/> 146 + <DetectSpaces context='#pop!span'/> 147 + </context> 110 148 111 149 <context name='ref' attribute='Reference' lineEndContext='#pop'> 150 + <IncludeRules context='flat-span'/> 112 151 <DetectSpaces context='#pop!span'/> 113 152 </context> 114 153 115 154 <context name='var-ref' attribute='Reference' lineEndContext='#pop'> 116 155 <WordDetect String="cortav" attribute='Standard Namespace'/> 117 156 <WordDetect String="env" attribute='Standard Namespace'/> 118 157 <DetectChar attribute='Span Delimiter' context='#pop' char=']'/> ................................................................................ 129 168 </context> 130 169 </contexts> 131 170 <itemDatas> 132 171 <itemData name='Normal Text' defStyleNum='dsNormal'/> 133 172 <itemData name='Styled Text' defStyleNum='dsNormal'/> 134 173 <itemData name='Emphatic Text' defStyleNum='dsNormal' italic='true'/> 135 174 <itemData name='Strong Text' defStyleNum='dsNormal' bold='true'/> 136 - <itemData name='Deleted Text' defStyleNum='dsNormal' strikeout='true'/> 175 + <itemData name='Deleted Text' defStyleNum='dsNormal' strikeOut='true'/> 137 176 138 177 <itemData name='Section Cue' defStyleNum='dsKeyword' bold='true'/> 139 178 <itemData name='Header' defStyleNum='dsControlFlow' underline='true'/> 140 179 <itemData name='Identifier' defStyleNum='dsVariable'/> 141 180 142 181 <itemData name='Unstyled Text' defStyleNum='dsVerbatimString'/> 143 182 <itemData name='Escaped Char' defStyleNum='dsSpecialChar'/> 144 183 <itemData name='Reference' defStyleNum='dsControlFlow' underline='true'/> 145 184 <itemData name='Span Cue' defStyleNum='dsKeyword' bold='true'/> 185 + <itemData name='Resource Cue' defStyleNum='dsKeyword' bold='true'/> 186 + <itemData name='Resource Identifier' defStyleNum='dsVariable' bold='true'/> 146 187 <itemData name='Span Delimiter' defStyleNum='dsKeyword'/> 147 188 <itemData name='Directive' defStyleNum='dsAttribute' bold='true'/> 148 189 <itemData name='Directive Cue' defStyleNum='dsAttribute'/> 149 190 <itemData name='Critical Directive Cue' defStyleNum='dsImport' bold='true'/> 150 191 <itemData name='Extension Directive' defStyleNum='dsImport' bold='true'/> 151 192 <itemData name='Renderer Directive' defStyleNum='dsExtension' bold='true'/> 152 193 <itemData name='Standard Namespace' defStyleNum='dsBuiltIn' bold='true'/> 153 194 <itemData name='Comment' defStyleNum='dsComment'/> 195 + <itemData name='Error' defStyleNum='dsError'/> 154 196 <itemData name='Macro' defStyleNum='dsPreprocessor' bold='true'/> 155 197 <itemData name='Macro Delimiter' defStyleNum='dsPreprocessor'/> 156 198 <itemData name='Field Delimiter' defStyleNum='dsPreprocessor' bold='true'/> 157 199 <itemData name='List' defStyleNum='dsOperator'/> 158 200 159 201 <itemData name='Literal Block' defStyleNum='dsSpecialString'/> 160 202 <itemData name='Literal Block Cue' defStyleNum='dsPreprocessor' bold='true'/>
Modified desk/velartrill-cortav.xml from [356c2a8842] to [51a69a6dad].
17 17 <expanded-acronym>Cortav</expanded-acronym> 18 18 19 19 <generic-icon>x-office-document</generic-icon> 20 20 <glob pattern="*.ct"/> <glob pattern="*."/> 21 21 <glob pattern="*.cortav"/> 22 22 <magic> 23 23 <match value="%ct\n" offset="0" type="string"/> 24 - <match value="\x03\x07\x3E\x2D" offset="0" type="string"/> 24 + <match value="\x3E\x2E\x14\x0C\x01\x04\x00\x00\x00\x03\x07\x3E\x2D" offset="0" type="string"/> 25 25 </magic> 26 26 </mime-type> 27 27 <mime-type type="text/x-cortav-intent"> 28 28 <comment xml:lang="en">Cortav rendering intent file</comment> 29 29 <comment xml:lang="x-ranuir-Latn">tav cunloci Cortavi</comment> 30 30 <comment xml:lang="x-ranuir-CR8"> </comment> 31 31
Added ext/transmogrify.lua version [ffa0ca0a64].
1 +local ct = require 'cortav' 2 +local ss = require 'sirsem' 3 + 4 +local patterns = { 5 + [ss.str.enc.utf8] = { 6 + { 7 + ['<-->'] = '⟷'; 8 + ['--->'] = '⟶'; 9 + ['<---'] = '⟵'; 10 + ['----'] = '⸻'; 11 + }; 12 + 13 + { 14 + ['<==>'] = '⟺'; 15 + ['===>'] = '⇐'; 16 + ['<==='] = '⟸'; 17 + }; 18 + 19 + { 20 + ['<->'] = '↔'; 21 + ['-->'] = '→'; 22 + ['<--'] = '←'; 23 + ['==>'] = '⇒'; 24 + ['<=>'] = '⇔'; 25 + ['<=='] = '⇐'; 26 + ['=/='] = '≠'; 27 + ['---'] = '⸺'; 28 + }; 29 + 30 + { 31 + ['-:-'] = '÷'; 32 + ['--'] = '—'; 33 + ['(C)'] = '©'; 34 + ['(>)'] = '🄯'; 35 + ['(R)'] = '®'; 36 + ['(TM)'] = '™'; 37 + ['(SM)'] = '℠'; 38 + }; 39 + }; 40 +} 41 + 42 +local quotes = { 43 + [ss.str.enc.utf8] = { 44 + ['en'] = {'“', '”'; '‘', '’'}; 45 + ['de'] = {'„', '“'; '‚', '‘'}; 46 + ['sp'] = {'«', '»'; '‹', '›'}; 47 + ['ja'] = {'「', '」'; '『', '』'}; 48 + ['fr'] = {'« ', ' »'; '‹ ', ' ›'}; 49 + [true] = {'“', '”'; '‘', '’'}; 50 + }; 51 +} 52 + 53 +local function meddle(ctx, t) 54 + local pts = patterns[ctx.doc.enc] 55 + if not pts then return t end 56 + local str = '' 57 + local lastchar 58 + local dquo = ctx.doc.enc.encodeUCS'"' 59 + local squo = ctx.doc.enc.encodeUCS"'" 60 + local forceRight = ctx.doc.enc.encodeUCS'`' 61 + local ptns = patterns[ctx.doc.enc] 62 + local function quo(c,p) 63 + if c == dquo then 64 + return 1 65 + elseif c == squo then 66 + return 2 67 + end 68 + end 69 + local qtbl if quotes[ctx.doc.enc] then 70 + if ctx.lang then 71 + qtbl = ss.str.langmatch(quotes[ctx.doc.enc], ctx.lang, ctx.doc.enc) or quotes[ctx.doc.enc][true] 72 + else 73 + qtbl = quotes[ctx.doc.enc][true] 74 + end 75 + end 76 + for c, p in ss.str.each(ctx.doc.enc,t) do 77 + local n = t:sub(p.byte) 78 + local ba, ca, nt = ctx.doc.enc.parse_escape(n) 79 + if ba then 80 + p.next.byte = p.next.byte + ba 81 + p.next.code = p.next.code + ca 82 + str = str .. nt 83 + lastchar = nt 84 + else 85 + local found = false 86 + local quote = quo(c,p) 87 + local force 88 + if not quote and c == forceRight and #t >= p.next.byte then 89 + quote = quo(ctx.doc.enc.char(ctx.doc.enc.codepoint(t,p.next.byte))) 90 + if quote then 91 + force = 2 92 + p.next.byte = p.next.byte + #forceRight 93 + p.next.code = p.next.code + ctx.doc.enc.len(forceRight) 94 + end 95 + end 96 + if qtbl and quote then 97 + found = true 98 + if force then 99 + str = str .. qtbl[quote*force] 100 + elseif lastchar == nil or ctx.doc.enc.iswhitespace(lastchar) then 101 + str = str .. qtbl[quote] 102 + else 103 + str = str .. qtbl[quote*2] 104 + end 105 + elseif ptns then 106 + for _, order in ipairs(ptns) do 107 + for k,v in pairs(order) do 108 + if ss.str.begins(n, k) then 109 + found = true 110 + str = str .. v 111 + p.next.byte = p.next.byte + string.len(k) - 1 112 + p.next.code = p.next.code + utf8.len(k) - 1 113 + goto stopsearch 114 + end 115 + end 116 + end::stopsearch:: 117 + end 118 + if not found then 119 + str = str .. c 120 + end 121 + lastchar = c 122 + end 123 + end 124 + return str 125 +end 126 + 127 +local function enterspan(origin, spans) 128 + for i,v in pairs(spans) do 129 + if type(v) == 'string' then 130 + spans[i] = meddle(origin, v) 131 + elseif v.kind ~= 'raw' and v.spans then 132 + enterspan(v.origin, v.spans) 133 + end 134 + end 135 +end 136 + 137 +ct.ext.install { 138 + id = 'transmogrify'; 139 + version = ss.version {0,1; 'devel'}; 140 + contributors = {{name='lexi hale', handle='velartrill', mail='lexi@hale.su', homepage='https://hale.su'}}; 141 + default = true; -- on unless inhibited 142 + slow = true; 143 + hook = { 144 + doc_meddle_ast = function(job) 145 + for n, sec in pairs(job.doc.secorder) do 146 + if sec.kind=='ordinary' or sec.kind=='blockquote' 147 + or sec.kind=='footnote' then 148 + for i, block in pairs(sec.blocks) do 149 + if type(block.spans) == 'table' then 150 + enterspan(block.origin, block.spans) 151 + elseif type(block.spans) == 'string' then 152 + block.spans = meddle(block.origin, block.spans) 153 + end 154 + end 155 + end 156 + end 157 + end; 158 + }; 159 +}
Modified makefile from [42776f3212] to [4482353657].
1 1 lua != which lua 2 2 luac != which luac 3 3 sh != which sh 4 4 5 5 extens = $(wildcard ext/*.lua) 6 -extens_names ?= $(basename $(notdir $(extens))) 6 +extens-names ?= $(basename $(notdir $(extens))) 7 7 build = build 8 8 executable = cortav 9 9 default-format-flags = -m html:width 40em 10 10 11 11 prefix = $(HOME)/.local 12 -bin_prefix = $(prefix)/bin 13 -share_prefix = $(prefix)/share/$(executable) 12 +bin-prefix = $(prefix)/bin 13 +share-prefix = $(prefix)/share/$(executable) 14 14 15 -$(build)/$(executable): sirsem.lua cortav.lua $(extens) cli.lua | $(build)/ 16 - @echo ' » building with extensions $(extens_names)' 15 +# by default, we fetch and parse information about encodings we 16 +# support so that cortav can do fancy things like format math 17 +# equations by character class (e.g. italicizing variables) 18 +# this is not necessary for parsing the format, and can be 19 +# disabled by blanking the encoding-data list when building 20 +# ($ make encoding-data=) 21 +encoding-data = ucstbls 22 +encoding-files = $(patsubst %,$(build)/%.lc,$(encoding-data)) 23 +encoding-data-ucs = https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt 24 + 25 +$(build)/$(executable): sirsem.lua $(encoding-files) cortav.lua $(extens) cli.lua | $(build)/ 26 + @echo ' » building with extensions $(extens-names)' 17 27 echo '#!$(lua)' > $@ 18 28 luac -o - $^ >> $@ 19 29 chmod +x $@ 20 30 21 31 $(build)/cortav.html: cortav.ct $(build)/$(executable) | $(build)/ 22 32 $(build)/$(executable) $< -o $@ -m render:format html -y html:fossil-uv 23 33 ................................................................................ 28 38 29 39 .PHONY: clean 30 40 clean: 31 41 rm -f $(build)/cortav $(build)/cortav.html $(build)/velartrill-cortav-view.desktop $(build)/cortav-view.sh 32 42 33 43 $(build)/%.sh: desk/%.sh 34 44 echo >$@ "#!$(sh)" 35 - echo >>$@ 'cortav_exec="$(bin_prefix)/$(executable)"' 45 + echo >>$@ 'cortav_exec="$(bin-prefix)/$(executable)"' 36 46 echo >>$@ 'cortav_flags="$${ct_format_flags-$(default-format-flags)}"' 37 47 cat $< >> $@ 38 48 chmod +x $@ 39 49 40 50 $(build)/velartrill-cortav-view.desktop: desk/cortav-view.desktop 41 51 cp $< $@ 42 - echo "Exec=$(bin_prefix)/cortav-view.sh" >>$@ 52 + echo "Exec=$(bin-prefix)/cortav-view.sh" >>$@ 43 53 44 54 %/: 45 55 mkdir -p $@ 46 56 57 +$(build)/unicode.txt: | $(build)/ 58 + curl $(encoding-data-ucs) > $@ 59 +$(build)/ucstbls.lc: $(build)/unicode.txt | $(build)/ 60 + $(lua) tools/ucs.lua $< | $(luac) -o $@ - 61 + 47 62 .PHONY: install 48 -install: $(build)/cortav $(build)/cortav-view.sh $(build)/velartrill-cortav-view.desktop | $(bin_prefix)/ 49 - install $(build)/$(executable) $(bin_prefix) 50 - install $(build)/cortav-view.sh $(bin_prefix) 63 +install: $(build)/cortav $(build)/cortav-view.sh $(build)/velartrill-cortav-view.desktop | $(bin-prefix)/ 64 + install $(build)/$(executable) $(bin-prefix) 65 + install $(build)/cortav-view.sh $(bin-prefix) 51 66 xdg-mime install desk/velartrill-cortav.xml 52 67 xdg-desktop-menu install $(build)/velartrill-cortav-view.desktop 53 68 xdg-mime default velartrill-cortav-view.desktop text/x-cortav 54 69 55 70 .PHONY: excise 56 71 excise: $(build)/velartrill-cortav-view.desktop 57 72 xdg-mime uninstall desk/velartrill-cortav.xml 58 73 xdg-desktop-menu uninstall $(build)/velartrill-cortav-view.desktop 59 - rm $(bin_prefix)/$(executable) 60 - rm $(bin_prefix)/cortav-view.sh 74 + rm $(bin-prefix)/$(executable) 75 + rm $(bin-prefix)/cortav-view.sh 61 76 62 77 .PHONY: wipe 63 78 wipe: excise clean
Modified sirsem.lua from [1f16b393f5] to [581e1b0127].
86 86 end 87 87 else 88 88 new[k] = v 89 89 end 90 90 end 91 91 return new 92 92 end 93 + 94 +function ss.push(tbl, ...) 95 + local idx = #tbl + 1 96 + local function rec(v, ...) 97 + tbl[idx] = v 98 + idx = idx + 1 99 + if ss.tuple.any(...) then rec(...) end 100 + end 101 + rec(...) 102 + return tbl 103 +end 93 104 94 105 function ss.delegate(tbl,tpl) -- returns a table that looks up keys it lacks from 95 106 -- tbl (lightweight alternative to shallow copies) 96 107 tpl = tpl or {} 97 108 return setmetatable({}, {__index=tbl}) 98 109 end 99 110 100 111 ss.str = {} 101 112 102 113 function ss.str.begins(str, pfx) 103 - return string.sub(str, 1, #pfx) == pfx 114 + -- appallingly, this is actually ~2/5ths faster than either 115 + -- of the below. i hate scripting languages so much 116 + return string.find(str, pfx, 1, true) == 1 117 + -- to my shock, disgust, and horror, even writing my own 118 + -- string scanning library for lua IN C only sped this up by 119 + -- a tiny fraction. i am just speechless. 120 +-- return string.sub(str, 1, #pfx) == pfx 121 + 122 +-- local pl = string.len(pfx) 123 +-- local sl = string.len(str) 124 +-- if sl < pl then return false end 125 +-- for i=1,pl do 126 +-- if string.byte(str,i) ~= string.byte(pfx,i) then 127 +-- return false 128 +-- end 129 +-- end 130 +-- return true 104 131 end 105 132 133 +function ss.enum(syms) 134 + local e = {} 135 + for i,v in pairs(syms) do 136 + e[v] = i 137 + e[i] = v 138 + end 139 + return e 140 +end 141 + 142 +function ss.bitmask_bytes(n,ofs) 143 + ofs = ofs or 0 144 + local function rec(i) 145 + if i > n then return end 146 + return 1<<(i+ofs), rec(i+1) 147 + end 148 + return 1<<ofs, rec(1) 149 +end 150 + 151 +function ss.bitmask(tbl,ofs) 152 + local codes = {ss.bitmask_bytes(#tbl,ofs)} 153 + local m = {} 154 + local maxbit 155 + for i, s in ipairs(tbl) do 156 + m[s] = codes[i] 157 + m[codes[i]] = s 158 + maxbit = i 159 + end 160 + m[true] = {ofs or 0,maxbit} 161 + return m 162 +end 163 + 164 +ss.str.charclass = ss.enum { 165 + 'numeral'; 'letter'; 'symbol'; 'punct'; 166 + 'space'; 'ctl'; 'glyph'; -- hanji 167 +} 168 +ss.str.charprop = ss.bitmask({ 169 + 'hexnumeral', -- character that can be used to write hexadecimal notation 170 + 'upper', 'lower'; 171 + 'diac'; -- diacritic/modifier letter 172 + 'wordbreak'; -- char causes following characters to be treated as a separate word (e.g. punctuation) 173 + 'wordsep'; -- char causes previous and following characters to be treated as separate words; char constitutes a word of its own in between (e.g. interpunct) 174 + 'breakokay'; -- is it okay to break words at this character? (eg hyphen) 175 + 'mathop'; -- char is a mathematical operator 176 + 'disallow', -- char is not allowed in narrative text 177 + 'brack', 'right', 'left', -- brackets 178 + 'noprint', -- character deposits no ink 179 + 'superimpose' -- character is superimposed over previous 180 +}, 3) 181 + 182 +ss.str.enc_generics = { 183 + pfxescape = function(ch, enc, chain) 184 + local bytes = #ch 185 + local codes = enc.len(ch) 186 + return function(s) 187 + if s == ch then 188 + return 0, 0, ch 189 + elseif ss.str.begins(s, ch) then 190 + local nc = enc.char(enc.codepoint(s, bytes + 1)) 191 + return bytes, codes, nc 192 + elseif chain then 193 + return chain(s) 194 + end 195 + end 196 + end; 197 +}; 198 + 199 +local cc,cp = ss.str.charclass, ss.str.charprop 106 200 ss.str.enc = { 107 201 utf8 = { 108 202 char = utf8.char; 109 203 codepoint = utf8.codepoint; 204 + len = utf8.len; 205 + encodeUCS = function(str) return str end; 206 + iswhitespace = function(c) 207 + return (c == ' ') or (c == '\t') or (c == '\n') 208 + or (c == '\u{3000}') 209 + or (c == '\u{200B}') 210 + end; 211 + }; 212 + ascii = { 213 + len = string.len; char = string.char; codepoint = string.byte; 214 + iswhitespace = function(c) 215 + return (c == ' ') or (c == '\t') or (c == '\n') 216 + end; 217 + ranges = { 218 + {0x00,0x1a, cc.ctl}; 219 + {0x1b,0x1b, cc.ctl, cp.disallow}; 220 + {0x1c,0x1f, cc.ctl}; 221 + {0x20,0x20, cc.space}; 222 + {0x21,0x22, cc.punct}; 223 + {0x23,0x26, cc.symbol}; 224 + {0x27,0x29, cc.punct}; 225 + {0x2a,0x2b, cc.symbol}; 226 + {0x2c,0x2f, cc.punct}; 227 + {0x30,0x39, cc.numeral, cp.hexnumeral}; 228 + {0x3a,0x3b, cc.punct}; 229 + {0x3c,0x3e, cc.symbol, cp.mathop}; 230 + {0x3f,0x3f, cc.punct}; 231 + {0x40,0x40, cc.symbol}; 232 + {0x41,0x46, cc.letter, cp.ucase, cp.hexnumeral}; 233 + {0x47,0x5a, cc.letter, cp.ucase}; 234 + {0x5b,0x5d, cc.symbol, cp.mathop}; 235 + {0x5e,0x5e, cc.symbol, mathop}; 236 + {0x5f,0x60, cc.symbol}; 237 + {0x61,0x66, cc.letter, cp.lcase, cp.hexnumeral}; 238 + {0x67,0x7a, cc.letter, cp.lcase}; 239 + {0x7b,0x7e, cc.symbol}; 240 + {0x7f,0x7f, cc.ctl, cp.disallow}; 241 + } 110 242 }; 111 - c6b = {}; 112 - ascii = {}; 243 + raw = {len = string.len; char = string.char; codepoint = string.byte; 244 + encodeUCS = function(str) return str end; 245 + iswhitespace = function(c) 246 + return (c == ' ') or (c == '\t') or (c == '\n') 247 + end; 248 + }; 113 249 } 114 250 115 -function ss.str.enc.utf8.each(str, ascode) 251 +-- unicode ranges are optionally generated from consortium data 252 +-- files and injected through a generated source file. if this 253 +-- part of the build process is disabled (e.g. due to lack of 254 +-- internet access, or to keep the size of the executable as 255 +-- small as possible), we still at least can make the ascii 256 +-- ranges available to UTF8 (UTF8 being a superset of ascii) 257 +ss.str.enc.utf8.ranges = ss.delegate(ss.str.enc.ascii.ranges) 258 + 259 +function ss.str.enc.ascii.encodeUCS(str) 260 + local newstr = '' 261 + for c,p in ss.str.each(ss.str.enc.utf8, str, true) do 262 + if c > 0x7F then 263 + newstr = newstr .. '?' 264 + else 265 + newstr = newstr .. string.char(c) 266 + end 267 + end 268 +end 269 + 270 +for _, v in pairs{'utf8','ascii','raw'} do 271 + ss.str.enc[v].parse_escape = ss.str.enc_generics.pfxescape('\\',ss.str.enc[v]) 272 +end 273 + 274 +function ss.str.classify(enc, ch) 275 + if not enc.ranges then return {} end 276 + if type(ch)=='string' then ch = enc.codepoint(ch) end 277 + -- TODO 278 +end 279 + 280 + 281 +function ss.str.each(enc, str, ascode) 282 + if enc.each then return enc.each(enc,str,ascode) end 283 + local pm = { 284 + __index = { 285 + esc = function(self) 286 + local ba, bc, nc = enc.parse_escape(str:sub(self.byte)) 287 + if ba then 288 + self.next.byte = self.next.byte + ba - 1 289 + self.next.code = self.next.code + bc - 1 290 + return nc 291 + end 292 + end; 293 + }; 294 + } 116 295 local pos = { 117 296 code = 1; 118 297 byte = 1; 119 298 } 120 299 return function() 121 300 if pos.byte > #str then return nil end 122 - local thischar = utf8.codepoint(str, pos.byte) 123 - local lastpos = { 301 + local thischar = enc.codepoint(str, pos.byte) 302 + local lastpos = setmetatable({ 124 303 code = pos.code; 125 304 byte = pos.byte; 126 305 next = pos; 127 - } 306 + },pm) 128 307 if not ascode then 129 - thischar = utf8.char(thischar) 308 + thischar = enc.char(thischar) 130 309 pos.byte = pos.byte + #thischar 131 310 else 132 - pos.byte = pos.byte + #utf8.char(thischar) 311 + pos.byte = pos.byte + #enc.char(thischar) 133 312 end 134 313 pos.code = pos.code + 1 135 314 return thischar, lastpos 136 315 end 137 316 end 317 + 318 +function ss.str.breakwords(enc, str, max, opts) 319 + if enc.breakwords then return enc.breakwords(str) end 320 + local words = {} 321 + opts = opts or {} 322 + local buf = '' 323 + local flush = function() 324 + if buf ~= '' then table.insert(words,buf) buf = '' end 325 + end 326 + for c, p in ss.str.each(enc,str) do 327 + local nc 328 + if opts.escape then 329 + nc = p:esc() 330 + end 331 + if nc then 332 + buf = buf + nc 333 + elseif enc.iswhitespace(c) then 334 + flush() 335 + if max and #words == max then 336 + local rs = str:sub(p.next.byte) 337 + if rs ~= '' then 338 + table.insert(words, rs) 339 + end 340 + break 341 + end 342 + else 343 + buf = buf .. c 344 + end 345 + end 346 + flush() 347 + return words 348 +end 349 +function ss.str.mergewords(enc, lst) 350 + if enc.mergewords then return enc.mergewords(lst) end 351 + return table.concat(lst, enc.wordsep or ' ') 352 +end 353 +function ss.str.breaklines(enc, str, opts) 354 + if enc.breaklines then return enc.breaklines(lst,opts) end 355 + return ss.str.split(enc, str, enc.encodeUCS'\n', opts) 356 +end 357 + 358 +function ss.str.split(enc, str, delim, opts) 359 + if enc.split then return enc.split(str,delim,opts) end 360 + opts = opts or {} 361 + local elts = {} 362 + local buf = '' 363 + local flush = function() 364 + if buf ~= '' or opts.keep_empties then 365 + table.insert(elts,buf) 366 + buf = '' 367 + end 368 + end 369 + local esc = enc.parse_escape 370 + local tryesc if opts.escape then 371 + tryesc = function(str, p) 372 + local ba, ca, escd = enc.parse_escape(str:sub(p.byte)) 373 + if ba then 374 + p.next.byte = p.next.byte + ba 375 + p.next.code = p.next.code + ca 376 + buf = buf .. escd 377 + return true 378 + end 379 + end 380 + else 381 + tryesc = function(...) end 382 + end 383 + 384 + if type(delim) == 'function' then 385 + for c, p in ss.str.each(enc,str) do 386 + if not tryesc(str,p) then 387 + local skip = delim(str:sub(p.byte)) 388 + if skip then 389 + flush() 390 + p.next.byte = p.next.byte + skip - 1 391 + else 392 + buf = buf .. c 393 + end 394 + end 395 + end 396 + elseif enc.len(delim) == 1 then 397 + for c, p in ss.str.each(enc,str) do 398 + if not tryesc(str,p) then 399 + if c == delim then 400 + flush() 401 + else 402 + buf = buf .. c 403 + end 404 + end 405 + end 406 + else 407 + local dlcode = enc.len(delim) 408 + for c, p in ss.str.each(enc,str) do 409 + if not tryesc(str,p) then 410 + if str:sub(p.byte, p.byte+#delim-1) == delim then 411 + flush() 412 + p.next.byte = p.next.byte + #delim - 1 413 + p.next.code = p.next.code + dlcode 414 + else 415 + buf = buf .. c 416 + end 417 + end 418 + end 419 + end 420 + flush() 421 + return elts 422 +end 423 + 424 +function ss.str.langmatch(tbl, lang, enc) 425 + -- this performs primitive language matching. NOTE: THIS IS NOT 426 + -- STANDARDS COMPLIANT. it's "good enough" for now, but in the 427 + -- long term it needs to be rewritten to actually understand the 428 + -- format, primarily so that e.g. 'en-US-Latn' and 'en-Latn-US' 429 + -- match -- currently order is significant. it shouldn't be 430 + -- ref: IETF BCP 47 (RFC 5646) https://www.ietf.org/rfc/bcp/bcp47.html 431 + local dash = enc.encodeUCS'-' 432 + local tags = ss.str.split(enc, lang, dash, {escape=true}) 433 + local bestlen = 0 434 + local bestmatch 435 + for k,v in pairs(tbl) do 436 + if k ~= true then 437 + local kt = ss.str.split(enc, k, dash, {escape=true}) 438 + for i=1,math.min(#kt,#tags) do 439 + if kt[i] ~= tags[i] then goto skip end 440 + end 441 + if #kt > bestlen then 442 + -- match the most specific matching tag 443 + bestmatch = k 444 + bestlen = #kt 445 + end 446 + end 447 + ::skip::end 448 + return tbl[bestmatch] or tbl[true], bestmatch 449 +end 138 450 139 451 ss.math = {} 140 452 141 453 function ss.math.lerp(t, a, b) 142 454 return (1-t)*a + (t*b) 143 455 end 144 456 ................................................................................ 239 551 elseif to == 'int' then return math.floor(tonumber(self)) 240 552 elseif c.cast and c.cast[to] then 241 553 return c.cast[to](self, ...) 242 554 elseif type(to) == 'table' and getmetatable(to) and getmetatable(to).cvt and getmetatable(to).cvt[cls] then 243 555 else error((c.ident or 'class') .. ' is not convertible to ' .. (type(to) == 'string' and to or tostring(to))) end 244 556 end 245 557 end 246 - if c.fns then return c.fns[k] end 558 + if c.fns and c.fns[k] then return c.fns[k] end 559 + if c.index then return c.index(self,k) end 247 560 end 248 561 249 562 if c.cast then 250 563 if c.cast.string then 251 564 cls.__tostring = c.cast.string 252 565 end 253 566 if c.cast.number then ................................................................................ 265 578 if c.construct then 266 579 c.construct(val, ...) 267 580 end 268 581 return val 269 582 end 270 583 getmetatable(cls).__call = function(_, ...) return cls.mk(...) end 271 584 cls.is = function(o) return getmetatable(o) == cls end 585 + cls.__metatable = cls -- lock metatable 272 586 return cls 273 587 end 274 588 275 589 -- tidy exceptions 276 590 277 591 ss.exn = ss.declare { 278 592 ident = 'exn'; ................................................................................ 302 616 } 303 617 end; 304 618 call = function(me, ...) 305 619 return ss.exn(me, ...) 306 620 end; 307 621 } 308 622 ss.str.exn = ss.exnkind 'failure while string munging' 623 +ss.bug = ss.exnkind 'tripped over bug' 309 624 310 625 function ss.str.delimit(encoding, start, stop, s) 311 626 local depth = 0 312 627 encoding = encoding or ss.str.enc.utf8 313 628 if not ss.str.begins(s, start) then return nil end 314 - for c,p in encoding.each(s) do 629 + for c,p in ss.str.each(encoding,s) do 315 630 if c == (encoding.escape or '\\') then 316 631 p.next.byte = p.next.byte + #encoding.char(encoding.codepoint(s, p.next.byte)) 317 632 p.next.code = p.next.code + 1 318 633 elseif c == start then 319 634 depth = depth + 1 320 635 elseif c == stop then 321 636 depth = depth - 1 ................................................................................ 384 699 return x 385 700 elseif select('#', ...) == 0 then 386 701 return nil 387 702 else 388 703 return ss.coalesce(...) 389 704 end 390 705 end 706 + 707 +ss.tuple = {} 708 +function ss.tuple.any(...) 709 + return select('#',...) > 0 710 +end 711 + 712 +function ss.tuple.cat(...) 713 + local a = {...} 714 + return function(...) 715 + ss.push(a, ...) 716 + return table.unpack(a) 717 + end 718 +end 719 + 720 +function ss.tuple.suffix(sfx,n,...) 721 + if n ~= nil then 722 + return n, ss.tuple.suffix(...) 723 + else 724 + return sfx 725 + end 726 +end 727 + 728 +function ss.tuple.cdr(x, ...) return ... end 729 + 730 +ss.stack = ss.declare { 731 + ident = 'stack'; 732 + mk = function() return { 733 + top = 0; 734 + store = {}; 735 + } end; 736 + index = function(me, i) 737 + if i <= 0 then 738 + return me.store[me.top + i] 739 + else 740 + return me.store[i] 741 + end 742 + end; 743 + fns = { 744 + push = function(me, val, ...) 745 + if val~=nil then 746 + me.top = me.top + 1 747 + me.store[me.top] = val 748 + me:push(...) 749 + end 750 + return val, ... 751 + end; 752 + pop = function(me,n) n = n or 1 753 + local r = {} 754 + if n < me.top then 755 + for i = 0,n-1 do 756 + r[i+1] = me.store[me.top - i] 757 + me.store[me.top - i] = nil 758 + end 759 + me.top = me.top - n 760 + else 761 + r = me.store 762 + me.store = {} 763 + end 764 + return table.unpack(r) 765 + end; 766 + set = function(me,val) 767 + if me.top == 0 then 768 + me.top = me.top + 1 --autopush 769 + end 770 + me.store[me.top] = val 771 + end; 772 + all = function(me) return table.unpack(me.store) end; 773 + each = function(forward) 774 + if forward then 775 + local idx = 0 776 + return function() 777 + idx = idx + 1 778 + if idx > top 779 + then return nil 780 + else return me.store[idx], idx 781 + end 782 + end 783 + else 784 + local idx = top + 1 785 + return function() 786 + idx = idx - 1 787 + if idx == 0 788 + then return nil 789 + else return me.store[idx], idx 790 + end 791 + end 792 + end 793 + end; 794 + }; 795 +} 796 + 797 +ss.automat = ss.declare { 798 + ident = 'automat'; 799 + mk = function() return { 800 + state = ss.stack(); 801 + states = {}; 802 + ttns = {}; 803 + mem = {}; 804 + match = function(sym, ttn, mach) 805 + if ttn.pred and ttn:pred(mach, sym)~=true then 806 + return false 807 + end 808 + if ttn.on then 809 + return sym == ttn.on 810 + end 811 + return false 812 + end; 813 + } end; 814 + 815 + construct = function(me, def) 816 + for k,v in pairs{'states','ttns','mem','syms'} do 817 + if def[k] then me[k] = v end 818 + end 819 + end; 820 + 821 + fns = { 822 + react = function(me,sym) 823 + local s = me.states[me.state.id] 824 + if s and s.input then 825 + s:react(me, sym) 826 + end 827 + end; 828 + 829 + drop = function(me,n) 830 + for i = 0, math.min(n-1,me.state.top-1) do 831 + local s = me.states[me.state[-i].id] 832 + if s.exit then s:exit(s.mem, me) end 833 + end 834 + if n < me.state.top then 835 + local newtop = me.states[me.state[-n].id] 836 + if newtop.activate then newtop:activate(me.state[-n].mem, me, n) end 837 + end 838 + return me.state:pop(n) 839 + end; 840 + clear = function(me) return me:drop(me.state.top) end; 841 + 842 + transition = function(me,ttn,oldstates) 843 + local s = me.state:push {id = ttn.to, mem = {}} 844 + local to = me.states[ttn.to] 845 + if to.enter then 846 + to:enter(s.mem, me) 847 + end 848 + end; 849 + 850 + input = function(me,sym) 851 + local ttns = me.ttns[me.state.id] 852 + local _, ttn = ss.find(ttns, function(ttn) 853 + return me.match(sym, ttn, me) 854 + end) 855 + if ttn then 856 + if ttn.pop then 857 + local oldstates = {me.state:drop(ttn.pop)} 858 + me:transition(ttn, sym, oldstates) 859 + else 860 + me:transition(ttn, sym) 861 + end 862 + else 863 + me:react(sym) 864 + end 865 + end; 866 + }; 867 +}
Added tools/ucs.lua version [3976f4bc78].
1 +-- [ʞ] tools/ucs.lua 2 +-- ~ lexi hale <lexi@hale.su> 3 +-- ? table generator for unicode character classes 4 +-- 🄯 AGPLv3 5 + 6 + 7 +local tpl = [[ 8 +local ss = require 'sirsem' 9 +ss.str.enc.utf8.ranges = {%s} 10 +]] 11 + 12 +local enum = function(syms) 13 + local e = {} 14 + for i,v in pairs(syms) do 15 + e[v] = i 16 + e[i] = v 17 + end 18 + return e 19 +end 20 + 21 +local file = io.stdin 22 +local path 23 +if arg[1] then 24 + path = arg[1] 25 + file = io.open(path, 'rb') 26 +end 27 + 28 +local bitmask_raw = function(n,ofs) 29 + ofs = ofs or 0 30 + local function rec(i) 31 + if i > n then return end 32 + return 1<<(i+ofs), rec(i+1) 33 + end 34 + return 1<<ofs, rec(1) 35 +end 36 + 37 +local bitmask = function(tbl,ofs) 38 + local codes = {bitmask_raw(#tbl,ofs)} 39 + local m = {} 40 + local maxbit 41 + for i, s in ipairs(tbl) do 42 + m[s] = codes[i] 43 + m[codes[i]] = s 44 + maxbit = i 45 + end 46 + m[true] = {ofs or 0,maxbit} 47 + return m 48 +end 49 + 50 +local basictype = enum { 51 + 'numeral'; 52 + 'alpha'; 53 + 'symbol'; 54 + 'punct'; 55 + 'space'; 56 + 'ctl'; 57 + 'glyph'; -- hanji 58 +} 59 +local props = bitmask({ 60 + 'hex', 61 + 'upper', 'lower', 'diac', 62 + 'wordbreak', 'wordsep', 63 + 'disallow', 64 + 'brack', 'right', 'left', 65 + 'noprint', 'superimpose' 66 +}, 3) 67 + 68 +local overrides = { 69 + [0x200B] = basictype.space | props.wordsep; -- database entry is wrong 70 +} 71 + 72 +local mask = ~0 -- mask out irrelevant properties to compactify database 73 + 74 +local function parsecat(tbl) 75 + local c,p,b = 0,props,basictype 76 + if overrides[tbl.codepoint] then 77 + c = overrides[tbl.codepoint] 78 + elseif tbl.class == 'Nd' then c = b.numeral 79 + elseif tbl.class == 'No' then c = b.numeral | p.diac 80 + elseif tbl.class == 'Cc' then 81 + if tbl.kind == 'S' 82 + or tbl.kind == 'WS' 83 + or tbl.kind == 'B' then c = b.space | p.wordsep 84 + else c = b.ctl | p.wordbreak | p.disallow end 85 + elseif tbl.class == 'Lu' then c = b.alpha | p.upper 86 + elseif tbl.class == 'Ll' then c = b.alpha | p.lower 87 + elseif tbl.class == 'Lo' 88 + or tbl.class == 'Lt' then c = b.alpha 89 + elseif tbl.class == 'Po' then c = b.punct | p.wordbreak 90 + elseif tbl.class == 'Sm' then c = b.symbol | p.wordsep 91 + elseif tbl.class == 'Ps' then c = b.punct | p.brack | p.left 92 + elseif tbl.class == 'Pe' then c = b.punct | p.brack | p.right 93 + elseif tbl.class == 'Pc' 94 + or tbl.class == 'Pd' 95 + or tbl.class == 'Sk' 96 + or tbl.class == 'Sc' then c = b.symbol 97 + elseif tbl.class == 'Zs' then c = b.space 98 + if tbl.kind == 'WS' then c=c|p.wordsep end 99 + elseif tbl.class == 'So' then c = b.glyph 100 + elseif tbl.class == 'Mn' then c = b.symbol | p.diac | p.superimpose 101 + end 102 + return c & mask 103 +end 104 + 105 +local ranuirAlpha = {0xe39d, 0xe39f, 0xe3ad, 0xe3af, 0xe3b5, 0xe3b7, 0xe3b9, 0xe3bb, 0xe3bd, 0xe3be, 0xe3bf, 0xe3c5, 0xe3c7, 0xe3c9, 0xe3cb, 0xe3cc, 0xe3cd, 0xe3ce, 0xe3cf} 106 +local ranuirSpecial = { 107 + [0xe390] = basictype.space | props.wordsep; 108 +} 109 + 110 +local ranuir = {} 111 +for _,v in pairs(ranuirAlpha) do ranuir[v] = basictype.alpha end 112 +for k,v in pairs(ranuirSpecial) do ranuir[k] = v end 113 +local ranuirKeys = {} 114 +for k in pairs(ranuir) do table.insert(ranuirKeys, k) end 115 +table.sort(ranuirKeys) 116 + 117 +local recs = {} 118 +local ranuirok = false 119 +for ln in file:lines() do 120 + local v = {} 121 + for s in ln:gmatch('[^;]*') do 122 + table.insert(v, s) 123 + end 124 + v[1] = tonumber(v[1],0x10) 125 + if v[1] > 0x7f then -- discard ASCII, we already have that 126 + local code = { 127 + codepoint = v[1]; 128 + name = v[2]; 129 + class = v[3]; 130 + kind = v[5]; 131 + } 132 + code.cat = parsecat(code) 133 + 134 + if (not ranuirok) and code.codepoint > 0xe390 then 135 + for _,ri in pairs(ranuirKeys) do 136 + table.insert(recs, { 137 + codepoint = ri; 138 + cat = ranuir[ri]; 139 + }) 140 + end 141 + ranuirok = true 142 + end 143 + 144 + if code.cat ~= 0 then 145 + table.insert(recs,code) 146 + end 147 + end 148 +end 149 + 150 + 151 +local ranges = {} 152 +local last = recs[1] 153 +local start = last 154 +local altern = false 155 +local flush = function(i) 156 + local new = {start.codepoint, last.codepoint, last.cat} 157 + if altern then 158 + new[3] = new[3] | props.upper | props.lower 159 + end 160 + table.insert(ranges, new) 161 + altern = false 162 +end 163 +for i, r in ipairs(recs) do 164 + if r.cat ~= last.cat then 165 + -- we can massively compactify this set with one weird trick: 166 + -- most non-ascii cased character sets are not in AAAAaaaa, 167 + -- but rather AaAaAa order. so we can look for this simple 168 + -- pattern and compress it, shaving c. 1/3rd off our dataset 169 + local ambi = props.upper | props.lower 170 + if (altern or (start == last and (last.cat & props.upper) ~= 0)) and 171 + ((r.cat &~ ambi) == (last.cat &~ ambi)) then 172 + altern = true 173 + last = r 174 + else 175 + flush() 176 + start = r 177 + end 178 + elseif altern then 179 + flush() 180 + start = r 181 + end 182 + last = r 183 +end 184 +flush() 185 + 186 +-- expand bitmask 187 + -- for k,v in pairs(ranges) do 188 + -- local basic = v[3] & ((1<<3) - 1) -- first three bits 189 + -- if basic ~= 0 then 190 + -- v[4] = basictype[basic] 191 + -- end 192 + -- local bitrange = props[true] 193 + -- for j=bitrange[1], bitrange[2] do 194 + -- if (v[3] & (1<<j)) ~= 0 then 195 + -- table.insert(v, props[1<<j]) 196 + -- end 197 + -- end 198 + -- end 199 + 200 +-- the data has been collected and formatted in the manner we 201 +-- need; now we just need to emit it as a lua table 202 + 203 +local tab = {} 204 +local top = 1 205 +for k,v in pairs(ranges) do 206 + tab[top] = string.format('{0x%x,0x%x,%u}',table.unpack(v)) 207 + top = top + 1 208 +end 209 +io.stdout:write(string.format(tpl, table.concat(tab,',')))