Overview
Comment: | all kindsa shit |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA3-256: |
52b9bce7dd9317730dfccf2eefd17494 |
User & Date: | lexi on 2021-12-26 04:08:02 |
Other Links: | manifest | tags |
Context
2021-12-26
| ||
17:49 | get math parser working check-in: d1b7d2fd5f user: lexi tags: trunk | |
04:08 | all kindsa shit check-in: 52b9bce7dd user: lexi tags: trunk | |
2021-12-22
| ||
10:23 | fix bugged makefile check-in: 36024a43c5 user: lexi tags: trunk | |
Changes
Modified cli.lua from [a9857f9cb6] to [ad6ab18d31].
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 .. 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 ... 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 ... 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 ... 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 |
local default_mode = { ['render:format'] = 'html'; ['html:gen-styles'] = true; } local function main(input, output, log, mode, suggestions, vars) local doc = ct.parse(input.stream, input.src, mode) input.stream:close() if mode['parse:show-tree'] then log:write(ss.dump(doc)) end -- the document has now had a chance to give its say; if it hasn't specified -- any modes of its own, we now merge in the 'weak modes' (suggestions) ................................................................................ ['mode-set'] = 1; ['mode-clear'] = 1; mode = 2; ['mode-set-weak'] = 1; ['mode-clear-weak'] = 1; ['mode-weak'] = 2; } return param_opts[o] or 0 end local optmap = { o = 'out'; l = 'log'; d = 'define'; V = 'version'; h = 'help'; y = 'mode-set', Y = 'mode-set-weak'; n = 'mode-clear', N = 'mode-clear-weak'; m = 'mode', M = 'mode-weak'; } local checkmodekey = function(key) if not key:match '[^:]+:.+' then ct.exns.cli('invalid mode key %s', key):throw() end return key end ................................................................................ mode = function(key,value) mode[checkmodekey(key)] = value end; ['mode-set'] = function(key) mode[checkmodekey(key)] = true end; ['mode-clear'] = function(key) mode[checkmodekey(key)] = false end; ['mode-weak'] = function(key,value) suggestions[checkmodekey(key)] = value end; ['mode-set-weak'] = function(key) suggestions[checkmodekey(key)] = true end; ['mode-clear-weak'] = function(key) suggestions[checkmodekey(key)] = false end; ['version'] = function() outp:write(ct.info:about()) if next(ct.ext.loaded) then outp:write('\nactive extensions:\n') for k,v in pairs(ct.ext.loaded) do outp:write(string.format(' * %s', v.id .. (v.version and (' ' .. v.version:string()) or ''))) ................................................................................ keepParsing = false else local longopt = v:match '^%-%-(.+)$' if keepParsing and longopt then execLongOpt(longopt) else if keepParsing and v:sub(1,1) == '-' then for c,p in ss.str.enc.utf8.each(v:sub(2)) do if optmap[c] then execLongOpt(optmap[c]) else ct.exns.cli('switch -%s unrecognized', c):throw() end end else ................................................................................ if args[1] and args[1] ~= '' then local file = io.open(args[1], "rb") if not file then error('unable to load file ' .. args[1]) end input.stream = file input.src.file = args[1] end return main(input, outp, log, mode, suggestions, vars) end local ok, e = pcall(entry_cli) -- local ok, e = true, entry_cli() if not ok then local str = 'translation failure' if ss.exn.is(e) then str = e.kind.desc end local color = false if log:seek() == nil then |
| | > > > > > > > > > > > > < > > > > > | | | | |
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 .. 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 ... 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 ... 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 ... 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 |
local default_mode = { ['render:format'] = 'html'; ['html:gen-styles'] = true; } local function main(input, output, log, mode, suggestions, vars, extrule) local doc = ct.parse(input.stream, input.src, mode, function(c) c.doc.ext = extrule end) input.stream:close() if mode['parse:show-tree'] then log:write(ss.dump(doc)) end -- the document has now had a chance to give its say; if it hasn't specified -- any modes of its own, we now merge in the 'weak modes' (suggestions) ................................................................................ ['mode-set'] = 1; ['mode-clear'] = 1; mode = 2; ['mode-set-weak'] = 1; ['mode-clear-weak'] = 1; ['mode-weak'] = 2; ['use'] = 1; ['inhibit'] = 1; ['need'] = 1; ['load'] = 1; ['enc'] = 1; } return param_opts[o] or 0 end local optmap = { o = 'out'; l = 'log'; d = 'define'; V = 'version'; h = 'help'; y = 'mode-set', Y = 'mode-set-weak'; n = 'mode-clear', N = 'mode-clear-weak'; m = 'mode', M = 'mode-weak'; L = 'load', u = 'use', i = 'inhibit', r = 'require'; e = 'enc'; } local extrule = {use={},inhibit={},need={}} local checkmodekey = function(key) if not key:match '[^:]+:.+' then ct.exns.cli('invalid mode key %s', key):throw() end return key end ................................................................................ mode = function(key,value) mode[checkmodekey(key)] = value end; ['mode-set'] = function(key) mode[checkmodekey(key)] = true end; ['mode-clear'] = function(key) mode[checkmodekey(key)] = false end; ['mode-weak'] = function(key,value) suggestions[checkmodekey(key)] = value end; ['mode-set-weak'] = function(key) suggestions[checkmodekey(key)] = true end; ['mode-clear-weak'] = function(key) suggestions[checkmodekey(key)] = false end; ['use' ] = function(ext) extrule.use [ext] = true end; ['inhibit'] = function(ext) extrule.inhibit[ext] = true end; ['require'] = function(ext) extrule.need [ext] = true end; ['load'] = function(extpath) end; ['enc'] = function(enc) end; ['version'] = function() outp:write(ct.info:about()) if next(ct.ext.loaded) then outp:write('\nactive extensions:\n') for k,v in pairs(ct.ext.loaded) do outp:write(string.format(' * %s', v.id .. (v.version and (' ' .. v.version:string()) or ''))) ................................................................................ keepParsing = false else local longopt = v:match '^%-%-(.+)$' if keepParsing and longopt then execLongOpt(longopt) else if keepParsing and v:sub(1,1) == '-' then for c,p in ss.str.each(ss.str.enc.utf8, v:sub(2)) do if optmap[c] then execLongOpt(optmap[c]) else ct.exns.cli('switch -%s unrecognized', c):throw() end end else ................................................................................ if args[1] and args[1] ~= '' then local file = io.open(args[1], "rb") if not file then error('unable to load file ' .. args[1]) end input.stream = file input.src.file = args[1] end return main(input, outp, log, mode, suggestions, vars, extrule) end -- local ok, e = pcall(entry_cli) local ok, e = true, entry_cli() if not ok then local str = 'translation failure' if ss.exn.is(e) then str = e.kind.desc end local color = false if log:seek() == nil then |
Modified cortav.ct from [c71fe3a9e8] to [5df14cacc3].
|
dict: http://ʞ.cc/fic/spirals/glossary the cortav [!format] can be called [!cortavgil], or [!gil cortavi], to differentiate it from the reference implementation [!cortavsir] or [!sir cortavi]. %toc ## cortav vs. markdown the most important difference between cortav and markdown is that cortav is strictly line-oriented. this choice was made to ensure that cortav was relatively easy to parse. so while a simple [$.ct] file may look a bit like a [$.md] file, in reality it's a lot closer to gemtext than any flavor of markdown. ## encoding a cortav document is made up of a sequence of codepoints. UTF-8 must be supported, but other encodings (such as UTF-32 or C6B) may be supported as well. lines will be derived by splitting the codepoints at the linefeed character or equivalent. note that unearthly encodings like C6B or EBCDIC will need to select their own control sequences. ## file type a cortav source file is identified using a file extension, file type, and/or magic byte sequence. three file extensions are defined as identifying a cortav source file. where relevant, all must be recognized as indicating a cortav source file. * [$ct] is the shorthand extension * [$cortav] is the canonical disambiguation extension, for use in circumstances where [$*.ct] is already defined to mean a different file format. * [$] is the canonical Corran extension, a byte sequence comprising the unicode codepoints [$U+E3CE U+E3BD]. where the filesystem in question does not specify a filename encoding, the bytes should be expressed in UTF-8. three more extensions are reserved for identifying a cortav intent file. * [$ctc] is the shorthand extension * [$cortavcun] is the canonical disambiguation extension * [$] is the canonical Corran extension, a byte sequence comprising the unicode codepoints [$U+E3CE U+E3BD U+E3CE]. where the filesystem in question does not specify a filename encoding, the bytes should be expressed in UTF-8. on systems which use metadata to encode filetype, two values are defined to identify cortav source files * [$text/x-cortav] should be used when strings or arbitrary byte sequences are supported * [$CTAV] (that is, the byte sequence [$0x43 0x54 0x41 0x56]) should be used on systems that support only 32-bit file types/4-character type codes like Classic Mac OS. two more values are defined to identify cortav intent files. * [$text/x-cortav-intent] * [$CTVC] (the byte sequence [$0x43 0x54 0x56 0x43]) on systems which do not define a canonical way of encoding the filetype but support extended attributes of some kind, such as linux, an attribute named [$mime] may be created and given the value [$text/x-cortav] or [$text/x-cortav-intent]; alternatively, extensions may be used. it is also possible to indicate the nature of a cortav file without using filesystem metadata. this is done by prefixing the file with a magic byte sequence. the sequence used depends on the encoding. * for UTF-8 and ASCII, [$%ct[!\\n]] (that is, the byte sequence [$0x25 0x63 0x74 0x0A]) should be used * for C6B, the file should begin with the word [$] (that is, the byte sequence [$0x03 0x07 0x3E 0x2D]). consequently, this sequence should be ignored by a cortav parser at the start of a file (except as an indication of file format). for FreeDesktop-based systems, the [$velartrill-cortav.xml] file included in the repository supplies mappings for the extensions and magic byte sequences. a script is also included which can be registered with xdg-open so that double-clicking on a cortav file will render it out and open it in your default web browser. ## structure cortav is based on an HTML-like block model, where a document consists of sections, which are made up of blocks, which may contain a sequence of spans. flows of text are automatically conjoined into spans, and blocks are separated by one or more newlines. this means that, unlike in markdown, a single logical paragraph [*cannot] span multiple ASCII lines. the primary purpose of this was to ensure ease of parsing, but also, both markdown and cortav are supposed to be readable from within a plain text editor. this is the 21st century. every reasonable text editor supports soft word wrap, and if yours doesn't, that's entirely your own damn fault. the first character(s) of every line (the "control sequence") indicates the role of that line. if no control sequence is recognized, the sequence [$.] is implied instead. the standard line classes and their associated control sequences are listed below. some control sequences have alternate forms, in order to support modern, readable unicode characters as well as plain ascii text. * paragraphs (. ¶ ❡): a paragraph is a simple block of text. the period control sequence is only necessary if the paragraph text begins with something that would otherwise be interpreted as a control sequence. * newlines (\\): inserts a line break into previous paragraph and attaches the following text. mostly useful for poetry or lyrics. * section starts (# §): starts a new section. all sections have an associated depth, determined by the number of sequence repetitions (e.g. "###" indicates depth-three"). sections may have headers and IDs; both are optional. IDs, if present, are a sequence of raw-text immediately following the hash marks. if the line has one or more space character followed by styled-text, a header will be attached. the character immediately following the hashes can specify a particular type of section. e.g.: ** [$#] is a simple section break. ** [$#anchor] opens a new section with the ID [$anchor]. ** [$# header] opens a new section with the title "header". ** [$#anchor header] opens a new section with both the ID [$anchor] and the title "header". ** [$#>conversation] opens a blockquote section named [$conversation] without a header. ** [$#^id] opens a footnote section for the multiline footnote [$id]. the ID must be specified. ** [$#$id] opens the multiline macro [$id]. the ID must be specified. ** [$#&id mime] opens a new inline object [$id] of type [$mime]. useful for embedding SVGs. the ID and mime type must be specified. * lists (* :): these are like paragraph nodes, but list nodes that occur next to each other will be arranged so as to show they compose a sequence. depth is determined by the number of stars/colons. like headers, a list entry may have an ID that can be used to refer back to it; it is indicated in the same way. if colons are used, this indicates that the order of the items is signifiant. :-lists and *-lists may be intermixed; however, note than only the last character in the sequence actually controls the depth type. * directives (%): a directive issues a hint to the renderer in the form of an arbitrary string. directives are normally ignored if they are not supported, but you may cause a warning to be emitted where the directive is not supported with [$%!] or mark a directive critical with [$%!!] so that rendering will entirely fail if it cannot be parsed. * comments (%%): a comment is a line of text that is simply ignored by the renderer. * asides (!): indicates text that diverges from the narrative, and can be skipped without interrupting it. think of it like block-level parentheses. asides which follow one another are merged as paragraphs of the same aside, usually represented as a sort of box. if the first line of an aside contains a colon, the stretch of styled-text from the beginning to the aside to the colon will be treated as a "type heading," e.g. "Warning:" * code (~~~): a line beginning with ~~~ begins or terminates a block of code. the opening line should look like one of the below ** [$~~~] ** [$~~~ language] (markdown-style shorthand syntax) ** [$~~~ \[language\] ~~~] (cortav syntax) ** [$~~~ \[language\] #id ~~~] ** [$~~~ title ~~~] ** [$~~~ title \[language\] ~~~] ** [$~~~ \[language\] title ~~~] ** [$~~~ title \[language\] #id ~~~] * reference (tab): a line beginning with a tab is treated as a "reference." references hold out-of-line metadata for preceding text like links and footnotes. a reference consists of an identifier followed by a colon and an arbitrary number of spaces or tabs, followed by text. whether this text is interpreted as raw-text or styled-text depends on the context in which the reference is used. * quotation (<): a line of the form [$<[!name]> [!quote]] denotes an utterance by [$name]. * blockquote (>): alternate blockquote syntax. can be nested by repeating the * subtitle (--): attaches a subtitle to the previous header * embed (&): embeds a referenced object. can be used to show images or repeat previously defined objects like lists or tables, optionally with a caption. ** &myimg All that remained of the unfortunate blood magic pageant contestants and audience (police photo) ** &$mymacro arg 1|arg 2|arg 3 * break (---): inserts a horizontal rule or other context break; does not end the section. must be followed by newline. * table cells (+ |): see [>ex.tab table examples]. ## styled text most blocks contain a sequence of spans. these spans are produced by interpreting a stream of [*styled-text] following the control sequence. styled-text is a sequence of codepoints potentially interspersed with escapes. an escape is formed by an open square bracket [$\[] followed by a [*span control sequence], and arguments for that sequence like more styled-text. escapes can be nested. * strong \[*[!styled-text]\]: causes its text to stand out from the narrative, generally rendered as bold or a brighter color. * emphatic \[![!styled-text]\]: indicates that its text should be spoken with emphasis, generally rendered as italics * literal \[$[!styled-text]\]: indicates that its text is a reference to a literal sequence of characters, variable name, or other discrete token. generally rendered in monospace * strikeout \[~[!styled-text]\]: indicates that its text should be struck through or otherwise indicated for deletion * insertion \[+[!styled-text]\]: indicates that its text should be indicated as a new addition to the text body. ** consider using a macro definition [$\edit: [~[#1]][+[#2]]] to save typing if you are doing editing work * link \[>[!ref] [!styled-text]\]: produces a hyperlink or cross-reference denoted by [$ref], which may be either a URL specified with a reference or the name of an object like an image or section elsewhere in the document. the unicode characters [$→] and [$🔗] can also be used instead of [$>] to denote a link. * footnote \[^[!ref] [!styled-text]\]: annotates the text with a defined footnote * raw \[\\[!raw-text]\]: causes all characters within to be interpreted literally, without expansion. the only special characters are square brackets, which must have a matching closing bracket * raw literal \[$\\[!raw-text]\]: shorthand for [\[$[\…]]] * macro \{[!name] [!arguments]\}: invokes a [>ex.mac macro], specified with a reference * argument \[#[!var]\]: in macros only, inserts the [$var]-th argument. otherwise, inserts a context variable provided by the renderer. * raw argument \[##[!var]\]: like above, but does not evaluate [$var]. * term \[&[!name] ([!label])\]: quotes a defined term with a link to its definition * inline image \[&@[!name]\]: shows a small image or other object inline. the unicode character [$🖼] can also be used instead of [$&@]. ## identifiers any identifier (including a reference) that is defined within a named section must be referred to from outside that section as [$[!sec].[!obj]], where [$sec] is the ID of the containing section and [$obj] is the ID of the object one wishes to reference. ## context variables context variables are provided so that cortav renderers can process templates. certain context variables are provided for by the standard. you can test for the presence of a context variable with the directive [$%[*when] ctx [!var]]. * {def cortav.file} the name of the file currently being rendered * {def cortav.path} the absolute path of the file currently being rendered * {def cortav.time} the current system time in the form [$[#cortav.time]] * {def cortav.date} the current system date in the form [$[#cortav.date]] * {def cortav.datetime} the current system date and time represented in the locale or system-standard manner (e.g. [$[#cortav.datetime]]) * {def cortav.page} the number of the page currently being rendered * {def cortav.id} the identifier of the renderer * {def cortav.hash} the SHA3 hash of the source file being rendered def: [*[#1]]: on systems with environment variables, these may be accessed as context variables by prefixing their name with [$env.]. different renderers may provide context in different ways, such as from command line options or a context file. any predefined variables should carry an appropriate prefix to prevent conflation. ## directives d: [$%[*[##1]]] * {d author} encodes document authorship * {d cols} specifies the number of columns the next object should be rendered with * {d include} transcludes another file * {d quote} transcludes another file, without expanding the text except for paragraphs * {d embed}, where possible, embeds another file as an object within the current one. in HTML this could be accomplished with e.g. an iframe. * {d expand} causes the next object (usually a code block) to be fully expanded when it would otherwise not be * {d pragma} supplies semantic data about author intent, the kind of information document contains and hints about how it should be displayed to the user. think of them like offhand remarks to the renderer -- there's no guarantee that it'll pay any attention, but if it does, your document will look better. pragmas have no scope; they affect the entire document. the pragma function exists primarily as a means to allow parameters that would normally need to be specified on e.g. the command line to be encoded in the document instead in a way that multiple implementations can understand. a few standard pragmas are defined. ** {d pragma layout} gives a hint on how the document should be layed out. the first hint that is understood will be applied; all others will be discarded. standard hints include: *** essay *** narrative *** screenplay: uses asides to denote actions, quotes for dialogue *** stageplay: uses asides to denote actions, quotes for dialogue *** manual *** glossary *** news ** {d pragma accent} specifies an accent hue (in degrees around the color wheel) for renderers which support colorized output ** {d pragma accent-spread} is a factor that controls the "spread" of hues used in the document. if 0, only the accent color will be used; if larger, other hues will be used in addition to the primary accent color. ** {d pragma dark-on-light on|off} controls whether the color scheme used should be light-on-dark or dark-on-light ** {d pragma page-width} indicates how wide the pages should be ! note on pragmas: particularly when working with collections of documents, you should not keep formatting metadata in the documents themselves! the best thing to do is to have a makefile for compiling the documents using whatever tools you want to support, and encoding the rendering options in this file (for the reference implementation this currently means as command line arguments, but eventually it will support intent files as well) so they can all be changed in one place; pragmas should instead be used for per-document [*overrides] of default settings. ! a workaround for the lack of intent files in the reference implementation is to have a single pseudo-stylesheet that contains only {d pragma} statements, and then import this file from each individual source file using the {d include} directive. this is suboptimal and recommended only when you need to ensure compatibility between different implementations. ! when creating HTML files, an even better alternative may be to turn off style generation entirely and link in an external, hand-written CSS stylesheet. this is generally the way you should compile sources for existing websites if you aren't going to write your own extension. ##ex examples ~~~ blockquotes #bq [cortav] ~~~ the following excerpts of text were recovered from a partially erased hard drive found in the Hawthorne manor in the weeks after the Incident. context is unknown. ................................................................................ +:english :| honor | +:ranuir :| tef | +:zia ţai :| pang | +:thalishte:| mbecheve | ~~~ ## extensions the cortav specification also specifies a number of extensions that do not have to be supported for a renderer to be compliant. the extension mechanism supports the following directives. * inhibits: prevents an extension from being used even where available * uses: turns on an extension that is not specified by the user operating the renderer (e.g. on the command line) * needs: causes rendering to fail with an error if the extensions are not available where possible, instead of [$needs x y z], the directive [$when has-ext x y z] should be used instead. this causes the next section to be rendered only if the named extensions are available. [$unless has-ext x y z] can be used to provide an alternative format. extensions are mainly interacted with through directives. all extension directives must be prefixed with the name of the extension. ### toc sections that have a title will be included in the table of contents. the table of contents is by default inserted at the break between the first level-1 section and the section immediately following it. you may instead place the directive [$toc] where you wish the TOC to be inserted, or suppress it entirely with [$inhibits toc]. note that some renderers may not display the TOC as part of the document itself. toc provides the directives: * [$%[*toc]]: insert a table of contents in the specified position. this can be used more than once, but doing so may have confusing, incorrect, or nonsensical results under some renderers, and some may just ignore the directive entirely * [$%[*toc] mark [!styled-text]]: inserts a TOC entry with the label [!styled-text] pointing to the current location. this can be used to e.g. mark noteworthy images, instances of long quotes or literal blocks, or functions inside an expanded code block. * [$%[*toc] name [!id styled-text]]: like [$%[*toc] mark] but allows an additional [!id] parameter which specifies the ID the renderer will assign to an anchor element. this is not meaningful for all renderers and when it is, it is up to the renderer to decide what it means. ** the [*html] render backend interprets [!id] as the [$id] element for the anchor tag ** the [*groff] render backend ignores [!id] ### smart-quotes a cortav renderer may automatically translate punctuation marks to other punctuation marks depending on their context. ### hilite code can be highlighted according to the formal language it is written in. ### lua renderers with a lua interpreter available can evaluate lua code: * [$%lua use [!file]]: evaluates [$file] and makes its definitions available * [$\[%lua raw [!script]\]]: evaluates [$script] and emits the string it returns (if any) in raw span context. * [$\[%lua exp [!script]\]]: evaluates [$script] and emits the string it returns (if any) in expanded span context. * [$%lua raw [!script]]: evaluates [$script] and emits the string array it returns (if any) in raw block context. * [$%lua exp [!script]]: evaluates [$script] and emits the string array it returns (if any) in expanded block context. the interpreter should provide a [$cortav] table with the objects: * ctx: contains context variables used files should return a table with the following members * macros: an array of functions that return strings or arrays of strings when invoked. these will be injected into the global macro namespace. ### ts the [*ts] extension allows documents to be marked up for basic classification constraints and automatically redacted. if you are seriously relying on ts for confidentiality, make damn sure you start the file with [$%[*requires] ts], so that rendering will fail with an error if the extension isn't supported. ts enables the directives: * [$ts class [!scope] [!level] (styled-text)]: indicates a classification level for either the while document (scope [!doc]) or the next section (scope [!sec]). if the ts level is below [$level], the section will be redacted or rendering will fail with an error, as appropriate. if styled-text is included, this will be treated as the name of the classification level. * [$ts word [!scope] [!word] (styled-text)]: indicates a codeword clearance that must be present for the text to render. if styled-text is present, this will be used to render the name of the codeword instead of [$word]. * [$when ts level [!level]] * [$when ts word [!word]] ts enables the spans: * [$\[🔒#[!level] [!styled-text]\]]: redacts the span if the security level is below that specified. * [$\[🔒.[!word] [!styled-text]\]]: redacts the span if the specified codeword clearance is not enabled. (the padlock emoji is shorthand for [$%ts].) ts redacts spans securely; that is, they are simply replaced with an indicator that they have been redacted, without visually leaking the length of the redacted text. ~~~#ts-example example [cortav] ~~~ %ts word doc sorrowful-pines SORROWFUL PINES # intercept R1440 TCT S3 ................................................................................ <B> Hyacinth, I told you not to contact me without— <A, shouting> god DAMMIT woman I am trying to SAVE your worthless skin <B> Hyacinth! your Godforsaken scrambler! <A> …oh, [!fuck]. (signal lost) ~~~ # reference implementation the cortav standard is implemented in [$cortav.lua], found in this repository. only the way [$cortav.lua] interprets the cortav language is defined as a reference implementation; other behaviors are simply how [$cortav.lua] implements the specification and may be copied, ignored, tweaked, violently assaulted, or used as inspiration by a compliant parser. the reference implementation can be used both as a lua library and from the command line. [$cortav.lua] contains the parser and renderers, [$ext/*] contain various extensions, [$sirsem.lua] contains utility functions, and [$cli.lua] contains the CLI driver. ## lua library there are various ways to use cortav from a lua script; the simplest however is probably to precompile your script with luac and link in the necessary components of the implementation. for instance, say we have the following program ~~~ stdin2html.lua [lua] ~~~ local ct = require 'cortav' local mode = {} local doc = ct.parse(io.stdin, {file = '(stdin)'}, mode) doc.stage = { ................................................................................ and the only extension we need is the table-of-contents extension. our script can be translated into a self-contained lua bytecode blob with the following command ~~~ $ luac -s -o stdin2html.lc $cortav_repo/{sirsem,cortav,ext/toc}.lua stdin2html.lua ~~~ and can then be operated with the command [$lua stdin2html.lc], with no further need for the cortav repository files. note that the order of the [$luac] command is important! [$sirsem.lua] must come first, followed by [$cortav.lua], followed by any extensions. your driver script (i.e. the script with the entry point into the application) should always come last. ### building custom tools generally, most existing file-format conversion tools (cmark, pandoc, and so on) have a crucial limitation: they hardcode specific assumptions like document structure. this means that the files they output are generally not suitable as-is for the users' purposes, and require further munging, usually by hateful shell or perl scripts. some tools do provide libraries end users to use as a basis for designing their own tools, but these are often limited, and in any case the user ends up having to write their own (non-standard) driver. it's no surprise that very few people end up doing this. [$cortav.lua]'s design lends itself to a more elegant solution. one can of course write their own driver using [$cortav] as a library, but most of the time when you're compiling document sources, you just want a binary you can run from the command line or a makefile. with [$cortav.lua], you can extend its capabilities easily while keeping the same driver. in the [$cortav] spec, extensions are mostly intended to give different implementations the ability to offer extra capabilities, but the reference implementation uses an extension architecture that makes it easy to write and add your own. for each type of new behavior you want to implement, just create a new extension and list it on the make command line: ~~~ $ nvim ~/dev/my-cortav-exts/imperial-edict.lua $ make cortav extens+=$HOME/dev/my-cortav-exts/*.lua ~~~ the cortav binary this produces will have all the extra capabilities you personally need, without any need to fork [$cortav.lua] itself or even touch the repository. there's no reason [$cortav.lua] shouldn't be able to load extensions at runtime as well; i just haven't implemented this behavior yet. it probably would only take a few extra lines of code tho. i will eventually document the extension API, but for now, look at [$ext/toc.lua] for a simple example of how to register an extension. ## command line driver the [$cortav.lua] command line driver can be run from the repository directory with the command [$lua ./cli.lua], or by first compiling it into a bytecode form that links in all its dependencies. this is the preferred method for installation, as it produces a self-contained executable which loads more quickly, but running the driver in script form may be desirable for development or debugging. the repository contains a GNU makefile to automate compilation of the reference implementation on unix-like OSes. simply run [$$ make cortav] or [$$ gmake cortav] from the repository root to produce a self-contained bytecode executable that can be installed anywhere on your filesystem, with no dependencies other than the lua interpreter. ! note that the makefile strips debugging symbols to save space, so running [$cli.lua] directly as a script may be helpful if you encounter errors and need stacktraces or other debugging information. henceforth it will be assumed that you have produced the [$cortav] executable and placed it somewhere in your [$$PATH]; if you are instead running [$cortav.lua] directly as an interpreted script, you'll need to replace [$$ cortav] with [$$ lua ./cli.lua] in incantations. when run without commands, [$cortav.lua] will read input from standard input and write to standard output. alternately, a source file can be given as an argument. to write to a specific file instead of the standard output stream, use the [$-o [!file]] flag. ~~~ $ cortav readme.ct -o readme.html # reads from readme.ct, writes to readme.html $ cortav -o readme.html # reads from standard input, writes to readme.html $ cortav readme.ct # reads from readme.ct, writes to standard output ~~~ ### switches [$cortav.lua] offers various switches to control its behavior. + long + short + function + | [$--out [!file]] :|:[$-o]:| sets the output file (default stdout) | | [$--log [!file]] :|:[$-l]:| sets the log file (default stderr) | | [$--define [!var] [!val]] :|:[$-d]:| sets the context variable [$var] to [$val] | | [$--mode-set [!mode]] :|:[$-y]:| activates the [>refimpl-mode mode] with ID [!mode] | [$--mode-clear [!mode]] :|:[$-n]:| disables the mode with ID [!mode] | | [$--mode [!id] [!val]] :|:[$-m]:| configures mode [!id] with the value [!val] | | [$--mode-set-weak [!mode]] :|:[$-Y]:| activates the [>refimpl-mode mode] with ID [!mode] if the source file does not specify otherwise | [$--mode-clear-weak [!mode]] :|:[$-N]:| disables the mode with ID [!mode] if the source file does not specify otherwise | [$--mode-weak [!id] [!val]] :|:[$-M]:| configures mode [!id] with the value [!val] if the source file does not specify otherwise | [$--help] :|:[$-h]:| display online help | | [$--version] :|:[$-V]:| display the interpreter version | ###refimpl-mode modes most of [$cortav.lua]'s implementation-specific behavior is controlled by use of [!modes]. these are namespaced options which may have a boolean, string, or numeric value. boolean modes are set with the [$-y] [$-n] flags; other modes use the [$-m] flags. most modes are defined by the renderer backend. the following modes affect the behavior of the frontend: + ID + type + effect | [$render:format]:| string | selects the [>refimpl-rend renderer] (default [$html]) | [$parse:show-tree]:| flag | dumps the parse tree to the log after parsing completes ##refimpl-rend renderers [$cortav.lua] implements a frontend-backend architecture, separating the parsing stage from the rendering stage. this means new renderers can be added to [$cortav.lua] relatively easily. currently, only an [>refimpl-rend-html HTML renderer] is included; however, a [$groff] backend is planned at some point in the future, so that PDFs and manpages can be generated from cortav files. ###refimpl-rend-html html the HTML renderer is activated with the incantation [$-m render:format html]. it is currently the default backend. it produces a single HTML file, optionally with CSS styling data, from a [$.ct] input file. it supports the following modes: * string (css length) [$html:width] sets a maximum width for the body content in order to make the page more readable on large displays * number [$html:accent] applies an accent hue to the generated webpage. the hue is specified in degrees, e.g. [$-m html:accent 0] applies a red accent. * flag [$html:dark-on-light] uses dark-on-light styling, instead of the default light-on-dark * flag [$html:fossil-uv] outputs an HTML snippet suitable for use with the Fossil VCS webserver. this is intended to be used with the unversioned content mechanism to host rendered versions of documentation written in cortav that's stored in a Fossil repository. * number [$html:hue-spread] generates a color palette based on the supplied accent hue. the larger the value, the more the other colors diverge from the accent hue. * string [$html:link-css] generates a document linking to the named stylesheet * flag [$html:gen-styles] embeds appropriate CSS styles in the document (default on) * flag [$html:snippet] produces a snippet of html instead of an entire web page. note that proper CSS scoping is not yet implemented (and can't be implemented hygienically since [$scoped] was removed 😢) * string [$html:title] specifies the webpage titlebar contents (normally autodetected from the document based on headings or directives) ~~~ $ cortav readme.ct --out readme.html \ -m render:format html \ -m html:width 40em \ -m html:accent 80 \ -m html:hue-spread 35 \ -y html:dark-on-light # could also be written as: $ cortav readme.ct -ommmmy readme.html render:format html html:width 40em html:accent 80 html:hue-spread 35 html:dark-on-light ~~~ ## further directions ### additional backends it is eventually intended to support to following backends, if reasonably practicable. * [*html]: emit HTML and CSS code to typeset the document. [!in progress] * [*svg]: emit SVG, taking advantage of its precise layout features to produce a nicely formatted and paginated document. pagination can be accomplished through emitting multiple files or by assigning one layer to each page. [!long term] * [*groff]: the most important output backend, rivalling [*html]. will allow the document to be typeset in a wide variety of formats, including PDF and manpage. [!short term] * [*gemtext]: essentially a downrezzing of cortav to make it readable to Gemini clients some formats may eventually warrant their own renderer, but are not a priority: * [*text]: cortav source files are already plain text, but a certain amount of layout could be done using ascii art. * [*ansi]: emit sequences of ANSI escape codes to lay out a document in a terminal-friendly way * [*tex]: TeX is an unholy abomination and i neither like nor use it, but lots of people do and if cortav ever catches on, a TeX backend should probably be written eventually. PDF is not on either list because it's a nightmarish mess of a format and groff, which is installed on most linux systems already, can easily generate PDFs ### LCH support right now, the use of color in the HTML renderer is very unsatisfactory. the accent mechanism operates on the basis of the CSS HSL function, which is not perceptually uniform; different hues will present different mixes of brightness and some (yellows?) may be ugly or unreadable. the ideal solution would be to simply switch to using LCH based colors. unfortunately, only Safari actually supports the LCH color function right now, and it's unlikely (unless Lea Verou and her husband manage to work a miracle) that Colors Level 4 is going to be implemented very widely any time soon. this leaves us in an awkward position. we can of course do the math ourselves, working in LCH to implement the internal [$@tone] macro, and then "converting" these colors to HSL. unfortunately, you can't actually convert from LCH to HSL; it's like converting from pounds to kilograms. LCH can represent any color the human visual system can perceive; sRGB can't, and CSS HSL is implemented in sRGB. however, we could at least approximate something that would allow for perceptually uniform brightness, which would be an improvement, and this is probably the direction to go in, unless a miracle occurs and [$lch()] or [$color()] pop up in Blink. it may be possible to do a more reasonable job of handling colors in the postscript and TeX outputs. unsure about SVG but i assume it suffers the same problems HTML/CSS do. does groff even support color?? ### intent files there's currently no standard way to describe the intent and desired formatting of a document besides placing pragmas in the source file itself. this is extremely suboptimal, as when generating collections of documents, it's ideal to be able to keep all formatting information in one place. users should also be able to specify their own styling overrides that describe the way they prefer to read [$cortav] files, especially for uses like gemini or gopher integration. at some point soon [$cortav] needs to address this by adding intent files that can be activated from outside the source file, such as with a command line flag or a configuration file setting. these will probably consist of lines that are interpreted as pragmata. in addition to the standard intent format however, individual implementations should feel free to provide their own ways to provide intent metadata; e.g. the reference implementation, which has a lua interpreter available, should be able to take a lua script that runs after the parse stage and generates . this will be particularly useful for the end-user who wishes to specify a particular format she likes reading her files in without forcing that format on everyone she sends the compiled document to, as it will be able to interrogate the document and make intelligent decisions about what pragmata to apply. |
| | | | | | | | | | | | | | | | | | | | | | | | | < | > | | | | | | | | | | | | | | | | | | > > > | < > > > | > > | > > | | | | > > | | | | < > > > | | | | | | > > > > > > > > > | | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | | | | | | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | | | > > > > > > > | | | | | | | | > > > > | > | | | > > | | | | | | | < > | > > > > > > > > > > | | > > > > > > > > > > > > > > | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | > > > > > > > > > > > > > | | | | | | | | | | < > | | | | | | | > | | | | | | | | | | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | | | > > |
|
dict: http://ʞ.cc/fic/spirals/glossary the cortav [!format] can be called [!cortavgil], or [!gil cortavi], to differentiate it from the reference implementation [!cortavsir] or [!sir cortavi]. %toc ## cortav vs. markdown the most important difference between cortav and markdown is that cortav is strictly line-oriented. this choice was made to ensure that cortav was relatively easy to parse. so while a simple [`.ct] file may look a bit like a [`.md] file, in reality it's a lot closer to gemtext than any flavor of markdown. ## encoding a cortav document is made up of a sequence of codepoints. UTF-8 must be supported, but other encodings (such as UTF-32 or C6B) may be supported as well. lines will be derived by splitting the codepoints at the linefeed character or equivalent. note that unearthly encodings like C6B or EBCDIC will need to select their own control sequences. ## file type a cortav source file is identified using a file extension, file type, and/or magic byte sequence. three file extensions are defined as identifying a cortav source file. where relevant, all must be recognized as indicating a cortav source file. * [`ct] is the shorthand extension * [`cortav] is the canonical disambiguation extension, for use in circumstances where [`*.ct] is already defined to mean a different file format. * [`] is the canonical Corran extension, a byte sequence comprising the unicode codepoints [`U+E3CE U+E3BD]. where the filesystem in question does not specify a filename encoding, the bytes should be expressed in UTF-8. three more extensions are reserved for identifying a cortav intent file. * [`ctc] is the shorthand extension * [`cortavcun] is the canonical disambiguation extension * [`] is the canonical Corran extension, a byte sequence comprising the unicode codepoints [`U+E3CE U+E3BD U+E3CE]. where the filesystem in question does not specify a filename encoding, the bytes should be expressed in UTF-8. on systems which use metadata to encode filetype, two values are defined to identify cortav source files * [`text/x-cortav] should be used when strings or arbitrary byte sequences are supported * [`CTAV] (that is, the byte sequence [`0x43 54 41 56]) should be used on systems that support only 32-bit file types/4-character type codes like Classic Mac OS. two more values are defined to identify cortav intent files. * [`text/x-cortav-intent] * [`CTVC] (the byte sequence [`0x43 54 56 43]) on systems which do not define a canonical way of encoding the filetype but support extended attributes of some kind, such as linux, an attribute named [$mime] may be created and given the value [`text/x-cortav] or [`text/x-cortav-intent]; alternatively, extensions may be used. it is also possible to indicate the nature of a cortav file without using filesystem metadata. this is done by prefixing the file with a magic byte sequence. the sequence used depends on the encoding. * for UTF-8 and ASCII plain text files, [`%ct[!\\n]] (that is, the byte sequence [`0x25 63 74 0A]) should be used * for C6B+PS files (parastream), the file should begin with the paragraph [`], which equates to the byte sequence [` 0x3E 2E 14 0C 01 04 00 00 00 03 07 3E 2D], including the parastream header). consequently, this sequence should be ignored by a cortav parser at the start of a file (except as an indication of file format). for FreeDesktop-based systems, the [`build/velartrill-cortav.xml] file included in the repository supplies mappings for the extensions and magic byte sequences. a script is also included which can be registered with xdg-open so that double-clicking on a cortav file will render it out and open it in your default web browser. [`$ make install] will generate the necessary FreeDesktop XML files and register them, as well as install the script and the [`cortav] executable itself. for more information see [>refimpl-build building the reference implementation]. ## structure cortav is based on an HTML-like block model, where a document consists of sections, which are made up of blocks, which may contain a sequence of spans. flows of text are automatically conjoined into spans, and blocks are separated by one or more newlines. this means that, unlike in markdown, a single logical paragraph [*cannot] span multiple ASCII lines. the primary purpose of this was to ensure ease of parsing, but also, both markdown and cortav are supposed to be readable from within a plain text editor. this is the 21st century. every reasonable text editor supports soft word wrap, and if yours doesn't, that's entirely your own damn fault. the first character(s) of every line (the "control sequence") indicates the role of that line. if no control sequence is recognized, the line is treated as a paragraph. the currently supported control sequences are listed below. some control sequences have alternate forms, in order to support modern, readable unicode characters as well as plain ascii text. * [*paragraphs] ([`.] [` ¶] [`❡]): a paragraph is a simple block of text. the period control sequence is only necessary if the paragraph text starts with text that would be interpreted as a control sequence otherwise * newlines [` \\]: inserts a line break into previous paragraph and attaches the following text. mostly useful for poetry or lyrics * [*section starts] [`#] [`§]: starts a new section. all sections have an associated depth, determined by the number of sequence repetitions (e.g. "###" indicates depth-three"). sections may have headers and IDs; both are optional. IDs, if present, are a sequence of raw-text immediately following the hash marks. if the line has one or more space character followed by styled-text, a header will be attached. the character immediately following the hashes can specify a particular type of section. e.g.: ** [`#] is a simple section break. ** [`#anchor] opens a new section with the ID [`anchor]. ** [`# header] opens a new section with the title "header". ** [`#anchor header] opens a new section with both the ID [`anchor] and the title "header". ** [`#>conversation] opens a blockquote section named [`conversation] without a header. * [*nonprinting sections] ([`^]): sometimes, you'll want to create a namespace without actually adding a visible new section to the document. you can achieve this by creating a [!nonprinting section] and defining resources within it. nonprinting sections can also be used to store comments, notes, or other information that is useful to have in the source file without it becoming a part of the output ** [`#&id mime] opens a new inline object [`id] of type [`mime]. useful for embedding SVGs. the ID and mime type must be specified. * [*resource] ([`@]): defines a [!resource]. a resource is an file or object that exists outside of the document but which will be included in the document somehow. common examples of resources include images, videos, iframes, or headers/footers. see [>rsrc resources] for more information. * [*lists] ([`*] [`:]): these are like paragraph nodes, but list nodes that occur next to each other will be arranged so as to show they compose a sequence. depth is determined by the number of stars/colons. like headers, a list entry may have an ID that can be used to refer back to it; it is indicated in the same way. if colons are used, this indicates that the order of the items is signifiant. :-lists and *-lists may be intermixed; however, note than only the last character in the sequence actually controls the depth type. * [*directives] ([`%]): a directive issues a hint to the renderer in the form of an arbitrary string. directives are normally ignored if they are not supported, but you may cause a warning to be emitted where the directive is not supported with [`%!] or mark a directive critical with [`%!!] so that rendering will entirely fail if it cannot be parsed. * [*comments] ([`%%]): a comment is a line of text that is simply ignored by the renderer. * [*asides] ([`!]): indicates text that diverges from the narrative, and can be skipped without interrupting it. think of it like block-level parentheses. asides which follow one another are merged as paragraphs of the same aside, usually represented as a sort of box. if the first line of an aside contains a colon, the stretch of styled-text from the beginning to the aside to the colon will be treated as a "type heading," e.g. "Warning:" * [*code] ([`~~~]): a line beginning with ~~~ begins or terminates a block of code. code blocks are by default not parsed, but parsing can be activated by preceding the code block with an [`%[*expand]] directive. the opening line should look like one of the below ** [`~~~] ** [`~~~ language] (markdown-style shorthand syntax) ** [`~~~ \[language\] ~~~] (cortav syntax) ** [`~~~ \[language\] #id ~~~] ** [`~~~ title ~~~] ** [`~~~ title \[language\] ~~~] ** [`~~~ \[language\] title ~~~] ** [`~~~ title \[language\] #id ~~~] *[*reference] (tab): a line beginning with a tab is treated as a "reference." references hold out-of-line metadata for preceding text like links and footnotes. a reference consists of an identifier followed by a colon and an arbitrary number of spaces or tabs, followed by text. whether this text is interpreted as raw-text or styled-text depends on the context in which the reference is used. in encodings without tab characters, two preceding blanks can be used instead. * [*quotation] ([`<]): a line of the form [`<[$name]> [$quote]] denotes an utterance by [$name]. * [*blockquote] ([`>]): alternate blockquote syntax. can be nested by repeating the [`>] character. * [*subtitle] ([`--]): attaches a subtitle to the previous header * [*embed] ([`&]): embeds a referenced object. can be used to show images or repeat previously defined objects like lists or tables, optionally with a caption. ** [`&$[$macro] [$arg1]|[$arg2]|[$argn]…] invokes a block-level macro with the supplied arguments *** [`&$mymacro arg 1|arg 2|arg 3] ** [`&[$image]] embeds an image or other block-level object. [!image] can be a reference with a url or file path, or it can be an embed section (e.g. for SVG files) ***[`&myimg All that remained of the unfortunate blood magic pageant contestants and audience (police photo)] ** [`&-[$section]] embeds a closed disclosure element. in interactive outputs, this will display as a block [!section] which can be clicked on to view the full contents of the referenced section; in static outputs, it will display as an enclosed box with [$section] as the title text *** [`&-ex-a Prosecution Exhibit A (GRAPHIC CONTENT)] ** [`&+[$section]] is like the above, but the disclosure element is open by default * [*horizontal rule] ([`\---]): inserts a horizontal rule or other context break; does not end the section. must be followed by newline. underlines can also be used in place of dashes. * [*page break] ([`\^^]): for formats that support pagination, like HTML (when printed), indicates that the rest of the current page should be blank. for formats that do not, extra margins will be inserted. does not create a new section * [*page rule] ([`\^^-]): inserts a page break for formats that support them, and a horizontal rule for formats that do not. does not create a new section * [*table cells] ([`+ |]): see [>ex.tab table examples]. * [*equations] ([`=]) block-level equations can be inserted with the [`=] * [*empty lines] (that is, lines consisting of nothing but whitespace) constitute a [!break], which terminates multiline objects that do not have a dedicated termination sequence, for example lists and asides. ## styled text most blocks contain a sequence of spans. these spans are produced by interpreting a stream of [*styled-text] following the control sequence. styled-text is a sequence of codepoints potentially interspersed with escapes. an escape is formed by an open square bracket [`\[] followed by a [*span control sequence], and arguments for that sequence like more styled-text. escapes can be nested. * strong {obj *|styled-text}: causes its text to stand out from the narrative, generally rendered as bold or a brighter color. * emphatic {obj !|styled-text}: indicates that its text should be spoken with emphasis, generally rendered as italics * literal {obj `|styled-text}: indicates that its text is a reference to a literal sequence of characters or other discrete token. generally rendered in monospace * variable {obj $|styled-text}: indicates that its text is a stand-in that will be replaced with what it names. generally rendered in italic monospace, ideally of a different color * underline {obj _|styled-text}: underlines the text. use sparingly on text intended for webpages -- underlined text [!is] distinct from links, but underlining non-links is still a violation of convention. * strikeout {obj ~|styled-text}: indicates that its text should be struck through or otherwise indicated for deletion * insertion {obj +|styled-text}: indicates that its text should be indicated as a new addition to the text body. ** consider using a macro definition [`\edit: [~[#1]][+[#2]]] to save typing if you are doing editing work * link \[>[!ref] [!styled-text]\]: produces a hyperlink or cross-reference denoted by [$ref], which may be either a URL specified with a reference or the name of an object like an image or section elsewhere in the document. the unicode characters [`→] and [`🔗] can also be used instead of [`>] to denote a link. * footnote {span ^|ref|[$styled-text]}: annotates the text with a defined footnote. in interactive output media [`\[^citations.qtheo Quantum Theosophy: A Neophyte's Catechism]] will insert a link with the next [`Quantum Theosophy: A Neophyte's Catechism] that, when clicked, causes a footnote to pop up on the screen. for static output media, the text will simply have a superscript integer after it denoting where the footnote is to be found. * superscript {obj '|[$styled-text]}: * subscript {obj ,|[$styled-text]}: * raw \[\\[`raw-text]\]: causes all characters within to be interpreted literally, without expansion. the only special characters are square brackets, which must have a matching closing bracket * raw literal \[$\\[!raw-text]\]: shorthand for [\[$[\…]]] * macro [`\{[!name] [!arguments]\}]: invokes a [>ex.mac macro], specified with a reference * argument {obj #|var}: in macros only, inserts the [$var]-th argument. otherwise, inserts a context variable provided by the renderer. * raw argument {obj ##|var}: like above, but does not evaluate [$var]. * term {obj &|name}, {span &|name|[$expansion]}: quotes a defined term with a link to its definition, optionally with a custom expansion of the term (for instance, to expand the first use of an acronym) * inline image {obj &@|name}: shows a small image or other object inline. the unicode character [`🖼] can also be used instead of [`&@]. * unicode codepoint {obj U+|hex-integer}: inserts an arbitrary UCS codepoint in the output, specified by [$hex-integer]. lowercase [`u] is also legal. * math mode {obj =|equation}: activates additional transformations on the span to format it as a mathematical equation; e.g. [`*] becomes [`×] and [`/] --> [`÷]. * extension {span %|ext|…}: invokes extension named in [$ext]. [$ext] will usually be an extension name followed by a symbol (often a period) and then an extension-specific directive, although for some simple extensions it may just be the plain extension name. further syntax and semantics depend on the extension. this syntax can also be used to apply formatting specific to certain renderers, such as assigning a CSS class in the [`html] renderer ([`\[%html.myclass my [!styled] text]]). * critical extension {span %!|ext|…}: like [!extension], but will trigger an error if the requested extension is not available * extension text {span %:|ext|styled-text}: like [!extension], but when the requested extension is not present, [$styled-text] wlil be emitted as-is. this is a better way to apply CSS classes, as the text will still be visible when rendered to formats other than HTML. * inline comment {obj %%|...}: ignored. useful for editorial annotations not intended to be part of the rendered product. span: [` \[[*[#1]][$[#2]] [#3]\]] obj: [` \[[*[#1]][$[#2]]\]] ##ident identifiers any identifier (including a reference) that is defined within a named section must be referred to from outside that section as [`[!sec].[!obj]], where [$sec] is the ID of the containing section and [$obj] is the ID of the object one wishes to reference. ##rsrc resources a [!resource] represents content that is not encoded directly into the source file, but which is embedded by some means in the output. resources can either be [!embedded], in which case they are compiled into the final document itself, or they can be [!linked], in which case the final document only contains a URI or similar tag referencing the resource. not all render backends support both linking and embedding embedding, nor do all backends support all object types (for instance, [`groff] does not support video embedding.) a resource definition is begun by line consisting of an [`@] sign and an [>ident identifier]. this line is followed by any number of parameters. a parameter is a line beginning with a single tab, a keyword, a colon, and a then a value. additional lines can be added to a parameter by following it with a line that consists of two tabs followed by the text you wish to add. (this is the same syntax used by references.) a resource definition is terminated by a break, or any line that does not begin with a tab a resource definition in use looks like this: ~~~ this is a demonstration of resources @smiley src: link image/webp http://cdn.example.net/img/smile.webp link image/png file:img/smile.png embed image/gif file img/smile.gif desc: the Smiling Man would like to see you in his office here is the resource in span context [&smiley] and here it is in block context: &smiley ~~~ rendered as HTML, this might produce the following: ~~~ <style> .res-smiley { content: image-set( url(http://cdn.example.net/img/smile.webp) type(image/webp), url(img/smile.png) type(image/png), url(* … */) type(image/gif) ); /* this will actually be repeated with a -webkit- prefix */ } </style> <p>this is a demonstration of resources</p> <p>here is the resource in span context: <span class="res-smiley"></span></p> <p>and here it is in block context:</p> <div class=".res-smiley"></div> ~~~ note that empty elements with CSS classes are used in the output, to avoid repeating long image definitions (especially base64 inline encoded ones!) ### supported parameters * [`src] (all): specifies where to find the file, what it is, and how to embed it. each line of [`src] should consist of three whitespace-separated words: embed method, MIME type, and URI. ** embed methods *** [`local]: loads the resource at build time and embeds it into the output file. not all implementations may allow loading remote network resources at build time. *** [`remote]: only embeds a reference to the location of the resource. use this for e.g. live iframes, dynamic images, or images hosted by a CDN. *** [`auto]: embeds a reference in file formats where that's practical, and use a remote reference otherwise. ** MIME types: which file types are supported depends on the individual implementation and renderer backend; additionally, extensions can add support for extra types. MIME-types that have no available handler will, where possible, result in an attachment that can be extracted by the user, usually by clicking on a link. however, the following should be usable with all compliant implementations *** [`image/*] (graphical outputs only) *** [`video/*] (interactive outputs only) *** [`image/svg+xml] is handled specially for HTML files, and may or may not be compatible with other renderer backends. *** [`font/*] can be used with the HTML backend to reference a web font *** [`font/woff2] can be used with the HTML backend to reference a web font *** [`text/plain] (will be inserted as a preformatted text block) *** [`text/css] (can be used when producing HTML files to link in an extra stylesheet, either by embedding it or referencing it from the header) *** [`text/x-cortav] (will be parsed and inserted as a formatted text block; context variables can be passed to the file with [`ctx.[$var]] parameters) *** any MIME-type that matches the type of file being generated by the renderer can be used to include a block of data that will be passed directly to the renderer. ** URI types: additional URI types can be added by extensions or different implementations, but every compliant implementation must support these URIs. *** [`http], [`https]: accesses resources over HTTP. add a [`file] fallback if possible for the benefit of renderers/viewers that do not have internet access abilities. *** [`file]: references local files. absolute paths should begin [`file:/]; the slash should be omitted for relative paths. note that this doesn't have quite the same meaning as in HTML -- [`file] can (and usually should be) used with HTML outputs to refer to resources that reside on the same server. a cortav URI of [`file:/etc/passwd] will actually result in the link [`/etc/passwd], not [`file:///etc/passwd] when converted to HTML. generally, you only should use [`http] when you're referring to a resource that exists on a different domain. *** [`name]: a special URI used generally for referencing resources that are already installed on a target system and do not need to be embedded or linked, the name and type are enough for a renderer on another machine to locate the correct resource. this is useful mostly for [>fonts fonts], where it's more typical to refer to fonts that are installed on your system rather than providing paths to font files. *** [`gemini]: accesses resources over the gemini protocol. currently you should really only use this for [`local] resources unless you're using the gemtext renderer backend, since nothing but gemini browsers are liable to support this protocol. * [`desc]: supplies a narrative description of the resources, for use as an "alt-text" when the image cannot be loaded and for screenreaders. * [`detail]: supplies extra narrative commentary that is displayed contextually, e.g. when the user hovers her mouse cursor over the embedded object. note that in certain cases, full MIME types do not need to be used. say you're defining a font with the [`name] URI -- you can't necessary know what file type the system fonts on another computer are going to be. in this case, you can just write [`font] instead of [`font/ttf] or [`font/woff2] or similar. all cortav needs to know in this case is what abstract kind of object you're referencing. ##ctxvar context variables context variables are provided so that cortav renderers can process templates. certain context variables are provided for by the standard. you can test for the presence of a context variable with the directive [`%[*when] ctx [$var]]. context variables are accessed with the [` \[#[$name]\]] span. * {def cortav.file} the name of the file currently being rendered * {def cortav.path} the absolute path of the file currently being rendered * {def cortav.time} the current system time in the form [`[#cortav.time]] * {def cortav.date} the current system date in the form [`[#cortav.date]] * {def cortav.datetime} the current system date and time represented in the locale or system-standard manner (e.g. [`[#cortav.datetime]]) * {def cortav.page} the number of the page currently being rendered * {def cortav.id} the identifier of the renderer * {def cortav.hash} the SHA3 hash of the source file being rendered def: [*[#1]]: on systems with environment variables, these may be accessed as context variables by prefixing their name with [`env.]. different renderers may provide context in different ways, such as from command line options or a context file. any predefined variables should carry an appropriate prefix to prevent conflation. ##fonts fonts for output backends that support font specification, cortav provides a sophisticated font management system by means of the [!font stack]. when a document parse begins, the font stack is empty (unless a default font has already been loaded by an intent file). when the font stack is empty, cortav does not include font specifications in its output, and thus will use whatever the default of the various rendering programs is. to use fonts, we first have to define the fonts as [>rsrc resources]. ~~~cortav %% first, we create a new section to namespace the fonts #^fonts %% we then define each font as a resource @serif src: auto font name:Alegreya embed font/ttf file:project-fonts/alegreya.ttf link font/woff2 file:/assets/font/alegreya.woff2 auto font name:Times New Roman @sans src: link font name:Alegreya Sans link font name:Open Sans link font name:sans-serif ~~~ here we have defined two font families, [`fonts.serif] and [`fonts.sans]. each contains a list of references to fonts which will be tried in order. for example, this could be translated into the following CSS: ~~~css @font-face { font-family: "fontdef-serif"; src: local("Alegreya"), url("data:font/ttf;base64,…") format("font/ttf"), url("/assets/font/alegreya.woff2") format("font/woff2"), local("Times New Roman"); } @font-face { font-family: "fontdef-sans"; src: local("Alegreya Sans"), local("Open Sans"), local("sans-serif"); } ~~~ there are two things that aren't super clear from the CSS, however. notice how we used [`auto] on a couple of those specs? this means it's up to the renderer to decide whether to link or embed the font. for html, a font specified by name can't really be embedded, but for some file formats, it can be. [`auto] lets us produce valid HTML while still taking advantage of font embedding in other formats. now that we have our font families defined, we can use their identifiers with the [`%[*font]] directive to control the font stack. the first thing we need to do is push a new font context. there's two ways we can do this: fnd: [`%[*font] [#1]] * {fnd dup} will create a copy of the current font context, allowing us to make some changes and then revert later with the {fnd pop} command. this isn't useful in our case, however, because right now the stack is empty; there's nothing to duplicate. * {fnd new} will create a brand new empty context for us to work with and push it to the stack. this can also be used to temporarily revert to the system default fonts, and then switch back with {fnd pop}. * {fnd set} changes one or more entries in the current font context. it can take a space-separated list of arguments in the form [`[$entry]=[$font-id]]. the supported entries are: ** [`body]: the fallback font. if only this is set in a given font context, it will be used for everything ** [`paragraph]: the font used for normal paragraphs ** [`header]: the font used in headers ** [`subtitle]: the font used in subtitles ** [`list]: the font used in lists ** [`table]: the font used in tables ** [`caption]: the font used for captions * {fnd pop} removes the top context from the font stack. note that extensions may consult the font context for their entries specific to them. for instance, [>toc toc] checks for [`toc] before falling back to [`body] and then the default font. these commands are enough to give us a very flexible setup. consider the following: ~~~cortav %% let's pretend we've also defined the fonts 'title', 'cursive', and 'thin' %font new %font set body=sans header=serif %font dup %font header=title # lorem ipsum dolor %font pop %% we've now set up a default font context, created a new context for the title of the %% document, and then popped it back off after the title was inserted so that our %% first font context is active again. everything after that last '%font pop' will %% be printed in sans, except for headers, which will be printed in 'serif' lorem ipsum dolor sit amet, sed consectetur apiscing elit… %font dup %font set body=cursive > sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. > Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut %font pop %% above we created a blockquote whose text is printed in a cursive font; afterwards, %% we simply remove this new context— and everything is back the way it was at "lorem ipsum" %% the font mechanism is at its most powerful when used with multiline macros: cursive-quote: %font dup %font set body=cursive > [#1] %font pop %% now, whenever we want a block with a cursive body, we can simply invoke &$cursive-quote Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident %% without affecting the overall font context. in fact, since 'cursive-quote' creates %% its context using 'dup', it would import all font specifications besides 'body' %% from the environment it is invoked in ~~~ ##dir directives d: [`%[*[##1]]] * {d author} encodes document authorship. multiple author directives can be issued to add additional coauthors * {d cols} specifies the number of columns the next object should be rendered with * {d include} transcludes another file * {d import} reads in the contents of another file as an embeddable section * {d quote} transcludes another file, without expanding the text except for paragraphs * {d embed}, where possible, embeds another file as an object within the current one. in HTML this could be accomplished with e.g. an iframe. * {d expand} causes the next object (usually a code block) to be fully expanded when it would otherwise not be * {d font} controls the font stack, for outputs that support changing fonts. see [>fonts fonts] for more information. * {d lang} changes the current language, which is used by extensions to e.g. control typographical conventions, and may be encoded into the output by certain renderers (e.g. HTML). note that quotes and blockquotes can be set to a separate language with a simpler syntax. the language should be notated using IETF language tags ** {d lang is x-ranuir-CR8} sets the current language to Ranuir as spoken in the Central Worlds, written in Corran and encoded using UTF-8. this might be used at the top of a document to set its primary language. ** {d lang push gsw-u-sd-chzh} temporarily switches to Zürich German, e.g. to quote a German passage in an otherwise Ranuir document ** {d lang sec en-US} switches to American English for the duration of a section. does not affect the language stack. ** {d lang pop} drops the current language off the language stack, returning to whatever was pushed or set before it. this would be used, for instance, at the end of a passage * {d pragma} supplies semantic data about author intent, the kind of information the document contains and hints about how it should be displayed to the user. think of them like offhand remarks to the renderer -- there's no guarantee that it'll pay any attention, but if it does, your document will look better. pragmas have no scope; they affect the entire document. the pragma function exists primarily as a means to allow parameters that would normally need to be specified on e.g. the command line to be encoded in the document instead in a way that multiple implementations can understand. a few standard pragmas are defined. ** {d pragma layout} gives a hint on how the document should be layed out. the first hint that is understood will be applied; all others will be discarded. standard hints include: *** [`essay] *** [`narrative] *** [`screenplay]: uses asides to denote actions, quotes for dialogue *** [`stageplay]: uses asides to denote actions, quotes for dialogue *** [`manual] *** [`glossary] *** [`news] *** [`book]: section depths 1-3 gain additional semantics ***: [*part]: the section gets a page to itself to announce the beginning of a new part or appendix ***: [*chapter]: the section is preceded by a page break ***: [*heading]: the section can occur on the same page as text and headings from other sections ** {d pragma accent} specifies an accent hue (in degrees around the color wheel) for renderers which support colorized output ** {d pragma accent-spread} is a factor that controls the "spread" of hues used in the document. if 0, only the accent color will be used; if larger, other hues will be used in addition to the primary accent color. ** {d pragma dark-on-light on\|off} controls whether the color scheme used should be light-on-dark or dark-on-light ** {d pragma page-width} indicates how wide the pages should be ** {d pragma title-page} specifies a section to use as a title page, for renderer backends that support pagination ! note on pragmata: particularly when working with collections of documents, you should not keep formatting metadata in the documents themselves! the best thing to do is to have a makefile for compiling the documents using whatever tools you want to support, and encoding the rendering options in this file (for the reference implementation this currently means as command line arguments, but eventually it will support intent files as well) so they can all be changed in one place; pragmas should instead be used for per-document [*overrides] of default settings. ! a workaround for the lack of intent files in the reference implementation is to have a single pseudo-stylesheet that contains only {d pragma} statements, and then import this file from each individual source file using the {d include} directive. this is suboptimal and recommended only when you need to ensure compatibility between different implementations. ! when creating HTML files, an even better alternative may be to turn off style generation entirely and link in an external, hand-written CSS stylesheet. this is generally the way you should compile sources for existing websites if you aren't going to write your own extension. ##ex examples ~~~ blockquotes #bq [cortav] ~~~ the following excerpts of text were recovered from a partially erased hard drive found in the Hawthorne manor in the weeks after the Incident. context is unknown. ................................................................................ +:english :| honor | +:ranuir :| tef | +:zia ţai :| pang | +:thalishte:| mbecheve | ~~~ ##extns extensions the cortav specification also specifies a number of extensions that do not have to be supported for a renderer to be compliant. the extension mechanism supports the following directives. * inhibits: prevents an extension from being used even where available * uses: turns on an extension that is not specified by the user operating the renderer (e.g. on the command line) * needs: causes rendering to fail with an error if the extensions are not available where possible, instead of [`needs [$x y z]], the directive [`when has-ext [$x y z]] should be used instead. this causes the next section to be rendered only if the named extensions are available. [`unless has-ext [$x y z]] can be used to provide an alternative format. extensions are mainly interacted with through directives. all extension directives must be prefixed with the name of the extension. the reference implementation seeks to support all standardized extensions. it's not quite there yet, however. ###toc toc sections that have a title will be included in the table of contents. the table of contents is by default inserted at the break between the first level-1 section and the section immediately following it. you may instead place the directive [`toc] where you wish the TOC to be inserted, or suppress it entirely with [`inhibits toc]. note that some renderers may not display the TOC as part of the document itself. toc provides the directives: * [`%[*toc]]: insert a table of contents in the specified position. this can be used more than once, but doing so may have confusing, incorrect, or nonsensical results under some renderers, and some may just ignore the directive entirely * [`%[*toc] mark [$styled-text]]: inserts a TOC entry with the label [$styled-text] pointing to the current location. this can be used to e.g. mark noteworthy images, instances of long quotes or literal blocks, or functions inside an expanded code block. * [`%[*toc] name [$id styled-text]]: like [`%[*toc] mark] but allows an additional [$id] parameter which specifies the ID the renderer will assign to an anchor element. this is not meaningful for all renderers and when it is, it is up to the renderer to decide what it means. ** the [*html] render backend interprets [$id] as the [`id] element for the anchor tag ** the [*groff] render backend ignores [$id] ###tsmog transmogrify a cortav renderer may automatically translate punctuation marks or symbol sequences to superior representations depending on their context. to be compliant this extension should implement, at minimum: * smart quotes (with consideration for the typographical conventions languages like German or Spanish) ** {dir.d transmogrify|language [$lang]} can be used to explicitly set the language; otherwise, it must be determined from the value of {dir.d pragma|lang}. if this is not present, implementations may fall back on their own methods for determining the language in use, such as command-line flags. * multigraph to glyph conversion, including at least: ** [`\--] --> "—" ** [`\-->] --> "→" ** [`\<--] --> "←" an escape character before any of the sequence characters should prevent the sequence from being rendered. raw nodes (that is, [`\[\…\]] and [`\[`\…\]]) should not be scanned for transmogrification, nor should the contents of code blocks unless marked with the [`%[*expand]] directive transmogrification shall only take place after all other parsing steps are completed. ###hilite hilite code can be highlighted according to the formal language it is written in. a compliant hilite implementation must implement basic keyword, symbol, comment, pragma, and literal highlighing for the following formal languages. * C * [>lua Lua] * [>html HTML] * [>scheme Scheme] * [>terra Terra] * [>libconfig libconfig] lua: https://lua.org scheme: https://call-cc.org terra: https://terralang.org html: https://dev.w3.org/html5/spec-LC/ libconfig: http://hyperrealm.github.io/libconfig/ the highlighter should make use of semantic HTML tags like [`<var>] where possible. ###lua lua renderers with a lua interpreter available can evaluate lua code: * [`%lua use [!file]]: evaluates [$file] and makes its definitions available * [`\[%lua raw [!script]\]]: evaluates [$script] and emits the string it returns (if any) in raw span context. * [` \[%lua exp [!script]\]]: evaluates [$script] and emits the string it returns (if any) in expanded span context. * [`%lua raw [!script]]: evaluates [$script] and emits the string array it returns (if any) in raw block context. * [`%lua exp [!script]]: evaluates [$script] and emits the string array it returns (if any) in expanded block context. the interpreter should provide a [`cortav] table with the objects: * ctx: contains context variables used files should return a table with the following members * macros: an array of functions that return strings or arrays of strings when invoked. these will be injected into the global macro namespace. ###ts ts the [*ts] extension allows documents to be marked up for basic classification constraints and automatically redacted. if you are seriously relying on ts for confidentiality, make damn sure you start the file with [$%[*requires] ts], so that rendering will fail with an error if the extension isn't supported. ts enables the directives: * [`%[*ts] class [$scope level] ([$styled-text])]: indicates a classification level for either the while document (scope [$doc]) or the next section (scope [$sec]). if the ts level is below [$level], the section will be redacted or rendering will fail with an error, as appropriate. if styled-text is included, this will be treated as the name of the classification level. * [`%[*ts] word [$scope word] ([$styled-text])]: indicates a codeword clearance that must be present for the text to render. if styled-text is present, this will be used to render the name of the codeword instead of [$word]. * [`%[*when] ts level [$level]] * [`%[*when] ts word [$word]] ts enables the spans: * [`\[🔒#[!level] [$styled-text]\]]: redacts the span if the security level is below that specified. * [`\[🔒.[!word] [$styled-text]\]]: redacts the span if the specified codeword clearance is not enabled. (the padlock emoji is shorthand for [`%[*ts]].) ts redacts spans securely; that is, they are simply replaced with an indicator that they have been redacted, without visually leaking the length of the redacted text. ~~~#ts-example example [cortav] ~~~ %ts word doc sorrowful-pines SORROWFUL PINES # intercept R1440 TCT S3 ................................................................................ <B> Hyacinth, I told you not to contact me without— <A, shouting> god DAMMIT woman I am trying to SAVE your worthless skin <B> Hyacinth! your Godforsaken scrambler! <A> …oh, [!fuck]. (signal lost) ~~~ #refimpl reference implementation the cortav standard is implemented in [`cortav.lua], found in this repository. only the way [`cortav.lua] interprets the cortav language is defined as a reference implementation; other behaviors are simply how [`cortav.lua] implements the specification and may be copied, ignored, tweaked, violently assaulted, or used as inspiration by a compliant parser. the reference implementation can be used both as a lua library and from the command line. [`cortav.lua] contains the parser and renderers, [`ext/*] contain various extensions, [`sirsem.lua] contains utility functions, and [`cli.lua] contains the CLI driver. ##refimpl-lib lua library there are various ways to use cortav from a lua script; the simplest however is probably to precompile your script with luac and link in the necessary components of the implementation. for instance, say we have the following program ~~~ stdin2html.lua [lua] ~~~ local ct = require 'cortav' local mode = {} local doc = ct.parse(io.stdin, {file = '(stdin)'}, mode) doc.stage = { ................................................................................ and the only extension we need is the table-of-contents extension. our script can be translated into a self-contained lua bytecode blob with the following command ~~~ $ luac -s -o stdin2html.lc $cortav_repo/{sirsem,cortav,ext/toc}.lua stdin2html.lua ~~~ and can then be operated with the command [`lua stdin2html.lc], with no further need for the cortav repository files. note that the order of the [`luac] command is important! [`sirsem.lua] must come first, followed by [`cortav.lua], followed by any extensions. your driver script (i.e. the script with the entry point into the application) should always come last. ###refimpl-tools building custom tools generally, most existing file-format conversion tools (cmark, pandoc, and so on) have a crucial limitation: they hardcode specific assumptions like document structure. this means that the files they output are generally not suitable as-is for the users' purposes, and require further munging, usually by hateful shell or perl scripts. some tools do provide libraries end users to use as a basis for designing their own tools, but these are often limited, and in any case the user ends up having to write their own (non-standard) driver. it's no surprise that very few people end up doing this. [`cortav.lua]'s design lends itself to a more elegant solution. one can of course write their own driver using [`cortav] as a library, but most of the time when you're compiling document sources, you just want a binary you can run from the command line or a makefile. with [`cortav.lua], you can extend its capabilities easily while keeping the same driver. in the [`cortav] spec, extensions are mostly intended to give different implementations the ability to offer extra capabilities, but the reference implementation uses an extension architecture that makes it easy to write and add your own. for each type of new behavior you want to implement, just create a new extension and list it on the make command line: ~~~ $ nvim ~/dev/my-cortav-exts/imperial-edict.lua $ make cortav extens+=$HOME/dev/my-cortav-exts/*.lua ~~~ the cortav binary this produces will have all the extra capabilities you personally need, without any need to fork [`cortav.lua] itself or even touch the repository. there's no reason [`cortav.lua] shouldn't be able to load extensions at runtime as well; i just haven't implemented this behavior yet. it probably would only take a few extra lines of code tho. i will eventually document the extension API, but for now, look at [`ext/toc.lua] for a simple example of how to register an extension. ##refimpl-cli command line driver the [$cortav.lua] command line driver can be run from the repository directory with the command [`lua ./cli.lua], or by first compiling it into a bytecode form that links in all its dependencies. this is the preferred method for installation, as it produces a self-contained executable which loads more quickly, but running the driver in script form may be desirable for development or debugging. the repository contains a GNU makefile to automate compilation of the reference implementation on unix-like OSes. simply run [`$ make cortav] or [`$ gmake cortav] from the repository root to produce a self-contained bytecode executable that can be installed anywhere on your filesystem, with no dependencies other than the lua interpreter. ! note that the makefile strips debugging symbols to save space, so running [`cli.lua] directly as a script may be helpful if you encounter errors and need stacktraces or other debugging information. henceforth it will be assumed that you have produced the [`cortav] executable and placed it somewhere in your [$$PATH]; if you are instead running [`cortav.lua] directly as an interpreted script, you'll need to replace [`$ cortav] with [`$ lua ./cli.lua] in incantations. when run without commands, [`cortav.lua] will read input from standard input and write to standard output. alternately, a source file can be given as an argument. to write to a specific file instead of the standard output stream, use the [`-o [!file]] flag. ~~~ $ cortav readme.ct -o readme.html # reads from readme.ct, writes to readme.html $ cortav -o readme.html # reads from standard input, writes to readme.html $ cortav readme.ct # reads from readme.ct, writes to standard output ~~~ ###refimpl-build building the command line driver is built and installed with a GNU [$make] script. this script accepts the variables shown below with their default values: + prefix | [`[$$HOME]/.local] | the path under which the package will be installed + build | [`build] | the directory where generated objects will be placed; useful for out-of-tree builds + bin-prefix | [`[$$prefix]/bin] | directory to install the executables to" + default-format-flags | [`-m html:width 35em] | a list of flags that will be passed by the viewer script to [`cortav] when generating a html fille the following targets are supplied to automate the build: * [`install] builds everything, installs the executable and the viewer script to [$$bin_prefix], and registers the viewer script with XDG * [`excise] deletes everything installed and deregisters the file handlers (note that the same variables must be passed to [`exicse] as were passed to [`install]!) * [`clean] deletes build artifacts from the [$$build] directory like it was never there * [`wipe] is equivalent to [`$ make excise && make clean] ###refimpl-switches switches [`cortav.lua] offers various switches to control its behavior. + long + short + function + | [`--out [!file]] :|:[`-o]:| sets the output file (default stdout) | | [`--log [!file]] :|:[`-l]:| sets the log file (default stderr) | | [`--define [!var] [!val]] :|:[`-d]:| sets the context variable [$var] to [$val] | | [`--mode-set [!mode]] :|:[`-y]:| activates the [>refimpl-mode mode] with ID [!mode] | [`--mode-clear [!mode]] :|:[`-n]:| disables the mode with ID [!mode] | | [`--mode [!id] [!val]] :|:[`-m]:| configures mode [!id] with the value [!val] | | [`--mode-set-weak [!mode]] :|:[`-Y]:| activates the [>refimpl-mode mode] with ID [!mode] if the source file does not specify otherwise | [`--mode-clear-weak [!mode]] :|:[`-N]:| disables the mode with ID [$mode] if the source file does not specify otherwise | [`--mode-weak [!id] [!val]] :|:[`-M]:| configures mode [$id] with the value [$val] if the source file does not specify otherwise | [`--help] :|:[`-h]:| display online help | | [`--version] :|:[`-V]:| display the interpreter version | ###refimpl-mode modes most of [`cortav.lua]'s implementation-specific behavior is controlled by use of [!modes]. these are namespaced options which may have a boolean, string, or numeric value. boolean modes are set with the [`-y] [`-n] flags; other modes use the [`-m] flags. most modes are defined by the renderer backend. the following modes affect the behavior of the frontend: + ID + type + effect | [`render:format]:| string | selects the [>refimpl-rend renderer] (default [`html]) | [`parse:show-tree]:| flag | dumps the parse tree to the log after parsing completes ##refimpl-rend renderers [`cortav.lua] implements a frontend-backend architecture, separating the parsing stage from the rendering stage. this means new renderers can be added to [`cortav.lua] relatively easily. currently, only an [>refimpl-rend-html HTML renderer] is included; however, a [`groff] backend is planned at some point in the future, so that PDFs and manpages can be generated from cortav files. ###refimpl-rend-html html the HTML renderer is activated with the incantation [`-m render:format html]. it is currently the default backend. it produces a single HTML file, optionally with CSS styling data, from a [`.ct] input file. ####refimpl-rend-html-modes modes [`html] supports the following modes: * string (css length) [`html:width] sets a maximum width for the body content in order to make the page more readable on large displays * number [`html:accent] applies an accent hue to the generated webpage. the hue is specified in degrees, e.g. [$-m html:accent 0] applies a red accent. * flag [`html:dark-on-light] uses dark-on-light styling, instead of the default light-on-dark * flag [`html:fossil-uv] outputs an HTML snippet suitable for use with the Fossil VCS webserver. this is intended to be used with the unversioned content mechanism to host rendered versions of documentation written in cortav that's stored in a Fossil repository. * number [`html:hue-spread] generates a color palette based on the supplied accent hue. the larger the value, the more the other colors diverge from the accent hue. * string [`html:link-css] generates a document linking to the named stylesheet * flag [`html:gen-styles] embeds appropriate CSS styles in the document (default on) * flag [`html:snippet] produces a snippet of html instead of an entire web page. note that proper CSS scoping is not yet implemented (and can't be implemented hygienically since [$scoped] was removed 😢) * string [`html:title] specifies the webpage titlebar contents (normally autodetected from the document based on headings or directives) * string [`html:font] specifies the default font to use when rendering as a CSS font specification (e.g. [`-m html:font 'Alegreya, Junicode, Georgia, "Times New Roman"]) ~~~ $ cortav readme.ct --out readme.html \ -m render:format html \ -m html:width 40em \ -m html:accent 80 \ -m html:hue-spread 35 \ -y html:dark-on-light # could also be written as: $ cortav readme.ct -ommmmy readme.html render:format html html:width 40em html:accent 80 html:hue-spread 35 html:dark-on-light ~~~ #### directives [`html] supplies the following render directives. * [`%[*html] link [$rel] [$mime] [$href]]: inserts a [`<link>] tag in the header, for example, to link in an alternate stylesheet, or help feed readers find your atom or rss feed. ** [`%[*html] link alternate\\ stylesheet text/css /res/style2.css] ** [`%[*html] link alternate application/atom+xml /feed.atom] * [`%[*html] style [$id]]: adds the stylesheet referenced by [$id] into the document stylesheet. the stylesheet is specified using a [>rsrc resource]. #### stylsheets the [`html] backend offers some additional directives for external CSS files that are embedded into the document, in order to simplify integration with the accent mechanism. these are: * [`@[*fg]]: resolves to a color expression denoting the selected foreground color. equivalent to [`[*tone](1)] * [`@[*bg]]: resolves to a color expression denoting the selected background color. equivalent to [`[*tone](0)] * [`@[*tone]\[/[$alpha]\]([$fac] \[[$shift] \[[$saturate]\]\] )]: resolves to a color expression. [$fac] is a floating-point value scaling from the background color to the foreground color. [$shift] is a value in degrees controlling how far the hue will shift relative to the accent. [$saturate] is a floating-point value controlling how satured the color is. ###refimpl-rend-groff groff the [`groff] backend produces a text file suitable for supplying to a [`groff] compiler. [`groff] is the GNU implementation of a venerable typesetting system from the early days of UNIX as a convenience, the groff backend supports two modes of operation: it can write a [`groff] file directly to disk, or it can automatically launch a [`groff] process with the appropriate command line options and environment variables. this second mode is recommended unless you're rendering very large files to multiple formats, as [`groff] invocation is nontrivial and it's best to let the renderer handle that for you. ####refimpl-rend-groff-modes modes [`groff] supports the following modes: * string [`groff:annotate] controls how footnotes will be handled. ** [`footnote] places footnotes at the end of the page they are referenced on. if the same footnote is used on multiple pages, it will be duplicated on each. ** [`secnote] places footnotes at the end of each section. footnotes used in multiple sections will be duplicated for each ** [`endnote] places all footnotes at the end of the rendered document. * string [`groff:dev] names an output device (such as [`dvi] or[`pdf]). if this mode is present, [`groff] will be automatically invoked * string [`groff:title-page] takes an identifier that names a section. this section will be treated as the title page for the document. ### directives * [`%[*pragma] title-page [$id]] sets the title page to section [$id]. this causes it to be specially formatted, with a large, centered title and subtitle. ### quirks if the [`toc] extension is active but [`%[*toc]] directive is provided, the table of contents will be given its own section at the start of the document (after the title page, if any). ## further directions ### additional backends it is eventually intended to support to following backends, if reasonably practicable. * [*html]: emit HTML and CSS code to typeset the document. [!in progress] * [*svg]: emit SVG, taking advantage of its precise layout features to produce a nicely formatted and paginated document. pagination can be accomplished through emitting multiple files or by assigning one layer to each page. [!long term] * [*groff]: the most important output backend, rivalling [*html]. will allow the document to be typeset in a wide variety of formats, including PDF and manpage. [!short term] * [*gemtext]: essentially a downrezzing of cortav to make it readable to Gemini clients * [*ast]: produces a human- and/or machine-readable dump of the document's syntax tree, to aid in debugging or for interoperation with systems that do not support `cortav` direcly. mode [`ast:repr] wil allow selecting formats for the dump. [`ast:rel] can be [`tree] (the default) to emit a hierarchical representation, or [`flat] to emit an array of nodes that convey hierarchy [^flatdoc by naming one another], rather than being placed inside one another. [`tree] is easier for humans to parse; [`flat] is easier for computers. origin information can be included for each node with the flag [`ast:debug-syms], but be aware this will greatly increase file size. ** [`tabtree] [!(default)]: a hierarchical tree view, with the number of tabs preceding an item showing its depth in the tree ** [`sexp] ** [`binary]: emit a raw binary format that is easier for programs to read. maybe an lmdb or cdb file? ** [`json] flatdoc: ~~~flat sexp example output [scheme]~~~ (nodes (section (id . "section1") (anchor "introduction") (kind . "ordinary") (label . "section1-heading") (nodes "section1-heading" "para1" "para2" "hzrule" "para3")) (section (id . "section2") (kind . "ordinary") (label . "section2-heading") (nodes "para4" "hzrule" "para5" "list1")) (block list (id . "list1") (kind . "ordered") (nodes "para6" "list2" "para7")) (block list (id . "list2") (kind . "unordered") (nodes "para8" "para9" "para10")) (block para (id . "para1") (nodes "text1" "format1" "text3" "foonote1" "text4")) (block label (id . "section1-heading") (nodes "section1-heading-text")) (text (id . "section1-heading-text") "Contemplating the Anathema") (text (id . "text1") "Disquieting information has recently been disclosed to virtual journalists of the Giedi Prime infomatrix by sources close to the Hyperion Entity regarding the catatrophic Year of Schisms and the unidentified agents believed to be responsible for memetically engineering the near-collapse of the Church Galactic.") (span format (id . "format1") (style . "emph") (nodes . "text2")) (text (id . "text2") "Curiously,") (text (id . "text3") "his Cyber-Holiness") (text (id . "footnote1-caption-text") "Pope Chewbacca III") (span footnote (id . "footnote1") (note . "footnote1-text") (ref . "papal-disclaimer") (nodes "footnode1-caption-text")) (text (id . "text4") "has thus far had little to say on the matter, provoking rampant speculation among the faithful.") (footnote-def (id . "footnote1-def") (nodes "footnote1-text") (text (id . "footnote1-text") "Currently recognized as legitimate successor to Peter of Terra by 2,756 sects, rejected by 678 of mostly Neo-Lutheran origin, and decried as an antipope by 73, most notably Pope Peter II of Centaurus Secundus, leader of the ongoing relativistic crusade against star systems owned by Microsoft.") ;;; snip ;;; (document (nodes "section1" "section2"))) ~~~ some formats may eventually warrant their own renderer, but are not a priority: * [*text]: cortav source files are already plain text, but a certain amount of layout could be done using ascii art. * [*ansi]: emit sequences of ANSI escape codes to lay out a document in a terminal-friendly way * [*tex]: TeX is an unholy abomination and i neither like nor use it, but lots of people do and if cortav ever catches on, a TeX backend should probably be written eventually. PDF is not on either list because it's a nightmarish mess of a format and groff, which is installed on most linux systems already, can easily generate PDFs ### LCH support right now, the use of color in the HTML renderer is very unsatisfactory. the accent mechanism operates on the basis of the CSS HSL function, which is not perceptually uniform; different hues will present different mixes of brightness and some (yellows?) may be ugly or unreadable. the ideal solution would be to simply switch to using LCH based colors. unfortunately, only Safari actually supports the LCH color function right now, and it's unlikely (unless Lea Verou and her husband manage to work a miracle) that Colors Level 4 is going to be implemented very widely any time soon. this leaves us in an awkward position. we can of course do the math ourselves, working in LCH to implement the internal [`@tone] macro, and then "converting" these colors to HSL. unfortunately, you can't actually convert from LCH to HSL; it's like converting from pounds to kilograms. LCH can represent any color the human visual system can perceive; sRGB can't, and CSS HSL is implemented in sRGB. however, we could at least approximate something that would allow for perceptually uniform brightness, which would be an improvement, and this is probably the direction to go in, unless a miracle occurs and [`lch()] or [`color()] pop up in Blink. it may be possible to do a more reasonable job of handling colors in the postscript and TeX outputs. unsure about SVG but i assume it suffers the same problems HTML/CSS do. does groff even support color?? ### intent files there's currently no standard way to describe the intent and desired formatting of a document besides placing pragmata in the source file itself. this is extremely suboptimal, as when generating collections of documents, it's ideal to be able to keep all formatting information in one place. users should also be able to specify their own styling overrides that describe the way they prefer to read [`cortav] files, especially for uses like gemini or gopher integration. at some point soon [`cortav] needs to address this by adding intent files that can be activated from outside the source file, such as with a command line flag or a configuration file setting. these will probably consist of lines that are interpreted as pragmata. in addition to the standard intent format however, individual implementations should feel free to provide their own ways to provide intent metadata; e.g. the reference implementation, which has a lua interpreter available, should be able to take a lua script that runs after the parse stage and generates . this will be particularly useful for the end-user who wishes to specify a particular format she likes reading her files in without forcing that format on everyone she sends the compiled document to, as it will be able to interrogate the document and make intelligent decisions about what pragmata to apply. intent files should also be able to define [>rsrc resources], [>ctxvar context variables], and macros. |
Modified cortav.lua from [eb3cc08f95] to [028f351fed].
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 .. 81 82 83 84 85 86 87 88 89 90 91 92 93 94 ... 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 ... 217 218 219 220 221 222 223 224 225 226 227 228 229 230 ... 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 ... 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 ... 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 ... 686 687 688 689 690 691 692 693 694 695 696 697 698 699 ... 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 ... 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 ... 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 .... 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 .... 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 .... 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 .... 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 .... 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 .... 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 .... 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 .... 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 .... 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 .... 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 .... 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 .... 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 .... 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 .... 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 |
-- ~ lexi hale <lexi@hale.su> -- © AGPLv3 -- ? reference implementation of the cortav document language local ss = require 'sirsem' -- aliases for commonly used sirsem funcs local startswith = ss.str.begins local eachcode = ss.str.enc.utf8.each local dump = ss.dump local declare = ss.declare -- make this module available to require() when linked into a lua bytecode program with luac local ct = ss.namespace 'cortav' ct.info = { version = ss.version {0,1; 'devel'}; ................................................................................ end); cli = ss.exnkind 'command line parse error'; mode = ss.exnkind('bad mode', function(msg, ...) return string.format("mode “%s” "..msg, ...) end); unimpl = ss.exnkind 'feature not implemented'; ext = ss.exnkind 'extension error'; } ct.ctx = declare { mk = function(src) return {src = src} end; ident = 'context'; cast = { string = function(me) ................................................................................ table.insert(self.sec.blocks,block) return block end; ref = function(self,id) if not id:find'%.' then local rid = self.sec.refs[id] if self.sec.refs[id] then return self.sec.refs[id] else self:fail("no such ref %s in current section", id or '') end else local sec, ref = string.match(id, "(.-)%.(.+)") local s = self.doc.sections[sec] if s then if s.refs[ref] then return s.refs[ref] else self:fail("no such ref %s in section %s", ref, sec) end else self:fail("no such section %s", sec) end end end }; } ................................................................................ meta = {}; vars = {}; ext = { inhibit = {}; need = {}; use = {}; }; } end; construct = function(me) me.docjob = ct.ext.job('doc', me, nil) end; } -- FP helper functions ................................................................................ -- renderer engines function ct.render.html(doc, opts) local doctitle = opts['title'] local f = string.format local ids = {} local canonicalID = {} local function getSafeID(obj) if canonicalID[obj] then return canonicalID[obj] elseif obj.id and ids[obj.id] then local newid local i = 1 repeat newid = obj.id .. string.format('-%x', i) i = i + 1 until not ids[newid] ids[newid] = obj canonicalID[obj] = newid return newid else local cid = obj.id if not cid then local i = 1 repeat cid = string.format('x-%x', i) i = i + 1 until not ids[cid] end ids[cid] = obj canonicalID[obj] = cid return cid end end local langsused = {} local langpairs = { lua = { color = 0x9377ff }; terra = { color = 0xff77c8 }; c = { name = 'C', color = 0x77ffe8 }; html = { color = 0xfff877 }; scheme = { color = 0x77ff88 }; lisp = { color = 0x77ff88 }; fortran = { color = 0xff779a }; python = { color = 0xffd277 }; python = { color = 0xcdd6ff }; } local stylesets = { header = [[ h1,h2,h3,h4,h5,h6 { border-bottom: 1px solid @tone(0.7); } h1 { font-size: 200%; border-bottom-style: double !important; border-bottom-width: 3px !important; margin: 0em -1em; } h2 { font-size: 130%; margin: 0em -0.7em; } h3 { font-size: 110%; margin: 0em -0.5em; } h4 { font-size: 100%; font-weight: normal; margin: 0em -0.2em; } h5 { font-size: 90%; font-weight: normal; } h6 { font-size: 80%; font-weight: normal; } ................................................................................ section:target > :is(h1,h2,h3,h4,h5,h6) { } ]]; paragraph = [[ p { margin: 0.7em 0; } section { margin: 1.2em 0; } section:first-child { margin-top: 0; } ]]; accent = [[ body { background: @bg; color: @fg } a[href] { color: @tone(0.7 30); text-decoration-color: @tone/0.4(0.7 30); } a[href]:hover { color: @tone(0.9 30); text-decoration-color: @tone/0.7(0.7 30); } h1 { color: @tone(2); } h2 { color: @tone(1.5); } h3 { color: @tone(1.2); } h4 { color: @tone(1); } h5,h6 { color: @tone(0.8); } ]]; code = [[ code { background: @fg; color: @bg; font-family: monospace; font-size: 90%; padding: 3px 5px; } ]]; abbr = [[ abbr[title] { cursor: help; } ]]; editors_markup = [[]]; block_code_listing = [[ section > figure.listing { font-family: monospace; background: @tone(0.05); color: @fg; padding: 0; margin: 0.3em 0; counter-reset: line-number; position: relative; border: 1px solid @fg; } section > figure.listing>div { white-space: pre-wrap; counter-increment: line-number; text-indent: -2.3em; margin-left: 2.3em; } section > figure.listing>:is(div,hr)::before { width: 1.0em; padding: 0.2em 0.4em; text-align: right; display: inline-block; background-color: @tone(0.2); border-right: 1px solid @fg; content: counter(line-number); margin-right: 0.3em; } section > figure.listing>hr::before { color: transparent; padding-top: 0; padding-bottom: 0; } section > figure.listing>div::before { color: @fg; } section > figure.listing>div:last-child::before { padding-bottom: 0.5em; } section > figure.listing>figcaption:first-child { border: none; border-bottom: 1px solid @fg; } section > figure.listing>figcaption::after { display: block; float: right; font-weight: normal; font-style: italic; font-size: 70%; padding-top: 0.3em; } section > figure.listing>figcaption { font-family: sans-serif; font-size: 120%; padding: 0.2em 0.4em; border: none; color: @tone(2); } section > figure.listing > hr { border: none; margin: 0; height: 0.7em; counter-increment: line-number; } ]]; } ................................................................................ stylesets = stylesets; stylesets_active = stylesNeeded; obj_htmlid = getSafeID; -- remaining fields added later } local renderJob = doc:job('render_html', nil, render_state_handle) local runhook = function(h, ...) return renderJob:hook(h, render_state_handle, ...) end local function getSpanRenderers(procs) local tag, elt, catenate = procs.tag, procs.elt, procs.catenate local htmlDoc = function(title, head, body) return [[<!doctype html>]] .. tag('html',nil, tag('head', nil, elt('meta',{charset = 'utf-8'}) .. (title and tag('title', nil, title) or '') .. (head or '')) .. tag('body', nil, body or '')) end local span_renderers = {} local function htmlSpan(spans, block, sec) local text = {} for k,v in pairs(spans) do if type(v) == 'string' then table.insert(text,(v:gsub('[<>&"]', function(x) return string.format('&#%02u;', string.byte(x)) end))) else table.insert(text, span_renderers[v.kind](v, block, sec)) end end return table.concat(text) end function span_renderers.format(sp,...) local tags = { strong = 'strong', emph = 'em', strike = 'del', insert = 'ins', literal = 'code' } if sp.style == 'literal' and not opts['fossil-uv'] then stylesNeeded.code = true end if sp.style == 'del' or sp.style == 'ins' then stylesNeeded.editors_markup = true end return tag(tags[sp.style],nil,htmlSpan(sp.spans,...)) end function span_renderers.term(t,b,s) local r = b.origin:ref(t.ref) local name = t.ref if name:find'%.' then name = name:match '^[^.]*%.(.+)$' end if type(r) ~= 'string' then b.origin:fail('%s is an object, not a reference', t.ref) end stylesNeeded.abbr = true return tag('abbr',{title=r},next(t.spans) and htmlSpan(t.spans,b,s) or name) end function span_renderers.macro(m,b,s) local r = b.origin:ref(m.macro) if type(r) ~= 'string' then b.origin:fail('%s is an object, not a reference', t.ref) end local mctx = b.origin:clone() mctx.invocation = m return htmlSpan(ct.parse_span(r, mctx),b,s) end function span_renderers.var(v,b,s) local val if v.pos then if not v.origin.invocation then v.origin:fail 'positional arguments can only be used in a macro invocation' ................................................................................ end if v.raw then return val else return htmlSpan(ct.parse_span(val, v.origin), b, s) end end function span_renderers.link(sp,b,s) local href if b.origin.doc.sections[sp.ref] then href = '#' .. sp.ref else if sp.addr then href = sp.addr else ................................................................................ if type(r) == 'table' then href = '#' .. getSafeID(r) else href = r end end end return tag('a',{href=href},next(sp.spans) and htmlSpan(sp.spans,b,s) or href) end return { span_renderers = span_renderers; htmlSpan = htmlSpan; htmlDoc = htmlDoc; } end local function getBlockRenderers(procs, sr) local tag, elt, catenate = procs.tag, procs.elt, procs.catenate local null = function() return catenate{} end local block_renderers = { anchor = function(b,s) ................................................................................ if #l > 0 then return tag('div',nil,sr.htmlSpan(l, b, s)) else return elt('hr') end end, b.lines) if b.title then table.insert(nodes,1,tag('figcaption',nil,sr.htmlSpan(b.title))) end if b.lang then langsused[b.lang] = true end return tag('figure', {class='listing', lang=b.lang, id=b.id and getSafeID(b)}, catenate(nodes)) end; aside = function(b,s) local bn = {} for _,v in pairs(b.lines) do table.insert(bn, tag('p', {}, sr.htmlSpan(v, b, s))) end return tag('aside', {}, bn) end; ['break'] = function() --[[nop]] end; } return block_renderers; end local function getRenderers(procs) local r = getSpanRenderers(procs) r.block_renderers = getBlockRenderers(procs, r) return r end local tagproc do local elt = function(t,attrs) return f('<%s%s>', t, attrs and ss.reduce(function(a,b) return a..b end, '', ss.map(function(v,k) if v == true then return ' '..k elseif v then return f(' %s="%s"', k, v) end end, attrs)) or '') end tagproc = { toTXT = { tag = function(t,a,v) return v end; elt = function(t,a) return '' end; catenate = table.concat; }; toIR = { tag = function(t,a,v,o) return { tag = t, attrs = a; nodes = type(v) == 'string' and {v} or v, src = o } end; elt = function(t,a,o) return { tag = t, attrs = a, src = o } end; catenate = function(...) return ... end; }; toHTML = { elt = elt; tag = function(t,attrs,body) return f('%s%s</%s>', elt(t,attrs), body, t) end; catenate = table.concat; }; } end local astproc = { toHTML = getRenderers(tagproc.toHTML); toTXT = getRenderers(tagproc.toTXT); toIR = { }; } ................................................................................ local ir = {} local dr = astproc.toHTML -- default renderers local plainr = astproc.toTXT local irBlockRdrs = astproc.toIR.block_renderers; render_state_handle.ir = ir; runhook('ir_assemble', ir) for i, sec in ipairs(doc.secorder) do if doctitle == nil and sec.depth == 1 and sec.heading_node then doctitle = astproc.toTXT.htmlSpan(sec.heading_node.spans, sec.heading_node, sec) end local irs if sec.kind == 'ordinary' then if #(sec.blocks) > 0 then irs = {tag='section',attrs={id = getSafeID(sec)},nodes={}} runhook('ir_section_build', irs, sec) for i, block in ipairs(sec.blocks) do local rd if irBlockRdrs[block.kind] then rd = irBlockRdrs[block.kind](block,sec) else local rdr = renderJob:proc('render',block.kind,'html') if rdr then rd = rdr({ state = render_state_handle; tagproc = tagproc.toIR; astproc = astproc.toIR; }, block, sec) end end if rd then if opts['heading-anchors'] and block == sec.heading_node then stylesNeeded.headingAnchors = true table.insert(rd.nodes, ' ') table.insert(rd.nodes, { tag = 'a'; attrs = {href = '#' .. irs.attrs.id, class='anchor'}; nodes = {type(opts['heading-anchors'])=='string' and opts['heading-anchors'] or '§'}; }) end table.insert(irs.nodes, rd) runhook('ir_section_node_insert', rd, irs, sec) end end end elseif sec.kind == 'blockquote' then elseif sec.kind == 'listing' then elseif sec.kind == 'embed' then end if irs then table.insert(ir, irs) end end -- restructure passes runhook('ir_restructure_pre', ir) ---- list insertion pass local lists = {} for _, sec in pairs(ir) do ................................................................................ local tonespan = opts.accent and .1 or 0 local tbg = opts['dark-on-light'] and 1.0 - tonespan or tonespan local tfg = opts['dark-on-light'] and tonespan or 1.0 - tonespan if var == 'bg' then return tone(tbg,nil,nil,tonumber(alpha)) elseif var == 'fg' then return tone(tfg,nil,nil,tonumber(alpha)) elseif var == 'tone' then local l, sep, sat for i=1,3 do -- 🙄 l,sep,sat = param:match('^%('..string.rep('([^%s]*)%s*',i)..'%)$') if l then break end end l = ss.math.lerp(tonumber(l), tbg, tfg) ................................................................................ kind = 'var'; pos = pos; raw = raw; var = not pos and s or nil; origin = c:clone(); } end end ct.spanctls = { {seq = '!', parse = formatter 'emph'}; {seq = '*', parse = formatter 'strong'}; {seq = '~', parse = formatter 'strike'}; {seq = '+', parse = formatter 'inser'}; {seq = '\\', parse = function(s, c) -- raw return s end}; {seq = '$\\', parse = function(s, c) -- raw return { kind = 'format'; style = 'literal'; spans = {s}; origin = c:clone(); } end}; {seq = '$', parse = formatter 'literal'}; {seq = '&', parse = function(s, c) local r, t = s:match '^([^%s]+)%s*(.-)$' return { kind = 'term'; spans = (t and t ~= "") and ct.parse_span(t, c) or {}; ref = r; origin = c:clone(); } end}; {seq = '^', parse = function(s, c) local fn, t = s:match '^([^%s]+)%s*(.-)$' ................................................................................ } end}; {seq = '>', parse = insert_link}; {seq = '→', parse = insert_link}; {seq = '🔗', parse = insert_link}; {seq = '##', parse = insert_var_ref(true)}; {seq = '#', parse = insert_var_ref(false)}; } end function ct.parse_span(str,ctx) local function delimited(start, stop, s) local r = { pcall(ss.str.delimit, nil, start, stop, s) } if r[1] then return table.unpack(r, 2) end ctx:fail(tostring(r[2])) end local buf = "" local spans = {} local function flush() if buf ~= "" then table.insert(spans, buf) buf = "" end end local skip = false for c,p in eachcode(str) do if skip == true then skip = false buf = buf .. c elseif c == '\\' then skip = true elseif c == '{' then flush() local substr, following = delimited('{','}',str:sub(p.byte)) local splitstart, splitstop = substr:find'%s+' local id, argstr if splitstart then id, argstr = substr:sub(1,splitstart-1), substr:sub(splitstop+1) ................................................................................ local i = 1 while i <= #argstr do while i<=#argstr and (argstr:sub(i,i) ~= '|' or argstr:sub(i-1,i) == '\\|') do i = i + 1 end local arg = argstr:sub(start, i == #argstr and i or i-1) start = i+1 table.insert(o.args, arg) i = i + 1 end end p.next.byte = p.next.byte + following - 1 table.insert(spans,o) ................................................................................ table.insert(spans, i.parse(substr:sub(1+#i.seq), ctx)) break end end if not found then ctx:fail('no recognized control sequence in [%s]', substr) end else buf = buf .. c end end flush() return spans end local function blockwrap(fn) return function(l,c,j) local block = fn(l,c,j) block.origin = c:clone(); table.insert(c.sec.blocks, block); j:hook('block_insert', c, block, l) end end local insert_paragraph = blockwrap(function(l,c) if l:sub(1,1) == '.' then l = l:sub(2) end return { kind = "paragraph"; ................................................................................ if t and t ~= "" then local heading = { kind = "label"; spans = ct.parse_span(t,c); origin = s.origin; captions = s; } table.insert(s.blocks, heading) s.heading_node = heading end c.sec = s j:hook('section_attach', c, s) end ................................................................................ c.doc.meta[key] = val j:hook('metadata_set', key, val) end local dextctl = function(w,c) local mode, exts = w(1) for e in exts:gmatch '([^%s]+)' do if mode == 'uses' then elseif mode == 'needs' then elseif mode == 'inhibits' then end end end local dcond = function(w,c) local mode, cond, exp = w(2) c.hide_next = mode == 'unless' end; ................................................................................ ct.directives = { author = dsetmeta; license = dsetmeta; keywords = dsetmeta; desc = dsetmeta; when = dcond; unless = dcond; expand = function(w,c) local _, m = w(1) if m ~= 'off' then c.expand_next = 1 else c.expand_next = 0 end end; } local function insert_table_row(l,c,j) local row = {} local buf ................................................................................ local flush = function() if buf then buf.str = buf.str:gsub('%s+$','') table.insert(row, buf) end buf = { str = '' } end for c,p in eachcode(l) do if c == '|' or c == '+' and (p.code == 1 or l:sub(p.byte-1,p.byte-1)~='\\') then flush() buf.header = c == '+' elseif c == ':' then local lst = l:sub(p.byte-#c,p.byte-#c) local nxt = l:sub(p.next.byte,p.next.byte) if lst == '|' or lst == '+' and l:sub(p.byte-2,p.byte-2) ~= '\\' then ................................................................................ else buf.str = buf.str .. c end end if buf.str ~= '' then flush() end for _,v in pairs(row) do v.spans = ct.parse_span(v.str, c) end if #c.sec.blocks > 1 and c.sec.blocks[#c.sec.blocks].kind == 'table' then local tbl = c.sec.blocks[#c.sec.blocks] table.insert(tbl.rows, row) j:hook('block_table_attach', c, tbl, row, l) j:hook('block_table_row_insert', c, tbl, row, l) else ................................................................................ {seq = '¶', fn = insert_paragraph}; {seq = '❡', fn = insert_paragraph}; {seq = '#', fn = insert_section}; {seq = '§', fn = insert_section}; {seq = '+', fn = insert_table_row}; {seq = '|', fn = insert_table_row}; {seq = '│', fn = insert_table_row}; {seq = '!', fn = function(l,c,j) local last = c.sec.blocks[#c.sec.blocks] local txt = l:match '^%s*!%s*(.-)$' if (not last) or last.kind ~= 'aside' then local aside = { kind = 'aside'; lines = { ct.parse_span(txt, c) } } c:insert(aside) j:hook('block_aside_insert', c, aside, l) j:hook('block_aside_line_insert', c, aside, aside.lines[1], l) j:hook('block_insert', c, aside, l) else local sp = ct.parse_span(txt, c) table.insert(last.lines, sp) j:hook('block_aside_attach', c, last, sp, l) j:hook('block_aside_line_insert', c, last, sp, l) end end}; {pred = function(s,c) return s:match'^[*:]' end, fn = blockwrap(function(l,c) -- list local stars = l:match '^([*:]+)' ................................................................................ return { kind = 'list-item'; depth = depth; ordered = ordered; spans = ct.parse_span(txt, c); } end)}; {seq = '\t', fn = function(l,c,j) local ref, val = l:match '\t+([^:]+):%s*(.*)$' c.sec.refs[ref] = val j:hook('section_ref_attach', c, ref, val, l) end}; {seq = '%', fn = function(l,c,j) -- directive local crit, cmdline = l:match '^%%([!%%]?)%s*(.*)$' local words = function(i) local wds = {} if i == 0 then return cmdline end for w,pos in cmdline:gmatch '([^%s]+)()' do table.insert(wds, w) i = i - 1 if i == 0 then table.insert(wds,cmdline:sub(pos)) return table.unpack(wds) end end end local cmd, rest = words(1) if ct.directives[cmd] then ................................................................................ ct.directives[cmd](words,c,j) elseif cmd == c.doc.stage.mode['render:format'] then -- this is a directive for the renderer; insert it into the tree as is local dir = { kind = 'directive'; critical = crit == '!'; words = words; } c:insert(dir) j:hook('block_directive_render', j, c, dir) elseif c.doc:allow_ext(cmd) then -- extension directives begin with their id local ext = ct.ext.loaded[cmd] if ext.directives then local _, topcmd = words(2) if ext.directives[topcmd] then ext.directives[topcmd](j:delegate(ext), c, words) ................................................................................ kind = 'code'; listing = { kind = 'listing'; lang = lang, id = id, title = title and ct.parse_span(title,c); lines = {}; } } j:hook('mode_switch', c, mode) c.mode = mode if id then if c.sec.refs[id] then c:fail('duplicate ID %s', id) end c.sec.refs[id] = c.mode.listing end j:hook('block_insert', c, mode.listing, l) return c.mode.listing; end)}; {pred = function(s,c) if s:match '^[%-_][*_%-%s]+' then return true end if startswith(s, '—') then for c, p in eachcode(s) do if ({ ['—'] = true, ['-'] = true, [' '] = true; ['*'] = true, ['_'] = true, ['\t'] = true; })[c] ~= true then return false end end return true end end; fn = blockwrap(function() return { kind = 'horiz-rule' } end)}; {fn = insert_paragraph}; } function ct.parse(file, src, mode) local function is_whitespace(cp) return cp == 0x20 or cp == 0xe390 end local ctx = ct.ctx.mk(src) ctx.line = 0 ctx.doc = ct.doc.mk() ctx.doc.src = src ctx.doc.stage = { kind = 'parse'; mode = mode; } ctx.sec = ctx.doc:mksec() -- toplevel section ctx.sec.origin = ctx:clone() -- create states for extension hooks local job = ctx.doc:job('parse',nil,ctx) for full_line in file:lines() do ctx.line = ctx.line + 1 local l for p, c in utf8.codes(full_line) do if not is_whitespace(c) then l = full_line:sub(p) break end end job:hook('line_read',ctx,l) if ctx.mode then if ctx.mode.kind == 'code' then if l and l:match '^~~~%s*$' then job:hook('block_listing_end',ctx,ctx.mode.listing) job:hook('mode_switch', c, nil) ctx.mode = nil else -- TODO handle formatted code local newline = {l} table.insert(ctx.mode.listing.lines, newline) job:hook('block_listing_newline',ctx,ctx.mode.listing,newline) end else ctx:fail('unimplemented syntax mode %s', ctx.mode.kind) end else if l then local function tryseqs(seqs, ...) for _, i in pairs(seqs) do if ((not i.seq ) or startswith(l, i.seq)) and ((not i.pred) or i.pred (l, ctx )) then i.fn(l, ctx, job, ...) return true end end return false end if not tryseqs(ct.ctlseqs) then local found = false for eb, ext, state in job:each('blocks') do if tryseqs(eb, state) then found = true break end end if not found then ctx:fail 'incomprehensible input line' end end else if next(ctx.sec.blocks) and ctx.sec.blocks[#ctx.sec.blocks].kind ~= 'break' then local brk = {kind='break'} job:hook('block_break', ctx, brk, l) table.insert(ctx.sec.blocks, brk) end end end job:hook('line_end',ctx,l) end return ctx.doc end |
< > > > | | > | > | > | | > > > | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | | | | | | | | | | | | | | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | > > > > > > > > > > > > > > > > > > > | | > > | | | | | | | | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | < < | | > > > > | > > > > > > > > > > > > > > > | < | > > | | < < | | | > > > > > > | < < < < > > > > | < < < > > > > > > > > > > > > > > > > | < < < > | > > > > > > | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | > > > > | | > | > > > | > < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > < < | < < < < < < < < < < < < < < < < < < < < < < < < < < < > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | | > > > > | > > > > > > > > > > > | > > | > > | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | > > > > > > > | | | | | | > > > > > > > > > | | | > > > > > > > > > > > > > > > > > > > > > > > > > | | | > | | | > > | > | > > > > > > > > > > > > > > > > > > | > > > > > > > | | | > | > > > > | > > > > > > > > > > > > > > | > > > > > > > > > > > > > > > > > > > > > > > > > > > > | < < > > > > > | > > > > > > > > > > > > > > > > > > > > > > > > > > > < < < < > > > > > > > > > > > > > > > > > > > > > > > | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | < < < < < < < < < < < < < < |
2 3 4 5 6 7 8 9 10 11 12 13 14 15 .. 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 ... 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 ... 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 ... 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 ... 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 ... 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 ... 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 ... 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 .... 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 .... 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 .... 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 .... 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 .... 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 .... 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 .... 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 .... 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 .... 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 .... 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 .... 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 .... 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 .... 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 .... 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 .... 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 .... 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 |
-- ~ lexi hale <lexi@hale.su> -- © AGPLv3 -- ? reference implementation of the cortav document language local ss = require 'sirsem' -- aliases for commonly used sirsem funcs local startswith = ss.str.begins local dump = ss.dump local declare = ss.declare -- make this module available to require() when linked into a lua bytecode program with luac local ct = ss.namespace 'cortav' ct.info = { version = ss.version {0,1; 'devel'}; ................................................................................ end); cli = ss.exnkind 'command line parse error'; mode = ss.exnkind('bad mode', function(msg, ...) return string.format("mode “%s” "..msg, ...) end); unimpl = ss.exnkind 'feature not implemented'; ext = ss.exnkind 'extension error'; enc = ss.exnkind('encoding error', function(msg, ...) return string.format('[%s]' .. msg, ...) end); } ct.ctx = declare { mk = function(src) return {src = src} end; ident = 'context'; cast = { string = function(me) ................................................................................ table.insert(self.sec.blocks,block) return block end; ref = function(self,id) if not id:find'%.' then local rid = self.sec.refs[id] if self.sec.refs[id] then return self.sec.refs[id], id, self.sec else self:fail("no such ref %s in current section", id or '') end else local sec, ref = string.match(id, "(.-)%.(.+)") local s = self.doc.sections[sec] if s then if s.refs[ref] then return s.refs[ref], ref, sec else self:fail("no such ref %s in section %s", ref, sec) end else self:fail("no such section %s", sec) end end end }; } ................................................................................ meta = {}; vars = {}; ext = { inhibit = {}; need = {}; use = {}; }; enc = ss.str.enc.utf8; } end; construct = function(me) me.docjob = ct.ext.job('doc', me, nil) end; } -- FP helper functions ................................................................................ -- renderer engines function ct.render.html(doc, opts) local doctitle = opts['title'] local f = string.format local ids = {} local canonicalID = {} local function getSafeID(obj,pfx) pfx = pfx or '' if canonicalID[obj] then return canonicalID[obj] elseif obj.id and ids[pfx .. obj.id] then local objid = pfx .. obj.id local newid local i = 1 repeat newid = objid .. string.format('-%x', i) i = i + 1 until not ids[newid] ids[newid] = obj canonicalID[obj] = newid return newid else local cid = obj.id if not cid then local i = 1 repeat cid = string.format('%sx-%x', pfx, i) i = i + 1 until not ids[cid] end ids[cid] = obj canonicalID[obj] = cid return cid end end local footnotes = {} local footnotecount = 0 local langsused = {} local langpairs = { lua = { color = 0x9377ff }; terra = { color = 0xff77c8 }; c = { name = 'C', color = 0x77ffe8 }; html = { color = 0xfff877 }; scheme = { color = 0x77ff88 }; lisp = { color = 0x77ff88 }; fortran = { color = 0xff779a }; python = { color = 0xffd277 }; ruby = { color = 0xcdd6ff }; } local stylesets = { footnote = [[ div.footnote { font-family: 90%; display: none; grid-template-columns: 1em 1fr min-content; grid-template-rows: 1fr min-content; position: fixed; padding: 1em; background: @tone(0.05); border: black; margin:auto; } div.footnote:target { display:grid; } @media screen { div.footnote { left: 10em; right: 10em; max-width: calc(@width + 2em); max-height: 30vw; bottom: 1em; } } @media print { div.footnote { position: relative; } div.footnote:first-of-type { border-top: 1px solid black; } } div.footnote > a[href="#0"]{ grid-row: 2/3; grid-column: 3/4; display: block; padding: 0.2em 0.7em; text-align: center; text-decoration: none; background: @tone(0.2); color: @tone(1); border: 1px solid black; margin-top: 0.6em; -webkit-user-select: none; -ms-user-select: none; user-select: none; -webkit-user-drag: none; user-drag: none; } div.footnote > a[href="#0"]:hover { background: @tone(0.3); color: @tone(2); } div.footnote > a[href="#0"]:active { background: @tone(0.05); color: @tone(0.4); } @media print { div.footnote > a[href="#0"]{ display:none; } } div.footnote > div.number { text-align:right; grid-row: 1/2; grid-column: 1/2; } div.footnote > div.text { grid-row: 1/2; grid-column: 2/4; padding-left: 1em; overflow-y: scroll; } ]]; header = [[ body { padding: 0 2.5em !important } h1,h2,h3,h4,h5,h6 { border-bottom: 1px solid @tone(0.7); } h1 { font-size: 200%; border-bottom-style: double !important; border-bottom-width: 3px !important; margin: 0em -1em; } h2 { font-size: 130%; margin: 0em -0.7em; } h3 { font-size: 110%; margin: 0em -0.5em; } h4 { font-size: 100%; font-weight: normal; margin: 0em -0.2em; } h5 { font-size: 90%; font-weight: normal; } h6 { font-size: 80%; font-weight: normal; } ................................................................................ section:target > :is(h1,h2,h3,h4,h5,h6) { } ]]; paragraph = [[ p { margin: 0.7em 0; text-align: justify; } section { margin: 1.2em 0; } section:first-child { margin-top: 0; } ]]; accent = [[ @media screen { body { background: @bg; color: @fg } a[href] { color: @tone(0.7 30); text-decoration-color: @tone/0.4(0.7 30); } a[href]:hover { color: @tone(0.9 30); text-decoration-color: @tone/0.7(0.7 30); } h1 { color: @tone(2); } h2 { color: @tone(1.5); } h3 { color: @tone(1.2); } h4 { color: @tone(1); } h5,h6 { color: @tone(0.8); } } @media print { a[href] { text-decoration: none; color: black; font-weight: bold; } h1,h2,h3,h4,h5,h6 { border-bottom: 1px black; } } ]]; aside = [[ section > aside { text-align: justify; margin: 0 1.5em; padding: 0.5em 0.8em; background: @tone(0.05); font-size: 90%; border-left: 5px solid @tone(0.2 15); border-right: 5px solid @tone(0.2 15); } section > aside p { margin: 0; margin-top: 0.6em; } section > aside p:first-child { margin: 0; } ]]; code = [[ code { display: inline-block; background: @tone(0.9); color: @bg; font-family: monospace; font-size: 90%; padding: 3px 5px; } ]]; var = [[ var { font-style: italic; font-family: monospace; color: @tone(0.7); } code var { color: @tone(0.25); } ]]; math = [[ span.equation { display: inline-block; background: @tone(0.08); color: @tone(2); padding: 0.1em 0.3em; border: 1px solid @tone(0.5); } ]]; abbr = [[ abbr[title] { cursor: help; } ]]; editors_markup = [[]]; block_code_listing = [[ figure.listing { font-family: monospace; background: @tone(0.05); color: @fg; padding: 0; margin: 0.3em 0; counter-reset: line-number; position: relative; border: 1px solid @fg; } figure.listing>div { white-space: pre-wrap; tab-size: 3; -moz-tab-size: 3; counter-increment: line-number; text-indent: -2.3em; margin-left: 2.3em; } figure.listing>:is(div,hr)::before { width: 1.0em; padding: 0.2em 0.4em; text-align: right; display: inline-block; background-color: @tone(0.2); border-right: 1px solid @fg; content: counter(line-number); margin-right: 0.3em; } figure.listing>hr::before { color: transparent; padding-top: 0; padding-bottom: 0; } figure.listing>div::before { color: @fg; } figure.listing>div:last-child::before { padding-bottom: 0.5em; } figure.listing>figcaption:first-child { border: none; border-bottom: 1px solid @fg; } figure.listing>figcaption::after { display: block; float: right; font-weight: normal; font-style: italic; font-size: 70%; padding-top: 0.3em; } figure.listing>figcaption { font-family: sans-serif; font-size: 120%; padding: 0.2em 0.4em; border: none; color: @tone(2); } figure.listing > hr { border: none; margin: 0; height: 0.7em; counter-increment: line-number; } ]]; } ................................................................................ stylesets = stylesets; stylesets_active = stylesNeeded; obj_htmlid = getSafeID; -- remaining fields added later } local renderJob = doc:job('render_html', nil, render_state_handle) doc.stage.job = renderJob; local runhook = function(h, ...) return renderJob:hook(h, render_state_handle, ...) end local tagproc do local elt = function(t,attrs) return f('<%s%s>', t, attrs and ss.reduce(function(a,b) return a..b end, '', ss.map(function(v,k) if v == true then return ' '..k elseif v then return f(' %s="%s"', k, v) end end, attrs)) or '') end tagproc = { toTXT = { tag = function(t,a,v) return v end; elt = function(t,a) return '' end; catenate = table.concat; }; toIR = { tag = function(t,a,v,o) return { tag = t, attrs = a; nodes = type(v) == 'string' and {v} or v, src = o } end; elt = function(t,a,o) return { tag = t, attrs = a, src = o } end; catenate = function(...) return ... end; }; toHTML = { elt = elt; tag = function(t,attrs,body) return f('%s%s</%s>', elt(t,attrs), body, t) end; catenate = table.concat; }; } end local function getBaseRenderers(procs, span_renderers) local tag, elt, catenate = procs.tag, procs.elt, procs.catenate local htmlDoc = function(title, head, body) return [[<!doctype html>]] .. tag('html',nil, tag('head', nil, elt('meta',{charset = 'utf-8'}) .. (title and tag('title', nil, title) or '') .. (head or '')) .. tag('body', nil, body or '')) end local function htmlSpan(spans, block, sec) local text = {} for k,v in pairs(spans) do if type(v) == 'string' then v=v:gsub('[<>&"]', function(x) return string.format('&#%02u;', string.byte(x)) end) for fn, ext in renderJob:each('hook','render_html_sanitize') do v = fn(renderJob:delegate(ext), v) end table.insert(text,v) else table.insert(text, (span_renderers[v.kind](v, block, sec))) end end return table.concat(text) end return {htmlDoc=htmlDoc, htmlSpan=htmlSpan} end local spanparse = function(...) local s = ct.parse_span(...) doc.docjob:hook('meddle_span', s) return s end local cssRulesFor = {} local function getSpanRenderers(procs) local tag, elt, catenate = procs.tag, procs.elt, procs.catenate local span_renderers = {} local plainrdr = getBaseRenderers(tagproc.toTXT, span_renderers) local htmlSpan = getBaseRenderers(procs, span_renderers).htmlSpan function span_renderers.format(sp,...) local tags = { strong = 'strong', emph = 'em', strike = 'del', insert = 'ins', literal = 'code', variable = 'var'} if sp.style == 'literal' and not opts['fossil-uv'] then stylesNeeded.code = true elseif sp.style == 'strike' or sp.style == 'insert' then stylesNeeded.editors_markup = true elseif sp.style == 'variable' then stylesNeeded.var = true end return tag(tags[sp.style],nil,htmlSpan(sp.spans,...)) end function span_renderers.deref(t,b,s) local r = b.origin:ref(t.ref) local name = t.ref if name:find'%.' then name = name:match '^[^.]*%.(.+)$' end if type(r) == 'string' then stylesNeeded.abbr = true return tag('abbr',{title=r},next(t.spans) and htmlSpan(t.spans,b,s) or name) end if r.kind == 'resource' then local rid = getSafeID(r, 'res-') if r.class == 'image' then if not cssRulesFor[r] then local css = prepcss(string.format([[ section p > .%s { } ]], rid)) stylesets[r] = css cssRulesFor[r] = css stylesNeeded[r] = true end return tag('div',{class=rid},catenate{'blaah'}) elseif r.class == 'video' then local vid = {} return tag('video',nil,vid) elseif r.class == 'font' then b.origin:fail('fonts cannot be instantiated, use %font directive instead') end else b.origin:fail('%s is not an object that can be embedded', t.ref) end end function span_renderers.var(v,b,s) local val if v.pos then if not v.origin.invocation then v.origin:fail 'positional arguments can only be used in a macro invocation' ................................................................................ end if v.raw then return val else return htmlSpan(ct.parse_span(val, v.origin), b, s) end end function span_renderers.raw(v,b,s) return htmlSpan(v.spans, b, s) end function span_renderers.link(sp,b,s) local href if b.origin.doc.sections[sp.ref] then href = '#' .. sp.ref else if sp.addr then href = sp.addr else ................................................................................ if type(r) == 'table' then href = '#' .. getSafeID(r) else href = r end end end return tag('a',{href=href},next(sp.spans) and htmlSpan(sp.spans,b,s) or href) end span_renderers['line-break'] = function(sp,b,s) return elt('br') end function span_renderers.macro(m,b,s) local macroname = plainrdr.htmlSpan( ct.parse_span(m.macro, b.origin), b,s) local r = b.origin:ref(macroname) if type(r) ~= 'string' then b.origin:fail('%s is an object, not a reference', t.ref) end local mctx = b.origin:clone() mctx.invocation = m return htmlSpan(ct.parse_span(r, mctx),b,s) end function span_renderers.math(m,b,s) stylesNeeded.math = true return tag('span',{class='equation'},htmlSpan(m.spans, b, s)) end; function span_renderers.directive(d,b,s) if d.ext == 'html' then elseif b.origin.doc:allow_ext(d.ext) then elseif d.crit then b.origin:fail('critical extension %s unavailable', d.ext) elseif d.failthru then return htmlSpan(d.spans, b, s) end end function span_renderers.footnote(f,b,s) stylesNeeded.footnote = true local source, sid, ssec = b.origin:ref(f.ref) local cnc = getSafeID(ssec) .. ' ' .. sid local fn if footnotes[cnc] then fn = footnotes[cnc] else footnotecount = footnotecount + 1 fn = {num = footnotecount, origin = b.origin, fnid=cnc, source = source} fn.id = getSafeID(fn) footnotes[cnc] = fn end return tag('a', {href='#'..fn.id}, htmlSpan(f.spans) .. tag('sup',nil, fn.num)) end return span_renderers end local function getBlockRenderers(procs, sr) local tag, elt, catenate = procs.tag, procs.elt, procs.catenate local null = function() return catenate{} end local block_renderers = { anchor = function(b,s) ................................................................................ if #l > 0 then return tag('div',nil,sr.htmlSpan(l, b, s)) else return elt('hr') end end, b.lines) if b.title then table.insert(nodes,1, tag('figcaption',nil,sr.htmlSpan(b.title))) end if b.lang then langsused[b.lang] = true end return tag('figure', {class='listing', lang=b.lang, id=b.id and getSafeID(b)}, catenate(nodes)) end; aside = function(b,s) local bn = {} stylesNeeded.aside = true if #b.lines == 1 then bn[1] = sr.htmlSpan(b.lines[1], b, s) else for _,v in pairs(b.lines) do table.insert(bn, tag('p', {}, sr.htmlSpan(v, b, s))) end end return tag('aside', {}, bn) end; ['break'] = function() -- HACK -- lists need to be rewritten to work like asides return ''; end; } return block_renderers; end local function getRenderers(procs) local span_renderers = getSpanRenderers(procs) local r = getBaseRenderers(procs,span_renderers) r.block_renderers = getBlockRenderers(procs, r) return r end local astproc = { toHTML = getRenderers(tagproc.toHTML); toTXT = getRenderers(tagproc.toTXT); toIR = { }; } ................................................................................ local ir = {} local dr = astproc.toHTML -- default renderers local plainr = astproc.toTXT local irBlockRdrs = astproc.toIR.block_renderers; render_state_handle.ir = ir; local function renderBlocks(blocks, irs) for i, block in ipairs(blocks) do local rd if irBlockRdrs[block.kind] then rd = irBlockRdrs[block.kind](block,sec) else local rdr = renderJob:proc('render',block.kind,'html') if rdr then rd = rdr({ state = render_state_handle; tagproc = tagproc.toIR; astproc = astproc.toIR; }, block, sec) end end if rd then if opts['heading-anchors'] and block == sec.heading_node then stylesNeeded.headingAnchors = true table.insert(rd.nodes, ' ') table.insert(rd.nodes, { tag = 'a'; attrs = {href = '#' .. irs.attrs.id, class='anchor'}; nodes = {type(opts['heading-anchors'])=='string' and opts['heading-anchors'] or '§'}; }) end if rd.src and rd.src.origin.lang then if not rd.attrs then rd.attrs = {} end rd.attrs.lang = rd.src.origin.lang end table.insert(irs.nodes, rd) runhook('ir_section_node_insert', rd, irs, sec) end end end runhook('ir_assemble', ir) for i, sec in ipairs(doc.secorder) do if doctitle == nil and sec.depth == 1 and sec.heading_node then doctitle = astproc.toTXT.htmlSpan(sec.heading_node.spans, sec.heading_node, sec) end local irs if sec.kind == 'ordinary' then if #(sec.blocks) > 0 then irs = {tag='section',attrs={id = getSafeID(sec)},nodes={}} runhook('ir_section_build', irs, sec) renderBlocks(sec.blocks, irs) end elseif sec.kind == 'blockquote' then elseif sec.kind == 'listing' then elseif sec.kind == 'embed' then end if irs then table.insert(ir, irs) end end for _, fn in pairs(footnotes) do local tag = tagproc.toIR.tag local body = {nodes={}} local ftir = {} for l in fn.source:gmatch('([^\n]*)') do ct.parse_line(l, fn.origin, ftir) end renderBlocks(ftir,body) local note = tag('div',{class='footnote',id=fn.id}, { tag('div',{class='number'}, tostring(fn.num)), tag('div',{class='text'}, body.nodes), tag('a',{href='#0'},'close') }) table.insert(ir, note) end -- restructure passes runhook('ir_restructure_pre', ir) ---- list insertion pass local lists = {} for _, sec in pairs(ir) do ................................................................................ local tonespan = opts.accent and .1 or 0 local tbg = opts['dark-on-light'] and 1.0 - tonespan or tonespan local tfg = opts['dark-on-light'] and tonespan or 1.0 - tonespan if var == 'bg' then return tone(tbg,nil,nil,tonumber(alpha)) elseif var == 'fg' then return tone(tfg,nil,nil,tonumber(alpha)) elseif var == 'width' then return opts['width'] or '100vw' elseif var == 'tone' then local l, sep, sat for i=1,3 do -- 🙄 l,sep,sat = param:match('^%('..string.rep('([^%s]*)%s*',i)..'%)$') if l then break end end l = ss.math.lerp(tonumber(l), tbg, tfg) ................................................................................ kind = 'var'; pos = pos; raw = raw; var = not pos and s or nil; origin = c:clone(); } end end local function insert_span_directive(crit, failthru) return function(s,c) local args = ss.str.breakwords(d.doc.enc, s, 1) local brksyms = map(enc.encodeUCS, { '.', ',', ':', ';', '!', '$', '&', '^', '/', '?', '@', '=' }) local brkhash = {} for _,s in pairs(brksyms) do brkhash[s] = true end local extname = '' local sym local cmd = '' for ch,p in ss.str.each(c.doc.enc, args[1]) do if sym == nil then if brkhash[ch] then sym = ch else extname = extname .. ch end elseif brkhash[ch] then sym = sym + ch else cmd = cmd + ch end end if cmd == '' then cmd = nil end local spans if failthru then spans = ct.parse_span(args[2], c) end return { kind = 'directive'; ext = extname; cmd = cmd; args = args; crit = crit; failthru = failthru; spans = spans; } end end ct.spanctls = { {seq = '!', parse = formatter 'emph'}; {seq = '*', parse = formatter 'strong'}; {seq = '~', parse = formatter 'strike'}; {seq = '+', parse = formatter 'insert'}; {seq = '\\', parse = function(s, c) -- raw return { kind = 'raw'; spans = {s}; origin = c:clone(); } end}; {seq = '`\\', parse = function(s, c) -- raw local o = c:clone(); local str = '' for c, p in ss.str.each(c.doc.enc, s) do local q = p:esc() if q then str = str .. q p.next.byte = p.next.byte + #q else str = str .. c end end return { kind = 'format'; style = 'literal'; spans = {{ kind = 'raw'; spans = {str}; origin = o; }}; origin = o; } end}; {seq = '`', parse = formatter 'literal'}; {seq = '$', parse = formatter 'variable'}; {seq = '^', parse = function(s,c) --footnotes local r, t = s:match '^([^%s]+)%s*(.-)$' return { kind = 'footnote'; ref = r; spans = ct.parse_span(t, c); origin = c:clone(); } -- TODO support for footnote sections end}; {seq = '=', parse = function(s,c) --math mode local tx = { ['%*'] = '×'; ['/'] = '÷'; } for k,v in pairs(tx) do s = s:gsub(k,v) end s=s:gsub('%^([0-9]+)', function(num) local sup = {'⁰','¹','²','³','⁴','⁵','⁶','⁷','⁸','⁹'}; local r = '' for i=1,#num do r = r .. sup[1 + (num:byte(i) - 0x30)] end return r end) local m = {s} --TODO return { kind = 'math'; original = s; spans = m; origin = c:clone(); }; end}; {seq = '&', parse = function(s, c) local r, t = s:match '^([^%s]+)%s*(.-)$' return { kind = 'deref'; spans = (t and t ~= "") and ct.parse_span(t, c) or {}; ref = r; origin = c:clone(); } end}; {seq = '^', parse = function(s, c) local fn, t = s:match '^([^%s]+)%s*(.-)$' ................................................................................ } end}; {seq = '>', parse = insert_link}; {seq = '→', parse = insert_link}; {seq = '🔗', parse = insert_link}; {seq = '##', parse = insert_var_ref(true)}; {seq = '#', parse = insert_var_ref(false)}; {seq = '%%', parse = function() --[[NOP]] end}; {seq = '%!', parse = insert_span_directive(true,false)}; {seq = '%:', parse = insert_span_directive(false,true)}; {seq = '%', parse = insert_span_directive(false,false)}; } end function ct.parse_span(str,ctx) local function delimited(start, stop, s) local r = { pcall(ss.str.delimit, nil, start, stop, s) } if r[1] then return table.unpack(r, 2) end ctx:fail(tostring(r[2])) end local buf = "" local spans = {} local function flush() if buf ~= "" then -- for fn, ext in ctx.doc.docjob:each('hook','doc_meddle_string') do -- buf = fn(ctx.doc.docjob:delegate(ext), ctx, buf) -- end table.insert(spans, buf) buf = "" end end local skip = false for c,p in ss.str.each(ctx.doc.enc,str) do local ba, ca, es = ctx.doc.enc.parse_escape(str:sub(p.byte)) if es then flush() table.insert(spans, { kind = 'raw'; spans = {es}; origin = ctx:clone() }) p.next.byte = p.next.byte + ba; p.next.code = p.next.code + ca; elseif c == '{' then flush() local substr, following = delimited('{','}',str:sub(p.byte)) local splitstart, splitstop = substr:find'%s+' local id, argstr if splitstart then id, argstr = substr:sub(1,splitstart-1), substr:sub(splitstop+1) ................................................................................ local i = 1 while i <= #argstr do while i<=#argstr and (argstr:sub(i,i) ~= '|' or argstr:sub(i-1,i) == '\\|') do i = i + 1 end local arg = argstr:sub(start, i == #argstr and i or i-1) start = i+1 arg=arg:gsub('\\|','|') table.insert(o.args, arg) i = i + 1 end end p.next.byte = p.next.byte + following - 1 table.insert(spans,o) ................................................................................ table.insert(spans, i.parse(substr:sub(1+#i.seq), ctx)) break end end if not found then ctx:fail('no recognized control sequence in [%s]', substr) end elseif c == '\n' then flush() table.insert(spans,{kind='line-break',origin=ctx:clone()}) else buf = buf .. c end end flush() return spans end local function blockwrap(fn) return function(l,c,j,d) local block = fn(l,c,j,d) block.origin = c:clone(); table.insert(d, block); j:hook('block_insert', c, block, l) if block.spans then c.doc.docjob:hook('meddle_span', block.spans, block) end end end local insert_paragraph = blockwrap(function(l,c) if l:sub(1,1) == '.' then l = l:sub(2) end return { kind = "paragraph"; ................................................................................ if t and t ~= "" then local heading = { kind = "label"; spans = ct.parse_span(t,c); origin = s.origin; captions = s; } c.doc.docjob:hook('meddle_span', heading.spans, heading) table.insert(s.blocks, heading) s.heading_node = heading end c.sec = s j:hook('section_attach', c, s) end ................................................................................ c.doc.meta[key] = val j:hook('metadata_set', key, val) end local dextctl = function(w,c) local mode, exts = w(1) for e in exts:gmatch '([^%s]+)' do if mode == 'uses' then c.doc.ext.use[e] = true elseif mode == 'needs' then c.doc.ext.need[e] = true elseif mode == 'inhibits' then c.doc.ext.inhibit[e] = true end end end local dcond = function(w,c) local mode, cond, exp = w(2) c.hide_next = mode == 'unless' end; ................................................................................ ct.directives = { author = dsetmeta; license = dsetmeta; keywords = dsetmeta; desc = dsetmeta; when = dcond; unless = dcond; pragma = function(w,c) end; lang = function(w,c) local _, op, l = w(2) local langstack = c.doc.stage.langstack if op == 'is' then langstack[math.max(1, #langstack)] = l elseif op == 'push' then table.insert(langstack, l) elseif op == 'pop' then if next(langstack) then langstack[#langstack] = nil end elseif op == 'sec' then c.sec.lang = l else c:fail('bad language directive “%s”', op) end c.lang = langstack[#langstack] end; expand = function(w,c) local _, m = w(1) if m ~= 'off' then c.doc.stage.expand_next = 1 else c.doc.stage.expand_next = 0 end end; } local function insert_table_row(l,c,j) local row = {} local buf ................................................................................ local flush = function() if buf then buf.str = buf.str:gsub('%s+$','') table.insert(row, buf) end buf = { str = '' } end for c,p in ss.str.each(c.doc.enc,l) do if c == '|' or c == '+' and (p.code == 1 or l:sub(p.byte-1,p.byte-1)~='\\') then flush() buf.header = c == '+' elseif c == ':' then local lst = l:sub(p.byte-#c,p.byte-#c) local nxt = l:sub(p.next.byte,p.next.byte) if lst == '|' or lst == '+' and l:sub(p.byte-2,p.byte-2) ~= '\\' then ................................................................................ else buf.str = buf.str .. c end end if buf.str ~= '' then flush() end for _,v in pairs(row) do v.spans = ct.parse_span(v.str, c) c.doc.docjob:hook('meddle_span', v.spans, v) end if #c.sec.blocks > 1 and c.sec.blocks[#c.sec.blocks].kind == 'table' then local tbl = c.sec.blocks[#c.sec.blocks] table.insert(tbl.rows, row) j:hook('block_table_attach', c, tbl, row, l) j:hook('block_table_row_insert', c, tbl, row, l) else ................................................................................ {seq = '¶', fn = insert_paragraph}; {seq = '❡', fn = insert_paragraph}; {seq = '#', fn = insert_section}; {seq = '§', fn = insert_section}; {seq = '+', fn = insert_table_row}; {seq = '|', fn = insert_table_row}; {seq = '│', fn = insert_table_row}; {seq = '!', fn = function(l,c,j,d) local last = d[#d] local txt = l:match '^%s*!%s*(.-)$' if (not last) or last.kind ~= 'aside' then local aside = { kind = 'aside'; lines = { ct.parse_span(txt, c) }; origin = c:clone(); } c.doc.docjob:hook('meddle_span', aside.lines[1], aside) table.insert(d,aside) j:hook('block_aside_insert', c, aside, l) j:hook('block_aside_line_insert', c, aside, aside.lines[1], l) j:hook('block_insert', c, aside, l) else local sp = ct.parse_span(txt, c) c.doc.docjob:hook('meddle_span', sp, last) table.insert(last.lines, sp) j:hook('block_aside_attach', c, last, sp, l) j:hook('block_aside_line_insert', c, last, sp, l) end end}; {pred = function(s,c) return s:match'^[*:]' end, fn = blockwrap(function(l,c) -- list local stars = l:match '^([*:]+)' ................................................................................ return { kind = 'list-item'; depth = depth; ordered = ordered; spans = ct.parse_span(txt, c); } end)}; {seq = '\t\t', fn = function(l,c,j,d) local last = d[#d] if (not last) or (last.kind ~= 'reference') then c:fail('reference continuations must immediately follow a reference') end local str = l:match '^\t\t(.-)%s*$' last.val = last.val .. '\n' .. str c.sec.refs[last.key] = last.val end}; {seq = '\t', fn = blockwrap(function(l,c,j,d) local ref, val = l:match '\t+([^:]+):%s*(.*)$' local last = d[#d] local rsrc if last and last.kind == 'resource' then last.props[ref] = val rsrc = last elseif last and last.kind == 'reference' and last.rsrc then last.rsrc.props[ref] = val rsrc = last.rsrc else c.sec.refs[ref] = val end j:hook('section_ref_attach', c, ref, val, l) return { kind = 'reference'; rsrc = rsrc; key = ref; val = val; } end)}; {seq = '%', fn = function(l,c,j,d) -- directive local crit, cmdline = l:match '^%%([!%%]?)%s*(.*)$' local words = function(i) local wds = {} if i == 0 then return cmdline end for w,pos in cmdline:gmatch '([^%s]+)()' do table.insert(wds, w) i = i - 1 if i == 0 then table.insert(wds,(cmdline:sub(pos):match('^%s*(.-)%s*$'))) return table.unpack(wds) end end end local cmd, rest = words(1) if ct.directives[cmd] then ................................................................................ ct.directives[cmd](words,c,j) elseif cmd == c.doc.stage.mode['render:format'] then -- this is a directive for the renderer; insert it into the tree as is local dir = { kind = 'directive'; critical = crit == '!'; words = words; origin = c; } table.insert(d, dir) j:hook('block_directive_render', j, c, dir) elseif c.doc:allow_ext(cmd) then -- extension directives begin with their id local ext = ct.ext.loaded[cmd] if ext.directives then local _, topcmd = words(2) if ext.directives[topcmd] then ext.directives[topcmd](j:delegate(ext), c, words) ................................................................................ kind = 'code'; listing = { kind = 'listing'; lang = lang, id = id, title = title and ct.parse_span(title,c); lines = {}; } } if c.doc.stage.expand_next and c.doc.stage.expand_next > 0 then c.doc.stage.expand_next = c.doc.stage.expand_next - 1 mode.expand = true end j:hook('mode_switch', c, mode) c.mode = mode if id then if c.sec.refs[id] then c:fail('duplicate ID %s', id) end c.sec.refs[id] = c.mode.listing end j:hook('block_insert', c, mode.listing, l) return c.mode.listing; end)}; {pred = function(s,c) if s:match '^[%-_][*_%-%s]+' then return true end if startswith(s, '—') then for c, p in ss.str.each(c.doc.enc,s) do if ({ ['—'] = true, ['-'] = true, [' '] = true; ['*'] = true, ['_'] = true, ['\t'] = true; })[c] ~= true then return false end end return true end end; fn = blockwrap(function() return { kind = 'horiz-rule' } end)}; {seq='@', fn=blockwrap(function(s,c) local id = s:match '^@%s*(.-)%s*$' local rsrc = { kind = 'resource'; props = {}; id = id; } if c.sec.refs[id] then c:fail('an object with id “%s” already exists in that section',id) else c.sec.refs[id] = rsrc end return rsrc end)}; {fn = insert_paragraph}; } function ct.parse_line(l, ctx, dest) local newspan local job = ctx.doc.stage.job job:hook('line_read',ctx,l) if ctx.mode then if ctx.mode.kind == 'code' then if l and l:match '^~~~%s*$' then job:hook('block_listing_end',ctx,ctx.mode.listing) job:hook('mode_switch', c, nil) ctx.mode = nil else -- TODO handle formatted code local newline if ctx.mode.expand then newline = ct.parse_span(l, ctx) else newline = {l} end table.insert(ctx.mode.listing.lines, newline) job:hook('block_listing_newline',ctx,ctx.mode.listing,newline) end else local mf = job:proc('modes', ctx.mode.kind) if not mf then ctx:fail('unimplemented syntax mode %s', ctx.mode.kind) end mf(job, ctx, l, dest) --NOTE: you are responsible for triggering the appropriate hooks if you insert anything! end else if l then local function tryseqs(seqs, ...) for _, i in pairs(seqs) do if ((not i.seq ) or startswith(l, i.seq)) and ((not i.pred) or i.pred (l, ctx )) then i.fn(l, ctx, job, dest, ...) return true end end return false end if not tryseqs(ct.ctlseqs) then local found = false for eb, ext, state in job:each('blocks') do if tryseqs(eb, state) then found = true break end end if not found then ctx:fail 'incomprehensible input line' end end else if next(dest) and dest[#dest].kind ~= 'break' then local brk = {kind='break', origin = ctx:clone()} job:hook('block_break', ctx, brk, l) table.insert(dest, brk) end end end job:hook('line_end',ctx,l) end function ct.parse(file, src, mode, setup) local ctx = ct.ctx.mk(src) ctx.line = 0 ctx.doc = ct.doc.mk() ctx.doc.src = src ctx.sec = ctx.doc:mksec() -- toplevel section ctx.sec.origin = ctx:clone() ctx.lang = mode['meta:lang'] if mode['parse:enc'] then local e = ss.str.enc[mode['parse:enc']] if not e then ct.exns.enc('requested encoding not supported',mode['parse:enc']):throw() end ctx.doc.enc = e end -- create states for extension hooks local job = ctx.doc:job('parse',nil,ctx) ctx.doc.stage = { kind = 'parse'; mode = mode; job = job; langstack = {ctx.lang}; fontstack = {}; } local function is_whitespace(cp) return ctx.doc.enc.iswhitespace(cp) end if setup then setup(ctx) end for full_line in file:lines() do ctx.line = ctx.line + 1 local l for p, c in utf8.codes(full_line) do if not is_whitespace(c) then l = full_line:sub(p) break end end ct.parse_line(l, ctx, ctx.sec.blocks) end for i, sec in ipairs(ctx.doc.secorder) do for refid, r in ipairs(sec.refs) do if type(r) == 'table' and r.kind == 'resource' and r.props.src then local lines = ss.str.breaklines(ctx.doc.enc, r.props.src) local srcs = {} for i,l in ipairs(lines) do local args = ss.str.breakwords(ctx.doc.enc, l, 2, {escape=true}) if #args < 3 then r.origin:fail('invalid syntax for resource %s', t.ref) end local mimebreak = function(s) local wds = ss.str.split(ctx.doc.enc, s, '/', 1, {escape=true}) return wds end local mime = mimebreak(args[2]); local mimeclasses = { ['application/svg+xml'] = 'image'; } local class = mimeclasses[mime] table.insert(srcs, { mode = args[1]; mime = mime; uri = args[3]; class = class or mime[1]; }) end --ideally move this into its own mimetype lib local kind = r.props.as or srcs[1].class r.class = kind r.srcs = srcs end end end ctx.doc.stage = nil ctx.doc.docjob:hook('meddle_ast') return ctx.doc end |
Modified desk/cortav.xml from [8189edad17] to [b82e1b14f3].
8 9 10 11 12 13 14 15 16 17 18 19 20 21 .. 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 .. 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 ... 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 |
--> <language name='Cortav' version='1' kateversion='2.4' section='Markup' extensions='*.ct'> <highlighting> <list name='extension-directives'> <item>uses</item> <item>needs</item> <item>inhibits</item> </list> <list name='renderer-directives'> <item>html</item> <item>groff</item> <item>ps</item> <item>tex</item> <item>plaintext</item> ................................................................................ </list> <contexts> <context name='init' attribute='Normal Text' lineEndContext='#pop' fallthroughContext='text'> <RegExpr String='\\.' attribute='Escaped Char'/> <RegExpr attribute='Section Cue' context='sec-ident' String='(#|§)+' firstNonSpace='true' /> <StringDetect String='~~~' attribute='Literal Block Cue' firstNonSpace='true' context='literal-block-cue'/> <RegExpr attribute='List' String='[\*:]+' firstNonSpace='true' context='text' /> <Detect2Chars char='%' char1='!' attribute='Critical Directive Cue' context='directive'/> <DetectChar char='%' attribute='Directive Cue' context='directive'/> <DetectChar char='	' attribute='Normal Text' context='refdef-id'/> </context> <context name='sec-ident' attribute='Identifier' lineEndContext='#pop'> <DetectSpaces context='#pop!sec' attribute='Normal Text'/> </context> <context name='sec' attribute='Header' lineEndContext='#pop'> <IncludeRules context='text'/> ................................................................................ <IncludeRules context='span'/> </context> <context name='span-del' attribute='Deleted Text' lineEndContext='#pop'> <IncludeRules context='span'/> </context> <context name='span-cue' attribute='Span Cue' lineEndContext='#pop'> <StringDetect attribute='Span Cue' String='$\' context='#pop!flat-span' /> <DetectChar attribute='Span Cue' char='!' context='#pop!span-emph' /> <DetectChar attribute='Span Cue' char='*' context='#pop!span-strong' /> <DetectChar attribute='Span Cue' char='~' context='#pop!span-del' /> <AnyChar attribute='Span Cue' String='$+🔒' context='#pop!span' /> <StringDetect attribute='Span Cue' String='→' context='#pop!ref' /> <StringDetect attribute='Span Cue' String='🔗' context='#pop!ref' /> <DetectChar attribute='Span Cue' char='>' context='#pop!ref' /> <DetectChar attribute='Span Cue' char='&' context='#pop!ref' /> <DetectChar attribute='Span Cue' char='#' context='#pop!var-ref' /> <DetectChar attribute='Span Cue' char='\' context='#pop!flat-span' /> </context> <context name='flat-span' attribute='Unstyled Text' lineEndContext='#pop'> <Detect2Chars attribute='Escaped Char' context='#stay' char='\' char1=']'/> <DetectChar attribute='Span Delimiter' context='#pop' char=']'/> </context> <context name='ref' attribute='Reference' lineEndContext='#pop'> <DetectSpaces context='#pop!span'/> </context> <context name='var-ref' attribute='Reference' lineEndContext='#pop'> <WordDetect String="cortav" attribute='Standard Namespace'/> <WordDetect String="env" attribute='Standard Namespace'/> <DetectChar attribute='Span Delimiter' context='#pop' char=']'/> ................................................................................ </context> </contexts> <itemDatas> <itemData name='Normal Text' defStyleNum='dsNormal'/> <itemData name='Styled Text' defStyleNum='dsNormal'/> <itemData name='Emphatic Text' defStyleNum='dsNormal' italic='true'/> <itemData name='Strong Text' defStyleNum='dsNormal' bold='true'/> <itemData name='Deleted Text' defStyleNum='dsNormal' strikeout='true'/> <itemData name='Section Cue' defStyleNum='dsKeyword' bold='true'/> <itemData name='Header' defStyleNum='dsControlFlow' underline='true'/> <itemData name='Identifier' defStyleNum='dsVariable'/> <itemData name='Unstyled Text' defStyleNum='dsVerbatimString'/> <itemData name='Escaped Char' defStyleNum='dsSpecialChar'/> <itemData name='Reference' defStyleNum='dsControlFlow' underline='true'/> <itemData name='Span Cue' defStyleNum='dsKeyword' bold='true'/> <itemData name='Span Delimiter' defStyleNum='dsKeyword'/> <itemData name='Directive' defStyleNum='dsAttribute' bold='true'/> <itemData name='Directive Cue' defStyleNum='dsAttribute'/> <itemData name='Critical Directive Cue' defStyleNum='dsImport' bold='true'/> <itemData name='Extension Directive' defStyleNum='dsImport' bold='true'/> <itemData name='Renderer Directive' defStyleNum='dsExtension' bold='true'/> <itemData name='Standard Namespace' defStyleNum='dsBuiltIn' bold='true'/> <itemData name='Comment' defStyleNum='dsComment'/> <itemData name='Macro' defStyleNum='dsPreprocessor' bold='true'/> <itemData name='Macro Delimiter' defStyleNum='dsPreprocessor'/> <itemData name='Field Delimiter' defStyleNum='dsPreprocessor' bold='true'/> <itemData name='List' defStyleNum='dsOperator'/> <itemData name='Literal Block' defStyleNum='dsSpecialString'/> <itemData name='Literal Block Cue' defStyleNum='dsPreprocessor' bold='true'/> |
> > > > > > > > > > > > > > > > > > > > > > > > | | | > > > > > > > > > > > > > > > | > > > |
8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 .. 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 ... 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 ... 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 |
--> <language name='Cortav' version='1' kateversion='2.4' section='Markup' extensions='*.ct'> <highlighting> <list name='extension-directives'> <item>uses</item> <item>needs</item> <item>inhibits</item> </list> <list name='meta-directives'> <item>author</item> <item>lang</item> <item>pragma</item> </list> <list name='ctl-directives'> <item>when</item> <item>unless</item> <item>cols</item> <item>quote</item> <item>include</item> <item>embed</item> </list> <list name='renderer-directives'> <item>html</item> <item>groff</item> <item>ps</item> <item>tex</item> <item>plaintext</item> ................................................................................ </list> <contexts> <context name='init' attribute='Normal Text' lineEndContext='#pop' fallthroughContext='text'> <RegExpr String='\\.' attribute='Escaped Char'/> <RegExpr attribute='Section Cue' context='sec-ident' String='(#|§)+' firstNonSpace='true' /> <StringDetect String='~~~' attribute='Literal Block Cue' firstNonSpace='true' context='literal-block-cue'/> <RegExpr attribute='List' String='[\*:]+' firstNonSpace='true' context='text' /> <Detect2Chars char='%' char1='%' attribute='Comment' context='comment'/> <Detect2Chars char='%' char1='!' attribute='Critical Directive Cue' context='directive'/> <DetectChar char='%' attribute='Directive Cue' context='directive'/> <DetectChar char='@' attribute='Resource Cue' context='resource'/> <DetectChar char='	' attribute='Normal Text' context='refdef-id'/> </context> <context name='comment' attribute='Comment' lineEndContext='#pop'> </context> <context name='error' attribute='Error' lineEndContext='#pop'> </context> <context name='resource' attribute='Resource Identifier' lineEndContext='#pop'> <DetectSpaces context='#pop!error' attribute='Error'/> </context> <context name='sec-ident' attribute='Identifier' lineEndContext='#pop'> <DetectSpaces context='#pop!sec' attribute='Normal Text'/> </context> <context name='sec' attribute='Header' lineEndContext='#pop'> <IncludeRules context='text'/> ................................................................................ <IncludeRules context='span'/> </context> <context name='span-del' attribute='Deleted Text' lineEndContext='#pop'> <IncludeRules context='span'/> </context> <context name='span-cue' attribute='Span Cue' lineEndContext='#pop' fallthroughContext="error"> <StringDetect attribute='Span Cue' String='`\' context='#pop!flat-span' /> <DetectChar attribute='Span Cue' char='!' context='#pop!span-emph' /> <DetectChar attribute='Span Cue' char='*' context='#pop!span-strong' /> <DetectChar attribute='Span Cue' char='~' context='#pop!span-del' /> <AnyChar attribute='Span Cue' String='`$+🔒' context='#pop!span' /> <StringDetect attribute='Span Cue' String='→' context='#pop!ref' /> <StringDetect attribute='Span Cue' String='🔗' context='#pop!ref' /> <DetectChar attribute='Span Cue' char='>' context='#pop!ref' /> <DetectChar attribute='Span Cue' char='^' context='#pop!ref' /> <DetectChar attribute='Span Cue' char='&' context='#pop!ref' /> <DetectChar attribute='Span Cue' char='#' context='#pop!var-ref' /> <DetectChar attribute='Span Cue' char='\' context='#pop!flat-span' /> <Detect2Chars attribute='Comment' char='%' char1='%' context='#pop!inline-comment' /> <Detect2Chars attribute='Critical Directive Cue' char='%' char1='!' context='#pop!inline-directive' /> <DetectChar attribute='Directive Cue' char='%' context='#pop!inline-directive' /> </context> <context name='flat-span' attribute='Unstyled Text' lineEndContext='#pop'> <Detect2Chars attribute='Escaped Char' context='#stay' char='\' char1=']'/> <DetectChar attribute='Span Delimiter' context='#pop' char=']'/> </context> <context name='inline-comment' attribute='Comment' lineEndContext='#pop'> <IncludeRules context='flat-span'/> </context> <context name='inline-directive' attribute='Directive' lineEndContext='#pop'> <IncludeRules context='flat-span'/> <AnyChar String=".:!#$%@~'"" attribute='Directive Cue'/> <DetectSpaces context='#pop!span'/> </context> <context name='ref' attribute='Reference' lineEndContext='#pop'> <IncludeRules context='flat-span'/> <DetectSpaces context='#pop!span'/> </context> <context name='var-ref' attribute='Reference' lineEndContext='#pop'> <WordDetect String="cortav" attribute='Standard Namespace'/> <WordDetect String="env" attribute='Standard Namespace'/> <DetectChar attribute='Span Delimiter' context='#pop' char=']'/> ................................................................................ </context> </contexts> <itemDatas> <itemData name='Normal Text' defStyleNum='dsNormal'/> <itemData name='Styled Text' defStyleNum='dsNormal'/> <itemData name='Emphatic Text' defStyleNum='dsNormal' italic='true'/> <itemData name='Strong Text' defStyleNum='dsNormal' bold='true'/> <itemData name='Deleted Text' defStyleNum='dsNormal' strikeOut='true'/> <itemData name='Section Cue' defStyleNum='dsKeyword' bold='true'/> <itemData name='Header' defStyleNum='dsControlFlow' underline='true'/> <itemData name='Identifier' defStyleNum='dsVariable'/> <itemData name='Unstyled Text' defStyleNum='dsVerbatimString'/> <itemData name='Escaped Char' defStyleNum='dsSpecialChar'/> <itemData name='Reference' defStyleNum='dsControlFlow' underline='true'/> <itemData name='Span Cue' defStyleNum='dsKeyword' bold='true'/> <itemData name='Resource Cue' defStyleNum='dsKeyword' bold='true'/> <itemData name='Resource Identifier' defStyleNum='dsVariable' bold='true'/> <itemData name='Span Delimiter' defStyleNum='dsKeyword'/> <itemData name='Directive' defStyleNum='dsAttribute' bold='true'/> <itemData name='Directive Cue' defStyleNum='dsAttribute'/> <itemData name='Critical Directive Cue' defStyleNum='dsImport' bold='true'/> <itemData name='Extension Directive' defStyleNum='dsImport' bold='true'/> <itemData name='Renderer Directive' defStyleNum='dsExtension' bold='true'/> <itemData name='Standard Namespace' defStyleNum='dsBuiltIn' bold='true'/> <itemData name='Comment' defStyleNum='dsComment'/> <itemData name='Error' defStyleNum='dsError'/> <itemData name='Macro' defStyleNum='dsPreprocessor' bold='true'/> <itemData name='Macro Delimiter' defStyleNum='dsPreprocessor'/> <itemData name='Field Delimiter' defStyleNum='dsPreprocessor' bold='true'/> <itemData name='List' defStyleNum='dsOperator'/> <itemData name='Literal Block' defStyleNum='dsSpecialString'/> <itemData name='Literal Block Cue' defStyleNum='dsPreprocessor' bold='true'/> |
Modified desk/velartrill-cortav.xml from [356c2a8842] to [51a69a6dad].
17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 |
<expanded-acronym>Cortav</expanded-acronym> <generic-icon>x-office-document</generic-icon> <glob pattern="*.ct"/> <glob pattern="*."/> <glob pattern="*.cortav"/> <magic> <match value="%ct\n" offset="0" type="string"/> <match value="\x03\x07\x3E\x2D" offset="0" type="string"/> </magic> </mime-type> <mime-type type="text/x-cortav-intent"> <comment xml:lang="en">Cortav rendering intent file</comment> <comment xml:lang="x-ranuir-Latn">tav cunloci Cortavi</comment> <comment xml:lang="x-ranuir-CR8"> </comment> |
| |
17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 |
<expanded-acronym>Cortav</expanded-acronym>
<generic-icon>x-office-document</generic-icon>
<glob pattern="*.ct"/> <glob pattern="*."/>
<glob pattern="*.cortav"/>
<magic>
<match value="%ct\n" offset="0" type="string"/>
<match value="\x3E\x2E\x14\x0C\x01\x04\x00\x00\x00\x03\x07\x3E\x2D" offset="0" type="string"/>
</magic>
</mime-type>
<mime-type type="text/x-cortav-intent">
<comment xml:lang="en">Cortav rendering intent file</comment>
<comment xml:lang="x-ranuir-Latn">tav cunloci Cortavi</comment>
<comment xml:lang="x-ranuir-CR8"> </comment>
|
Added ext/transmogrify.lua version [ffa0ca0a64].
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 |
local ct = require 'cortav' local ss = require 'sirsem' local patterns = { [ss.str.enc.utf8] = { { ['<-->'] = '⟷'; ['--->'] = '⟶'; ['<---'] = '⟵'; ['----'] = '⸻'; }; { ['<==>'] = '⟺'; ['===>'] = '⇐'; ['<==='] = '⟸'; }; { ['<->'] = '↔'; ['-->'] = '→'; ['<--'] = '←'; ['==>'] = '⇒'; ['<=>'] = '⇔'; ['<=='] = '⇐'; ['=/='] = '≠'; ['---'] = '⸺'; }; { ['-:-'] = '÷'; ['--'] = '—'; ['(C)'] = '©'; ['(>)'] = '🄯'; ['(R)'] = '®'; ['(TM)'] = '™'; ['(SM)'] = '℠'; }; }; } local quotes = { [ss.str.enc.utf8] = { ['en'] = {'“', '”'; '‘', '’'}; ['de'] = {'„', '“'; '‚', '‘'}; ['sp'] = {'«', '»'; '‹', '›'}; ['ja'] = {'「', '」'; '『', '』'}; ['fr'] = {'« ', ' »'; '‹ ', ' ›'}; [true] = {'“', '”'; '‘', '’'}; }; } local function meddle(ctx, t) local pts = patterns[ctx.doc.enc] if not pts then return t end local str = '' local lastchar local dquo = ctx.doc.enc.encodeUCS'"' local squo = ctx.doc.enc.encodeUCS"'" local forceRight = ctx.doc.enc.encodeUCS'`' local ptns = patterns[ctx.doc.enc] local function quo(c,p) if c == dquo then return 1 elseif c == squo then return 2 end end local qtbl if quotes[ctx.doc.enc] then if ctx.lang then qtbl = ss.str.langmatch(quotes[ctx.doc.enc], ctx.lang, ctx.doc.enc) or quotes[ctx.doc.enc][true] else qtbl = quotes[ctx.doc.enc][true] end end for c, p in ss.str.each(ctx.doc.enc,t) do local n = t:sub(p.byte) local ba, ca, nt = ctx.doc.enc.parse_escape(n) if ba then p.next.byte = p.next.byte + ba p.next.code = p.next.code + ca str = str .. nt lastchar = nt else local found = false local quote = quo(c,p) local force if not quote and c == forceRight and #t >= p.next.byte then quote = quo(ctx.doc.enc.char(ctx.doc.enc.codepoint(t,p.next.byte))) if quote then force = 2 p.next.byte = p.next.byte + #forceRight p.next.code = p.next.code + ctx.doc.enc.len(forceRight) end end if qtbl and quote then found = true if force then str = str .. qtbl[quote*force] elseif lastchar == nil or ctx.doc.enc.iswhitespace(lastchar) then str = str .. qtbl[quote] else str = str .. qtbl[quote*2] end elseif ptns then for _, order in ipairs(ptns) do for k,v in pairs(order) do if ss.str.begins(n, k) then found = true str = str .. v p.next.byte = p.next.byte + string.len(k) - 1 p.next.code = p.next.code + utf8.len(k) - 1 goto stopsearch end end end::stopsearch:: end if not found then str = str .. c end lastchar = c end end return str end local function enterspan(origin, spans) for i,v in pairs(spans) do if type(v) == 'string' then spans[i] = meddle(origin, v) elseif v.kind ~= 'raw' and v.spans then enterspan(v.origin, v.spans) end end end ct.ext.install { id = 'transmogrify'; version = ss.version {0,1; 'devel'}; contributors = {{name='lexi hale', handle='velartrill', mail='lexi@hale.su', homepage='https://hale.su'}}; default = true; -- on unless inhibited slow = true; hook = { doc_meddle_ast = function(job) for n, sec in pairs(job.doc.secorder) do if sec.kind=='ordinary' or sec.kind=='blockquote' or sec.kind=='footnote' then for i, block in pairs(sec.blocks) do if type(block.spans) == 'table' then enterspan(block.origin, block.spans) elseif type(block.spans) == 'string' then block.spans = meddle(block.origin, block.spans) end end end end end; }; } |
Modified makefile from [42776f3212] to [4482353657].
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
..
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
|
lua != which lua luac != which luac sh != which sh extens = $(wildcard ext/*.lua) extens_names ?= $(basename $(notdir $(extens))) build = build executable = cortav default-format-flags = -m html:width 40em prefix = $(HOME)/.local bin_prefix = $(prefix)/bin share_prefix = $(prefix)/share/$(executable) $(build)/$(executable): sirsem.lua cortav.lua $(extens) cli.lua | $(build)/ @echo ' » building with extensions $(extens_names)' echo '#!$(lua)' > $@ luac -o - $^ >> $@ chmod +x $@ $(build)/cortav.html: cortav.ct $(build)/$(executable) | $(build)/ $(build)/$(executable) $< -o $@ -m render:format html -y html:fossil-uv ................................................................................ .PHONY: clean clean: rm -f $(build)/cortav $(build)/cortav.html $(build)/velartrill-cortav-view.desktop $(build)/cortav-view.sh $(build)/%.sh: desk/%.sh echo >$@ "#!$(sh)" echo >>$@ 'cortav_exec="$(bin_prefix)/$(executable)"' echo >>$@ 'cortav_flags="$${ct_format_flags-$(default-format-flags)}"' cat $< >> $@ chmod +x $@ $(build)/velartrill-cortav-view.desktop: desk/cortav-view.desktop cp $< $@ echo "Exec=$(bin_prefix)/cortav-view.sh" >>$@ %/: mkdir -p $@ .PHONY: install install: $(build)/cortav $(build)/cortav-view.sh $(build)/velartrill-cortav-view.desktop | $(bin_prefix)/ install $(build)/$(executable) $(bin_prefix) install $(build)/cortav-view.sh $(bin_prefix) xdg-mime install desk/velartrill-cortav.xml xdg-desktop-menu install $(build)/velartrill-cortav-view.desktop xdg-mime default velartrill-cortav-view.desktop text/x-cortav .PHONY: excise excise: $(build)/velartrill-cortav-view.desktop xdg-mime uninstall desk/velartrill-cortav.xml xdg-desktop-menu uninstall $(build)/velartrill-cortav-view.desktop rm $(bin_prefix)/$(executable) rm $(bin_prefix)/cortav-view.sh .PHONY: wipe wipe: excise clean |
|
|
|
>
>
>
>
>
>
>
>
>
>
|
|
|
|
>
>
>
>
>
|
|
|
|
|
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
..
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
|
lua != which lua luac != which luac sh != which sh extens = $(wildcard ext/*.lua) extens-names ?= $(basename $(notdir $(extens))) build = build executable = cortav default-format-flags = -m html:width 40em prefix = $(HOME)/.local bin-prefix = $(prefix)/bin share-prefix = $(prefix)/share/$(executable) # by default, we fetch and parse information about encodings we # support so that cortav can do fancy things like format math # equations by character class (e.g. italicizing variables) # this is not necessary for parsing the format, and can be # disabled by blanking the encoding-data list when building # ($ make encoding-data=) encoding-data = ucstbls encoding-files = $(patsubst %,$(build)/%.lc,$(encoding-data)) encoding-data-ucs = https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt $(build)/$(executable): sirsem.lua $(encoding-files) cortav.lua $(extens) cli.lua | $(build)/ @echo ' » building with extensions $(extens-names)' echo '#!$(lua)' > $@ luac -o - $^ >> $@ chmod +x $@ $(build)/cortav.html: cortav.ct $(build)/$(executable) | $(build)/ $(build)/$(executable) $< -o $@ -m render:format html -y html:fossil-uv ................................................................................ .PHONY: clean clean: rm -f $(build)/cortav $(build)/cortav.html $(build)/velartrill-cortav-view.desktop $(build)/cortav-view.sh $(build)/%.sh: desk/%.sh echo >$@ "#!$(sh)" echo >>$@ 'cortav_exec="$(bin-prefix)/$(executable)"' echo >>$@ 'cortav_flags="$${ct_format_flags-$(default-format-flags)}"' cat $< >> $@ chmod +x $@ $(build)/velartrill-cortav-view.desktop: desk/cortav-view.desktop cp $< $@ echo "Exec=$(bin-prefix)/cortav-view.sh" >>$@ %/: mkdir -p $@ $(build)/unicode.txt: | $(build)/ curl $(encoding-data-ucs) > $@ $(build)/ucstbls.lc: $(build)/unicode.txt | $(build)/ $(lua) tools/ucs.lua $< | $(luac) -o $@ - .PHONY: install install: $(build)/cortav $(build)/cortav-view.sh $(build)/velartrill-cortav-view.desktop | $(bin-prefix)/ install $(build)/$(executable) $(bin-prefix) install $(build)/cortav-view.sh $(bin-prefix) xdg-mime install desk/velartrill-cortav.xml xdg-desktop-menu install $(build)/velartrill-cortav-view.desktop xdg-mime default velartrill-cortav-view.desktop text/x-cortav .PHONY: excise excise: $(build)/velartrill-cortav-view.desktop xdg-mime uninstall desk/velartrill-cortav.xml xdg-desktop-menu uninstall $(build)/velartrill-cortav-view.desktop rm $(bin-prefix)/$(executable) rm $(bin-prefix)/cortav-view.sh .PHONY: wipe wipe: excise clean |
Modified sirsem.lua from [1f16b393f5] to [581e1b0127].
86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 ... 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 ... 265 266 267 268 269 270 271 272 273 274 275 276 277 278 ... 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 ... 384 385 386 387 388 389 390 |
end else new[k] = v end end return new end function ss.delegate(tbl,tpl) -- returns a table that looks up keys it lacks from -- tbl (lightweight alternative to shallow copies) tpl = tpl or {} return setmetatable({}, {__index=tbl}) end ss.str = {} function ss.str.begins(str, pfx) return string.sub(str, 1, #pfx) == pfx end ss.str.enc = { utf8 = { char = utf8.char; codepoint = utf8.codepoint; }; c6b = {}; ascii = {}; } function ss.str.enc.utf8.each(str, ascode) local pos = { code = 1; byte = 1; } return function() if pos.byte > #str then return nil end local thischar = utf8.codepoint(str, pos.byte) local lastpos = { code = pos.code; byte = pos.byte; next = pos; } if not ascode then thischar = utf8.char(thischar) pos.byte = pos.byte + #thischar else pos.byte = pos.byte + #utf8.char(thischar) end pos.code = pos.code + 1 return thischar, lastpos end end ss.math = {} function ss.math.lerp(t, a, b) return (1-t)*a + (t*b) end ................................................................................ elseif to == 'int' then return math.floor(tonumber(self)) elseif c.cast and c.cast[to] then return c.cast[to](self, ...) elseif type(to) == 'table' and getmetatable(to) and getmetatable(to).cvt and getmetatable(to).cvt[cls] then else error((c.ident or 'class') .. ' is not convertible to ' .. (type(to) == 'string' and to or tostring(to))) end end end if c.fns then return c.fns[k] end end if c.cast then if c.cast.string then cls.__tostring = c.cast.string end if c.cast.number then ................................................................................ if c.construct then c.construct(val, ...) end return val end getmetatable(cls).__call = function(_, ...) return cls.mk(...) end cls.is = function(o) return getmetatable(o) == cls end return cls end -- tidy exceptions ss.exn = ss.declare { ident = 'exn'; ................................................................................ } end; call = function(me, ...) return ss.exn(me, ...) end; } ss.str.exn = ss.exnkind 'failure while string munging' function ss.str.delimit(encoding, start, stop, s) local depth = 0 encoding = encoding or ss.str.enc.utf8 if not ss.str.begins(s, start) then return nil end for c,p in encoding.each(s) do if c == (encoding.escape or '\\') then p.next.byte = p.next.byte + #encoding.char(encoding.codepoint(s, p.next.byte)) p.next.code = p.next.code + 1 elseif c == start then depth = depth + 1 elseif c == stop then depth = depth - 1 ................................................................................ return x elseif select('#', ...) == 0 then return nil else return ss.coalesce(...) end end |
> > > > > > > > > > > > > > > > > | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > < | > > > > > > > > > > > > > > > > > > > > > > > > > > > > | > > > > > > > | > > > > > > > > > | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | | | | | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | > > > | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > |
|
end else new[k] = v end end return new end function ss.push(tbl, ...) local idx = #tbl + 1 local function rec(v, ...) tbl[idx] = v idx = idx + 1 if ss.tuple.any(...) then rec(...) end end rec(...) return tbl end function ss.delegate(tbl,tpl) -- returns a table that looks up keys it lacks from -- tbl (lightweight alternative to shallow copies) tpl = tpl or {} return setmetatable({}, {__index=tbl}) end ss.str = {} function ss.str.begins(str, pfx) -- appallingly, this is actually ~2/5ths faster than either -- of the below. i hate scripting languages so much return string.find(str, pfx, 1, true) == 1 -- to my shock, disgust, and horror, even writing my own -- string scanning library for lua IN C only sped this up by -- a tiny fraction. i am just speechless. -- return string.sub(str, 1, #pfx) == pfx -- local pl = string.len(pfx) -- local sl = string.len(str) -- if sl < pl then return false end -- for i=1,pl do -- if string.byte(str,i) ~= string.byte(pfx,i) then -- return false -- end -- end -- return true end function ss.enum(syms) local e = {} for i,v in pairs(syms) do e[v] = i e[i] = v end return e end function ss.bitmask_bytes(n,ofs) ofs = ofs or 0 local function rec(i) if i > n then return end return 1<<(i+ofs), rec(i+1) end return 1<<ofs, rec(1) end function ss.bitmask(tbl,ofs) local codes = {ss.bitmask_bytes(#tbl,ofs)} local m = {} local maxbit for i, s in ipairs(tbl) do m[s] = codes[i] m[codes[i]] = s maxbit = i end m[true] = {ofs or 0,maxbit} return m end ss.str.charclass = ss.enum { 'numeral'; 'letter'; 'symbol'; 'punct'; 'space'; 'ctl'; 'glyph'; -- hanji } ss.str.charprop = ss.bitmask({ 'hexnumeral', -- character that can be used to write hexadecimal notation 'upper', 'lower'; 'diac'; -- diacritic/modifier letter 'wordbreak'; -- char causes following characters to be treated as a separate word (e.g. punctuation) 'wordsep'; -- char causes previous and following characters to be treated as separate words; char constitutes a word of its own in between (e.g. interpunct) 'breakokay'; -- is it okay to break words at this character? (eg hyphen) 'mathop'; -- char is a mathematical operator 'disallow', -- char is not allowed in narrative text 'brack', 'right', 'left', -- brackets 'noprint', -- character deposits no ink 'superimpose' -- character is superimposed over previous }, 3) ss.str.enc_generics = { pfxescape = function(ch, enc, chain) local bytes = #ch local codes = enc.len(ch) return function(s) if s == ch then return 0, 0, ch elseif ss.str.begins(s, ch) then local nc = enc.char(enc.codepoint(s, bytes + 1)) return bytes, codes, nc elseif chain then return chain(s) end end end; }; local cc,cp = ss.str.charclass, ss.str.charprop ss.str.enc = { utf8 = { char = utf8.char; codepoint = utf8.codepoint; len = utf8.len; encodeUCS = function(str) return str end; iswhitespace = function(c) return (c == ' ') or (c == '\t') or (c == '\n') or (c == '\u{3000}') or (c == '\u{200B}') end; }; ascii = { len = string.len; char = string.char; codepoint = string.byte; iswhitespace = function(c) return (c == ' ') or (c == '\t') or (c == '\n') end; ranges = { {0x00,0x1a, cc.ctl}; {0x1b,0x1b, cc.ctl, cp.disallow}; {0x1c,0x1f, cc.ctl}; {0x20,0x20, cc.space}; {0x21,0x22, cc.punct}; {0x23,0x26, cc.symbol}; {0x27,0x29, cc.punct}; {0x2a,0x2b, cc.symbol}; {0x2c,0x2f, cc.punct}; {0x30,0x39, cc.numeral, cp.hexnumeral}; {0x3a,0x3b, cc.punct}; {0x3c,0x3e, cc.symbol, cp.mathop}; {0x3f,0x3f, cc.punct}; {0x40,0x40, cc.symbol}; {0x41,0x46, cc.letter, cp.ucase, cp.hexnumeral}; {0x47,0x5a, cc.letter, cp.ucase}; {0x5b,0x5d, cc.symbol, cp.mathop}; {0x5e,0x5e, cc.symbol, mathop}; {0x5f,0x60, cc.symbol}; {0x61,0x66, cc.letter, cp.lcase, cp.hexnumeral}; {0x67,0x7a, cc.letter, cp.lcase}; {0x7b,0x7e, cc.symbol}; {0x7f,0x7f, cc.ctl, cp.disallow}; } }; raw = {len = string.len; char = string.char; codepoint = string.byte; encodeUCS = function(str) return str end; iswhitespace = function(c) return (c == ' ') or (c == '\t') or (c == '\n') end; }; } -- unicode ranges are optionally generated from consortium data -- files and injected through a generated source file. if this -- part of the build process is disabled (e.g. due to lack of -- internet access, or to keep the size of the executable as -- small as possible), we still at least can make the ascii -- ranges available to UTF8 (UTF8 being a superset of ascii) ss.str.enc.utf8.ranges = ss.delegate(ss.str.enc.ascii.ranges) function ss.str.enc.ascii.encodeUCS(str) local newstr = '' for c,p in ss.str.each(ss.str.enc.utf8, str, true) do if c > 0x7F then newstr = newstr .. '?' else newstr = newstr .. string.char(c) end end end for _, v in pairs{'utf8','ascii','raw'} do ss.str.enc[v].parse_escape = ss.str.enc_generics.pfxescape('\\',ss.str.enc[v]) end function ss.str.classify(enc, ch) if not enc.ranges then return {} end if type(ch)=='string' then ch = enc.codepoint(ch) end -- TODO end function ss.str.each(enc, str, ascode) if enc.each then return enc.each(enc,str,ascode) end local pm = { __index = { esc = function(self) local ba, bc, nc = enc.parse_escape(str:sub(self.byte)) if ba then self.next.byte = self.next.byte + ba - 1 self.next.code = self.next.code + bc - 1 return nc end end; }; } local pos = { code = 1; byte = 1; } return function() if pos.byte > #str then return nil end local thischar = enc.codepoint(str, pos.byte) local lastpos = setmetatable({ code = pos.code; byte = pos.byte; next = pos; },pm) if not ascode then thischar = enc.char(thischar) pos.byte = pos.byte + #thischar else pos.byte = pos.byte + #enc.char(thischar) end pos.code = pos.code + 1 return thischar, lastpos end end function ss.str.breakwords(enc, str, max, opts) if enc.breakwords then return enc.breakwords(str) end local words = {} opts = opts or {} local buf = '' local flush = function() if buf ~= '' then table.insert(words,buf) buf = '' end end for c, p in ss.str.each(enc,str) do local nc if opts.escape then nc = p:esc() end if nc then buf = buf + nc elseif enc.iswhitespace(c) then flush() if max and #words == max then local rs = str:sub(p.next.byte) if rs ~= '' then table.insert(words, rs) end break end else buf = buf .. c end end flush() return words end function ss.str.mergewords(enc, lst) if enc.mergewords then return enc.mergewords(lst) end return table.concat(lst, enc.wordsep or ' ') end function ss.str.breaklines(enc, str, opts) if enc.breaklines then return enc.breaklines(lst,opts) end return ss.str.split(enc, str, enc.encodeUCS'\n', opts) end function ss.str.split(enc, str, delim, opts) if enc.split then return enc.split(str,delim,opts) end opts = opts or {} local elts = {} local buf = '' local flush = function() if buf ~= '' or opts.keep_empties then table.insert(elts,buf) buf = '' end end local esc = enc.parse_escape local tryesc if opts.escape then tryesc = function(str, p) local ba, ca, escd = enc.parse_escape(str:sub(p.byte)) if ba then p.next.byte = p.next.byte + ba p.next.code = p.next.code + ca buf = buf .. escd return true end end else tryesc = function(...) end end if type(delim) == 'function' then for c, p in ss.str.each(enc,str) do if not tryesc(str,p) then local skip = delim(str:sub(p.byte)) if skip then flush() p.next.byte = p.next.byte + skip - 1 else buf = buf .. c end end end elseif enc.len(delim) == 1 then for c, p in ss.str.each(enc,str) do if not tryesc(str,p) then if c == delim then flush() else buf = buf .. c end end end else local dlcode = enc.len(delim) for c, p in ss.str.each(enc,str) do if not tryesc(str,p) then if str:sub(p.byte, p.byte+#delim-1) == delim then flush() p.next.byte = p.next.byte + #delim - 1 p.next.code = p.next.code + dlcode else buf = buf .. c end end end end flush() return elts end function ss.str.langmatch(tbl, lang, enc) -- this performs primitive language matching. NOTE: THIS IS NOT -- STANDARDS COMPLIANT. it's "good enough" for now, but in the -- long term it needs to be rewritten to actually understand the -- format, primarily so that e.g. 'en-US-Latn' and 'en-Latn-US' -- match -- currently order is significant. it shouldn't be -- ref: IETF BCP 47 (RFC 5646) https://www.ietf.org/rfc/bcp/bcp47.html local dash = enc.encodeUCS'-' local tags = ss.str.split(enc, lang, dash, {escape=true}) local bestlen = 0 local bestmatch for k,v in pairs(tbl) do if k ~= true then local kt = ss.str.split(enc, k, dash, {escape=true}) for i=1,math.min(#kt,#tags) do if kt[i] ~= tags[i] then goto skip end end if #kt > bestlen then -- match the most specific matching tag bestmatch = k bestlen = #kt end end ::skip::end return tbl[bestmatch] or tbl[true], bestmatch end ss.math = {} function ss.math.lerp(t, a, b) return (1-t)*a + (t*b) end ................................................................................ elseif to == 'int' then return math.floor(tonumber(self)) elseif c.cast and c.cast[to] then return c.cast[to](self, ...) elseif type(to) == 'table' and getmetatable(to) and getmetatable(to).cvt and getmetatable(to).cvt[cls] then else error((c.ident or 'class') .. ' is not convertible to ' .. (type(to) == 'string' and to or tostring(to))) end end end if c.fns and c.fns[k] then return c.fns[k] end if c.index then return c.index(self,k) end end if c.cast then if c.cast.string then cls.__tostring = c.cast.string end if c.cast.number then ................................................................................ if c.construct then c.construct(val, ...) end return val end getmetatable(cls).__call = function(_, ...) return cls.mk(...) end cls.is = function(o) return getmetatable(o) == cls end cls.__metatable = cls -- lock metatable return cls end -- tidy exceptions ss.exn = ss.declare { ident = 'exn'; ................................................................................ } end; call = function(me, ...) return ss.exn(me, ...) end; } ss.str.exn = ss.exnkind 'failure while string munging' ss.bug = ss.exnkind 'tripped over bug' function ss.str.delimit(encoding, start, stop, s) local depth = 0 encoding = encoding or ss.str.enc.utf8 if not ss.str.begins(s, start) then return nil end for c,p in ss.str.each(encoding,s) do if c == (encoding.escape or '\\') then p.next.byte = p.next.byte + #encoding.char(encoding.codepoint(s, p.next.byte)) p.next.code = p.next.code + 1 elseif c == start then depth = depth + 1 elseif c == stop then depth = depth - 1 ................................................................................ return x elseif select('#', ...) == 0 then return nil else return ss.coalesce(...) end end ss.tuple = {} function ss.tuple.any(...) return select('#',...) > 0 end function ss.tuple.cat(...) local a = {...} return function(...) ss.push(a, ...) return table.unpack(a) end end function ss.tuple.suffix(sfx,n,...) if n ~= nil then return n, ss.tuple.suffix(...) else return sfx end end function ss.tuple.cdr(x, ...) return ... end ss.stack = ss.declare { ident = 'stack'; mk = function() return { top = 0; store = {}; } end; index = function(me, i) if i <= 0 then return me.store[me.top + i] else return me.store[i] end end; fns = { push = function(me, val, ...) if val~=nil then me.top = me.top + 1 me.store[me.top] = val me:push(...) end return val, ... end; pop = function(me,n) n = n or 1 local r = {} if n < me.top then for i = 0,n-1 do r[i+1] = me.store[me.top - i] me.store[me.top - i] = nil end me.top = me.top - n else r = me.store me.store = {} end return table.unpack(r) end; set = function(me,val) if me.top == 0 then me.top = me.top + 1 --autopush end me.store[me.top] = val end; all = function(me) return table.unpack(me.store) end; each = function(forward) if forward then local idx = 0 return function() idx = idx + 1 if idx > top then return nil else return me.store[idx], idx end end else local idx = top + 1 return function() idx = idx - 1 if idx == 0 then return nil else return me.store[idx], idx end end end end; }; } ss.automat = ss.declare { ident = 'automat'; mk = function() return { state = ss.stack(); states = {}; ttns = {}; mem = {}; match = function(sym, ttn, mach) if ttn.pred and ttn:pred(mach, sym)~=true then return false end if ttn.on then return sym == ttn.on end return false end; } end; construct = function(me, def) for k,v in pairs{'states','ttns','mem','syms'} do if def[k] then me[k] = v end end end; fns = { react = function(me,sym) local s = me.states[me.state.id] if s and s.input then s:react(me, sym) end end; drop = function(me,n) for i = 0, math.min(n-1,me.state.top-1) do local s = me.states[me.state[-i].id] if s.exit then s:exit(s.mem, me) end end if n < me.state.top then local newtop = me.states[me.state[-n].id] if newtop.activate then newtop:activate(me.state[-n].mem, me, n) end end return me.state:pop(n) end; clear = function(me) return me:drop(me.state.top) end; transition = function(me,ttn,oldstates) local s = me.state:push {id = ttn.to, mem = {}} local to = me.states[ttn.to] if to.enter then to:enter(s.mem, me) end end; input = function(me,sym) local ttns = me.ttns[me.state.id] local _, ttn = ss.find(ttns, function(ttn) return me.match(sym, ttn, me) end) if ttn then if ttn.pop then local oldstates = {me.state:drop(ttn.pop)} me:transition(ttn, sym, oldstates) else me:transition(ttn, sym) end else me:react(sym) end end; }; } |
Added tools/ucs.lua version [3976f4bc78].
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 |
-- [ʞ] tools/ucs.lua -- ~ lexi hale <lexi@hale.su> -- ? table generator for unicode character classes -- 🄯 AGPLv3 local tpl = [[ local ss = require 'sirsem' ss.str.enc.utf8.ranges = {%s} ]] local enum = function(syms) local e = {} for i,v in pairs(syms) do e[v] = i e[i] = v end return e end local file = io.stdin local path if arg[1] then path = arg[1] file = io.open(path, 'rb') end local bitmask_raw = function(n,ofs) ofs = ofs or 0 local function rec(i) if i > n then return end return 1<<(i+ofs), rec(i+1) end return 1<<ofs, rec(1) end local bitmask = function(tbl,ofs) local codes = {bitmask_raw(#tbl,ofs)} local m = {} local maxbit for i, s in ipairs(tbl) do m[s] = codes[i] m[codes[i]] = s maxbit = i end m[true] = {ofs or 0,maxbit} return m end local basictype = enum { 'numeral'; 'alpha'; 'symbol'; 'punct'; 'space'; 'ctl'; 'glyph'; -- hanji } local props = bitmask({ 'hex', 'upper', 'lower', 'diac', 'wordbreak', 'wordsep', 'disallow', 'brack', 'right', 'left', 'noprint', 'superimpose' }, 3) local overrides = { [0x200B] = basictype.space | props.wordsep; -- database entry is wrong } local mask = ~0 -- mask out irrelevant properties to compactify database local function parsecat(tbl) local c,p,b = 0,props,basictype if overrides[tbl.codepoint] then c = overrides[tbl.codepoint] elseif tbl.class == 'Nd' then c = b.numeral elseif tbl.class == 'No' then c = b.numeral | p.diac elseif tbl.class == 'Cc' then if tbl.kind == 'S' or tbl.kind == 'WS' or tbl.kind == 'B' then c = b.space | p.wordsep else c = b.ctl | p.wordbreak | p.disallow end elseif tbl.class == 'Lu' then c = b.alpha | p.upper elseif tbl.class == 'Ll' then c = b.alpha | p.lower elseif tbl.class == 'Lo' or tbl.class == 'Lt' then c = b.alpha elseif tbl.class == 'Po' then c = b.punct | p.wordbreak elseif tbl.class == 'Sm' then c = b.symbol | p.wordsep elseif tbl.class == 'Ps' then c = b.punct | p.brack | p.left elseif tbl.class == 'Pe' then c = b.punct | p.brack | p.right elseif tbl.class == 'Pc' or tbl.class == 'Pd' or tbl.class == 'Sk' or tbl.class == 'Sc' then c = b.symbol elseif tbl.class == 'Zs' then c = b.space if tbl.kind == 'WS' then c=c|p.wordsep end elseif tbl.class == 'So' then c = b.glyph elseif tbl.class == 'Mn' then c = b.symbol | p.diac | p.superimpose end return c & mask end local ranuirAlpha = {0xe39d, 0xe39f, 0xe3ad, 0xe3af, 0xe3b5, 0xe3b7, 0xe3b9, 0xe3bb, 0xe3bd, 0xe3be, 0xe3bf, 0xe3c5, 0xe3c7, 0xe3c9, 0xe3cb, 0xe3cc, 0xe3cd, 0xe3ce, 0xe3cf} local ranuirSpecial = { [0xe390] = basictype.space | props.wordsep; } local ranuir = {} for _,v in pairs(ranuirAlpha) do ranuir[v] = basictype.alpha end for k,v in pairs(ranuirSpecial) do ranuir[k] = v end local ranuirKeys = {} for k in pairs(ranuir) do table.insert(ranuirKeys, k) end table.sort(ranuirKeys) local recs = {} local ranuirok = false for ln in file:lines() do local v = {} for s in ln:gmatch('[^;]*') do table.insert(v, s) end v[1] = tonumber(v[1],0x10) if v[1] > 0x7f then -- discard ASCII, we already have that local code = { codepoint = v[1]; name = v[2]; class = v[3]; kind = v[5]; } code.cat = parsecat(code) if (not ranuirok) and code.codepoint > 0xe390 then for _,ri in pairs(ranuirKeys) do table.insert(recs, { codepoint = ri; cat = ranuir[ri]; }) end ranuirok = true end if code.cat ~= 0 then table.insert(recs,code) end end end local ranges = {} local last = recs[1] local start = last local altern = false local flush = function(i) local new = {start.codepoint, last.codepoint, last.cat} if altern then new[3] = new[3] | props.upper | props.lower end table.insert(ranges, new) altern = false end for i, r in ipairs(recs) do if r.cat ~= last.cat then -- we can massively compactify this set with one weird trick: -- most non-ascii cased character sets are not in AAAAaaaa, -- but rather AaAaAa order. so we can look for this simple -- pattern and compress it, shaving c. 1/3rd off our dataset local ambi = props.upper | props.lower if (altern or (start == last and (last.cat & props.upper) ~= 0)) and ((r.cat &~ ambi) == (last.cat &~ ambi)) then altern = true last = r else flush() start = r end elseif altern then flush() start = r end last = r end flush() -- expand bitmask -- for k,v in pairs(ranges) do -- local basic = v[3] & ((1<<3) - 1) -- first three bits -- if basic ~= 0 then -- v[4] = basictype[basic] -- end -- local bitrange = props[true] -- for j=bitrange[1], bitrange[2] do -- if (v[3] & (1<<j)) ~= 0 then -- table.insert(v, props[1<<j]) -- end -- end -- end -- the data has been collected and formatted in the manner we -- need; now we just need to emit it as a lua table local tab = {} local top = 1 for k,v in pairs(ranges) do tab[top] = string.format('{0x%x,0x%x,%u}',table.unpack(v)) top = top + 1 end io.stdout:write(string.format(tpl, table.concat(tab,','))) |