cortav  Check-in [52b9bce7dd]

Overview
Comment:all kindsa shit
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA3-256: 52b9bce7dd9317730dfccf2eefd17494b43d4f37936044c542ad0dce7a9d6b92
User & Date: lexi on 2021-12-26 04:08:02
Other Links: manifest | tags
Context
2021-12-26
17:49
get math parser working check-in: d1b7d2fd5f user: lexi tags: trunk
04:08
all kindsa shit check-in: 52b9bce7dd user: lexi tags: trunk
2021-12-22
10:23
fix bugged makefile check-in: 36024a43c5 user: lexi tags: trunk
Changes

Modified cli.lua from [a9857f9cb6] to [ad6ab18d31].

     3      3   
     4      4   local default_mode = {
     5      5   	['render:format'] = 'html';
     6      6   	['html:gen-styles'] = true;
     7      7   }
     8      8   
     9      9   local function
    10         -main(input, output, log, mode, suggestions, vars)
    11         -	local doc = ct.parse(input.stream, input.src, mode)
           10  +main(input, output, log, mode, suggestions, vars, extrule)
           11  +	local doc = ct.parse(input.stream, input.src, mode, function(c)
           12  +		                     c.doc.ext = extrule
           13  +	                     end)
    12     14   	input.stream:close()
    13     15   	if mode['parse:show-tree'] then
    14     16   		log:write(ss.dump(doc))
    15     17   	end
    16     18   
    17     19   	-- the document has now had a chance to give its say; if it hasn't specified
    18     20   	-- any modes of its own, we now merge in the 'weak modes' (suggestions)
................................................................................
    70     72   			['mode-set'] = 1;
    71     73   			['mode-clear'] = 1;
    72     74   			mode = 2;
    73     75   
    74     76   			['mode-set-weak'] = 1;
    75     77   			['mode-clear-weak'] = 1;
    76     78   			['mode-weak'] = 2;
           79  +			['use'] = 1;
           80  +			['inhibit'] = 1;
           81  +			['need'] = 1;
           82  +			['load'] = 1;
           83  +			['enc'] = 1;
    77     84   		}
    78     85   		return param_opts[o] or 0
    79     86   	end
    80     87   
    81     88   	local optmap = {
    82     89   		o = 'out';
    83     90   		l = 'log';
    84     91   		d = 'define';
    85     92   		V = 'version';
    86     93   		h = 'help';
    87     94   		y = 'mode-set',   Y = 'mode-set-weak';
    88     95   		n = 'mode-clear', N = 'mode-clear-weak';
    89     96   		m = 'mode',       M = 'mode-weak';
           97  +		L = 'load',
           98  +		u = 'use', i = 'inhibit', r = 'require';
           99  +		e = 'enc';
    90    100   	}
          101  +
          102  +	local extrule = {use={},inhibit={},need={}}
    91    103   
    92    104   	local checkmodekey = function(key)
    93    105   		if not key:match '[^:]+:.+' then
    94    106   			ct.exns.cli('invalid mode key %s', key):throw()
    95    107   		end
    96    108   		return key
    97    109   	end
................................................................................
   117    129   		mode = function(key,value) mode[checkmodekey(key)] = value end;
   118    130   		['mode-set'] = function(key) mode[checkmodekey(key)] = true end;
   119    131   		['mode-clear'] = function(key) mode[checkmodekey(key)] = false end;
   120    132   
   121    133   		['mode-weak'] = function(key,value) suggestions[checkmodekey(key)] = value end;
   122    134   		['mode-set-weak'] = function(key) suggestions[checkmodekey(key)] = true end;
   123    135   		['mode-clear-weak'] = function(key) suggestions[checkmodekey(key)] = false end;
   124         -
          136  +		['use'    ] = function(ext) extrule.use    [ext] = true end;
          137  +		['inhibit'] = function(ext) extrule.inhibit[ext] = true end;
          138  +		['require'] = function(ext) extrule.need   [ext] = true end;
          139  +		['load'] = function(extpath) end;
          140  +		['enc'] = function(enc) end;
   125    141   		['version'] = function()
   126    142   			outp:write(ct.info:about())
   127    143   			if next(ct.ext.loaded) then
   128    144   				outp:write('\nactive extensions:\n')
   129    145   				for k,v in pairs(ct.ext.loaded) do
   130    146   					outp:write(string.format(' * %s', v.id ..
   131    147   						(v.version and (' ' .. v.version:string()) or '')))
................................................................................
   175    191   			keepParsing = false
   176    192   		else
   177    193   			local longopt = v:match '^%-%-(.+)$'
   178    194   			if keepParsing and longopt then
   179    195   				execLongOpt(longopt)
   180    196   			else
   181    197   				if keepParsing and v:sub(1,1) == '-' then
   182         -					for c,p in ss.str.enc.utf8.each(v:sub(2)) do
          198  +					for c,p in ss.str.each(ss.str.enc.utf8, v:sub(2)) do
   183    199   						if optmap[c] then
   184    200   							execLongOpt(optmap[c])
   185    201   						else
   186    202   							ct.exns.cli('switch -%s unrecognized', c):throw()
   187    203   						end
   188    204   					end
   189    205   				else
................................................................................
   197    213   	if args[1] and args[1] ~= '' then
   198    214   		local file = io.open(args[1], "rb")
   199    215   		if not file then error('unable to load file ' .. args[1]) end
   200    216   		input.stream = file
   201    217   		input.src.file = args[1]
   202    218   	end
   203    219   
   204         -	return main(input, outp, log, mode, suggestions, vars)
          220  +	return main(input, outp, log, mode, suggestions, vars, extrule)
   205    221   end
   206    222   
   207         -local ok, e = pcall(entry_cli)
   208         --- local ok, e = true, entry_cli()
          223  +-- local ok, e = pcall(entry_cli)
          224  +local ok, e = true, entry_cli()
   209    225   if not ok then
   210    226   	local str = 'translation failure'
   211    227   	if ss.exn.is(e) then
   212    228   		str = e.kind.desc
   213    229   	end
   214    230   	local color = false
   215    231   	if log:seek() == nil then

Modified cortav.ct from [c71fe3a9e8] to [5df14cacc3].

     4      4   	dict: http://ʞ.cc/fic/spirals/glossary
     5      5   
     6      6   the cortav [!format] can be called [!cortavgil], or [!gil cortavi], to differentiate it from the reference implementation [!cortavsir] or [!sir cortavi].
     7      7   
     8      8   %toc
     9      9   
    10     10   ## cortav vs. markdown
    11         -the most important difference between cortav and markdown is that cortav is strictly line-oriented. this choice was made to ensure that cortav was relatively easy to parse. so while a simple [$.ct] file may look a bit like a [$.md] file, in reality it's a lot closer to gemtext than any flavor of markdown.
           11  +the most important difference between cortav and markdown is that cortav is strictly line-oriented. this choice was made to ensure that cortav was relatively easy to parse. so while a simple [`.ct] file may look a bit like a [`.md] file, in reality it's a lot closer to gemtext than any flavor of markdown.
    12     12   
    13     13   ## encoding
    14     14   a cortav document is made up of a sequence of codepoints. UTF-8 must be supported, but other encodings (such as UTF-32 or C6B) may be supported as well. lines will be derived by splitting the codepoints at the linefeed character or equivalent. note that unearthly encodings like C6B or EBCDIC will need to select their own control sequences.
    15     15   
    16     16   ## file type
    17     17   a cortav source file is identified using a file extension, file type, and/or magic byte sequence.
    18     18   
    19     19   three file extensions are defined as identifying a cortav source file. where relevant, all must be recognized as indicating a cortav source file.
    20         -* [$ct] is the shorthand extension
    21         -* [$cortav] is the canonical disambiguation extension, for use in circumstances where [$*.ct] is already defined to mean a different file format.
    22         -* [$] is the canonical Corran extension, a byte sequence comprising the unicode codepoints [$U+E3CE U+E3BD]. where the filesystem in question does not specify a filename encoding, the bytes should be expressed in UTF-8.
           20  +* [`ct] is the shorthand extension
           21  +* [`cortav] is the canonical disambiguation extension, for use in circumstances where [`*.ct] is already defined to mean a different file format.
           22  +* [`] is the canonical Corran extension, a byte sequence comprising the unicode codepoints [`U+E3CE U+E3BD]. where the filesystem in question does not specify a filename encoding, the bytes should be expressed in UTF-8.
    23     23   
    24     24   three more extensions are reserved for identifying a cortav intent file.
    25         -* [$ctc] is the shorthand extension
    26         -* [$cortavcun] is the canonical disambiguation extension
    27         -* [$] is the canonical Corran extension, a byte sequence comprising the unicode codepoints [$U+E3CE U+E3BD U+E3CE]. where the filesystem in question does not specify a filename encoding, the bytes should be expressed in UTF-8.
           25  +* [`ctc] is the shorthand extension
           26  +* [`cortavcun] is the canonical disambiguation extension
           27  +* [`] is the canonical Corran extension, a byte sequence comprising the unicode codepoints [`U+E3CE U+E3BD U+E3CE]. where the filesystem in question does not specify a filename encoding, the bytes should be expressed in UTF-8.
    28     28   
    29     29   on systems which use metadata to encode filetype, two values are defined to identify cortav source files
    30         -* [$text/x-cortav] should be used when strings or arbitrary byte sequences are supported
    31         -* [$CTAV] (that is, the byte sequence [$0x43 0x54 0x41 0x56]) should be used on systems that support only 32-bit file types/4-character type codes like Classic Mac OS.
           30  +* [`text/x-cortav] should be used when strings or arbitrary byte sequences are supported
           31  +* [`CTAV] (that is, the byte sequence [`0x43 54 41 56]) should be used on systems that support only 32-bit file types/4-character type codes like Classic Mac OS.
    32     32   
    33     33   two more values are defined to identify cortav intent files.
    34         -* [$text/x-cortav-intent] 
    35         -* [$CTVC] (the byte sequence [$0x43 0x54 0x56 0x43])
           34  +* [`text/x-cortav-intent]
           35  +* [`CTVC] (the byte sequence [`0x43 54 56 43])
    36     36   
    37         -on systems which do not define a canonical way of encoding the filetype but support extended attributes of some kind, such as linux, an attribute named [$mime] may be created and given the value [$text/x-cortav] or [$text/x-cortav-intent]; alternatively, extensions may be used.
           37  +on systems which do not define a canonical way of encoding the filetype but support extended attributes of some kind, such as linux, an attribute named [$mime] may be created and given the value [`text/x-cortav] or [`text/x-cortav-intent]; alternatively, extensions may be used.
    38     38   
    39     39   it is also possible to indicate the nature of a cortav file without using filesystem metadata. this is done by prefixing the file with a magic byte sequence. the sequence used depends on the encoding.
    40         -* for UTF-8 and ASCII, [$%ct[!\\n]] (that is, the byte sequence [$0x25 0x63 0x74 0x0A]) should be used
    41         -* for C6B, the file should begin with the word [$] (that is, the byte sequence [$0x03 0x07 0x3E 0x2D]).
           40  +* for UTF-8 and ASCII plain text files, [`%ct[!\\n]] (that is, the byte sequence [`0x25 63 74 0A]) should be used
           41  +* for C6B+PS files (parastream), the file should begin with the paragraph [`], which equates to the byte sequence [` 0x3E 2E 14 0C 01 04 00 00 00 03 07 3E 2D], including the parastream header).
    42     42   consequently, this sequence should be ignored by a cortav parser at the start of a file (except as an indication of file format).
    43     43   
    44         -for FreeDesktop-based systems, the [$velartrill-cortav.xml] file included in the repository supplies mappings for the extensions and magic byte sequences. a script is also included which can be registered with xdg-open so that double-clicking on a cortav file will render it out and open it in your default web browser.
           44  +for FreeDesktop-based systems, the [`build/velartrill-cortav.xml] file included in the repository supplies mappings for the extensions and magic byte sequences. a script is also included which can be registered with xdg-open so that double-clicking on a cortav file will render it out and open it in your default web browser. [`$ make install] will generate the necessary FreeDesktop XML files and register them, as well as install the script and the [`cortav] executable itself. for more information see [>refimpl-build building the reference implementation].
    45     45   
    46     46   ## structure
    47     47   cortav is based on an HTML-like block model, where a document consists of sections, which are made up of blocks, which may contain a sequence of spans. flows of text are automatically conjoined into spans, and blocks are separated by one or more newlines. this means that, unlike in markdown, a single logical paragraph [*cannot] span multiple ASCII lines. the primary purpose of this was to ensure ease of parsing, but also, both markdown and cortav are supposed to be readable from within a plain text editor. this is the 21st century. every reasonable text editor supports soft word wrap, and if yours doesn't, that's entirely your own damn fault.
    48     48   
    49         -the first character(s) of every line (the "control sequence") indicates the role of that line. if no control sequence is recognized, the sequence [$.] is implied instead. the standard line classes and their associated control sequences are listed below. some control sequences have alternate forms, in order to support modern, readable unicode characters as well as plain ascii text.
    50         -
    51         -* paragraphs (. ¶ ❡): a paragraph is a simple block of text. the period control sequence is only necessary if the paragraph text begins with something that would otherwise be interpreted as a control sequence.
    52         -* newlines (\\): inserts a line break into previous paragraph and attaches the following text. mostly useful for poetry or lyrics.
    53         -* section starts (# §): starts a new section. all sections have an associated depth, determined by the number of sequence repetitions (e.g. "###" indicates depth-three"). sections may have headers and IDs; both are optional. IDs, if present, are a sequence of raw-text immediately following the hash marks. if the line has one or more space character followed by styled-text, a header will be attached. the character immediately following the hashes can specify a particular type of section. e.g.:
    54         -** [$#] is a simple section break.
    55         -** [$#anchor] opens a new section with the ID [$anchor].
    56         -** [$# header] opens a new section with the title "header".
    57         -** [$#anchor header] opens a new section with both the ID [$anchor] and the title "header".
    58         -** [$#>conversation] opens a blockquote section named [$conversation] without a header.
    59         -** [$#^id] opens a footnote section for the multiline footnote [$id]. the ID must be specified.
    60         -** [$#$id] opens the multiline macro [$id]. the ID must be specified.
    61         -** [$#&id mime] opens a new inline object [$id] of type [$mime]. useful for embedding SVGs. the ID and mime type must be specified.
    62         -* lists (* :): these are like paragraph nodes, but list nodes that occur next to each other will be arranged so as to show they compose a sequence. depth is determined by the number of stars/colons. like headers, a list entry may have an ID that can be used to refer back to it; it is indicated in the same way. if colons are used, this indicates that the order of the items is signifiant. :-lists and *-lists may be intermixed; however, note than only the last character in the sequence actually controls the depth type.
    63         -* directives (%): a directive issues a hint to the renderer in the form of an arbitrary string. directives are normally ignored if they are not supported, but you may cause a warning to be emitted where the directive is not supported with [$%!] or mark a directive critical with [$%!!] so that rendering will entirely fail if it cannot be parsed.
    64         -* comments (%%): a comment is a line of text that is simply ignored by the renderer. 
    65         -* asides (!): indicates text that diverges from the narrative, and can be skipped without interrupting it. think of it like block-level parentheses. asides which follow one another are merged as paragraphs of the same aside, usually represented as a sort of box. if the first line of an aside contains a colon, the stretch of styled-text from the beginning to the aside to the colon will be treated as a "type heading," e.g. "Warning:"
    66         -* code (~~~): a line beginning with ~~~ begins or terminates a block of code. the opening line should look like one of the below
    67         -** [$~~~]
    68         -** [$~~~ language] (markdown-style shorthand syntax)
    69         -** [$~~~ \[language\] ~~~] (cortav syntax)
    70         -** [$~~~ \[language\] #id ~~~]
    71         -** [$~~~ title ~~~]
    72         -** [$~~~ title \[language\] ~~~]
    73         -** [$~~~ \[language\] title ~~~]
    74         -** [$~~~ title \[language\] #id ~~~]
    75         -* reference (tab): a line beginning with a tab is treated as a "reference." references hold out-of-line metadata for preceding text like links and footnotes. a reference consists of an identifier followed by a colon and an arbitrary number of spaces or tabs, followed by text. whether this text is interpreted as raw-text or styled-text depends on the context in which the reference is used.
    76         -* quotation (<): a line of the form [$<[!name]> [!quote]] denotes an utterance by [$name].
    77         -* blockquote (>): alternate blockquote syntax. can be nested by repeating the 
    78         -* subtitle (--): attaches a subtitle to the previous header
    79         -* embed (&): embeds a referenced object. can be used to show images or repeat previously defined objects like lists or tables, optionally with a caption.
    80         -** &myimg All that remained of the unfortunate blood magic pageant contestants and audience (police photo)
    81         -** &$mymacro arg 1|arg 2|arg 3
    82         -* break (---): inserts a horizontal rule or other context break; does not end the section. must be followed by newline.
    83         -* table cells (+ |): see [>ex.tab table examples].
           49  +the first character(s) of every line (the "control sequence") indicates the role of that line. if no control sequence is recognized, the line is treated as a paragraph. the currently supported control sequences are listed below. some control sequences have alternate forms, in order to support modern, readable unicode characters as well as plain ascii text.
           50  +
           51  +* [*paragraphs] ([`.] [` ¶] [`❡]): a paragraph is a simple block of text. the period control sequence is only necessary if the paragraph text starts with text that would be interpreted as a control sequence otherwise
           52  +* newlines [` \\]: inserts a line break into previous paragraph and attaches the following text. mostly useful for poetry or lyrics
           53  +* [*section starts] [`#] [`§]: starts a new section. all sections have an associated depth, determined by the number of sequence repetitions (e.g. "###" indicates depth-three"). sections may have headers and IDs; both are optional. IDs, if present, are a sequence of raw-text immediately following the hash marks. if the line has one or more space character followed by styled-text, a header will be attached. the character immediately following the hashes can specify a particular type of section. e.g.:
           54  +** [`#] is a simple section break.
           55  +** [`#anchor] opens a new section with the ID [`anchor].
           56  +** [`# header] opens a new section with the title "header".
           57  +** [`#anchor header] opens a new section with both the ID [`anchor] and the title "header".
           58  +** [`#>conversation] opens a blockquote section named [`conversation] without a header.
           59  +* [*nonprinting sections] ([`^]): sometimes, you'll want to create a namespace without actually adding a visible new section to the document. you can achieve this by creating a [!nonprinting section] and defining resources within it. nonprinting sections can also be used to store comments, notes, or other information that is useful to have in the source file without it becoming a part of the output
           60  +** [`#&id mime] opens a new inline object [`id] of type [`mime]. useful for embedding SVGs. the ID and mime type must be specified.
           61  +* [*resource] ([`@]): defines a [!resource]. a resource is an file or object that exists outside of the document but which will be included in the document somehow. common examples of resources include images, videos, iframes, or headers/footers. see [>rsrc resources] for more information.
           62  +* [*lists] ([`*] [`:]): these are like paragraph nodes, but list nodes that occur next to each other will be arranged so as to show they compose a sequence. depth is determined by the number of stars/colons. like headers, a list entry may have an ID that can be used to refer back to it; it is indicated in the same way. if colons are used, this indicates that the order of the items is signifiant. :-lists and *-lists may be intermixed; however, note than only the last character in the sequence actually controls the depth type.
           63  +* [*directives] ([`%]): a directive issues a hint to the renderer in the form of an arbitrary string. directives are normally ignored if they are not supported, but you may cause a warning to be emitted where the directive is not supported with [`%!] or mark a directive critical with [`%!!] so that rendering will entirely fail if it cannot be parsed.
           64  +* [*comments] ([`%%]): a comment is a line of text that is simply ignored by the renderer.
           65  +* [*asides] ([`!]): indicates text that diverges from the narrative, and can be skipped without interrupting it. think of it like block-level parentheses. asides which follow one another are merged as paragraphs of the same aside, usually represented as a sort of box. if the first line of an aside contains a colon, the stretch of styled-text from the beginning to the aside to the colon will be treated as a "type heading," e.g. "Warning:"
           66  +* [*code] ([`~~~]): a line beginning with ~~~ begins or terminates a block of code. code blocks are by default not parsed, but parsing can be activated by preceding the code block with an [`%[*expand]] directive. the opening line should look like one of the below
           67  +** [`~~~]
           68  +** [`~~~ language] (markdown-style shorthand syntax)
           69  +** [`~~~ \[language\] ~~~] (cortav syntax)
           70  +** [`~~~ \[language\] #id ~~~]
           71  +** [`~~~ title ~~~]
           72  +** [`~~~ title \[language\] ~~~]
           73  +** [`~~~ \[language\] title ~~~]
           74  +** [`~~~ title \[language\] #id ~~~]
           75  +*[*reference] (tab): a line beginning with a tab is treated as a "reference." references hold out-of-line metadata for preceding text like links and footnotes. a reference consists of an identifier followed by a colon and an arbitrary number of spaces or tabs, followed by text. whether this text is interpreted as raw-text or styled-text depends on the context in which the reference is used. in encodings without tab characters, two preceding blanks can be used instead.
           76  +* [*quotation] ([`<]): a line of the form [`<[$name]> [$quote]] denotes an utterance by [$name].
           77  +* [*blockquote] ([`>]): alternate blockquote syntax. can be nested by repeating the [`>] character.
           78  +* [*subtitle] ([`--]): attaches a subtitle to the previous header
           79  +* [*embed] ([`&]): embeds a referenced object. can be used to show images or repeat previously defined objects like lists or tables, optionally with a caption.
           80  +** [`&$[$macro] [$arg1]|[$arg2]|[$argn]…] invokes a block-level macro with the supplied arguments
           81  +*** [`&$mymacro arg 1|arg 2|arg 3]
           82  +** [`&[$image]] embeds an image or other block-level object. [!image] can be a reference with a url or file path, or it can be an embed section (e.g. for SVG files)
           83  +***[`&myimg All that remained of the unfortunate blood magic pageant contestants and audience (police photo)]
           84  +** [`&-[$section]] embeds a closed disclosure element. in interactive outputs, this will display as a block [!section] which can be clicked on to view the full contents of the referenced section; in static outputs, it will display as an enclosed box with [$section] as the title text
           85  +*** [`&-ex-a Prosecution Exhibit A (GRAPHIC CONTENT)]
           86  +** [`&+[$section]] is like the above, but the disclosure element is open by default
           87  +* [*horizontal rule] ([`\---]): inserts a horizontal rule or other context break; does not end the section. must be followed by newline. underlines can also be used in place of dashes.
           88  +* [*page break] ([`\^^]): for formats that support pagination, like HTML (when printed), indicates that the rest of the current page should be blank. for formats that do not, extra margins will be inserted. does not create a new section
           89  +* [*page rule] ([`\^^-]): inserts a page break for formats that support them, and a horizontal rule for formats that do not. does not create a new section
           90  +* [*table cells] ([`+ |]): see [>ex.tab table examples].
           91  +* [*equations] ([`=]) block-level equations can be inserted with the [`=]
           92  +* [*empty lines] (that is, lines consisting of nothing but whitespace) constitute a [!break], which terminates multiline objects that do not have a dedicated termination sequence, for example lists and asides.
    84     93   
    85     94   ## styled text
    86         -most blocks contain a sequence of spans. these spans are produced by interpreting a stream of [*styled-text] following the control sequence. styled-text is a sequence of codepoints potentially interspersed with escapes. an escape is formed by an open square bracket [$\[] followed by a [*span control sequence], and arguments for that sequence like more styled-text. escapes can be nested.
           95  +most blocks contain a sequence of spans. these spans are produced by interpreting a stream of [*styled-text] following the control sequence. styled-text is a sequence of codepoints potentially interspersed with escapes. an escape is formed by an open square bracket [`\[] followed by a [*span control sequence], and arguments for that sequence like more styled-text. escapes can be nested.
    87     96   
    88         -* strong \[*[!styled-text]\]: causes its text to stand out from the narrative, generally rendered as bold or a brighter color.
    89         -* emphatic \[![!styled-text]\]: indicates that its text should be spoken with emphasis, generally rendered as italics
    90         -* literal \[$[!styled-text]\]: indicates that its text is a reference to a literal sequence of characters, variable name, or other discrete token. generally rendered in monospace
    91         -* strikeout \[~[!styled-text]\]: indicates that its text should be struck through or otherwise indicated for deletion
    92         -* insertion \[+[!styled-text]\]: indicates that its text should be indicated as a new addition to the text body. 
    93         -** consider using a macro definition [$\edit: [~[#1]][+[#2]]] to save typing if you are doing editing work
    94         -* link \[>[!ref] [!styled-text]\]: produces a hyperlink or cross-reference denoted by [$ref], which may be either a URL specified with a reference or the name of an object like an image or section elsewhere in the document. the unicode characters [$→] and [$🔗] can also be used instead of [$>] to denote a link.
    95         -* footnote \[^[!ref] [!styled-text]\]: annotates the text with a defined footnote
    96         -* raw \[\\[!raw-text]\]: causes all characters within to be interpreted literally, without expansion. the only special characters are square brackets, which must have a matching closing bracket
           97  +* strong {obj *|styled-text}: causes its text to stand out from the narrative, generally rendered as bold or a brighter color.
           98  +* emphatic {obj !|styled-text}: indicates that its text should be spoken with emphasis, generally rendered as italics
           99  +* literal {obj `|styled-text}: indicates that its text is a reference to a literal sequence of characters or other discrete token. generally rendered in monospace
          100  +* variable {obj $|styled-text}: indicates that its text is a stand-in that will be replaced with what it names. generally rendered in italic monospace, ideally of a different color
          101  +* underline {obj _|styled-text}: underlines the text. use sparingly on text intended for webpages -- underlined text  [!is] distinct from links, but underlining non-links is still a violation of convention.
          102  +* strikeout {obj ~|styled-text}: indicates that its text should be struck through or otherwise indicated for deletion
          103  +* insertion {obj +|styled-text}: indicates that its text should be indicated as a new addition to the text body.
          104  +** consider using a macro definition [`\edit: [~[#1]][+[#2]]] to save typing if you are doing editing work
          105  +* link \[>[!ref] [!styled-text]\]: produces a hyperlink or cross-reference denoted by [$ref], which may be either a URL specified with a reference or the name of an object like an image or section elsewhere in the document. the unicode characters [`→] and [`🔗] can also be used instead of [`>] to denote a link.
          106  +* footnote {span ^|ref|[$styled-text]}: annotates the text with a defined footnote. in interactive output media [`\[^citations.qtheo Quantum Theosophy: A Neophyte's Catechism]] will insert a link with the next [`Quantum Theosophy: A Neophyte's Catechism] that, when clicked, causes a footnote to pop up on the screen. for static output media, the text will simply have a superscript integer after it denoting where the footnote is to be found.
          107  +* superscript {obj '|[$styled-text]}:
          108  +* subscript {obj ,|[$styled-text]}:
          109  +* raw \[\\[`raw-text]\]: causes all characters within to be interpreted literally, without expansion. the only special characters are square brackets, which must have a matching closing bracket
    97    110   * raw literal \[$\\[!raw-text]\]: shorthand for [\[$[\…]]]
    98         -* macro \{[!name] [!arguments]\}: invokes a [>ex.mac macro], specified with a reference
    99         -* argument \[#[!var]\]: in macros only, inserts the [$var]-th argument. otherwise, inserts a context variable provided by the renderer.
   100         -* raw argument \[##[!var]\]: like above, but does not evaluate [$var].
   101         -* term \[&[!name] ([!label])\]: quotes a defined term with a link to its definition
   102         -* inline image \[&@[!name]\]: shows a small image or other object inline. the unicode character [$🖼] can also be used instead of [$&@].
   103         -
   104         -## identifiers
   105         -any identifier (including a reference) that is defined within a named section must be referred to from outside that section as [$[!sec].[!obj]], where [$sec] is the ID of the containing section and [$obj] is the ID of the object one wishes to reference.
   106         -
   107         -## context variables
   108         -context variables are provided so that cortav renderers can process templates. certain context variables are provided for by the standard. you can test for the presence of a context variable with the directive [$%[*when] ctx [!var]].
          111  +* macro [`\{[!name] [!arguments]\}]: invokes a [>ex.mac macro], specified with a reference
          112  +* argument {obj #|var}: in macros only, inserts the [$var]-th argument. otherwise, inserts a context variable provided by the renderer.
          113  +* raw argument {obj ##|var}: like above, but does not evaluate [$var].
          114  +* term {obj &|name}, {span &|name|[$expansion]}: quotes a defined term with a link to its definition, optionally with a custom expansion of the term (for instance, to expand the first use of an acronym)
          115  +* inline image {obj &@|name}: shows a small image or other object inline. the unicode character [`🖼] can also be used instead of [`&@].
          116  +* unicode codepoint {obj U+|hex-integer}: inserts an arbitrary UCS codepoint in the output, specified by [$hex-integer]. lowercase [`u] is also legal.
          117  +* math mode {obj =|equation}: activates additional transformations on the span to format it as a mathematical equation; e.g. [`*] becomes [`×] and [`/] --> [`÷].
          118  +* extension {span %|ext|…}: invokes extension named in [$ext]. [$ext] will usually be an extension name followed by a symbol (often a period) and then an extension-specific directive, although for some simple extensions it may just be the plain extension name. further syntax and semantics depend on the extension. this syntax can also be used to apply formatting specific to certain renderers, such as assigning a CSS class in the [`html] renderer ([`\[%html.myclass my [!styled] text]]).
          119  +* critical extension {span %!|ext|…}: like [!extension], but will trigger an error if the requested extension is not available
          120  +* extension text {span %:|ext|styled-text}: like [!extension], but when the requested extension is not present, [$styled-text] wlil be emitted as-is. this is a better way to apply CSS classes, as the text will still be visible when rendered to formats other than HTML.
          121  +* inline comment {obj %%|...}: ignored. useful for editorial annotations not intended to be part of the rendered product.
          122  +
          123  +	span: [` \[[*[#1]][$[#2]] [#3]\]]
          124  +	obj: [` \[[*[#1]][$[#2]]\]]
          125  +
          126  +##ident identifiers
          127  +any identifier (including a reference) that is defined within a named section must be referred to from outside that section as [`[!sec].[!obj]], where [$sec] is the ID of the containing section and [$obj] is the ID of the object one wishes to reference.
          128  +
          129  +##rsrc resources
          130  +a [!resource] represents content that is not encoded directly into the source file, but which is embedded by some means in the output. resources can either be [!embedded], in which case they are compiled into the final document itself, or they can be [!linked], in which case the final document only contains a URI or similar tag referencing the resource. not all render backends support both linking and embedding embedding, nor do all backends support all object types (for instance, [`groff] does not support video embedding.)
          131  +
          132  +a resource definition is begun by line consisting of an [`@] sign and an [>ident identifier]. this line is followed by any number of parameters. a parameter is a line beginning with a single tab, a keyword, a colon, and a then a value. additional lines can be added to a parameter by following it with a line that consists of two tabs followed by the text you wish to add. (this is the same syntax used by references.) a resource definition is terminated by a break, or any line that does not begin with a tab
          133  +
          134  +a resource definition in use looks like this:
          135  +
          136  +~~~
          137  +this is a demonstration of resources
          138  +@smiley
          139  +	src: link image/webp http://cdn.example.net/img/smile.webp
          140  +		  link image/png file:img/smile.png
          141  +		  embed image/gif file img/smile.gif
          142  +	desc: the Smiling Man would like to see you in his office
          143  +here is the resource in span context [&smiley]
          144  +and here it is in block context:
          145  +&smiley
          146  +~~~
          147  +
          148  +rendered as HTML, this might produce the following:
          149  +
          150  +~~~
          151  +<style>
          152  +	.res-smiley {
          153  +		content: image-set(
          154  +			url(http://cdn.example.net/img/smile.webp) type(image/webp),
          155  +			url(img/smile.png) type(image/png),
          156  +			url(* … */) type(image/gif)
          157  +		); /* this will actually be repeated with a -webkit- prefix */
          158  +	}
          159  +</style>
          160  +<p>this is a demonstration of resources</p>
          161  +<p>here is the resource in span context: <span class="res-smiley"></span></p>
          162  +<p>and here it is in block context:</p>
          163  +<div class=".res-smiley"></div>
          164  +~~~
          165  +
          166  +note that empty elements with CSS classes are used in the output, to avoid repeating long image definitions (especially base64 inline encoded ones!)
          167  +
          168  +### supported parameters
          169  +* [`src] (all): specifies where to find the file, what it is, and how to embed it. each line of [`src] should consist of three whitespace-separated words: embed method, MIME type, and URI.
          170  +** embed methods
          171  +*** [`local]: loads the resource at build time and embeds it into the output file. not all implementations may allow loading remote network resources at build time.
          172  +*** [`remote]: only embeds a reference to the location of the resource. use this for e.g. live iframes, dynamic images, or images hosted by a CDN.
          173  +*** [`auto]: embeds a reference in file formats where that's practical, and use a remote reference otherwise.
          174  +** MIME types: which file types are supported depends on the individual implementation and renderer backend; additionally, extensions can add support for extra types. MIME-types that have no available handler will, where possible, result in an attachment that can be extracted by the user, usually by clicking on a link. however, the following should be usable with all compliant implementations
          175  +*** [`image/*] (graphical outputs only)
          176  +*** [`video/*] (interactive outputs only)
          177  +*** [`image/svg+xml] is handled specially for HTML files, and may or may not be compatible with other renderer backends.
          178  +*** [`font/*] can be used with the HTML backend to reference a web font
          179  +*** [`font/woff2] can be used with the HTML backend to reference a web font
          180  +*** [`text/plain] (will be inserted as a preformatted text block)
          181  +*** [`text/css] (can be used when producing HTML files to link in an extra stylesheet, either by embedding it or referencing it from the header)
          182  +*** [`text/x-cortav] (will be parsed and inserted as a formatted text block; context variables can be passed to the file with [`ctx.[$var]] parameters)
          183  +*** any MIME-type that matches the type of file being generated by the renderer can be used to include a block of data that will be passed directly to the renderer.
          184  +** URI types: additional URI types can be added by extensions or different implementations, but every compliant implementation must support these URIs.
          185  +*** [`http], [`https]: accesses resources over HTTP. add a [`file] fallback if possible for the benefit of renderers/viewers that do not have internet access abilities.
          186  +*** [`file]: references local files. absolute paths should begin [`file:/]; the slash should be omitted for relative paths. note that this doesn't have quite the same meaning as in HTML -- [`file] can (and usually should be) used with HTML outputs to refer to resources that reside on the same server. a cortav URI of [`file:/etc/passwd] will actually result in the link [`/etc/passwd], not [`file:///etc/passwd] when converted to HTML. generally, you only should use [`http] when you're referring to a resource that exists on a different domain.
          187  +*** [`name]: a special URI used generally for referencing resources that are already installed on a target system and do not need to be embedded or linked, the name and type are enough for a renderer on another machine to locate the correct resource. this is useful mostly for [>fonts fonts], where it's more typical to refer to fonts that are installed on your system rather than providing paths to font files.
          188  +*** [`gemini]: accesses resources over the gemini protocol. currently you should really only use this for [`local] resources unless you're using the gemtext renderer backend, since nothing but gemini browsers are liable to support this protocol.
          189  +* [`desc]: supplies a narrative description of the resources, for use as an "alt-text" when the image cannot be loaded and for screenreaders.
          190  +* [`detail]: supplies extra narrative commentary that is displayed contextually, e.g. when the user hovers her mouse cursor over the embedded object.
          191  +
          192  +note that in certain cases, full MIME types do not need to be used. say you're defining a font with the [`name] URI -- you can't necessary know what file type the system fonts on another computer are going to be. in this case, you can just write [`font] instead of [`font/ttf] or [`font/woff2] or similar. all cortav needs to know in this case is what abstract kind of object you're referencing.
          193  +
          194  +
          195  +##ctxvar context variables
          196  +context variables are provided so that cortav renderers can process templates. certain context variables are provided for by the standard. you can test for the presence of a context variable with the directive [`%[*when] ctx [$var]]. context variables are accessed with the [` \[#[$name]\]] span.
   109    197   
   110    198   * {def cortav.file} the name of the file currently being rendered
   111    199   * {def cortav.path} the absolute path of the file currently being rendered
   112         -* {def cortav.time} the current system time in the form [$[#cortav.time]]
   113         -* {def cortav.date} the current system date in the form [$[#cortav.date]]
   114         -* {def cortav.datetime} the current system date and time represented in the locale or system-standard manner (e.g. [$[#cortav.datetime]])
          200  +* {def cortav.time} the current system time in the form [`[#cortav.time]]
          201  +* {def cortav.date} the current system date in the form [`[#cortav.date]]
          202  +* {def cortav.datetime} the current system date and time represented in the locale or system-standard manner (e.g. [`[#cortav.datetime]])
   115    203   * {def cortav.page} the number of the page currently being rendered
   116    204   * {def cortav.id} the identifier of the renderer
   117    205   * {def cortav.hash} the SHA3 hash of the source file being rendered
   118    206   	def: [*[#1]]:
   119    207   
   120         -on systems with environment variables, these may be accessed as context variables by prefixing their name with [$env.].
          208  +on systems with environment variables, these may be accessed as context variables by prefixing their name with [`env.].
   121    209   
   122    210   different renderers may provide context in different ways, such as from command line options or a context file. any predefined variables should carry an appropriate prefix to prevent conflation. 
   123    211   
   124         -## directives
   125         -	d: [$%[*[##1]]]
   126         -* {d author} encodes document authorship
          212  +##fonts fonts
          213  +for output backends that support font specification, cortav provides a sophisticated font management system by means of the [!font stack].
          214  +
          215  +when a document parse begins, the font stack is empty (unless a default font has already been loaded by an intent file).
          216  +when the font stack is empty, cortav does not include font specifications in its output, and thus will use whatever the default of the various rendering programs is.
          217  +
          218  +to use fonts, we first have to define the fonts as [>rsrc resources].
          219  +
          220  +~~~cortav
          221  +%% first, we create a new section to namespace the fonts
          222  +#^fonts
          223  +%% we then define each font as a resource
          224  +@serif
          225  +	src: auto font name:Alegreya
          226  +		embed  font/ttf file:project-fonts/alegreya.ttf
          227  +		link font/woff2 file:/assets/font/alegreya.woff2
          228  +		auto font name:Times New Roman
          229  +@sans
          230  +	src: link font name:Alegreya Sans
          231  +		link font name:Open Sans
          232  +		link font name:sans-serif
          233  +~~~
          234  +
          235  +here we have defined two font families, [`fonts.serif] and [`fonts.sans]. each contains a list of references to fonts which will be tried in order. for example, this could be translated into the following CSS:
          236  +
          237  +~~~css
          238  +@font-face {
          239  +	font-family: "fontdef-serif";
          240  +	src: local("Alegreya"),
          241  +		url("data:font/ttf;base64,…") format("font/ttf"),
          242  +		url("/assets/font/alegreya.woff2") format("font/woff2"),
          243  +		local("Times New Roman");
          244  +}
          245  +@font-face {
          246  +	font-family: "fontdef-sans";
          247  +	src: local("Alegreya Sans"),
          248  +		local("Open Sans"),
          249  +		local("sans-serif");
          250  +}
          251  +~~~
          252  +
          253  +there are two things that aren't super clear from the CSS, however. notice how we used [`auto] on a couple of those specs? this means it's up to the renderer to decide whether to link or embed the font. for html, a font specified by name can't really be embedded, but for some file formats, it can be. [`auto] lets us produce valid HTML while still taking advantage of font embedding in other formats.
          254  +
          255  +now that we have our font families defined, we can use their identifiers with the [`%[*font]] directive to control the font stack. the first thing we need to do is push a new font context. there's two ways we can do this:
          256  +	fnd: [`%[*font] [#1]]
          257  +* {fnd dup} will create a copy of the current font context, allowing us to make some changes and then revert later with the {fnd pop} command. this isn't useful in our case, however, because right now the stack is empty; there's nothing to duplicate.
          258  +* {fnd new} will create a brand new empty context for us to work with and push it to the stack. this can also be used to temporarily revert to the system default fonts, and then switch back with {fnd pop}.
          259  +* {fnd set} changes one or more entries in the current font context. it can take a space-separated list of arguments in the form [`[$entry]=[$font-id]]. the supported entries are:
          260  +** [`body]: the fallback font. if only this is set in a given font context, it will be used for everything
          261  +** [`paragraph]: the font used for normal paragraphs
          262  +** [`header]: the font used in headers
          263  +** [`subtitle]: the font used in subtitles
          264  +** [`list]: the font used in lists
          265  +** [`table]: the font used in tables
          266  +** [`caption]: the font used for captions
          267  +* {fnd pop} removes the top context from the font stack.
          268  +
          269  +note that extensions may consult the font context for their entries specific to them. for instance, [>toc toc] checks for [`toc] before falling back to [`body] and then the default font.
          270  +
          271  +these commands are enough to give us a very flexible setup. consider the following:
          272  +
          273  +~~~cortav
          274  +%% let's pretend we've also defined the fonts 'title', 'cursive', and 'thin'
          275  +
          276  +%font new
          277  +%font set body=sans header=serif
          278  +%font dup
          279  +%font header=title
          280  +# lorem ipsum dolor
          281  +%font pop
          282  +
          283  +%% we've now set up a default font context, created a new context for the title of the
          284  +%% document, and then popped it back off after the title was inserted so that our
          285  +%% first font context is active again. everything after that last '%font pop' will
          286  +%% be printed in sans, except for headers, which will be printed in 'serif'
          287  +
          288  +lorem ipsum dolor sit amet, sed consectetur apiscing elit…
          289  +
          290  +%font dup
          291  +%font set body=cursive
          292  +> sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.
          293  +> Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut
          294  +%font pop
          295  +
          296  +%% above we created a blockquote whose text is printed in a cursive font; afterwards,
          297  +%% we simply remove this new context—
          298  +
          299  +and everything is back the way it was at "lorem ipsum"
          300  +
          301  +%% the font mechanism is at its most powerful when used with multiline macros:
          302  +
          303  +	cursive-quote: %font dup
          304  +		%font set body=cursive
          305  +		> [#1]
          306  +		%font pop
          307  +
          308  +%% now, whenever we want a block with a cursive body, we can simply invoke
          309  +
          310  +&$cursive-quote Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident
          311  +
          312  +%% without affecting the overall font context. in fact, since 'cursive-quote' creates
          313  +%% its context using 'dup', it would import all font specifications besides 'body'
          314  +%% from the environment it is invoked in
          315  +~~~
          316  +
          317  +##dir directives
          318  +	d: [`%[*[##1]]]
          319  +* {d author} encodes document authorship. multiple author directives can be issued to add additional coauthors
   127    320   * {d cols} specifies the number of columns the next object should be rendered with
   128    321   * {d include} transcludes another file
          322  +* {d import} reads in the contents of another file as an embeddable section
   129    323   * {d quote} transcludes another file, without expanding the text except for paragraphs 
   130    324   * {d embed}, where possible, embeds another file as an object within the current one. in HTML this could be accomplished with e.g. an iframe.
   131    325   * {d expand} causes the next object (usually a code block) to be fully expanded when it would otherwise not be
   132         -* {d pragma} supplies semantic data about author intent, the kind of information document contains and hints about how it should be displayed to the user. think of them like offhand remarks to the renderer -- there's no guarantee that it'll pay any attention, but if it does, your document will look better. pragmas have no scope; they affect the entire document. the pragma function exists primarily as a means to allow parameters that would normally need to be specified on e.g. the command line to be encoded in the document instead in a way that multiple implementations can understand. a few standard pragmas are defined.
          326  +* {d font} controls the font stack, for outputs that support changing fonts. see [>fonts fonts] for more information.
          327  +* {d lang} changes the current language, which is used by extensions to e.g. control typographical conventions, and may be encoded into the output by certain renderers (e.g. HTML). note that quotes and blockquotes can be set to a separate language with a simpler syntax. the language should be notated using IETF language tags
          328  +** {d lang is x-ranuir-CR8} sets the current language to Ranuir as spoken in the Central Worlds, written in Corran and encoded using UTF-8. this might be used at the top of a document to set its primary language.
          329  +** {d lang push gsw-u-sd-chzh} temporarily switches to Zürich German, e.g. to quote a German passage in an otherwise Ranuir document
          330  +** {d lang sec en-US} switches to American English for the duration of a section. does not affect the language stack.
          331  +** {d lang pop} drops the current language off the language stack, returning to whatever was pushed or set before it. this would be used, for instance, at the end of a passage
          332  +* {d pragma} supplies semantic data about author intent, the kind of information the document contains and hints about how it should be displayed to the user. think of them like offhand remarks to the renderer -- there's no guarantee that it'll pay any attention, but if it does, your document will look better. pragmas have no scope; they affect the entire document. the pragma function exists primarily as a means to allow parameters that would normally need to be specified on e.g. the command line to be encoded in the document instead in a way that multiple implementations can understand. a few standard pragmas are defined.
   133    333   ** {d pragma layout} gives a hint on how the document should be layed out. the first hint that is understood will be applied; all others will be discarded. standard hints include:
   134         -*** essay
   135         -*** narrative
   136         -*** screenplay: uses asides to denote actions, quotes for dialogue
   137         -*** stageplay: uses asides to denote actions, quotes for dialogue
   138         -*** manual
   139         -*** glossary
   140         -*** news
          334  +*** [`essay]
          335  +*** [`narrative]
          336  +*** [`screenplay]: uses asides to denote actions, quotes for dialogue
          337  +*** [`stageplay]: uses asides to denote actions, quotes for dialogue
          338  +*** [`manual]
          339  +*** [`glossary]
          340  +*** [`news]
          341  +*** [`book]: section depths 1-3 gain additional semantics
          342  +***: [*part]: the section gets a page to itself to announce the beginning of a new part or appendix
          343  +***: [*chapter]: the section is preceded by a page break
          344  +***: [*heading]: the section can occur on the same page as text and  headings from other sections
   141    345   ** {d pragma accent} specifies an accent hue (in degrees around the color wheel) for renderers which support colorized output
   142    346   ** {d pragma accent-spread} is a factor that controls the "spread" of hues used in the document. if 0, only the accent color will be used; if larger, other hues will be used in addition to the primary accent color.
   143         -** {d pragma dark-on-light on|off} controls whether the color scheme used should be light-on-dark or dark-on-light
          347  +** {d pragma dark-on-light on\|off} controls whether the color scheme used should be light-on-dark or dark-on-light
   144    348   ** {d pragma page-width} indicates how wide the pages should be
          349  +** {d pragma title-page} specifies a section to use as a title page, for renderer backends that support pagination
   145    350   
   146         -! note on pragmas: particularly when working with collections of documents, you should not keep formatting metadata in the documents themselves! the best thing to do is to have a makefile for compiling the documents using whatever tools you want to support, and encoding the rendering options in this file (for the reference implementation this currently means as command line arguments, but eventually it will support intent files as well) so they can all be changed in one place; pragmas should instead be used for per-document [*overrides] of default settings.
          351  +! note on pragmata: particularly when working with collections of documents, you should not keep formatting metadata in the documents themselves! the best thing to do is to have a makefile for compiling the documents using whatever tools you want to support, and encoding the rendering options in this file (for the reference implementation this currently means as command line arguments, but eventually it will support intent files as well) so they can all be changed in one place; pragmas should instead be used for per-document [*overrides] of default settings.
   147    352   ! a workaround for the lack of intent files in the reference implementation is to have a single pseudo-stylesheet that contains only {d pragma} statements, and then import this file from each individual source file using the {d include} directive. this is suboptimal and recommended only when you need to ensure compatibility between different implementations.
   148    353   ! when creating HTML files, an even better alternative may be to turn off style generation entirely and link in an external, hand-written CSS stylesheet. this is generally the way you should compile sources for existing websites if you aren't going to write your own extension.
   149    354   
   150    355   ##ex examples
   151    356   
   152    357   ~~~ blockquotes #bq [cortav] ~~~
   153    358   the following excerpts of text were recovered from a partially erased hard drive found in the Hawthorne manor in the weeks after the Incident. context is unknown.
................................................................................
   185    390   
   186    391   +:english  :| honor |
   187    392   +:ranuir   :| tef   |
   188    393   +:zia ţai  :| pang  |
   189    394   +:thalishte:| mbecheve |
   190    395   ~~~
   191    396   
   192         -## extensions
          397  +##extns extensions
   193    398   the cortav specification also specifies a number of extensions that do not have to be supported for a renderer to be compliant. the extension mechanism supports the following directives.
   194    399   
   195    400   * inhibits: prevents an extension from being used even where available
   196    401   * uses: turns on an extension that is not specified by the user operating the renderer (e.g. on the command line)
   197    402   * needs: causes rendering to fail with an error if the extensions are not available
   198    403   
   199         -where possible, instead of [$needs x y z], the directive [$when has-ext x y z] should be used instead. this causes the next section to be rendered only if the named extensions are available. [$unless has-ext x y z] can be used to provide an alternative format.
          404  +where possible, instead of [`needs [$x y z]], the directive [`when has-ext [$x y z]] should be used instead. this causes the next section to be rendered only if the named extensions are available. [`unless has-ext [$x y z]] can be used to provide an alternative format.
   200    405   
   201    406   extensions are mainly interacted with through directives. all extension directives must be prefixed with the name of the extension.
   202    407   
   203         -### toc
   204         -sections that have a title will be included in the table of contents. the table of contents is by default inserted at the break between the first level-1 section and the section immediately following it. you may instead place the directive [$toc] where you wish the TOC to be inserted, or suppress it entirely with [$inhibits toc]. note that some renderers may not display the TOC as part of the document itself.
          408  +the reference implementation seeks to support all standardized extensions. it's not quite there yet, however.
          409  +
          410  +###toc toc
          411  +sections that have a title will be included in the table of contents. the table of contents is by default inserted at the break between the first level-1 section and the section immediately following it. you may instead place the directive [`toc] where you wish the TOC to be inserted, or suppress it entirely with [`inhibits toc]. note that some renderers may not display the TOC as part of the document itself.
   205    412   
   206    413   toc provides the directives:
   207    414   
   208         -* [$%[*toc]]: insert a table of contents in the specified position. this can be used more than once, but doing so may have confusing, incorrect, or nonsensical results under some renderers, and some may just ignore the directive entirely
   209         -* [$%[*toc] mark [!styled-text]]: inserts a TOC entry with the label [!styled-text]  pointing to the current location. this can be used to e.g. mark noteworthy images, instances of long quotes or literal blocks, or functions inside an expanded code block.
   210         -* [$%[*toc] name [!id styled-text]]: like [$%[*toc] mark] but allows an additional [!id] parameter which specifies the ID the renderer will assign to an anchor element. this is not meaningful for all renderers and when it is, it is up to the renderer to decide what it means.
   211         -** the [*html] render backend interprets [!id] as the [$id] element for the anchor tag
   212         -** the [*groff] render backend ignores [!id]
   213         -
   214         -### smart-quotes
   215         -a cortav renderer may automatically translate punctuation marks to other punctuation marks depending on their context. 
   216         -
   217         -### hilite
   218         -code can be highlighted according to the formal language it is written in.
   219         -
   220         -### lua
          415  +* [`%[*toc]]: insert a table of contents in the specified position. this can be used more than once, but doing so may have confusing, incorrect, or nonsensical results under some renderers, and some may just ignore the directive entirely
          416  +* [`%[*toc] mark [$styled-text]]: inserts a TOC entry with the label [$styled-text]  pointing to the current location. this can be used to e.g. mark noteworthy images, instances of long quotes or literal blocks, or functions inside an expanded code block.
          417  +* [`%[*toc] name [$id styled-text]]: like [`%[*toc] mark] but allows an additional [$id] parameter which specifies the ID the renderer will assign to an anchor element. this is not meaningful for all renderers and when it is, it is up to the renderer to decide what it means.
          418  +** the [*html] render backend interprets [$id] as the [`id] element for the anchor tag
          419  +** the [*groff] render backend ignores [$id]
          420  +
          421  +###tsmog transmogrify
          422  +a cortav renderer may automatically translate punctuation marks or symbol sequences to superior representations depending on their context. to be compliant this extension should implement, at minimum:
          423  +* smart quotes (with consideration for the typographical conventions languages like German or Spanish)
          424  +** {dir.d transmogrify|language [$lang]} can be used to explicitly set the language; otherwise, it must be determined from the value of {dir.d pragma|lang}. if this is not present, implementations may fall back on their own methods for determining the language in use, such as command-line flags.
          425  +* multigraph to glyph conversion, including at least:
          426  +** [`\--] --> "—"
          427  +** [`\-->] --> "→"
          428  +** [`\<--] -->  "←"
          429  +
          430  +an escape character before any of the sequence characters should prevent the sequence from being rendered. raw nodes (that is, [`\[\…\]] and [`\[`\…\]]) should not be scanned for transmogrification, nor should the contents of code blocks unless marked with the [`%[*expand]] directive
          431  +
          432  +transmogrification shall only take place after all other parsing steps are completed.
          433  +
          434  +###hilite hilite
          435  +code can be highlighted according to the formal language it is written in. a compliant hilite implementation must implement basic keyword, symbol, comment, pragma, and literal highlighing for the following formal languages.
          436  +* C
          437  +* [>lua Lua]
          438  +* [>html HTML]
          439  +* [>scheme Scheme]
          440  +* [>terra Terra]
          441  +* [>libconfig libconfig]
          442  +
          443  +	lua: https://lua.org
          444  +	scheme: https://call-cc.org
          445  +	terra: https://terralang.org
          446  +	html: https://dev.w3.org/html5/spec-LC/
          447  +	libconfig: http://hyperrealm.github.io/libconfig/
          448  +
          449  +the highlighter should make use of semantic HTML tags like [`<var>] where possible.
          450  +
          451  +###lua lua
   221    452   renderers with a lua interpreter available can evaluate lua code:
   222         -* [$%lua use [!file]]: evaluates [$file] and makes its definitions available
   223         -* [$\[%lua raw [!script]\]]: evaluates [$script] and emits the string it returns (if any) in raw span context.
   224         -* [$\[%lua exp [!script]\]]: evaluates [$script] and emits the string it returns (if any) in expanded span context.
   225         -* [$%lua raw [!script]]: evaluates [$script] and emits the string array it returns (if any) in raw block context.
   226         -* [$%lua exp [!script]]: evaluates [$script] and emits the string array it returns (if any) in expanded block context.
          453  +* [`%lua use [!file]]: evaluates [$file] and makes its definitions available
          454  +* [`\[%lua raw [!script]\]]: evaluates [$script] and emits the string it returns (if any) in raw span context.
          455  +* [` \[%lua exp [!script]\]]: evaluates [$script] and emits the string it returns (if any) in expanded span context.
          456  +* [`%lua raw [!script]]: evaluates [$script] and emits the string array it returns (if any) in raw block context.
          457  +* [`%lua exp [!script]]: evaluates [$script] and emits the string array it returns (if any) in expanded block context.
   227    458   
   228         -the interpreter should provide a [$cortav] table with the objects:
          459  +the interpreter should provide a [`cortav] table with the objects:
   229    460   * ctx: contains context variables
   230    461   
   231    462   used files should return a table with the following members
   232    463   * macros: an array of functions that return strings or arrays of strings when invoked. these will be injected into the global macro namespace.
   233    464   
   234         -### ts
          465  +###ts ts
   235    466   the [*ts] extension allows documents to be marked up for basic classification constraints and automatically redacted. if you are seriously relying on ts for confidentiality, make damn sure you start the file with [$%[*requires] ts], so that rendering will fail with an error if the extension isn't supported.
   236    467   
   237    468   ts enables the directives:
   238         -* [$ts class [!scope] [!level] (styled-text)]: indicates a classification level for either the while document (scope [!doc]) or the next section (scope [!sec]). if the ts level is below [$level], the section will be redacted or rendering will fail with an error, as appropriate. if styled-text is included, this will be treated as the name of the classification level.
   239         -* [$ts word [!scope] [!word] (styled-text)]: indicates a codeword clearance that must be present for the text to render. if styled-text is present, this will be used to render the name of the codeword instead of [$word].
   240         -* [$when ts level [!level]]
   241         -* [$when ts word [!word]]
          469  +* [`%[*ts] class [$scope level] ([$styled-text])]: indicates a classification level for either the while document (scope [$doc]) or the next section (scope [$sec]). if the ts level is below [$level], the section will be redacted or rendering will fail with an error, as appropriate. if styled-text is included, this will be treated as the name of the classification level.
          470  +* [`%[*ts] word [$scope word] ([$styled-text])]: indicates a codeword clearance that must be present for the text to render. if styled-text is present, this will be used to render the name of the codeword instead of [$word].
          471  +* [`%[*when] ts level [$level]]
          472  +* [`%[*when] ts word [$word]]
   242    473   
   243    474   ts enables the spans:
   244         -* [$\[🔒#[!level] [!styled-text]\]]: redacts the span if the security level is below that specified.
   245         -* [$\[🔒.[!word] [!styled-text]\]]: redacts the span if the specified codeword clearance is not enabled.
   246         -(the padlock emoji is shorthand for [$%ts].)
          475  +* [`\[🔒#[!level] [$styled-text]\]]: redacts the span if the security level is below that specified.
          476  +* [`\[🔒.[!word] [$styled-text]\]]: redacts the span if the specified codeword clearance is not enabled.
          477  +(the padlock emoji is shorthand for [`%[*ts]].)
   247    478   
   248    479   ts redacts spans securely; that is, they are simply replaced with an indicator that they have been redacted, without visually leaking the length of the redacted text.
   249    480   
   250    481   ~~~#ts-example example [cortav] ~~~
   251    482   %ts word doc sorrowful-pines SORROWFUL PINES
   252    483   
   253    484   # intercept R1440 TCT S3
................................................................................
   266    497   <B> Hyacinth, I told you not to contact me without—
   267    498   <A, shouting> god DAMMIT woman I am trying to SAVE your worthless skin
   268    499   <B> Hyacinth! your Godforsaken scrambler!
   269    500   <A> …oh, [!fuck].
   270    501   (signal lost)
   271    502   ~~~
   272    503   
   273         -# reference implementation
   274         -the cortav standard is implemented in [$cortav.lua], found in this repository. only the way [$cortav.lua] interprets the cortav language is defined as a reference implementation; other behaviors are simply how [$cortav.lua] implements the specification and may be copied, ignored, tweaked, violently assaulted, or used as inspiration by a compliant parser.
          504  +#refimpl reference implementation
          505  +the cortav standard is implemented in [`cortav.lua], found in this repository. only the way [`cortav.lua] interprets the cortav language is defined as a reference implementation; other behaviors are simply how [`cortav.lua] implements the specification and may be copied, ignored, tweaked, violently assaulted, or used as inspiration by a compliant parser.
   275    506   
   276         -the reference implementation can be used both as a lua library and from the command line. [$cortav.lua] contains the parser and renderers, [$ext/*] contain various extensions, [$sirsem.lua] contains utility functions, and [$cli.lua] contains the CLI driver.
          507  +the reference implementation can be used both as a lua library and from the command line. [`cortav.lua] contains the parser and renderers, [`ext/*] contain various extensions, [`sirsem.lua] contains utility functions, and [`cli.lua] contains the CLI driver.
   277    508   
   278         -## lua library
          509  +##refimpl-lib lua library
   279    510   there are various ways to use cortav from a lua script; the simplest however is probably to precompile your script with luac and link in the necessary components of the implementation. for instance, say we have the following program
   280    511   
   281    512   ~~~ stdin2html.lua [lua] ~~~
   282    513   local ct = require 'cortav'
   283    514   local mode = {}
   284    515   local doc = ct.parse(io.stdin, {file = '(stdin)'}, mode)
   285    516   doc.stage = {
................................................................................
   292    523   
   293    524   and the only extension we need is the table-of-contents extension. our script can be translated into a self-contained lua bytecode blob with the following command
   294    525   
   295    526   ~~~
   296    527   $ luac -s -o stdin2html.lc $cortav_repo/{sirsem,cortav,ext/toc}.lua stdin2html.lua
   297    528   ~~~
   298    529   
   299         -and can then be operated with the command [$lua stdin2html.lc], with no further need for the cortav repository files. note that the order of the [$luac] command is important! [$sirsem.lua] must come first, followed by [$cortav.lua], followed by any extensions. your driver script (i.e. the script with the entry point into the application) should always come last.
          530  +and can then be operated with the command [`lua stdin2html.lc], with no further need for the cortav repository files. note that the order of the [`luac] command is important! [`sirsem.lua] must come first, followed by [`cortav.lua], followed by any extensions. your driver script (i.e. the script with the entry point into the application) should always come last.
   300    531   
   301         -### building custom tools
          532  +###refimpl-tools building custom tools
   302    533   generally, most existing file-format conversion tools (cmark, pandoc, and so on) have a crucial limitation: they hardcode specific assumptions like document structure. this means that the files they output are generally not suitable as-is for the users' purposes, and require further munging, usually by hateful shell or perl scripts. some tools do provide libraries end users to use as a basis for designing their own tools, but these are often limited, and in any case the user ends up having to write their own (non-standard) driver. it's no surprise that very few people end up doing this.
   303    534   
   304         -[$cortav.lua]'s design lends itself to a more elegant solution. one can of course write their own driver using [$cortav] as a library, but most of the time when you're compiling document sources, you just want a binary you can run from the command line or a makefile. with [$cortav.lua], you can extend its capabilities easily while keeping the same driver.
          535  +[`cortav.lua]'s design lends itself to a more elegant solution. one can of course write their own driver using [`cortav] as a library, but most of the time when you're compiling document sources, you just want a binary you can run from the command line or a makefile. with [`cortav.lua], you can extend its capabilities easily while keeping the same driver.
   305    536   
   306         -in the [$cortav] spec, extensions are mostly intended to give different implementations the ability to offer extra capabilities, but the reference implementation uses an extension architecture that makes it easy to write and add your own. for each type of new behavior you want to implement, just create a new extension and list it on the make command line:
          537  +in the [`cortav] spec, extensions are mostly intended to give different implementations the ability to offer extra capabilities, but the reference implementation uses an extension architecture that makes it easy to write and add your own. for each type of new behavior you want to implement, just create a new extension and list it on the make command line:
   307    538   
   308    539   ~~~
   309    540   $ nvim ~/dev/my-cortav-exts/imperial-edict.lua
   310    541   $ make cortav extens+=$HOME/dev/my-cortav-exts/*.lua
   311    542   ~~~
   312    543   
   313         -the cortav binary this produces will have all the extra capabilities you personally need, without any need to fork [$cortav.lua] itself or even touch the repository.
          544  +the cortav binary this produces will have all the extra capabilities you personally need, without any need to fork [`cortav.lua] itself or even touch the repository.
   314    545   
   315         -there's no reason [$cortav.lua] shouldn't be able to load extensions at runtime as well; i just haven't implemented this behavior yet. it probably would only take a few extra lines of code tho.
          546  +there's no reason [`cortav.lua] shouldn't be able to load extensions at runtime as well; i just haven't implemented this behavior yet. it probably would only take a few extra lines of code tho.
   316    547   
   317         -i will eventually document the extension API, but for now, look at [$ext/toc.lua] for a simple example of how to register an extension.
          548  +i will eventually document the extension API, but for now, look at [`ext/toc.lua] for a simple example of how to register an extension.
   318    549   
   319         -## command line driver
   320         -the [$cortav.lua] command line driver can be run from the repository directory with the command [$lua ./cli.lua], or by first compiling it into a bytecode form that links in all its dependencies. this is the preferred method for installation, as it produces a self-contained executable which loads more quickly, but running the driver in script form may be desirable for development or debugging.
          550  +##refimpl-cli command line driver
          551  +the [$cortav.lua] command line driver can be run from the repository directory with the command [`lua ./cli.lua], or by first compiling it into a bytecode form that links in all its dependencies. this is the preferred method for installation, as it produces a self-contained executable which loads more quickly, but running the driver in script form may be desirable for development or debugging.
   321    552   
   322         -the repository contains a GNU makefile to automate compilation of the reference implementation on unix-like OSes. simply run [$$ make cortav] or [$$ gmake cortav] from the repository root to produce a self-contained bytecode executable that can be installed anywhere on your filesystem, with no dependencies other than the lua interpreter.
          553  +the repository contains a GNU makefile to automate compilation of the reference implementation on unix-like OSes. simply run [`$ make cortav] or [`$ gmake cortav] from the repository root to produce a self-contained bytecode executable that can be installed anywhere on your filesystem, with no dependencies other than the lua interpreter.
   323    554   
   324         -! note that the makefile strips debugging symbols to save space, so running [$cli.lua] directly as a script may be helpful if you encounter errors and need stacktraces or other debugging information.
          555  +! note that the makefile strips debugging symbols to save space, so running [`cli.lua] directly as a script may be helpful if you encounter errors and need stacktraces or other debugging information.
   325    556   
   326         -henceforth it will be assumed that you have produced the [$cortav] executable and placed it somewhere in your [$$PATH]; if you are instead running [$cortav.lua] directly as an interpreted script, you'll need to replace [$$ cortav] with [$$ lua ./cli.lua] in incantations.
          557  +henceforth it will be assumed that you have produced the [`cortav] executable and placed it somewhere in your [$$PATH]; if you are instead running [`cortav.lua] directly as an interpreted script, you'll need to replace [`$ cortav] with [`$ lua ./cli.lua] in incantations.
   327    558   
   328         -when run without commands, [$cortav.lua] will read input from standard input and write to standard output. alternately, a source file can be given as an argument. to write to a specific file instead of the standard output stream, use the [$-o [!file]] flag.
          559  +when run without commands, [`cortav.lua] will read input from standard input and write to standard output. alternately, a source file can be given as an argument. to write to a specific file instead of the standard output stream, use the [`-o [!file]] flag.
   329    560   
   330    561   ~~~
   331    562   $ cortav readme.ct -o readme.html
   332    563   	# reads from readme.ct, writes to readme.html
   333    564   $ cortav -o readme.html
   334    565   	# reads from standard input, writes to readme.html
   335    566   $ cortav readme.ct
   336    567   	# reads from readme.ct, writes to standard output
   337    568   ~~~
   338    569   
   339         -### switches
   340         -[$cortav.lua] offers various switches to control its behavior.
          570  +###refimpl-build building
          571  +the command line driver is built and installed with a GNU [$make] script. this script accepts the variables shown below with their default values:
          572  ++ prefix | [`[$$HOME]/.local] | the path under which the package will be installed
          573  ++ build | [`build] |  the directory where generated objects will be placed; useful for out-of-tree builds
          574  ++ bin-prefix | [`[$$prefix]/bin] | directory to install the executables to"
          575  ++ default-format-flags | [`-m html:width 35em] | a list of flags that will be passed by the viewer script to [`cortav] when generating a html fille
          576  +
          577  +the following targets are supplied to automate the build:
          578  +* [`install] builds everything, installs the executable and the viewer script to [$$bin_prefix], and registers the viewer script with XDG
          579  +* [`excise] deletes everything installed and deregisters the file handlers (note that the same variables must be passed to [`exicse] as were passed to [`install]!)
          580  +* [`clean] deletes build artifacts from the [$$build] directory like it was never there
          581  +* [`wipe] is equivalent to [`$ make excise && make clean]
          582  +
          583  +###refimpl-switches switches
          584  +[`cortav.lua] offers various switches to control its behavior.
   341    585   + long                      + short + function                                    +
   342         -| [$--out [!file]]              :|:[$-o]:| sets the output file (default stdout)       |
   343         -| [$--log [!file]]              :|:[$-l]:| sets the log file (default stderr)          |
   344         -| [$--define [!var] [!val]]     :|:[$-d]:| sets the context variable [$var] to [$val]  |
   345         -| [$--mode-set [!mode]]         :|:[$-y]:| activates the [>refimpl-mode mode] with ID [!mode]
   346         -| [$--mode-clear [!mode]]       :|:[$-n]:| disables the mode with ID [!mode]           |
   347         -| [$--mode [!id] [!val]]        :|:[$-m]:| configures mode [!id] with the value [!val] |
   348         -| [$--mode-set-weak [!mode]]    :|:[$-Y]:| activates the [>refimpl-mode mode] with ID [!mode] if the source file does not specify otherwise
   349         -| [$--mode-clear-weak [!mode]]  :|:[$-N]:| disables the mode with ID [!mode] if the source file does not specify otherwise
   350         -| [$--mode-weak [!id] [!val]]   :|:[$-M]:| configures mode [!id] with the value [!val] if the source file does not specify otherwise
   351         -| [$--help]                     :|:[$-h]:| display online help                         |
   352         -| [$--version]                  :|:[$-V]:| display the interpreter version             |
          586  +| [`--out [!file]]              :|:[`-o]:| sets the output file (default stdout)       |
          587  +| [`--log [!file]]              :|:[`-l]:| sets the log file (default stderr)          |
          588  +| [`--define [!var] [!val]]     :|:[`-d]:| sets the context variable [$var] to [$val]  |
          589  +| [`--mode-set [!mode]]         :|:[`-y]:| activates the [>refimpl-mode mode] with ID [!mode]
          590  +| [`--mode-clear [!mode]]       :|:[`-n]:| disables the mode with ID [!mode]           |
          591  +| [`--mode [!id] [!val]]        :|:[`-m]:| configures mode [!id] with the value [!val] |
          592  +| [`--mode-set-weak [!mode]]    :|:[`-Y]:| activates the [>refimpl-mode mode] with ID [!mode] if the source file does not specify otherwise
          593  +| [`--mode-clear-weak [!mode]]  :|:[`-N]:| disables the mode with ID [$mode] if the source file does not specify otherwise
          594  +| [`--mode-weak [!id] [!val]]   :|:[`-M]:| configures mode [$id] with the value [$val] if the source file does not specify otherwise
          595  +| [`--help]                     :|:[`-h]:| display online help                         |
          596  +| [`--version]                  :|:[`-V]:| display the interpreter version             |
   353    597   
   354    598   ###refimpl-mode modes
   355         -most of [$cortav.lua]'s implementation-specific behavior is controlled by use of [!modes]. these are namespaced options which may have a boolean, string, or numeric value. boolean modes are set with the [$-y] [$-n] flags; other modes use the [$-m] flags.
          599  +most of [`cortav.lua]'s implementation-specific behavior is controlled by use of [!modes]. these are namespaced options which may have a boolean, string, or numeric value. boolean modes are set with the [`-y] [`-n] flags; other modes use the [`-m] flags.
   356    600   
   357    601   most modes are defined by the renderer backend. the following modes affect the behavior of the frontend:
   358    602   
   359    603   + ID                 + type   + effect
   360         -|   [$render:format]:| string | selects the [>refimpl-rend renderer] (default [$html])
   361         -| [$parse:show-tree]:| flag   | dumps the parse tree to the log after parsing completes
          604  +|   [`render:format]:| string | selects the [>refimpl-rend renderer] (default [`html])
          605  +| [`parse:show-tree]:| flag   | dumps the parse tree to the log after parsing completes
   362    606   
   363    607   ##refimpl-rend renderers
   364         -[$cortav.lua] implements a frontend-backend architecture, separating the parsing stage from the rendering stage. this means new renderers can be added to [$cortav.lua] relatively easily. currently, only an [>refimpl-rend-html HTML renderer] is included; however, a [$groff] backend is planned at some point in the future, so that PDFs and manpages can be generated from cortav files.
          608  +[`cortav.lua] implements a frontend-backend architecture, separating the parsing stage from the rendering stage. this means new renderers can be added to [`cortav.lua] relatively easily. currently, only an [>refimpl-rend-html HTML renderer] is included; however, a [`groff] backend is planned at some point in the future, so that PDFs and manpages can be generated from cortav files.
   365    609   
   366    610   ###refimpl-rend-html html
   367         -the HTML renderer is activated with the incantation [$-m render:format html]. it is currently the default backend. it produces a single HTML file, optionally with CSS styling data, from a [$.ct] input file.
          611  +the HTML renderer is activated with the incantation [`-m render:format html]. it is currently the default backend. it produces a single HTML file, optionally with CSS styling data, from a [`.ct] input file.
   368    612   
   369         -it supports the following modes:
          613  +####refimpl-rend-html-modes modes
          614  +[`html] supports the following modes:
   370    615   
   371         -* string (css length) [$html:width] sets a maximum width for the body content in order to make the page more readable on large displays
   372         -* number [$html:accent] applies an accent hue to the generated webpage. the hue is specified in degrees, e.g. [$-m html:accent 0] applies a red accent.
   373         -* flag [$html:dark-on-light] uses dark-on-light styling, instead of the default light-on-dark
   374         -* flag [$html:fossil-uv] outputs an HTML snippet suitable for use with the Fossil VCS webserver. this is intended to be used with the unversioned content mechanism to host rendered versions of documentation written in cortav that's stored in a Fossil repository.
   375         -* number [$html:hue-spread] generates a color palette based on the supplied accent hue. the larger the value, the more the other colors diverge from the accent hue.
   376         -* string [$html:link-css] generates a document linking to the named stylesheet
   377         -* flag [$html:gen-styles] embeds appropriate CSS styles in the document (default on)
   378         -* flag [$html:snippet] produces a snippet of html instead of an entire web page. note that proper CSS scoping is not yet implemented (and can't be implemented hygienically since [$scoped] was removed 😢)
   379         -* string [$html:title] specifies the webpage titlebar contents (normally autodetected from the document based on headings or directives)
          616  +* string (css length) [`html:width] sets a maximum width for the body content in order to make the page more readable on large displays
          617  +* number [`html:accent] applies an accent hue to the generated webpage. the hue is specified in degrees, e.g. [$-m html:accent 0] applies a red accent.
          618  +* flag [`html:dark-on-light] uses dark-on-light styling, instead of the default light-on-dark
          619  +* flag [`html:fossil-uv] outputs an HTML snippet suitable for use with the Fossil VCS webserver. this is intended to be used with the unversioned content mechanism to host rendered versions of documentation written in cortav that's stored in a Fossil repository.
          620  +* number [`html:hue-spread] generates a color palette based on the supplied accent hue. the larger the value, the more the other colors diverge from the accent hue.
          621  +* string [`html:link-css] generates a document linking to the named stylesheet
          622  +* flag [`html:gen-styles] embeds appropriate CSS styles in the document (default on)
          623  +* flag [`html:snippet] produces a snippet of html instead of an entire web page. note that proper CSS scoping is not yet implemented (and can't be implemented hygienically since [$scoped] was removed 😢)
          624  +* string [`html:title] specifies the webpage titlebar contents (normally autodetected from the document based on headings or directives)
          625  +* string [`html:font] specifies the default font to use when rendering as a CSS font specification (e.g. [`-m html:font 'Alegreya, Junicode, Georgia, "Times New Roman"])
   380    626   
   381    627   ~~~
   382    628   $ cortav readme.ct --out readme.html \
   383    629   	-m render:format html \
   384    630   	-m html:width 40em \
   385    631   	-m html:accent 80 \
   386    632   	-m html:hue-spread 35 \
   387    633   	-y html:dark-on-light # could also be written as:
   388    634   $ cortav readme.ct -ommmmy readme.html render:format html html:width 40em html:accent 80 html:hue-spread 35 html:dark-on-light
   389    635   ~~~
   390    636   
          637  +#### directives
          638  +[`html] supplies the following render directives.
          639  +
          640  +* [`%[*html] link [$rel] [$mime] [$href]]: inserts a [`<link>] tag in the header, for example, to link in an alternate stylesheet, or help feed readers find your atom or rss feed.
          641  +** [`%[*html] link alternate\\ stylesheet text/css /res/style2.css]
          642  +** [`%[*html] link alternate application/atom+xml /feed.atom]
          643  +* [`%[*html] style [$id]]: adds the stylesheet referenced by [$id] into the document stylesheet. the stylesheet is specified using a [>rsrc resource].
          644  +
          645  +#### stylsheets
          646  +the [`html] backend offers some additional directives for external CSS files that are embedded into the document, in order to simplify integration with the accent mechanism. these are:
          647  +
          648  +* [`@[*fg]]: resolves to a color expression denoting the selected foreground color. equivalent to [`[*tone](1)]
          649  +* [`@[*bg]]: resolves to a color expression denoting the selected background color. equivalent to [`[*tone](0)]
          650  +* [`@[*tone]\[/[$alpha]\]([$fac] \[[$shift] \[[$saturate]\]\] )]: resolves to a color expression. [$fac] is a floating-point value scaling from the background color to the foreground color. [$shift] is a value in degrees controlling how far the hue will shift relative to the accent. [$saturate] is a floating-point value controlling how satured the color is.
          651  +
          652  +###refimpl-rend-groff groff
          653  +the [`groff] backend produces a text file suitable for supplying to a [`groff] compiler. [`groff] is the GNU implementation of a venerable typesetting system from the early days of UNIX
          654  +
          655  +as a convenience, the groff backend supports two modes of operation: it can write a [`groff] file directly to disk, or it can automatically launch a [`groff] process with the appropriate command line options and environment variables. this second mode is recommended unless you're rendering very large files to multiple formats, as [`groff] invocation is nontrivial and it's best to let the renderer handle that for you.
          656  +
          657  +####refimpl-rend-groff-modes modes
          658  +[`groff] supports the following modes:
          659  +
          660  +* string [`groff:annotate] controls how footnotes will be handled.
          661  +** [`footnote] places footnotes at the end of the page they are referenced on. if the same footnote is used on multiple pages, it will be duplicated on each.
          662  +** [`secnote] places footnotes at the end of each section. footnotes used in multiple sections will be duplicated for each
          663  +** [`endnote] places all footnotes at the end of the rendered document.
          664  +* string [`groff:dev] names an output device (such as [`dvi] or[`pdf]). if this mode is present, [`groff] will be automatically invoked
          665  +* string [`groff:title-page] takes an identifier that names a section. this section will be treated as the title page for the document.
          666  +
          667  +### directives
          668  +* [`%[*pragma] title-page [$id]] sets the title page to section [$id]. this causes it to be specially formatted, with a large, centered title and subtitle.
          669  +
          670  +### quirks
          671  +if the [`toc] extension is active but [`%[*toc]] directive is provided, the table of contents will be given its own section at the start of the document (after the title page, if any).
          672  +
   391    673   ## further directions
   392    674   
   393    675   ### additional backends
   394    676   it is eventually intended to support to following backends, if reasonably practicable.
   395    677   * [*html]: emit HTML and CSS code to typeset the document. [!in progress]
   396    678   * [*svg]: emit SVG, taking advantage of its precise layout features to produce a nicely formatted and paginated document. pagination can be accomplished through emitting multiple files or by assigning one layer to each page. [!long term]
   397    679   * [*groff]: the most important output backend, rivalling [*html]. will allow the document to be typeset in a wide variety of formats, including PDF and manpage. [!short term]
   398    680   * [*gemtext]: essentially a downrezzing of cortav to make it readable to Gemini clients
          681  +* [*ast]: produces a human- and/or machine-readable dump of the document's syntax tree, to aid in debugging or for interoperation with systems that do not support `cortav` direcly. mode [`ast:repr] wil allow selecting formats for the dump. [`ast:rel] can be [`tree] (the default) to emit a hierarchical representation, or [`flat] to emit an array of nodes that convey hierarchy [^flatdoc by naming one another], rather than being placed inside one another. [`tree] is easier for humans to parse; [`flat] is easier for computers. origin information can be included for each node with the flag [`ast:debug-syms], but be aware this will greatly increase file size.
          682  +** [`tabtree] [!(default)]: a hierarchical tree view, with the number of tabs preceding an item showing its depth in the tree
          683  +** [`sexp]
          684  +** [`binary]: emit a raw binary format that is easier for programs to read. maybe an lmdb or cdb file?
          685  +** [`json]
          686  +
          687  +	flatdoc: ~~~flat sexp example output [scheme]~~~
          688  +		(nodes
          689  +			(section (id . "section1")
          690  +				(anchor "introduction")
          691  +				(kind . "ordinary")
          692  +				(label . "section1-heading")
          693  +				(nodes
          694  +					"section1-heading"
          695  +					"para1"
          696  +					"para2"
          697  +					"hzrule"
          698  +					"para3"))
          699  +			(section (id . "section2")
          700  +				(kind . "ordinary")
          701  +				(label . "section2-heading")
          702  +				(nodes
          703  +					"para4"
          704  +					"hzrule"
          705  +					"para5"
          706  +					"list1"))
          707  +			(block list (id . "list1")
          708  +				(kind . "ordered")
          709  +				(nodes
          710  +					"para6"
          711  +					"list2"
          712  +					"para7"))
          713  +			(block list (id . "list2")
          714  +				(kind . "unordered")
          715  +				(nodes
          716  +					"para8"
          717  +					"para9"
          718  +					"para10"))
          719  +			(block para (id . "para1")
          720  +				(nodes "text1" "format1" "text3" "foonote1" "text4"))
          721  +			(block label (id . "section1-heading") (nodes "section1-heading-text"))
          722  +			(text (id . "section1-heading-text") "Contemplating the Anathema")
          723  +			(text (id . "text1")
          724  +				"Disquieting information has recently been disclosed to virtual journalists of the Giedi Prime infomatrix by sources close to the Hyperion Entity regarding the catatrophic Year of Schisms and the unidentified agents believed to be responsible for memetically engineering the near-collapse of the Church Galactic.")
          725  +			(span format (id . "format1")
          726  +				(style . "emph")
          727  +				(nodes . "text2"))
          728  +			(text (id . "text2") "Curiously,")
          729  +			(text (id . "text3") "his Cyber-Holiness")
          730  +			(text (id . "footnote1-caption-text") "Pope Chewbacca III")
          731  +			(span footnote (id . "footnote1")
          732  +				(note . "footnote1-text")
          733  +				(ref . "papal-disclaimer")
          734  +				(nodes
          735  +					"footnode1-caption-text"))
          736  +			(text (id . "text4") "has thus far had little to say on the matter, provoking rampant speculation among the faithful.")
          737  +			(footnote-def (id . "footnote1-def")
          738  +				(nodes "footnote1-text")
          739  +			(text (id . "footnote1-text") "Currently recognized as legitimate successor to Peter of Terra by 2,756 sects, rejected by 678 of mostly Neo-Lutheran origin, and decried as an antipope by 73, most notably Pope Peter II of Centaurus Secundus, leader of the ongoing relativistic crusade against star systems owned by Microsoft.")
          740  +			;;; snip ;;;
          741  +			(document
          742  +				(nodes
          743  +					"section1" "section2")))
          744  +		~~~
   399    745   
   400    746   some formats may eventually warrant their own renderer, but are not a priority:
   401    747   * [*text]: cortav source files are already plain text, but a certain amount of layout could be done using ascii art.
   402    748   * [*ansi]: emit sequences of ANSI escape codes to lay out a document in a terminal-friendly way
   403    749   * [*tex]: TeX is an unholy abomination and i neither like nor use it, but lots of people do and if cortav ever catches on, a TeX backend should probably be written eventually.
   404    750   
   405    751   PDF is not on either list because it's a nightmarish mess of a format and groff, which is installed on most linux systems already, can easily generate PDFs
   406    752   
   407    753   ### LCH support
   408    754   right now, the use of color in the HTML renderer is very unsatisfactory. the accent mechanism operates on the basis of the CSS HSL function, which is not perceptually uniform; different hues will present different mixes of brightness and some (yellows?) may be ugly or unreadable.
   409    755   
   410    756   the ideal solution would be to simply switch to using LCH based colors. unfortunately, only Safari actually supports the LCH color function right now, and it's unlikely (unless Lea Verou and her husband manage to work a miracle) that Colors Level 4 is going to be implemented very widely any time soon.
   411    757   
   412         -this leaves us in an awkward position. we can of course do the math ourselves, working in LCH to implement the internal [$@tone] macro, and then "converting" these colors to HSL. unfortunately, you can't actually convert from LCH to HSL; it's like converting from pounds to kilograms. LCH can represent any color the human visual system can perceive; sRGB can't, and CSS HSL is implemented in sRGB. however, we could at least approximate something that would allow for perceptually uniform brightness, which would be an improvement, and this is probably the direction to go in, unless a miracle occurs and [$lch()] or [$color()] pop up in Blink.
          758  +this leaves us in an awkward position. we can of course do the math ourselves, working in LCH to implement the internal [`@tone] macro, and then "converting" these colors to HSL. unfortunately, you can't actually convert from LCH to HSL; it's like converting from pounds to kilograms. LCH can represent any color the human visual system can perceive; sRGB can't, and CSS HSL is implemented in sRGB. however, we could at least approximate something that would allow for perceptually uniform brightness, which would be an improvement, and this is probably the direction to go in, unless a miracle occurs and [`lch()] or [`color()] pop up in Blink.
   413    759   
   414    760   it may be possible to do a more reasonable job of handling colors in the postscript and TeX outputs. unsure about SVG but i assume it suffers the same problems HTML/CSS do. does groff even support color??
   415    761   
   416    762   ### intent files
   417         -there's currently no standard way to describe the intent and desired formatting of a document besides placing pragmas in the source file itself. this is extremely suboptimal, as when generating collections of documents, it's ideal to be able to keep all formatting information in one place. users should also be able to specify their own styling overrides that describe the way they prefer to read [$cortav] files, especially for uses like gemini or gopher integration.
          763  +there's currently no standard way to describe the intent and desired formatting of a document besides placing pragmata in the source file itself. this is extremely suboptimal, as when generating collections of documents, it's ideal to be able to keep all formatting information in one place. users should also be able to specify their own styling overrides that describe the way they prefer to read [`cortav] files, especially for uses like gemini or gopher integration.
          764  +
          765  +at some point soon [`cortav] needs to address this by adding intent files that can be activated from outside the source file, such as with a command line flag or a configuration file setting. these will probably consist of lines that are interpreted as pragmata. in addition to the standard intent format however, individual implementations should feel free to provide their own ways to provide intent metadata; e.g. the reference implementation, which has a lua interpreter available, should be able to take a lua script that runs after the parse stage and generates . this will be particularly useful for the end-user who wishes to specify a particular format she likes reading her files in without forcing that format on everyone she sends the compiled document to, as it will be able to interrogate the document and make intelligent decisions about what pragmata to apply.
   418    766   
   419         -at some point soon [$cortav] needs to address this by adding intent files that can be activated from outside the source file, such as with a command line flag or a configuration file setting. these will probably consist of lines that are interpreted as pragmata. in addition to the standard intent format however, individual implementations should feel free to provide their own ways to provide intent metadata; e.g. the reference implementation, which has a lua interpreter available, should be able to take a lua script that runs after the parse stage and generates . this will be particularly useful for the end-user who wishes to specify a particular format she likes reading her files in without forcing that format on everyone she sends the compiled document to, as it will be able to interrogate the document and make intelligent decisions about what pragmata to apply.
          767  +intent files should also be able to define [>rsrc resources], [>ctxvar context variables], and macros.

Modified cortav.lua from [eb3cc08f95] to [028f351fed].

     2      2   --  ~ lexi hale <lexi@hale.su>
     3      3   --  © AGPLv3
     4      4   --  ? reference implementation of the cortav document language
     5      5   
     6      6   local ss = require 'sirsem'
     7      7   -- aliases for commonly used sirsem funcs
     8      8   local startswith = ss.str.begins
     9         -local eachcode = ss.str.enc.utf8.each
    10      9   local dump = ss.dump
    11     10   local declare = ss.declare
    12     11   
    13     12   -- make this module available to require() when linked into a lua bytecode program with luac
    14     13   local ct = ss.namespace 'cortav'
    15     14   ct.info = {
    16     15   	version = ss.version {0,1; 'devel'};
................................................................................
    81     80   	end);
    82     81   	cli = ss.exnkind 'command line parse error';
    83     82   	mode = ss.exnkind('bad mode', function(msg, ...)
    84     83   		return string.format("mode “%s” "..msg, ...)
    85     84   	end);
    86     85   	unimpl = ss.exnkind 'feature not implemented';
    87     86   	ext = ss.exnkind 'extension error';
           87  +	enc = ss.exnkind('encoding error', function(msg, ...)
           88  +		return string.format('[%s]' .. msg, ...)
           89  +	end);
    88     90   }
    89     91   
    90     92   ct.ctx = declare {
    91     93   	mk = function(src) return {src = src} end;
    92     94   	ident = 'context';
    93     95   	cast = {
    94     96   		string = function(me)
................................................................................
   112    114   			table.insert(self.sec.blocks,block)
   113    115   			return block
   114    116   		end;
   115    117   		ref = function(self,id)
   116    118   			if not id:find'%.' then
   117    119   				local rid = self.sec.refs[id]
   118    120   				if self.sec.refs[id] then
   119         -					return self.sec.refs[id]
          121  +					return self.sec.refs[id], id, self.sec
   120    122   				else self:fail("no such ref %s in current section", id or '') end
   121    123   			else
   122    124   				local sec, ref = string.match(id, "(.-)%.(.+)")
   123    125   				local s = self.doc.sections[sec]
   124    126   				if s then
   125    127   					if s.refs[ref] then
   126         -						return s.refs[ref]
          128  +						return s.refs[ref], ref, sec
   127    129   					else self:fail("no such ref %s in section %s", ref, sec) end
   128    130   				else self:fail("no such section %s", sec) end
   129    131   			end
   130    132   		end
   131    133   	};
   132    134   }
   133    135   
................................................................................
   217    219   		meta = {};
   218    220   		vars = {};
   219    221   		ext = {
   220    222   			inhibit = {};
   221    223   			need = {};
   222    224   			use = {};
   223    225   		};
          226  +		enc = ss.str.enc.utf8;
   224    227   	} end;
   225    228   	construct = function(me)
   226    229   		me.docjob = ct.ext.job('doc', me, nil)
   227    230   	end;
   228    231   }
   229    232   
   230    233   -- FP helper functions
................................................................................
   397    400   
   398    401   -- renderer engines
   399    402   function ct.render.html(doc, opts)
   400    403   	local doctitle = opts['title']
   401    404   	local f = string.format
   402    405   	local ids = {}
   403    406   	local canonicalID = {}
   404         -	local function getSafeID(obj)
          407  +	local function getSafeID(obj,pfx)
          408  +		pfx = pfx or ''
   405    409   		if canonicalID[obj] then
   406    410   			return canonicalID[obj]
   407         -		elseif obj.id and ids[obj.id] then
          411  +		elseif obj.id and ids[pfx .. obj.id] then
          412  +			local objid = pfx .. obj.id
   408    413   			local newid
   409    414   			local i = 1
   410         -			repeat newid = obj.id .. string.format('-%x', i)
          415  +			repeat newid = objid .. string.format('-%x', i)
   411    416   				i = i + 1 until not ids[newid]
   412    417   			ids[newid] = obj
   413    418   			canonicalID[obj] = newid
   414    419   			return newid
   415    420   		else
   416    421   			local cid = obj.id
   417    422   			if not cid then
   418    423   				local i = 1
   419         -				repeat cid = string.format('x-%x', i)
          424  +				repeat cid = string.format('%sx-%x', pfx, i)
   420    425   					i = i + 1 until not ids[cid]
   421    426   			end
   422    427   			ids[cid] = obj
   423    428   			canonicalID[obj] = cid
   424    429   			return cid
   425    430   		end
   426    431   	end
   427    432   
          433  +	local footnotes = {}
          434  +	local footnotecount = 0
          435  +
   428    436   	local langsused = {}
   429    437   	local langpairs = {
   430    438   		lua = { color = 0x9377ff };
   431    439   		terra = { color = 0xff77c8 };
   432    440   		c = { name = 'C', color = 0x77ffe8 };
   433    441   		html = { color = 0xfff877 };
   434    442   		scheme = { color = 0x77ff88 };
   435    443   		lisp = { color = 0x77ff88 };
   436    444   		fortran = { color = 0xff779a };
   437    445   		python = { color = 0xffd277 };
   438         -		python = { color = 0xcdd6ff };
          446  +		ruby = { color = 0xcdd6ff };
   439    447   	}
   440    448   
   441    449   	local stylesets = {
          450  +		footnote = [[
          451  +			div.footnote {
          452  +			font-family: 90%;
          453  +				display: none;
          454  +				grid-template-columns: 1em 1fr min-content;
          455  +				grid-template-rows: 1fr min-content;
          456  +				position: fixed;
          457  +				padding: 1em;
          458  +				background: @tone(0.05);
          459  +				border: black;
          460  +				margin:auto;
          461  +			}
          462  +			div.footnote:target { display:grid; }
          463  +			@media screen {
          464  +				div.footnote {
          465  +					left: 10em;
          466  +					right: 10em;
          467  +					max-width: calc(@width + 2em);
          468  +					max-height: 30vw;
          469  +					bottom: 1em;
          470  +				}
          471  +			}
          472  +			@media print {
          473  +				div.footnote {
          474  +					position: relative;
          475  +				}
          476  +				div.footnote:first-of-type {
          477  +					border-top: 1px solid black;
          478  +				}
          479  +			}
          480  +
          481  +			div.footnote > a[href="#0"]{
          482  +				grid-row: 2/3;
          483  +				grid-column: 3/4;
          484  +				display: block;
          485  +				padding: 0.2em 0.7em;
          486  +				text-align: center;
          487  +				text-decoration: none;
          488  +				background: @tone(0.2);
          489  +				color: @tone(1);
          490  +				border: 1px solid black;
          491  +				margin-top: 0.6em;
          492  +				-webkit-user-select: none;
          493  +				-ms-user-select: none;
          494  +				user-select: none;
          495  +				-webkit-user-drag: none;
          496  +				user-drag: none;
          497  +			}
          498  +			div.footnote > a[href="#0"]:hover {
          499  +				background: @tone(0.3);
          500  +				color: @tone(2);
          501  +			}
          502  +			div.footnote > a[href="#0"]:active {
          503  +				background: @tone(0.05);
          504  +				color: @tone(0.4);
          505  +			}
          506  +			@media print {
          507  +				div.footnote > a[href="#0"]{
          508  +					display:none;
          509  +				}
          510  +			}
          511  +			div.footnote > div.number {
          512  +				text-align:right;
          513  +				grid-row: 1/2;
          514  +				grid-column: 1/2;
          515  +			}
          516  +			div.footnote > div.text {
          517  +				grid-row: 1/2;
          518  +				grid-column: 2/4;
          519  +				padding-left: 1em;
          520  +				overflow-y: scroll;
          521  +			}
          522  +		]];
   442    523   		header = [[
          524  +			body { padding: 0 2.5em !important }
   443    525   			h1,h2,h3,h4,h5,h6 { border-bottom: 1px solid @tone(0.7); }
   444    526   			h1 { font-size: 200%; border-bottom-style: double !important; border-bottom-width: 3px !important; margin: 0em -1em; }
   445    527   			h2 { font-size: 130%; margin: 0em -0.7em; }
   446    528   			h3 { font-size: 110%; margin: 0em -0.5em; }
   447    529   			h4 { font-size: 100%; font-weight: normal; margin: 0em -0.2em; }
   448    530   			h5 { font-size: 90%; font-weight: normal; }
   449    531   			h6 { font-size: 80%; font-weight: normal; }
................................................................................
   490    572   			section:target > :is(h1,h2,h3,h4,h5,h6) {
   491    573   
   492    574   			}
   493    575   		]];
   494    576   		paragraph = [[
   495    577   			p {
   496    578   				margin: 0.7em 0;
          579  +				text-align: justify;
   497    580   			}
   498    581   			section {
   499    582   				margin: 1.2em 0;
   500    583   			}
   501    584   			section:first-child { margin-top: 0; }
   502    585   		]];
   503    586   		accent = [[
   504         -			body { background: @bg; color: @fg }
   505         -			a[href] {
   506         -				color: @tone(0.7 30);
   507         -				text-decoration-color: @tone/0.4(0.7 30);
          587  +			@media screen {
          588  +				body { background: @bg; color: @fg }
          589  +				a[href] {
          590  +					color: @tone(0.7 30);
          591  +					text-decoration-color: @tone/0.4(0.7 30);
          592  +				}
          593  +				a[href]:hover {
          594  +					color: @tone(0.9 30);
          595  +					text-decoration-color: @tone/0.7(0.7 30);
          596  +				}
          597  +				h1 { color: @tone(2); }
          598  +				h2 { color: @tone(1.5); }
          599  +				h3 { color: @tone(1.2); }
          600  +				h4 { color: @tone(1); }
          601  +				h5,h6 { color: @tone(0.8); }
   508    602   			}
   509         -			a[href]:hover {
   510         -				color: @tone(0.9 30);
   511         -				text-decoration-color: @tone/0.7(0.7 30);
          603  +			@media print {
          604  +				a[href] {
          605  +					text-decoration: none;
          606  +					color: black;
          607  +					font-weight: bold;
          608  +				}
          609  +				h1,h2,h3,h4,h5,h6 {
          610  +					border-bottom: 1px black;
          611  +				}
   512    612   			}
   513         -			h1 { color: @tone(2); }
   514         -			h2 { color: @tone(1.5); }
   515         -			h3 { color: @tone(1.2); }
   516         -			h4 { color: @tone(1); }
   517         -			h5,h6 { color: @tone(0.8); }
   518    613   		]];
          614  +		aside = [[
          615  +			section > aside {
          616  +				text-align: justify;
          617  +				margin: 0 1.5em;
          618  +				padding: 0.5em 0.8em;
          619  +				background: @tone(0.05);
          620  +				font-size: 90%;
          621  +				border-left: 5px solid @tone(0.2 15);
          622  +				border-right: 5px solid @tone(0.2 15);
          623  +			}
          624  +			section > aside p {
          625  +				margin: 0;
          626  +				margin-top: 0.6em;
          627  +			}
          628  +			section > aside p:first-child {
          629  +				margin: 0;
          630  +			}
          631  +      ]];
   519    632   		code = [[
   520    633   			code {
   521         -				background: @fg;
          634  +				display: inline-block;
          635  +				background: @tone(0.9);
   522    636   				color: @bg;
   523    637   				font-family: monospace;
   524    638   				font-size: 90%;
   525    639   				padding: 3px 5px;
   526    640   			}
          641  +		]];
          642  +		var = [[
          643  +			var {
          644  +				font-style: italic;
          645  +				font-family: monospace;
          646  +				color: @tone(0.7);
          647  +			}
          648  +			code var {
          649  +				color: @tone(0.25);
          650  +			}
          651  +		]];
          652  +		math = [[
          653  +			span.equation {
          654  +				display: inline-block;
          655  +				background: @tone(0.08);
          656  +				color: @tone(2);
          657  +				padding: 0.1em 0.3em;
          658  +				border: 1px solid @tone(0.5);
          659  +			}
   527    660   		]];
   528    661   		abbr = [[
   529    662   			abbr[title] { cursor: help; }
   530    663   		]];
   531    664   		editors_markup = [[]];
   532    665   		block_code_listing = [[
   533         -			section > figure.listing {
          666  +			figure.listing {
   534    667   				font-family: monospace;
   535    668   				background: @tone(0.05);
   536    669   				color: @fg;
   537    670   				padding: 0;
   538    671   				margin: 0.3em 0;
   539    672   				counter-reset: line-number;
   540    673   				position: relative;
   541    674   				border: 1px solid @fg;
   542    675   			}
   543         -			section > figure.listing>div {
          676  +			figure.listing>div {
   544    677   				white-space: pre-wrap;
          678  +				tab-size: 3;
          679  +				-moz-tab-size: 3;
   545    680   				counter-increment: line-number;
   546    681   				text-indent: -2.3em;
   547    682   				margin-left: 2.3em;
   548    683   			}
   549         -			section > figure.listing>:is(div,hr)::before {
          684  +			figure.listing>:is(div,hr)::before {
   550    685   				width: 1.0em;
   551    686   				padding: 0.2em 0.4em;
   552    687   				text-align: right;
   553    688   				display: inline-block;
   554    689   				background-color: @tone(0.2);
   555    690   				border-right: 1px solid @fg;
   556    691   				content: counter(line-number);
   557    692   				margin-right: 0.3em;
   558    693   			}
   559         -			section > figure.listing>hr::before {
          694  +			figure.listing>hr::before {
   560    695   				color: transparent;
   561    696   				padding-top: 0;
   562    697   				padding-bottom: 0;
   563    698   			}
   564         -			section > figure.listing>div::before {
          699  +			figure.listing>div::before {
   565    700   				color: @fg;
   566    701   			}
   567         -			section > figure.listing>div:last-child::before {
          702  +			figure.listing>div:last-child::before {
   568    703   				padding-bottom: 0.5em;
   569    704   			}
   570         -			section > figure.listing>figcaption:first-child {
          705  +			figure.listing>figcaption:first-child {
   571    706   				border: none;
   572    707   				border-bottom: 1px solid @fg;
   573    708   			}
   574         -			section > figure.listing>figcaption::after {
          709  +			figure.listing>figcaption::after {
   575    710   				display: block;
   576    711   				float: right;
   577    712   				font-weight: normal;
   578    713   				font-style: italic;
   579    714   				font-size: 70%;
   580    715   				padding-top: 0.3em;
   581    716   			}
   582         -			section > figure.listing>figcaption {
          717  +			figure.listing>figcaption {
   583    718   				font-family: sans-serif;
   584    719   				font-size: 120%;
   585    720   				padding: 0.2em 0.4em;
   586    721   				border: none;
   587    722   				color: @tone(2);
   588    723   			}
   589         -			section > figure.listing > hr {
          724  +			figure.listing > hr {
   590    725   				border: none;
   591    726   				margin: 0;
   592    727   				height: 0.7em;
   593    728   				counter-increment: line-number;
   594    729   			}
   595    730   		]];
   596    731   	}
................................................................................
   604    739   		stylesets = stylesets;
   605    740   		stylesets_active = stylesNeeded;
   606    741   		obj_htmlid = getSafeID;
   607    742   		-- remaining fields added later
   608    743   	}
   609    744   
   610    745   	local renderJob = doc:job('render_html', nil, render_state_handle)
          746  +	doc.stage.job = renderJob;
   611    747   
   612    748   	local runhook = function(h, ...)
   613    749   		return renderJob:hook(h, render_state_handle, ...)
   614    750   	end
   615    751   
   616         -	local function getSpanRenderers(procs)
          752  +	local tagproc do
          753  +		local elt = function(t,attrs)
          754  +			return f('<%s%s>', t,
          755  +				attrs and ss.reduce(function(a,b) return a..b end, '',
          756  +					ss.map(function(v,k)
          757  +						if v == true
          758  +							then          return ' '..k
          759  +							elseif v then return f(' %s="%s"', k, v)
          760  +						end
          761  +					end, attrs)) or '')
          762  +		end
          763  +
          764  +		tagproc = {
          765  +			toTXT = {
          766  +				tag = function(t,a,v) return v  end;
          767  +				elt = function(t,a)   return '' end;
          768  +				catenate = table.concat;
          769  +			};
          770  +			toIR = {
          771  +				tag = function(t,a,v,o) return {
          772  +					tag = t, attrs = a;
          773  +					nodes = type(v) == 'string' and {v} or v, src = o
          774  +				} end;
          775  +
          776  +				elt = function(t,a,o) return {
          777  +					tag = t, attrs = a, src = o
          778  +				} end;
          779  +
          780  +				catenate = function(...) return ... end;
          781  +			};
          782  +			toHTML = {
          783  +				elt = elt;
          784  +				tag = function(t,attrs,body)
          785  +					return f('%s%s</%s>', elt(t,attrs), body, t)
          786  +				end;
          787  +				catenate = table.concat;
          788  +			};
          789  +		}
          790  +	end
          791  +
          792  +	local function getBaseRenderers(procs, span_renderers)
   617    793   		local tag, elt, catenate = procs.tag, procs.elt, procs.catenate
   618    794   		local htmlDoc = function(title, head, body)
   619    795   			return [[<!doctype html>]] .. tag('html',nil,
   620    796   				tag('head', nil,
   621    797   					elt('meta',{charset = 'utf-8'}) ..
   622    798   					(title and tag('title', nil, title) or '') ..
   623    799   					(head or '')) ..
   624    800   				tag('body', nil, body or ''))
   625    801   		end
   626    802   
   627         -		local span_renderers = {}
   628    803   		local function htmlSpan(spans, block, sec)
   629    804   			local text = {}
   630    805   			for k,v in pairs(spans) do
   631    806   				if type(v) == 'string' then
   632         -					table.insert(text,(v:gsub('[<>&"]',
   633         -						function(x)
          807  +					v=v:gsub('[<>&"]', function(x)
   634    808   							return string.format('&#%02u;', string.byte(x))
   635         -						end)))
          809  +						end)
          810  +					for fn, ext in renderJob:each('hook','render_html_sanitize') do
          811  +						v = fn(renderJob:delegate(ext), v)
          812  +					end
          813  +					table.insert(text,v)
   636    814   				else
   637         -					table.insert(text, span_renderers[v.kind](v, block, sec))
          815  +					table.insert(text, (span_renderers[v.kind](v, block, sec)))
   638    816   				end
   639    817   			end
   640    818   			return table.concat(text)
   641    819   		end
          820  +		return {htmlDoc=htmlDoc, htmlSpan=htmlSpan}
          821  +	end
          822  +
          823  +	local spanparse = function(...)
          824  +		local s = ct.parse_span(...)
          825  +		doc.docjob:hook('meddle_span', s)
          826  +		return s
          827  +	end
          828  +
          829  +	local cssRulesFor = {}
          830  +	local function getSpanRenderers(procs)
          831  +		local tag, elt, catenate = procs.tag, procs.elt, procs.catenate
          832  +		local span_renderers = {}
          833  +		local plainrdr = getBaseRenderers(tagproc.toTXT, span_renderers)
          834  +		local htmlSpan = getBaseRenderers(procs, span_renderers).htmlSpan
   642    835   
   643    836   		function span_renderers.format(sp,...)
   644         -			local tags = { strong = 'strong', emph = 'em', strike = 'del', insert = 'ins', literal = 'code' }
          837  +			local tags = { strong = 'strong', emph = 'em', strike = 'del', insert = 'ins', literal = 'code', variable = 'var'}
   645    838   			if sp.style == 'literal' and not opts['fossil-uv'] then
   646    839   				stylesNeeded.code = true
   647         -			end
   648         -			if sp.style == 'del' or sp.style == 'ins' then
          840  +			elseif sp.style == 'strike' or sp.style == 'insert' then
   649    841   				stylesNeeded.editors_markup = true
          842  +			elseif sp.style == 'variable' then
          843  +				stylesNeeded.var = true
   650    844   			end
   651    845   			return tag(tags[sp.style],nil,htmlSpan(sp.spans,...))
   652    846   		end
   653    847   
   654         -		function span_renderers.term(t,b,s)
          848  +		function span_renderers.deref(t,b,s)
   655    849   			local r = b.origin:ref(t.ref)
   656    850   			local name = t.ref
   657    851   			if name:find'%.' then name = name:match '^[^.]*%.(.+)$' end
   658         -			if type(r) ~= 'string' then
   659         -				b.origin:fail('%s is an object, not a reference', t.ref)
          852  +			if type(r) == 'string' then
          853  +				stylesNeeded.abbr = true
          854  +				return tag('abbr',{title=r},next(t.spans) and htmlSpan(t.spans,b,s) or name)
   660    855   			end
   661         -			stylesNeeded.abbr = true
   662         -			return tag('abbr',{title=r},next(t.spans) and htmlSpan(t.spans,b,s) or name)
   663         -		end
   664         -
   665         -		function span_renderers.macro(m,b,s)
   666         -			local r = b.origin:ref(m.macro)
   667         -			if type(r) ~= 'string' then
   668         -				b.origin:fail('%s is an object, not a reference', t.ref)
          856  +			if r.kind == 'resource' then
          857  +				local rid = getSafeID(r, 'res-')
          858  +				if r.class == 'image' then
          859  +					if not cssRulesFor[r] then
          860  +						local css = prepcss(string.format([[
          861  +							section p > .%s {
          862  +							}
          863  +						]], rid))
          864  +						stylesets[r] = css
          865  +						cssRulesFor[r] = css
          866  +						stylesNeeded[r] = true
          867  +					end
          868  +					return tag('div',{class=rid},catenate{'blaah'})
          869  +				elseif r.class == 'video' then
          870  +					local vid = {}
          871  +					return tag('video',nil,vid)
          872  +				elseif r.class == 'font' then
          873  +					b.origin:fail('fonts cannot be instantiated, use %font directive instead')
          874  +				end
          875  +			else
          876  +				b.origin:fail('%s is not an object that can be embedded', t.ref)
   669    877   			end
   670         -			local mctx = b.origin:clone()
   671         -			mctx.invocation = m
   672         -			return htmlSpan(ct.parse_span(r, mctx),b,s)
   673    878   		end
   674    879   
   675    880   		function span_renderers.var(v,b,s)
   676    881   			local val
   677    882   			if v.pos then
   678    883   				if not v.origin.invocation then
   679    884   					v.origin:fail 'positional arguments can only be used in a macro invocation'
................................................................................
   686    891   			end
   687    892   			if v.raw then
   688    893   				return val
   689    894   			else
   690    895   				return htmlSpan(ct.parse_span(val, v.origin), b, s)
   691    896   			end
   692    897   		end
          898  +
          899  +		function span_renderers.raw(v,b,s)
          900  +			return htmlSpan(v.spans, b, s)
          901  +		end
   693    902   
   694    903   		function span_renderers.link(sp,b,s)
   695    904   			local href
   696    905   			if b.origin.doc.sections[sp.ref] then
   697    906   				href = '#' .. sp.ref
   698    907   			else
   699    908   				if sp.addr then href = sp.addr else
................................................................................
   701    910   					if type(r) == 'table' then
   702    911   						href = '#' .. getSafeID(r)
   703    912   					else href = r end
   704    913   				end
   705    914   			end
   706    915   			return tag('a',{href=href},next(sp.spans) and htmlSpan(sp.spans,b,s) or href)
   707    916   		end
   708         -		return {
   709         -			span_renderers = span_renderers;
   710         -			htmlSpan = htmlSpan;
   711         -			htmlDoc = htmlDoc;
   712         -		}
          917  +
          918  +		span_renderers['line-break'] = function(sp,b,s)
          919  +			return elt('br')
          920  +		end
          921  +
          922  +		function span_renderers.macro(m,b,s)
          923  +			local macroname = plainrdr.htmlSpan(
          924  +				ct.parse_span(m.macro, b.origin), b,s)
          925  +			local r = b.origin:ref(macroname)
          926  +			if type(r) ~= 'string' then
          927  +				b.origin:fail('%s is an object, not a reference', t.ref)
          928  +			end
          929  +			local mctx = b.origin:clone()
          930  +			mctx.invocation = m
          931  +			return htmlSpan(ct.parse_span(r, mctx),b,s)
          932  +		end
          933  +		function span_renderers.math(m,b,s)
          934  +			stylesNeeded.math = true
          935  +			return tag('span',{class='equation'},htmlSpan(m.spans, b, s))
          936  +		end;
          937  +		function span_renderers.directive(d,b,s)
          938  +			if d.ext == 'html' then
          939  +			elseif b.origin.doc:allow_ext(d.ext) then
          940  +			elseif d.crit then
          941  +				b.origin:fail('critical extension %s unavailable', d.ext)
          942  +			elseif d.failthru then
          943  +				return htmlSpan(d.spans, b, s)
          944  +			end
          945  +		end
          946  +		function span_renderers.footnote(f,b,s)
          947  +			stylesNeeded.footnote = true
          948  +			local source, sid, ssec = b.origin:ref(f.ref)
          949  +			local cnc = getSafeID(ssec) .. ' ' .. sid
          950  +			local fn
          951  +			if footnotes[cnc] then
          952  +				fn = footnotes[cnc]
          953  +			else
          954  +				footnotecount = footnotecount + 1
          955  +				fn = {num = footnotecount, origin = b.origin, fnid=cnc, source = source}
          956  +				fn.id = getSafeID(fn)
          957  +				footnotes[cnc] = fn
          958  +			end
          959  +			return tag('a', {href='#'..fn.id}, htmlSpan(f.spans) ..
          960  +						tag('sup',nil, fn.num))
          961  +		end
          962  +
          963  +		return span_renderers
   713    964   	end
   714         -
   715    965   
   716    966   	local function getBlockRenderers(procs, sr)
   717    967   		local tag, elt, catenate = procs.tag, procs.elt, procs.catenate
   718    968   		local null = function() return catenate{} end
   719    969   
   720    970   		local block_renderers = {
   721    971   			anchor = function(b,s)
................................................................................
   766   1016   					if #l > 0 then
   767   1017   						return tag('div',nil,sr.htmlSpan(l, b, s))
   768   1018   					else
   769   1019   						return elt('hr')
   770   1020   					end
   771   1021   				end, b.lines)
   772   1022   				if b.title then
   773         -					table.insert(nodes,1,tag('figcaption',nil,sr.htmlSpan(b.title)))
         1023  +					table.insert(nodes,1, tag('figcaption',nil,sr.htmlSpan(b.title)))
   774   1024   				end
   775   1025   				if b.lang then langsused[b.lang] = true end
   776   1026   				return tag('figure', {class='listing', lang=b.lang, id=b.id and getSafeID(b)}, catenate(nodes))
   777   1027   			end;
   778   1028   			aside = function(b,s)
   779   1029   				local bn = {}
   780         -				for _,v in pairs(b.lines) do
   781         -					table.insert(bn, tag('p', {}, sr.htmlSpan(v, b, s)))
         1030  +				stylesNeeded.aside = true
         1031  +				if #b.lines == 1 then
         1032  +					bn[1] = sr.htmlSpan(b.lines[1], b, s)
         1033  +				else
         1034  +					for _,v in pairs(b.lines) do
         1035  +						table.insert(bn, tag('p', {}, sr.htmlSpan(v, b, s)))
         1036  +					end
   782   1037   				end
   783   1038   				return tag('aside', {}, bn)
   784   1039   			end;
   785         -			['break'] = function() --[[nop]] end;
         1040  +			['break'] = function() -- HACK
         1041  +				-- lists need to be rewritten to work like asides
         1042  +				return '';
         1043  +			end;
   786   1044   		}
   787   1045   		return block_renderers;
   788   1046   	end
   789   1047   
   790   1048   	local function getRenderers(procs)
   791         -		local r = getSpanRenderers(procs)
         1049  +		local span_renderers = getSpanRenderers(procs)
         1050  +		local r = getBaseRenderers(procs,span_renderers)
   792   1051   		r.block_renderers = getBlockRenderers(procs, r)
   793   1052   		return r
   794         -	end
   795         -
   796         -	local tagproc do
   797         -		local elt = function(t,attrs)
   798         -			return f('<%s%s>', t,
   799         -				attrs and ss.reduce(function(a,b) return a..b end, '', 
   800         -					ss.map(function(v,k)
   801         -						if v == true
   802         -							then          return ' '..k
   803         -							elseif v then return f(' %s="%s"', k, v)
   804         -						end
   805         -					end, attrs)) or '')
   806         -		end
   807         -
   808         -		tagproc = {
   809         -			toTXT = {
   810         -				tag = function(t,a,v) return v  end;
   811         -				elt = function(t,a)   return '' end;
   812         -				catenate = table.concat;
   813         -			};
   814         -			toIR = {
   815         -				tag = function(t,a,v,o) return {
   816         -					tag = t, attrs = a;
   817         -					nodes = type(v) == 'string' and {v} or v, src = o
   818         -				} end;
   819         -				
   820         -				elt = function(t,a,o) return {
   821         -					tag = t, attrs = a, src = o
   822         -				} end;
   823         -
   824         -				catenate = function(...) return ... end;
   825         -			};
   826         -			toHTML = {
   827         -				elt = elt;
   828         -				tag = function(t,attrs,body)
   829         -					return f('%s%s</%s>', elt(t,attrs), body, t)
   830         -				end;
   831         -				catenate = table.concat;
   832         -			};
   833         -		}
   834   1053   	end
   835   1054   
   836   1055   	local astproc = {
   837   1056   		toHTML = getRenderers(tagproc.toHTML);
   838   1057   		toTXT  = getRenderers(tagproc.toTXT);
   839   1058   		toIR   = { };
   840   1059   	}
................................................................................
   853   1072   	local ir = {}
   854   1073   	local dr = astproc.toHTML -- default renderers
   855   1074   	local plainr = astproc.toTXT
   856   1075   	local irBlockRdrs = astproc.toIR.block_renderers;
   857   1076   
   858   1077   	render_state_handle.ir = ir;
   859   1078   
         1079  +	local function renderBlocks(blocks, irs)
         1080  +		for i, block in ipairs(blocks) do
         1081  +			local rd
         1082  +			if irBlockRdrs[block.kind] then
         1083  +				rd = irBlockRdrs[block.kind](block,sec)
         1084  +			else
         1085  +				local rdr = renderJob:proc('render',block.kind,'html')
         1086  +				if rdr then
         1087  +					rd = rdr({
         1088  +						state = render_state_handle;
         1089  +						tagproc = tagproc.toIR;
         1090  +						astproc = astproc.toIR;
         1091  +					}, block, sec)
         1092  +				end
         1093  +			end
         1094  +			if rd then
         1095  +				if opts['heading-anchors'] and block == sec.heading_node then
         1096  +					stylesNeeded.headingAnchors = true
         1097  +					table.insert(rd.nodes, ' ')
         1098  +					table.insert(rd.nodes, {
         1099  +						tag = 'a';
         1100  +						attrs = {href = '#' .. irs.attrs.id, class='anchor'};
         1101  +						nodes = {type(opts['heading-anchors'])=='string' and opts['heading-anchors'] or '&sect;'};
         1102  +					})
         1103  +				end
         1104  +				if rd.src and rd.src.origin.lang then
         1105  +					if not rd.attrs then rd.attrs = {} end
         1106  +					rd.attrs.lang = rd.src.origin.lang
         1107  +				end
         1108  +				table.insert(irs.nodes, rd)
         1109  +				runhook('ir_section_node_insert', rd, irs, sec)
         1110  +			end
         1111  +		end
         1112  +	end
   860   1113   	runhook('ir_assemble', ir)
   861   1114   	for i, sec in ipairs(doc.secorder) do
   862   1115   		if doctitle == nil and sec.depth == 1 and sec.heading_node then
   863   1116   			doctitle = astproc.toTXT.htmlSpan(sec.heading_node.spans, sec.heading_node, sec)
   864   1117   		end
   865   1118   		local irs
   866   1119   		if sec.kind == 'ordinary' then
   867   1120   			if #(sec.blocks) > 0 then
   868   1121   				irs = {tag='section',attrs={id = getSafeID(sec)},nodes={}}
   869         -
   870   1122   				runhook('ir_section_build', irs, sec)
   871         -				
   872         -				for i, block in ipairs(sec.blocks) do
   873         -					local rd
   874         -					if irBlockRdrs[block.kind] then
   875         -						rd = irBlockRdrs[block.kind](block,sec)
   876         -					else
   877         -						local rdr = renderJob:proc('render',block.kind,'html')
   878         -						if rdr then
   879         -							rd = rdr({
   880         -								state = render_state_handle;
   881         -								tagproc = tagproc.toIR;
   882         -								astproc = astproc.toIR;
   883         -							}, block, sec)
   884         -						end
   885         -					end
   886         -					if rd then
   887         -						if opts['heading-anchors'] and block == sec.heading_node then
   888         -							stylesNeeded.headingAnchors = true
   889         -							table.insert(rd.nodes, ' ')
   890         -							table.insert(rd.nodes, {
   891         -								tag = 'a';
   892         -								attrs = {href = '#' .. irs.attrs.id, class='anchor'};
   893         -								nodes = {type(opts['heading-anchors'])=='string' and opts['heading-anchors'] or '&sect;'};
   894         -							})
   895         -						end
   896         -						table.insert(irs.nodes, rd)
   897         -						runhook('ir_section_node_insert', rd, irs, sec)
   898         -					end
   899         -				end
         1123  +				renderBlocks(sec.blocks, irs)
   900   1124   			end
   901   1125   		elseif sec.kind == 'blockquote' then
   902   1126   		elseif sec.kind == 'listing' then
   903   1127   		elseif sec.kind == 'embed' then
   904   1128   		end
   905   1129   		if irs then table.insert(ir, irs) end
   906   1130   	end
         1131  +
         1132  +	for _, fn in pairs(footnotes) do
         1133  +		local tag = tagproc.toIR.tag
         1134  +		local body = {nodes={}}
         1135  +		local ftir = {}
         1136  +		for l in fn.source:gmatch('([^\n]*)') do
         1137  +			ct.parse_line(l, fn.origin, ftir)
         1138  +		end
         1139  +		renderBlocks(ftir,body)
         1140  +		local note = tag('div',{class='footnote',id=fn.id}, {
         1141  +			tag('div',{class='number'}, tostring(fn.num)),
         1142  +			tag('div',{class='text'}, body.nodes),
         1143  +			tag('a',{href='#0'},'close')
         1144  +		})
         1145  +		table.insert(ir, note)
         1146  +	end
   907   1147   
   908   1148   	-- restructure passes
   909   1149   	runhook('ir_restructure_pre', ir)
   910   1150   	
   911   1151   	---- list insertion pass
   912   1152   	local lists = {}
   913   1153   	for _, sec in pairs(ir) do
................................................................................
  1033   1273   			local tonespan = opts.accent and .1 or 0
  1034   1274   			local tbg = opts['dark-on-light'] and 1.0 - tonespan or tonespan
  1035   1275   			local tfg = opts['dark-on-light'] and tonespan or 1.0 - tonespan
  1036   1276   			if var == 'bg' then
  1037   1277   				return tone(tbg,nil,nil,tonumber(alpha))
  1038   1278   			elseif var == 'fg' then
  1039   1279   				return tone(tfg,nil,nil,tonumber(alpha))
         1280  +			elseif var == 'width' then
         1281  +				return opts['width'] or '100vw'
  1040   1282   			elseif var == 'tone' then
  1041   1283   				local l, sep, sat
  1042   1284   				for i=1,3 do -- 🙄
  1043   1285   					l,sep,sat = param:match('^%('..string.rep('([^%s]*)%s*',i)..'%)$')
  1044   1286   					if l then break end
  1045   1287   				end
  1046   1288   				l = ss.math.lerp(tonumber(l), tbg, tfg)
................................................................................
  1124   1366   				kind = 'var';
  1125   1367   				pos = pos;
  1126   1368   				raw = raw;
  1127   1369   				var = not pos and s or nil;
  1128   1370   				origin = c:clone();
  1129   1371   			}
  1130   1372   		end
         1373  +	end
         1374  +	local function insert_span_directive(crit, failthru)
         1375  +		return function(s,c)
         1376  +			local args = ss.str.breakwords(d.doc.enc, s, 1)
         1377  +			local brksyms = map(enc.encodeUCS, {
         1378  +				'.', ',', ':', ';', '!', '$', '&', '^',
         1379  +				'/', '?', '@', '='
         1380  +			})
         1381  +			local brkhash = {} for _,s in pairs(brksyms) do
         1382  +				brkhash[s] = true
         1383  +			end
         1384  +
         1385  +			local extname = ''
         1386  +			local sym
         1387  +			local cmd = ''
         1388  +			for ch,p in ss.str.each(c.doc.enc, args[1]) do
         1389  +				if sym == nil then
         1390  +					if brkhash[ch] then
         1391  +						sym = ch
         1392  +					else
         1393  +						extname = extname .. ch
         1394  +					end
         1395  +				elseif brkhash[ch] then
         1396  +					sym = sym + ch
         1397  +				else
         1398  +					cmd = cmd + ch
         1399  +				end
         1400  +			end
         1401  +			if cmd == '' then cmd = nil end
         1402  +			local spans if failthru then
         1403  +				spans = ct.parse_span(args[2], c)
         1404  +			end
         1405  +			return {
         1406  +				kind = 'directive';
         1407  +				ext = extname;
         1408  +				cmd = cmd;
         1409  +				args = args;
         1410  +				crit = crit;
         1411  +				failthru = failthru;
         1412  +				spans = spans;
         1413  +			}
         1414  +		end
  1131   1415   	end
  1132   1416   	ct.spanctls = {
  1133   1417   		{seq = '!', parse = formatter 'emph'};
  1134   1418   		{seq = '*', parse = formatter 'strong'};
  1135   1419   		{seq = '~', parse = formatter 'strike'};
  1136         -		{seq = '+', parse = formatter 'inser'};
         1420  +		{seq = '+', parse = formatter 'insert'};
  1137   1421   		{seq = '\\', parse = function(s, c) -- raw
  1138         -			return s
  1139         -		end};
  1140         -		{seq = '$\\', parse = function(s, c) -- raw
  1141   1422   			return {
  1142         -				kind = 'format';
  1143         -				style = 'literal';
         1423  +				kind = 'raw';
  1144   1424   				spans = {s};
  1145   1425   				origin = c:clone();
  1146   1426   			}
  1147   1427   		end};
  1148         -		{seq = '$', parse = formatter 'literal'};
         1428  +		{seq = '`\\', parse = function(s, c) -- raw
         1429  +			local o = c:clone();
         1430  +			local str = ''
         1431  +			for c, p in ss.str.each(c.doc.enc, s) do
         1432  +				local q = p:esc()
         1433  +				if q then
         1434  +					str = str ..  q
         1435  +					p.next.byte = p.next.byte + #q
         1436  +				else
         1437  +					str = str .. c
         1438  +				end
         1439  +			end
         1440  +			return {
         1441  +				kind = 'format';
         1442  +				style = 'literal';
         1443  +				spans = {{
         1444  +					kind = 'raw';
         1445  +					spans = {str};
         1446  +					origin = o;
         1447  +				}};
         1448  +				origin = o;
         1449  +			}
         1450  +		end};
         1451  +		{seq = '`', parse = formatter 'literal'};
         1452  +		{seq = '$', parse = formatter 'variable'};
         1453  +		{seq = '^', parse = function(s,c) --footnotes
         1454  +			local r, t = s:match '^([^%s]+)%s*(.-)$'
         1455  +			return {
         1456  +				kind = 'footnote';
         1457  +				ref = r;
         1458  +				spans = ct.parse_span(t, c);
         1459  +				origin = c:clone();
         1460  +			}
         1461  +		-- TODO support for footnote sections
         1462  +		end};
         1463  +		{seq = '=', parse = function(s,c) --math mode
         1464  +			local tx = {
         1465  +				['%*'] = '×';
         1466  +				['/'] = '÷';
         1467  +			}
         1468  +			for k,v in pairs(tx) do s = s:gsub(k,v) end
         1469  +			s=s:gsub('%^([0-9]+)', function(num)
         1470  +				local sup = {'⁰','¹','²','³','⁴','⁵','⁶','⁷','⁸','⁹'};
         1471  +				local r = ''
         1472  +				for i=1,#num do
         1473  +					r = r .. sup[1 + (num:byte(i) - 0x30)]
         1474  +				end
         1475  +				return r
         1476  +			end)
         1477  +			local m = {s} --TODO
         1478  +			return {
         1479  +				kind = 'math';
         1480  +				original = s;
         1481  +				spans = m;
         1482  +				origin = c:clone();
         1483  +			};
         1484  +		end};
  1149   1485   		{seq = '&', parse = function(s, c)
  1150   1486   			local r, t = s:match '^([^%s]+)%s*(.-)$'
  1151   1487   			return {
  1152         -				kind = 'term';
         1488  +				kind = 'deref';
  1153   1489   				spans = (t and t ~= "") and ct.parse_span(t, c) or {};
  1154   1490   				ref = r; 
  1155   1491   				origin = c:clone();
  1156   1492   			}
  1157   1493   		end};
  1158   1494   		{seq = '^', parse = function(s, c)
  1159   1495   			local fn, t = s:match '^([^%s]+)%s*(.-)$'
................................................................................
  1165   1501   			}
  1166   1502   		end};
  1167   1503   		{seq = '>', parse = insert_link};
  1168   1504   		{seq = '→', parse = insert_link};
  1169   1505   		{seq = '🔗', parse = insert_link};
  1170   1506   		{seq = '##', parse = insert_var_ref(true)};
  1171   1507   		{seq = '#', parse = insert_var_ref(false)};
         1508  +		{seq = '%%', parse = function() --[[NOP]] end};
         1509  +		{seq = '%!', parse = insert_span_directive(true,false)};
         1510  +		{seq = '%:', parse = insert_span_directive(false,true)};
         1511  +		{seq = '%', parse = insert_span_directive(false,false)};
  1172   1512   	}
  1173   1513   end
  1174   1514   
  1175   1515   function ct.parse_span(str,ctx)
  1176   1516   	local function delimited(start, stop, s)
  1177   1517   		local r = { pcall(ss.str.delimit, nil, start, stop, s) }
  1178   1518   		if r[1] then return table.unpack(r, 2) end
  1179   1519   		ctx:fail(tostring(r[2]))
  1180   1520   	end
  1181   1521   	local buf = ""
  1182   1522   	local spans = {}
  1183   1523   	local function flush()
  1184   1524   		if buf ~= "" then
         1525  +	-- 			for fn, ext in ctx.doc.docjob:each('hook','doc_meddle_string') do
         1526  +	-- 				buf = fn(ctx.doc.docjob:delegate(ext), ctx, buf)
         1527  +	-- 			end
  1185   1528   			table.insert(spans, buf)
  1186   1529   			buf = ""
  1187   1530   		end
  1188   1531   	end
  1189   1532   	local skip = false
  1190         -	for c,p in eachcode(str) do
  1191         -		if skip == true then
  1192         -			skip = false
  1193         -			buf = buf .. c
  1194         -		elseif c == '\\' then
  1195         -			skip = true
         1533  +	for c,p in ss.str.each(ctx.doc.enc,str) do
         1534  +		local ba, ca, es = ctx.doc.enc.parse_escape(str:sub(p.byte))
         1535  +		if es then
         1536  +			flush()
         1537  +			table.insert(spans, {
         1538  +				kind = 'raw';
         1539  +				spans = {es};
         1540  +				origin = ctx:clone()
         1541  +			})
         1542  +			p.next.byte = p.next.byte + ba;
         1543  +			p.next.code = p.next.code + ca;
  1196   1544   		elseif c == '{' then
  1197   1545   			flush()
  1198   1546   			local substr, following = delimited('{','}',str:sub(p.byte))
  1199   1547   			local splitstart, splitstop = substr:find'%s+'
  1200   1548   			local id, argstr
  1201   1549   			if splitstart then
  1202   1550   				id, argstr = substr:sub(1,splitstart-1), substr:sub(splitstop+1)
................................................................................
  1214   1562   				local i = 1
  1215   1563   				while i <= #argstr do
  1216   1564   					while i<=#argstr and (argstr:sub(i,i) ~= '|' or argstr:sub(i-1,i) == '\\|') do
  1217   1565   						i = i + 1
  1218   1566   					end
  1219   1567   					local arg = argstr:sub(start, i == #argstr and i or i-1)
  1220   1568   					start = i+1
         1569  +					arg=arg:gsub('\\|','|')
  1221   1570   					table.insert(o.args, arg)
  1222   1571   					i = i + 1
  1223   1572   				end
  1224   1573   			end
  1225   1574   
  1226   1575   			p.next.byte = p.next.byte + following - 1
  1227   1576   			table.insert(spans,o)
................................................................................
  1236   1585   					table.insert(spans, i.parse(substr:sub(1+#i.seq), ctx))
  1237   1586   					break
  1238   1587   				end
  1239   1588   			end
  1240   1589   			if not found then
  1241   1590   				ctx:fail('no recognized control sequence in [%s]', substr)
  1242   1591   			end
         1592  +		elseif c == '\n' then
         1593  +			flush()
         1594  +			table.insert(spans,{kind='line-break',origin=ctx:clone()})
  1243   1595   		else
  1244   1596   			buf = buf .. c
  1245   1597   		end
  1246   1598   	end
  1247   1599   	flush()
  1248   1600   	return spans
  1249   1601   end
  1250   1602   
  1251   1603   local function
  1252   1604   blockwrap(fn)
  1253         -	return function(l,c,j)
  1254         -		local block = fn(l,c,j)
         1605  +	return function(l,c,j,d)
         1606  +		local block = fn(l,c,j,d)
  1255   1607   		block.origin = c:clone();
  1256         -		table.insert(c.sec.blocks, block);
         1608  +		table.insert(d, block);
  1257   1609   		j:hook('block_insert', c, block, l)
         1610  +		if block.spans then
         1611  +			c.doc.docjob:hook('meddle_span', block.spans, block)
         1612  +		end
  1258   1613   	end
  1259   1614   end
  1260   1615   
  1261   1616   local insert_paragraph = blockwrap(function(l,c)
  1262   1617   	if l:sub(1,1) == '.' then l = l:sub(2) end
  1263   1618   	return {
  1264   1619   		kind = "paragraph";
................................................................................
  1282   1637   	if t and t ~= "" then
  1283   1638   		local heading = {
  1284   1639   			kind = "label";
  1285   1640   			spans = ct.parse_span(t,c);
  1286   1641   			origin = s.origin;
  1287   1642   			captions = s;
  1288   1643   		}
         1644  +		c.doc.docjob:hook('meddle_span', heading.spans, heading)
  1289   1645   		table.insert(s.blocks, heading)
  1290   1646   		s.heading_node = heading
  1291   1647   	end
  1292   1648   	c.sec = s
  1293   1649   
  1294   1650   	j:hook('section_attach', c, s)
  1295   1651   end
................................................................................
  1299   1655   	c.doc.meta[key] = val
  1300   1656   	j:hook('metadata_set', key, val)
  1301   1657   end
  1302   1658   local dextctl = function(w,c)
  1303   1659   	local mode, exts = w(1)
  1304   1660   	for e in exts:gmatch '([^%s]+)' do
  1305   1661   		if mode == 'uses' then
         1662  +			c.doc.ext.use[e] = true
  1306   1663   		elseif mode == 'needs' then
         1664  +			c.doc.ext.need[e] = true
  1307   1665   		elseif mode == 'inhibits' then
         1666  +			c.doc.ext.inhibit[e] = true
  1308   1667   		end
  1309   1668   	end
  1310   1669   end
  1311   1670   local dcond = function(w,c)
  1312   1671   	local mode, cond, exp = w(2)
  1313   1672   	c.hide_next = mode == 'unless'
  1314   1673   end;
................................................................................
  1315   1674   ct.directives = {
  1316   1675   	author = dsetmeta;
  1317   1676   	license = dsetmeta;
  1318   1677   	keywords = dsetmeta;
  1319   1678   	desc = dsetmeta;
  1320   1679   	when = dcond;
  1321   1680   	unless = dcond;
         1681  +	pragma = function(w,c)
         1682  +	end;
         1683  +	lang = function(w,c)
         1684  +		local _, op, l = w(2)
         1685  +		local langstack = c.doc.stage.langstack
         1686  +		if op == 'is' then
         1687  +			langstack[math.max(1, #langstack)] = l
         1688  +		elseif op == 'push' then
         1689  +			table.insert(langstack, l)
         1690  +		elseif op == 'pop' then
         1691  +			if next(langstack) then
         1692  +				langstack[#langstack] = nil
         1693  +			end
         1694  +		elseif op == 'sec' then
         1695  +			c.sec.lang = l
         1696  +		else c:fail('bad language directive “%s”', op) end
         1697  +		c.lang = langstack[#langstack]
         1698  +	end;
  1322   1699   	expand = function(w,c)
  1323   1700   		local _, m = w(1)
  1324   1701   		if m ~= 'off' then
  1325         -			c.expand_next = 1
         1702  +			c.doc.stage.expand_next = 1
  1326   1703   		else
  1327         -			c.expand_next = 0
         1704  +			c.doc.stage.expand_next = 0
  1328   1705   		end
  1329   1706   	end;
  1330   1707   }
  1331   1708   
  1332   1709   local function insert_table_row(l,c,j)
  1333   1710   	local row = {}
  1334   1711   	local buf
................................................................................
  1335   1712   	local flush = function()
  1336   1713   		if buf then
  1337   1714   			buf.str = buf.str:gsub('%s+$','')
  1338   1715   			table.insert(row, buf)
  1339   1716   		end
  1340   1717   		buf = { str = '' }
  1341   1718   	end
  1342         -	for c,p in eachcode(l) do
         1719  +	for c,p in ss.str.each(c.doc.enc,l) do
  1343   1720   		if c == '|' or c == '+' and (p.code == 1 or l:sub(p.byte-1,p.byte-1)~='\\') then
  1344   1721   			flush()
  1345   1722   			buf.header = c == '+'
  1346   1723   		elseif c == ':' then
  1347   1724   			local lst = l:sub(p.byte-#c,p.byte-#c)
  1348   1725   			local nxt = l:sub(p.next.byte,p.next.byte)
  1349   1726   			if lst == '|' or lst == '+' and l:sub(p.byte-2,p.byte-2) ~= '\\' then
................................................................................
  1371   1748   		else
  1372   1749   			buf.str = buf.str .. c
  1373   1750   		end
  1374   1751   	end
  1375   1752   	if buf.str ~= '' then flush() end 
  1376   1753   	for _,v in pairs(row) do
  1377   1754   		v.spans = ct.parse_span(v.str, c)
         1755  +		c.doc.docjob:hook('meddle_span', v.spans, v)
  1378   1756   	end
  1379   1757   	if #c.sec.blocks > 1 and c.sec.blocks[#c.sec.blocks].kind == 'table' then
  1380   1758   		local tbl = c.sec.blocks[#c.sec.blocks]
  1381   1759   		table.insert(tbl.rows, row)
  1382   1760   		j:hook('block_table_attach', c, tbl, row, l)
  1383   1761   		j:hook('block_table_row_insert', c, tbl, row, l)
  1384   1762   	else
................................................................................
  1398   1776   	{seq = '¶', fn = insert_paragraph};
  1399   1777   	{seq = '❡', fn = insert_paragraph};
  1400   1778   	{seq = '#', fn = insert_section};
  1401   1779   	{seq = '§', fn = insert_section};
  1402   1780   	{seq = '+', fn = insert_table_row};
  1403   1781   	{seq = '|', fn = insert_table_row};
  1404   1782   	{seq = '│', fn = insert_table_row};
  1405         -	{seq = '!', fn = function(l,c,j) 
  1406         -		local last = c.sec.blocks[#c.sec.blocks]
         1783  +	{seq = '!', fn = function(l,c,j,d)
         1784  +		local last = d[#d]
  1407   1785   		local txt = l:match '^%s*!%s*(.-)$'
  1408   1786   		if (not last) or last.kind ~= 'aside' then
  1409   1787   			local aside = {
  1410   1788   				kind = 'aside';
  1411         -				lines = { ct.parse_span(txt, c) }
         1789  +				lines = { ct.parse_span(txt, c) };
         1790  +				origin = c:clone();
  1412   1791   			}
  1413         -			c:insert(aside)
         1792  +			c.doc.docjob:hook('meddle_span', aside.lines[1], aside)
         1793  +			table.insert(d,aside)
  1414   1794   			j:hook('block_aside_insert', c, aside, l)
  1415   1795   			j:hook('block_aside_line_insert', c, aside, aside.lines[1], l)
  1416   1796   			j:hook('block_insert', c, aside, l)
  1417   1797   		else
  1418   1798   			local sp = ct.parse_span(txt, c)
         1799  +			c.doc.docjob:hook('meddle_span', sp, last)
  1419   1800   			table.insert(last.lines, sp)
  1420   1801   			j:hook('block_aside_attach', c, last, sp, l)
  1421   1802   			j:hook('block_aside_line_insert', c, last, sp, l)
  1422   1803   		end
  1423   1804   	end};
  1424   1805   	{pred = function(s,c) return s:match'^[*:]' end, fn = blockwrap(function(l,c) -- list
  1425   1806   		local stars = l:match '^([*:]+)'
................................................................................
  1430   1811   		return {
  1431   1812   			kind = 'list-item';
  1432   1813   			depth = depth;
  1433   1814   			ordered = ordered;
  1434   1815   			spans = ct.parse_span(txt, c);
  1435   1816   		}
  1436   1817   	end)};
  1437         -	{seq = '\t', fn = function(l,c,j)
  1438         -		local ref, val = l:match '\t+([^:]+):%s*(.*)$'
  1439         -		c.sec.refs[ref] = val
  1440         -		j:hook('section_ref_attach', c, ref, val, l)
         1818  +	{seq = '\t\t', fn = function(l,c,j,d)
         1819  +		local last = d[#d]
         1820  +		if (not last) or (last.kind ~= 'reference') then
         1821  +			c:fail('reference continuations must immediately follow a reference')
         1822  +		end
         1823  +		local str = l:match '^\t\t(.-)%s*$'
         1824  +		last.val = last.val .. '\n' .. str
         1825  +		c.sec.refs[last.key] = last.val
  1441   1826   	end};
  1442         -	{seq = '%', fn = function(l,c,j) -- directive
         1827  +	{seq = '\t', fn = blockwrap(function(l,c,j,d)
         1828  +		local ref, val = l:match '\t+([^:]+):%s*(.*)$'
         1829  +		local last = d[#d]
         1830  +		local rsrc
         1831  +		if last and last.kind == 'resource' then
         1832  +			last.props[ref] = val
         1833  +			rsrc = last
         1834  +		elseif last and last.kind == 'reference' and last.rsrc then
         1835  +			last.rsrc.props[ref] = val
         1836  +			rsrc = last.rsrc
         1837  +		else
         1838  +			c.sec.refs[ref] = val
         1839  +		end
         1840  +		j:hook('section_ref_attach', c, ref, val, l)
         1841  +		return {
         1842  +			kind = 'reference';
         1843  +			rsrc = rsrc;
         1844  +			key = ref;
         1845  +			val = val;
         1846  +		}
         1847  +	end)};
         1848  +	{seq = '%', fn = function(l,c,j,d) -- directive
  1443   1849   		local crit, cmdline = l:match '^%%([!%%]?)%s*(.*)$'
  1444   1850   		local words = function(i)
  1445   1851   			local wds = {}
  1446   1852   			if i == 0 then return cmdline end
  1447   1853   			for w,pos in cmdline:gmatch '([^%s]+)()' do
  1448   1854   				table.insert(wds, w)
  1449   1855   				i = i - 1
  1450   1856   				if i == 0 then
  1451         -					table.insert(wds,cmdline:sub(pos))
         1857  +					table.insert(wds,(cmdline:sub(pos):match('^%s*(.-)%s*$')))
  1452   1858   					return table.unpack(wds)
  1453   1859   				end
  1454   1860   			end
  1455   1861   		end
  1456   1862   
  1457   1863   		local cmd, rest = words(1)
  1458   1864   		if ct.directives[cmd] then
................................................................................
  1459   1865   			ct.directives[cmd](words,c,j)
  1460   1866   		elseif cmd == c.doc.stage.mode['render:format'] then
  1461   1867   			-- this is a directive for the renderer; insert it into the tree as is
  1462   1868   			local dir = {
  1463   1869   				kind = 'directive';
  1464   1870   				critical = crit == '!';
  1465   1871   				words = words;
         1872  +				origin = c;
  1466   1873   			}
  1467         -			c:insert(dir)
         1874  +			table.insert(d, dir)
  1468   1875   			j:hook('block_directive_render', j, c, dir)
  1469   1876   		elseif c.doc:allow_ext(cmd) then -- extension directives begin with their id
  1470   1877   			local ext = ct.ext.loaded[cmd]
  1471   1878   			if ext.directives then
  1472   1879   				local _, topcmd = words(2)
  1473   1880   				if ext.directives[topcmd] then
  1474   1881   					ext.directives[topcmd](j:delegate(ext), c, words)
................................................................................
  1505   1912   			kind = 'code';
  1506   1913   			listing = {
  1507   1914   				kind = 'listing';
  1508   1915   				lang = lang, id = id, title = title and ct.parse_span(title,c);
  1509   1916   				lines = {};
  1510   1917   			}
  1511   1918   		}
         1919  +		if c.doc.stage.expand_next and c.doc.stage.expand_next > 0 then
         1920  +			c.doc.stage.expand_next = c.doc.stage.expand_next - 1
         1921  +			mode.expand = true
         1922  +		end
  1512   1923   		j:hook('mode_switch', c, mode)
  1513   1924   		c.mode = mode
  1514   1925   		if id then
  1515   1926   			if c.sec.refs[id] then c:fail('duplicate ID %s', id) end
  1516   1927   			c.sec.refs[id] = c.mode.listing
  1517   1928   		end
  1518   1929   		j:hook('block_insert', c, mode.listing, l)
  1519   1930   		return c.mode.listing;
  1520   1931   	end)};
  1521   1932   	{pred = function(s,c)
  1522   1933   		if s:match '^[%-_][*_%-%s]+' then return true end
  1523   1934   		if startswith(s, '—') then
  1524         -			for c, p in eachcode(s) do
         1935  +			for c, p in ss.str.each(c.doc.enc,s) do
  1525   1936   				if ({
  1526   1937   					['—'] = true, ['-'] = true, [' '] = true;
  1527   1938   					['*'] = true, ['_'] = true, ['\t'] = true;
  1528   1939   				})[c] ~= true then return false end
  1529   1940   			end
  1530   1941   			return true
  1531   1942   		end
  1532   1943   	end; fn = blockwrap(function()
  1533   1944   		return { kind = 'horiz-rule' }
         1945  +	end)};
         1946  +	{seq='@', fn=blockwrap(function(s,c)
         1947  +		local id = s:match '^@%s*(.-)%s*$'
         1948  +		local rsrc = {
         1949  +			kind = 'resource';
         1950  +			props = {};
         1951  +			id = id;
         1952  +		}
         1953  +		if c.sec.refs[id] then
         1954  +			c:fail('an object with id “%s” already exists in that section',id)
         1955  +		else
         1956  +			c.sec.refs[id] = rsrc
         1957  +		end
         1958  +		return rsrc
  1534   1959   	end)};
  1535   1960   	{fn = insert_paragraph};
  1536   1961   }
  1537   1962   
  1538         -function ct.parse(file, src, mode)
  1539         -	local function
  1540         -	is_whitespace(cp)
  1541         -		return cp == 0x20 or cp == 0xe390
         1963  +function ct.parse_line(l, ctx, dest)
         1964  +	local newspan
         1965  +	local job = ctx.doc.stage.job
         1966  +	job:hook('line_read',ctx,l)
         1967  +	if ctx.mode then
         1968  +		if ctx.mode.kind == 'code' then
         1969  +			if l and l:match '^~~~%s*$' then
         1970  +				job:hook('block_listing_end',ctx,ctx.mode.listing)
         1971  +				job:hook('mode_switch', c, nil)
         1972  +				ctx.mode = nil
         1973  +			else
         1974  +				-- TODO handle formatted code
         1975  +				local newline
         1976  +				if ctx.mode.expand
         1977  +					then newline = ct.parse_span(l, ctx)
         1978  +					else newline = {l}
         1979  +				end
         1980  +				table.insert(ctx.mode.listing.lines, newline)
         1981  +				job:hook('block_listing_newline',ctx,ctx.mode.listing,newline)
         1982  +			end
         1983  +	  else
         1984  +			local mf = job:proc('modes', ctx.mode.kind)
         1985  +			if not mf then
         1986  +				ctx:fail('unimplemented syntax mode %s', ctx.mode.kind)
         1987  +			end
         1988  +			mf(job, ctx, l, dest) --NOTE: you are responsible for triggering the appropriate hooks if you insert anything!
         1989  +		end
         1990  +	else
         1991  +		if l then
         1992  +			local function tryseqs(seqs, ...)
         1993  +				for _, i in pairs(seqs) do
         1994  +					if ((not i.seq ) or startswith(l, i.seq)) and
         1995  +					   ((not i.pred) or i.pred    (l, ctx  )) then
         1996  +						i.fn(l, ctx, job, dest, ...)
         1997  +						return true
         1998  +					end
         1999  +				end
         2000  +				return false
         2001  +			end
         2002  +
         2003  +			if not tryseqs(ct.ctlseqs) then
         2004  +				local found = false
         2005  +
         2006  +				for eb, ext, state in job:each('blocks') do
         2007  +					if tryseqs(eb, state) then found = true break end
         2008  +				end
         2009  +
         2010  +				if not found then
         2011  +					ctx:fail 'incomprehensible input line'
         2012  +				end
         2013  +			end
         2014  +		else
         2015  +			if next(dest) and dest[#dest].kind ~= 'break' then
         2016  +				local brk = {kind='break', origin = ctx:clone()}
         2017  +				job:hook('block_break', ctx, brk, l)
         2018  +				table.insert(dest, brk)
         2019  +			end
         2020  +		end
  1542   2021   	end
         2022  +	job:hook('line_end',ctx,l)
         2023  +end
         2024  +
         2025  +function ct.parse(file, src, mode, setup)
  1543   2026   
  1544   2027   	local ctx = ct.ctx.mk(src)
  1545   2028   	ctx.line = 0
  1546   2029   	ctx.doc = ct.doc.mk()
  1547   2030   	ctx.doc.src = src
  1548         -	ctx.doc.stage = {
  1549         -		kind = 'parse';
  1550         -		mode = mode;
  1551         -	}
  1552   2031   	ctx.sec = ctx.doc:mksec() -- toplevel section
  1553   2032   	ctx.sec.origin = ctx:clone()
         2033  +	ctx.lang = mode['meta:lang']
         2034  +	if mode['parse:enc'] then
         2035  +		local e = ss.str.enc[mode['parse:enc']]
         2036  +		if not e then
         2037  +			ct.exns.enc('requested encoding not supported',mode['parse:enc']):throw()
         2038  +		end
         2039  +		ctx.doc.enc = e
         2040  +	end
  1554   2041   
  1555   2042   	-- create states for extension hooks
  1556   2043   	local job = ctx.doc:job('parse',nil,ctx)
         2044  +	ctx.doc.stage = {
         2045  +		kind = 'parse';
         2046  +		mode = mode;
         2047  +		job = job;
         2048  +		langstack = {ctx.lang};
         2049  +		fontstack = {};
         2050  +	}
         2051  +
         2052  +	local function
         2053  +	is_whitespace(cp)
         2054  +		return ctx.doc.enc.iswhitespace(cp)
         2055  +	end
         2056  +
         2057  +	if setup then setup(ctx) end
         2058  +
  1557   2059   
  1558   2060   	for full_line in file:lines() do ctx.line = ctx.line + 1
  1559   2061   		local l
  1560   2062   		for p, c in utf8.codes(full_line) do
  1561   2063   			if not is_whitespace(c) then
  1562   2064   				l = full_line:sub(p)
  1563   2065   				break
  1564   2066   			end
  1565   2067   		end
  1566         -		job:hook('line_read',ctx,l)
         2068  +		ct.parse_line(l, ctx, ctx.sec.blocks)
         2069  +	end
  1567   2070   
  1568         -		if ctx.mode then
  1569         -			if ctx.mode.kind == 'code' then
  1570         -				if l and l:match '^~~~%s*$' then
  1571         -					job:hook('block_listing_end',ctx,ctx.mode.listing)
  1572         -					job:hook('mode_switch', c, nil)
  1573         -					ctx.mode = nil
  1574         -				else
  1575         -					-- TODO handle formatted code
  1576         -					local newline = {l}
  1577         -					table.insert(ctx.mode.listing.lines, newline)
  1578         -					job:hook('block_listing_newline',ctx,ctx.mode.listing,newline)
  1579         -				end
  1580         -			else
  1581         -				ctx:fail('unimplemented syntax mode %s', ctx.mode.kind)
  1582         -			end
  1583         -		else
  1584         -			if l then
  1585         -				local function tryseqs(seqs, ...)
  1586         -					for _, i in pairs(seqs) do
  1587         -						if  ((not i.seq ) or startswith(l, i.seq)) and
  1588         -							((not i.pred) or i.pred    (l, ctx  )) then
  1589         -							i.fn(l, ctx, job, ...)
  1590         -							return true
  1591         -						end
         2071  +	for i, sec in ipairs(ctx.doc.secorder) do
         2072  +		for refid, r in ipairs(sec.refs) do
         2073  +			if type(r) == 'table' and r.kind == 'resource' and r.props.src then
         2074  +				local lines = ss.str.breaklines(ctx.doc.enc, r.props.src)
         2075  +				local srcs = {}
         2076  +				for i,l in ipairs(lines) do
         2077  +					local args = ss.str.breakwords(ctx.doc.enc, l, 2, {escape=true})
         2078  +					if #args < 3 then
         2079  +						r.origin:fail('invalid syntax for resource %s', t.ref)
         2080  +					end
         2081  +					local mimebreak = function(s)
         2082  +						local wds = ss.str.split(ctx.doc.enc, s, '/', 1, {escape=true})
         2083  +						return wds
  1592   2084   					end
  1593         -					return false
         2085  +					local mime = mimebreak(args[2]);
         2086  +					local mimeclasses = {
         2087  +						['application/svg+xml'] = 'image';
         2088  +					}
         2089  +					local class = mimeclasses[mime]
         2090  +					table.insert(srcs, {
         2091  +						mode = args[1];
         2092  +						mime = mime;
         2093  +						uri = args[3];
         2094  +						class = class or mime[1];
         2095  +					})
  1594   2096   				end
  1595         -
  1596         -				if not tryseqs(ct.ctlseqs) then
  1597         -					local found = false
  1598         -					
  1599         -					for eb, ext, state in job:each('blocks') do
  1600         -						if tryseqs(eb, state) then found = true break end
  1601         -					end
  1602         -
  1603         -					if not found then
  1604         -						ctx:fail 'incomprehensible input line'
  1605         -					end
  1606         -				end
  1607         -			else
  1608         -				if next(ctx.sec.blocks) and ctx.sec.blocks[#ctx.sec.blocks].kind ~= 'break' then
  1609         -					local brk = {kind='break'}
  1610         -					job:hook('block_break', ctx, brk, l)
  1611         -					table.insert(ctx.sec.blocks, brk)
  1612         -				end
         2097  +				 --ideally move this into its own mimetype lib
         2098  +				local kind = r.props.as or srcs[1].class
         2099  +				r.class = kind
         2100  +				r.srcs = srcs
  1613   2101   			end
  1614   2102   		end
  1615         -		job:hook('line_end',ctx,l)
  1616   2103   	end
  1617         -
         2104  +	ctx.doc.stage = nil
         2105  +	ctx.doc.docjob:hook('meddle_ast')
  1618   2106   	return ctx.doc
  1619   2107   end

Modified desk/cortav.xml from [8189edad17] to [b82e1b14f3].

     8      8   -->
     9      9   <language name='Cortav' version='1' kateversion='2.4' section='Markup' extensions='*.ct'>
    10     10   	<highlighting>
    11     11   		<list name='extension-directives'>
    12     12   			<item>uses</item>
    13     13   			<item>needs</item>
    14     14   			<item>inhibits</item>
           15  +		</list>
           16  +		<list name='meta-directives'>
           17  +			<item>author</item>
           18  +			<item>lang</item>
           19  +			<item>pragma</item>
           20  +		</list>
           21  +		<list name='ctl-directives'>
           22  +			<item>when</item>
           23  +			<item>unless</item>
           24  +			<item>cols</item>
           25  +			<item>quote</item>
           26  +			<item>include</item>
           27  +			<item>embed</item>
    15     28   		</list>
    16     29   		<list name='renderer-directives'>
    17     30   			<item>html</item>
    18     31   			<item>groff</item>
    19     32   			<item>ps</item>
    20     33   			<item>tex</item>
    21     34   			<item>plaintext</item>
................................................................................
    24     37   		</list>
    25     38   		<contexts>
    26     39   			<context name='init' attribute='Normal Text' lineEndContext='#pop' fallthroughContext='text'>
    27     40   				<RegExpr String='\\.' attribute='Escaped Char'/>
    28     41   				<RegExpr attribute='Section Cue' context='sec-ident' String='(#|§)+' firstNonSpace='true' />
    29     42   				<StringDetect String='~~~' attribute='Literal Block Cue' firstNonSpace='true' context='literal-block-cue'/>
    30     43   				<RegExpr attribute='List' String='[\*:]+' firstNonSpace='true' context='text' />
           44  +				<Detect2Chars char='%' char1='%' attribute='Comment' context='comment'/>
    31     45   				<Detect2Chars char='%' char1='!' attribute='Critical Directive Cue' context='directive'/>
    32     46   				<DetectChar char='%' attribute='Directive Cue' context='directive'/>
           47  +				<DetectChar char='@' attribute='Resource Cue' context='resource'/>
    33     48   				<DetectChar char='&#9;' attribute='Normal Text' context='refdef-id'/>
    34     49   			</context>
           50  +
           51  +			<context name='comment' attribute='Comment' lineEndContext='#pop'>
           52  +			</context>
           53  +			<context name='error' attribute='Error' lineEndContext='#pop'>
           54  +			</context>
           55  +
           56  +			<context name='resource' attribute='Resource Identifier' lineEndContext='#pop'>
           57  +				<DetectSpaces context='#pop!error' attribute='Error'/>
           58  +			</context>
    35     59   
    36     60   			<context name='sec-ident' attribute='Identifier' lineEndContext='#pop'>
    37     61   				<DetectSpaces context='#pop!sec' attribute='Normal Text'/>
    38     62   			</context>
    39     63   
    40     64   			<context name='sec' attribute='Header' lineEndContext='#pop'>
    41     65   				<IncludeRules context='text'/>
................................................................................
    83    107   				<IncludeRules context='span'/>
    84    108   			</context>
    85    109   
    86    110   			<context name='span-del' attribute='Deleted Text' lineEndContext='#pop'>
    87    111   				<IncludeRules context='span'/>
    88    112   			</context>
    89    113   
    90         -			<context name='span-cue' attribute='Span Cue' lineEndContext='#pop'>
    91         -				<StringDetect attribute='Span Cue' String='$\' context='#pop!flat-span' />
          114  +			<context name='span-cue' attribute='Span Cue' lineEndContext='#pop' fallthroughContext="error">
          115  +				<StringDetect attribute='Span Cue' String='`\' context='#pop!flat-span' />
    92    116   
    93    117   				<DetectChar   attribute='Span Cue' char='!' context='#pop!span-emph' />
    94    118   				<DetectChar   attribute='Span Cue' char='*' context='#pop!span-strong' />
    95    119   				<DetectChar   attribute='Span Cue' char='~' context='#pop!span-del' />
    96    120   
    97         -				<AnyChar      attribute='Span Cue' String='$+🔒' context='#pop!span' />
          121  +				<AnyChar      attribute='Span Cue' String='`$+🔒' context='#pop!span' />
    98    122   				<StringDetect attribute='Span Cue' String='→' context='#pop!ref' />
    99    123   				<StringDetect attribute='Span Cue' String='🔗' context='#pop!ref' />
   100    124   				<DetectChar   attribute='Span Cue' char='>' context='#pop!ref' />
          125  +				<DetectChar   attribute='Span Cue' char='^' context='#pop!ref' />
   101    126   				<DetectChar   attribute='Span Cue' char='&amp;' context='#pop!ref' />
   102    127   				<DetectChar   attribute='Span Cue' char='#' context='#pop!var-ref' />
   103    128   				<DetectChar   attribute='Span Cue' char='\' context='#pop!flat-span' />
          129  +				<Detect2Chars attribute='Comment' char='%' char1='%' context='#pop!inline-comment' />
          130  +				<Detect2Chars attribute='Critical Directive Cue' char='%' char1='!' context='#pop!inline-directive' />
          131  +				<DetectChar   attribute='Directive Cue' char='%' context='#pop!inline-directive' />
   104    132   			</context>
   105    133   
   106    134   			<context name='flat-span' attribute='Unstyled Text' lineEndContext='#pop'>
   107    135   				<Detect2Chars attribute='Escaped Char' context='#stay' char='\' char1=']'/>
   108    136   				<DetectChar attribute='Span Delimiter' context='#pop' char=']'/>
   109    137   			</context>
          138  +
          139  +			<context name='inline-comment' attribute='Comment' lineEndContext='#pop'>
          140  +				<IncludeRules context='flat-span'/>
          141  +			</context>
          142  +
          143  +			<context name='inline-directive' attribute='Directive' lineEndContext='#pop'>
          144  +				<IncludeRules context='flat-span'/>
          145  +				<AnyChar String=".:!#$%@~'&quot;" attribute='Directive Cue'/>
          146  +				<DetectSpaces context='#pop!span'/>
          147  +			</context>
   110    148   
   111    149   			<context name='ref' attribute='Reference' lineEndContext='#pop'>
          150  +				<IncludeRules context='flat-span'/>
   112    151   				<DetectSpaces context='#pop!span'/>
   113    152   			</context>
   114    153   
   115    154   			<context name='var-ref' attribute='Reference' lineEndContext='#pop'>
   116    155   				<WordDetect String="cortav" attribute='Standard Namespace'/>
   117    156   				<WordDetect String="env" attribute='Standard Namespace'/>
   118    157   				<DetectChar attribute='Span Delimiter' context='#pop' char=']'/>
................................................................................
   129    168   			</context>
   130    169   		</contexts>
   131    170   		<itemDatas>
   132    171   			<itemData name='Normal Text' defStyleNum='dsNormal'/>
   133    172   			<itemData name='Styled Text' defStyleNum='dsNormal'/>
   134    173   			<itemData name='Emphatic Text' defStyleNum='dsNormal' italic='true'/>
   135    174   			<itemData name='Strong Text' defStyleNum='dsNormal' bold='true'/>
   136         -			<itemData name='Deleted Text' defStyleNum='dsNormal' strikeout='true'/>
          175  +			<itemData name='Deleted Text' defStyleNum='dsNormal' strikeOut='true'/>
   137    176   				
   138    177   			<itemData name='Section Cue' defStyleNum='dsKeyword' bold='true'/>
   139    178   			<itemData name='Header' defStyleNum='dsControlFlow' underline='true'/>
   140    179   			<itemData name='Identifier' defStyleNum='dsVariable'/>
   141    180   
   142    181   			<itemData name='Unstyled Text' defStyleNum='dsVerbatimString'/>
   143    182   			<itemData name='Escaped Char' defStyleNum='dsSpecialChar'/>
   144    183   			<itemData name='Reference' defStyleNum='dsControlFlow' underline='true'/>
   145    184   			<itemData name='Span Cue' defStyleNum='dsKeyword' bold='true'/>
          185  +			<itemData name='Resource Cue' defStyleNum='dsKeyword' bold='true'/>
          186  +			<itemData name='Resource Identifier' defStyleNum='dsVariable' bold='true'/>
   146    187   			<itemData name='Span Delimiter' defStyleNum='dsKeyword'/>
   147    188   			<itemData name='Directive' defStyleNum='dsAttribute' bold='true'/>
   148    189   			<itemData name='Directive Cue' defStyleNum='dsAttribute'/>
   149    190   			<itemData name='Critical Directive Cue' defStyleNum='dsImport' bold='true'/>
   150    191   			<itemData name='Extension Directive' defStyleNum='dsImport' bold='true'/>
   151    192   			<itemData name='Renderer Directive' defStyleNum='dsExtension' bold='true'/>
   152    193   			<itemData name='Standard Namespace' defStyleNum='dsBuiltIn' bold='true'/>
   153    194   			<itemData name='Comment' defStyleNum='dsComment'/>
          195  +			<itemData name='Error' defStyleNum='dsError'/>
   154    196   			<itemData name='Macro' defStyleNum='dsPreprocessor' bold='true'/>
   155    197   			<itemData name='Macro Delimiter' defStyleNum='dsPreprocessor'/>
   156    198   			<itemData name='Field Delimiter' defStyleNum='dsPreprocessor' bold='true'/>
   157    199   			<itemData name='List' defStyleNum='dsOperator'/>
   158    200   
   159    201   			<itemData name='Literal Block' defStyleNum='dsSpecialString'/>
   160    202   			<itemData name='Literal Block Cue' defStyleNum='dsPreprocessor' bold='true'/>

Modified desk/velartrill-cortav.xml from [356c2a8842] to [51a69a6dad].

    17     17   		<expanded-acronym>Cortav</expanded-acronym>
    18     18   
    19     19   		<generic-icon>x-office-document</generic-icon>
    20     20   		<glob pattern="*.ct"/> <glob pattern="*."/>
    21     21   		<glob pattern="*.cortav"/>
    22     22   		<magic>
    23     23   			<match value="%ct\n" offset="0" type="string"/>
    24         -			<match value="\x03\x07\x3E\x2D" offset="0" type="string"/>
           24  +			<match value="\x3E\x2E\x14\x0C\x01\x04\x00\x00\x00\x03\x07\x3E\x2D" offset="0" type="string"/>
    25     25   		</magic>
    26     26   	</mime-type>
    27     27   	<mime-type type="text/x-cortav-intent">
    28     28   		<comment xml:lang="en">Cortav rendering intent file</comment>
    29     29   		<comment xml:lang="x-ranuir-Latn">tav cunloci Cortavi</comment>
    30     30   		<comment xml:lang="x-ranuir-CR8">  </comment>
    31     31   

Added ext/transmogrify.lua version [ffa0ca0a64].

            1  +local ct = require 'cortav'
            2  +local ss = require 'sirsem'
            3  +
            4  +local patterns = {
            5  +	[ss.str.enc.utf8] = {
            6  +		{
            7  +	      ['<-->'] = '⟷';
            8  +			['--->'] = '⟶';
            9  +			['<---'] = '⟵';
           10  +			['----'] = '⸻';
           11  +      };
           12  +
           13  +		{
           14  +			['<==>'] = '⟺';
           15  +			['===>'] = '⇐';
           16  +			['<==='] = '⟸';
           17  +		};
           18  +
           19  +		{
           20  +			['<->'] = '↔';
           21  +			['-->'] = '→';
           22  +			['<--'] = '←';
           23  +			['==>'] = '⇒';
           24  +			['<=>'] = '⇔';
           25  +			['<=='] = '⇐';
           26  +			['=/='] = '≠';
           27  +			['---'] = '⸺';
           28  +		};
           29  +
           30  +		{
           31  +			['-:-'] = '÷';
           32  +			['--'] = '—';
           33  +			['(C)'] = '©';
           34  +			['(>)'] = '🄯';
           35  +			['(R)'] = '®';
           36  +			['(TM)'] = '™';
           37  +			['(SM)'] = '℠';
           38  +		};
           39  +   };
           40  +}
           41  +
           42  +local quotes = {
           43  +	[ss.str.enc.utf8] = {
           44  +		['en'] = {'“', '”'; '‘', '’'};
           45  +		['de'] = {'„', '“'; '‚', '‘'};
           46  +		['sp'] = {'«', '»'; '‹', '›'};
           47  +		['ja'] = {'「', '」'; '『', '』'};
           48  +		['fr'] = {'« ', ' »'; '‹ ', ' ›'};
           49  +		[true] = {'“', '”'; '‘', '’'};
           50  +	};
           51  +}
           52  +
           53  +local function meddle(ctx, t)
           54  +	local pts = patterns[ctx.doc.enc]
           55  +	if not pts then return t end
           56  +	local str = ''
           57  +	local lastchar
           58  +	local dquo = ctx.doc.enc.encodeUCS'"'
           59  +	local squo = ctx.doc.enc.encodeUCS"'"
           60  +	local forceRight = ctx.doc.enc.encodeUCS'`'
           61  +	local ptns = patterns[ctx.doc.enc]
           62  +	local function quo(c,p)
           63  +		if c == dquo then
           64  +			return 1
           65  +		elseif c == squo then
           66  +			return 2
           67  +		end
           68  +	end
           69  +	local qtbl if quotes[ctx.doc.enc] then
           70  +		if ctx.lang then
           71  +			qtbl = ss.str.langmatch(quotes[ctx.doc.enc], ctx.lang, ctx.doc.enc) or quotes[ctx.doc.enc][true]
           72  +		else
           73  +			qtbl = quotes[ctx.doc.enc][true]
           74  +		end
           75  +	end
           76  +	for c, p in ss.str.each(ctx.doc.enc,t) do
           77  +		local n = t:sub(p.byte)
           78  +		local ba, ca, nt = ctx.doc.enc.parse_escape(n)
           79  +		if ba then
           80  +			p.next.byte = p.next.byte + ba
           81  +			p.next.code = p.next.code + ca
           82  +			str = str .. nt
           83  +			lastchar = nt
           84  +		else
           85  +			local found = false
           86  +			local quote = quo(c,p)
           87  +			local force
           88  +			if not quote and c == forceRight and #t >= p.next.byte then
           89  +				quote = quo(ctx.doc.enc.char(ctx.doc.enc.codepoint(t,p.next.byte)))
           90  +				if quote then
           91  +		           force = 2
           92  +		           p.next.byte = p.next.byte + #forceRight
           93  +		           p.next.code = p.next.code + ctx.doc.enc.len(forceRight)
           94  +				end
           95  +			end
           96  +			if qtbl and quote then
           97  +				found = true
           98  +				if force then
           99  +					str = str .. qtbl[quote*force]
          100  +				elseif lastchar == nil or ctx.doc.enc.iswhitespace(lastchar) then
          101  +					str = str .. qtbl[quote]
          102  +				else
          103  +					str = str .. qtbl[quote*2]
          104  +				end
          105  +			elseif ptns then
          106  +				for _, order in ipairs(ptns) do
          107  +					for k,v in pairs(order) do
          108  +						if ss.str.begins(n, k) then
          109  +							found = true
          110  +							str = str .. v
          111  +							p.next.byte = p.next.byte + string.len(k) - 1
          112  +							p.next.code = p.next.code + utf8.len(k) - 1
          113  +							goto stopsearch
          114  +						end
          115  +					end
          116  +				end::stopsearch::
          117  +			end
          118  +			if not found then
          119  +				str = str .. c
          120  +			end
          121  +			lastchar = c
          122  +		end
          123  +	end
          124  +	return str
          125  +end
          126  +
          127  +local function enterspan(origin, spans)
          128  +	for i,v in pairs(spans) do
          129  +		if type(v) == 'string' then
          130  +			spans[i] = meddle(origin, v)
          131  +		elseif v.kind ~= 'raw' and v.spans then
          132  +			enterspan(v.origin, v.spans)
          133  +		end
          134  +	end
          135  +end
          136  +
          137  +ct.ext.install {
          138  +	id = 'transmogrify';
          139  +	version = ss.version {0,1; 'devel'};
          140  +	contributors = {{name='lexi hale', handle='velartrill', mail='lexi@hale.su', homepage='https://hale.su'}};
          141  +	default = true; -- on unless inhibited
          142  +	slow = true;
          143  +	hook = {
          144  +		doc_meddle_ast = function(job)
          145  +			for n, sec in pairs(job.doc.secorder) do
          146  +				if sec.kind=='ordinary' or sec.kind=='blockquote'
          147  +				or sec.kind=='footnote' then
          148  +					for i, block in pairs(sec.blocks) do
          149  +			         if type(block.spans) == 'table' then
          150  +							enterspan(block.origin, block.spans)
          151  +						elseif type(block.spans) == 'string' then
          152  +							block.spans = meddle(block.origin, block.spans)
          153  +						end
          154  +					end
          155  +				end
          156  +			end
          157  +		end;
          158  +	};
          159  +}

Modified makefile from [42776f3212] to [4482353657].

     1      1   lua != which lua
     2      2   luac != which luac
     3      3   sh != which sh
     4      4   
     5      5   extens = $(wildcard ext/*.lua)
     6         -extens_names ?= $(basename $(notdir $(extens)))
            6  +extens-names ?= $(basename $(notdir $(extens)))
     7      7   build = build
     8      8   executable = cortav
     9      9   default-format-flags = -m html:width 40em
    10     10   
    11     11   prefix = $(HOME)/.local
    12         -bin_prefix = $(prefix)/bin
    13         -share_prefix = $(prefix)/share/$(executable)
           12  +bin-prefix = $(prefix)/bin
           13  +share-prefix = $(prefix)/share/$(executable)
    14     14   
    15         -$(build)/$(executable): sirsem.lua cortav.lua $(extens) cli.lua | $(build)/
    16         -	@echo ' » building with extensions $(extens_names)'
           15  +# by default, we fetch and parse information about encodings we
           16  +# support so that cortav can do fancy things like format math
           17  +# equations by character class (e.g. italicizing variables)
           18  +# this is not necessary for parsing the format, and can be
           19  +# disabled by blanking the encoding-data list when building
           20  +# ($ make encoding-data=)
           21  +encoding-data  = ucstbls
           22  +encoding-files = $(patsubst %,$(build)/%.lc,$(encoding-data))
           23  +encoding-data-ucs = https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
           24  +
           25  +$(build)/$(executable): sirsem.lua $(encoding-files) cortav.lua $(extens) cli.lua | $(build)/
           26  +	@echo ' » building with extensions $(extens-names)'
    17     27   	echo '#!$(lua)' > $@
    18     28   	luac -o - $^ >> $@
    19     29   	chmod +x $@
    20     30   
    21     31   $(build)/cortav.html: cortav.ct $(build)/$(executable) | $(build)/
    22     32   	$(build)/$(executable) $< -o $@ -m render:format html -y html:fossil-uv
    23     33   
................................................................................
    28     38   
    29     39   .PHONY: clean
    30     40   clean:
    31     41   	rm -f $(build)/cortav $(build)/cortav.html $(build)/velartrill-cortav-view.desktop $(build)/cortav-view.sh
    32     42   
    33     43   $(build)/%.sh: desk/%.sh
    34     44   	echo >$@ "#!$(sh)"
    35         -	echo >>$@ 'cortav_exec="$(bin_prefix)/$(executable)"'
           45  +	echo >>$@ 'cortav_exec="$(bin-prefix)/$(executable)"'
    36     46   	echo >>$@ 'cortav_flags="$${ct_format_flags-$(default-format-flags)}"'
    37     47   	cat $< >> $@
    38     48   	chmod +x $@
    39     49   
    40     50   $(build)/velartrill-cortav-view.desktop: desk/cortav-view.desktop
    41     51   	cp $< $@
    42         -	echo "Exec=$(bin_prefix)/cortav-view.sh" >>$@
           52  +	echo "Exec=$(bin-prefix)/cortav-view.sh" >>$@
    43     53   
    44     54   %/:
    45     55   	mkdir -p $@
    46     56   
           57  +$(build)/unicode.txt: | $(build)/
           58  +	curl $(encoding-data-ucs) > $@
           59  +$(build)/ucstbls.lc: $(build)/unicode.txt | $(build)/
           60  +	$(lua) tools/ucs.lua $< | $(luac) -o $@ -
           61  +
    47     62   .PHONY: install
    48         -install: $(build)/cortav $(build)/cortav-view.sh $(build)/velartrill-cortav-view.desktop | $(bin_prefix)/
    49         -	install $(build)/$(executable)  $(bin_prefix)
    50         -	install $(build)/cortav-view.sh $(bin_prefix)
           63  +install: $(build)/cortav $(build)/cortav-view.sh $(build)/velartrill-cortav-view.desktop | $(bin-prefix)/
           64  +	install $(build)/$(executable)  $(bin-prefix)
           65  +	install $(build)/cortav-view.sh $(bin-prefix)
    51     66   	xdg-mime         install desk/velartrill-cortav.xml
    52     67   	xdg-desktop-menu install $(build)/velartrill-cortav-view.desktop
    53     68   	xdg-mime         default velartrill-cortav-view.desktop text/x-cortav
    54     69   
    55     70   .PHONY: excise
    56     71   excise: $(build)/velartrill-cortav-view.desktop
    57     72   	xdg-mime         uninstall desk/velartrill-cortav.xml
    58     73   	xdg-desktop-menu uninstall $(build)/velartrill-cortav-view.desktop
    59         -	rm $(bin_prefix)/$(executable)
    60         -	rm $(bin_prefix)/cortav-view.sh
           74  +	rm $(bin-prefix)/$(executable)
           75  +	rm $(bin-prefix)/cortav-view.sh
    61     76   
    62     77   .PHONY: wipe
    63     78   wipe: excise clean

Modified sirsem.lua from [1f16b393f5] to [581e1b0127].

    86     86   			end
    87     87   		else
    88     88   			new[k] = v
    89     89   		end
    90     90   	end
    91     91   	return new
    92     92   end
           93  +
           94  +function ss.push(tbl, ...)
           95  +	local idx = #tbl + 1
           96  +	local function rec(v, ...)
           97  +		tbl[idx] = v
           98  +		idx = idx + 1
           99  +		if ss.tuple.any(...) then rec(...) end
          100  +	end
          101  +	rec(...)
          102  +	return tbl
          103  +end
    93    104   
    94    105   function ss.delegate(tbl,tpl) -- returns a table that looks up keys it lacks from
    95    106                                 -- tbl (lightweight alternative to shallow copies)
    96    107   	tpl = tpl or {}
    97    108   	return setmetatable({}, {__index=tbl})
    98    109   end
    99    110   
   100    111   ss.str = {}
   101    112   
   102    113   function ss.str.begins(str, pfx)
   103         -	return string.sub(str, 1, #pfx) == pfx
          114  +	-- appallingly, this is actually ~2/5ths faster than either
          115  +	-- of the below. i hate scripting languages so much
          116  +	return string.find(str, pfx, 1, true) == 1
          117  +	-- to my shock, disgust, and horror, even writing my own
          118  +	-- string scanning library for lua IN C only sped this up by
          119  +	-- a tiny fraction. i am just speechless.
          120  +-- 	return string.sub(str, 1, #pfx) == pfx
          121  +
          122  +-- 	local pl = string.len(pfx)
          123  +-- 	local sl = string.len(str)
          124  +-- 	if sl < pl then return false end
          125  +-- 	for i=1,pl do
          126  +-- 		if string.byte(str,i) ~= string.byte(pfx,i) then
          127  +-- 			return false
          128  +-- 		end
          129  +-- 	end
          130  +-- 	return true
   104    131   end
   105    132   
          133  +function ss.enum(syms)
          134  +	local e = {}
          135  +	for i,v in pairs(syms) do
          136  +		e[v] = i
          137  +		e[i] = v
          138  +	end
          139  +	return e
          140  +end
          141  +
          142  +function ss.bitmask_bytes(n,ofs)
          143  +	ofs = ofs or 0
          144  +	local function rec(i)
          145  +		if i > n then return end
          146  +		return 1<<(i+ofs), rec(i+1)
          147  +	end
          148  +	return 1<<ofs, rec(1)
          149  +end
          150  +
          151  +function ss.bitmask(tbl,ofs)
          152  +	local codes = {ss.bitmask_bytes(#tbl,ofs)}
          153  +	local m = {}
          154  +	local maxbit
          155  +	for i, s in ipairs(tbl) do
          156  +		m[s] = codes[i]
          157  +		m[codes[i]] = s
          158  +		maxbit = i
          159  +	end
          160  +	m[true] = {ofs or 0,maxbit}
          161  +	return m
          162  +end
          163  +
          164  +ss.str.charclass = ss.enum {
          165  +	'numeral'; 'letter'; 'symbol'; 'punct';
          166  +	'space'; 'ctl'; 'glyph'; -- hanji
          167  +}
          168  +ss.str.charprop = ss.bitmask({
          169  +	'hexnumeral', -- character that can be used to write hexadecimal notation
          170  +	'upper', 'lower';
          171  +	'diac'; -- diacritic/modifier letter
          172  +	'wordbreak'; -- char causes following characters to be treated as a separate word (e.g. punctuation)
          173  +	'wordsep'; -- char causes previous and following characters to be treated as separate words; char constitutes a word of its own in between (e.g. interpunct)
          174  +	'breakokay'; -- is it okay to break words at this character? (eg hyphen)
          175  +	'mathop'; -- char is a mathematical operator
          176  +	'disallow', -- char is not allowed in narrative text
          177  +	'brack', 'right', 'left', -- brackets
          178  +	'noprint', -- character deposits no ink
          179  +	'superimpose' -- character is superimposed over previous
          180  +}, 3)
          181  +
          182  +ss.str.enc_generics = {
          183  +	pfxescape = function(ch, enc, chain)
          184  +		local bytes = #ch
          185  +		local codes = enc.len(ch)
          186  +		return function(s)
          187  +			if s == ch then
          188  +				return 0, 0, ch
          189  +			elseif ss.str.begins(s, ch) then
          190  +				local nc = enc.char(enc.codepoint(s, bytes + 1))
          191  +				return bytes, codes, nc
          192  +			elseif chain then
          193  +				return chain(s)
          194  +			end
          195  +		end
          196  +	end;
          197  +};
          198  +
          199  +local cc,cp = ss.str.charclass, ss.str.charprop
   106    200   ss.str.enc = {
   107    201   	utf8 = {
   108    202   		char = utf8.char;
   109    203   		codepoint = utf8.codepoint;
          204  +		len = utf8.len;
          205  +		encodeUCS = function(str) return str end;
          206  +		iswhitespace = function(c)
          207  +			return (c == ' ') or (c == '\t') or (c == '\n')
          208  +				or (c == '\u{3000}')
          209  +				or (c == '\u{200B}')
          210  +      end;
          211  +	};
          212  +	ascii = {
          213  +		len = string.len; char = string.char; codepoint = string.byte;
          214  +		iswhitespace = function(c)
          215  +			return (c == ' ') or (c == '\t') or (c == '\n')
          216  +      end;
          217  +		ranges = {
          218  +			{0x00,0x1a, cc.ctl};
          219  +			{0x1b,0x1b, cc.ctl, cp.disallow};
          220  +			{0x1c,0x1f, cc.ctl};
          221  +			{0x20,0x20, cc.space};
          222  +			{0x21,0x22, cc.punct};
          223  +			{0x23,0x26, cc.symbol};
          224  +			{0x27,0x29, cc.punct};
          225  +			{0x2a,0x2b, cc.symbol};
          226  +			{0x2c,0x2f, cc.punct};
          227  +			{0x30,0x39, cc.numeral, cp.hexnumeral};
          228  +			{0x3a,0x3b, cc.punct};
          229  +			{0x3c,0x3e, cc.symbol, cp.mathop};
          230  +			{0x3f,0x3f, cc.punct};
          231  +			{0x40,0x40, cc.symbol};
          232  +			{0x41,0x46, cc.letter, cp.ucase, cp.hexnumeral};
          233  +			{0x47,0x5a, cc.letter, cp.ucase};
          234  +			{0x5b,0x5d, cc.symbol, cp.mathop};
          235  +			{0x5e,0x5e, cc.symbol, mathop};
          236  +			{0x5f,0x60, cc.symbol};
          237  +			{0x61,0x66, cc.letter, cp.lcase, cp.hexnumeral};
          238  +			{0x67,0x7a, cc.letter, cp.lcase};
          239  +			{0x7b,0x7e, cc.symbol};
          240  +			{0x7f,0x7f, cc.ctl, cp.disallow};
          241  +		}
   110    242   	};
   111         -	c6b = {};
   112         -	ascii = {};
          243  +	raw = {len = string.len; char = string.char; codepoint = string.byte;
          244  +		encodeUCS = function(str) return str end;
          245  +		iswhitespace = function(c)
          246  +			return (c == ' ') or (c == '\t') or (c == '\n')
          247  +      end;
          248  +   };
   113    249   }
   114    250   
   115         -function ss.str.enc.utf8.each(str, ascode)
          251  +-- unicode ranges are optionally generated from consortium data
          252  +-- files and injected through a generated source file. if this
          253  +-- part of the build process is disabled (e.g. due to lack of
          254  +-- internet access, or to keep the size of the executable as
          255  +-- small as possible), we still at least can make the ascii
          256  +-- ranges available to UTF8 (UTF8 being a superset of ascii)
          257  +ss.str.enc.utf8.ranges = ss.delegate(ss.str.enc.ascii.ranges)
          258  +
          259  +function ss.str.enc.ascii.encodeUCS(str)
          260  +	local newstr = ''
          261  +	for c,p in ss.str.each(ss.str.enc.utf8, str, true) do
          262  +		if c > 0x7F then
          263  +			newstr = newstr .. '?'
          264  +		else
          265  +			newstr = newstr .. string.char(c)
          266  +		end
          267  +	end
          268  +end
          269  +
          270  +for _, v in pairs{'utf8','ascii','raw'} do
          271  +	ss.str.enc[v].parse_escape = ss.str.enc_generics.pfxescape('\\',ss.str.enc[v])
          272  +end
          273  +
          274  +function ss.str.classify(enc, ch)
          275  +	if not enc.ranges then return {} end
          276  +	if type(ch)=='string' then ch = enc.codepoint(ch) end
          277  +	-- TODO
          278  +end
          279  +
          280  +
          281  +function ss.str.each(enc, str, ascode)
          282  +	if enc.each then return enc.each(enc,str,ascode) end
          283  +	local pm = {
          284  +		__index = {
          285  +			esc = function(self)
          286  +				local ba, bc, nc = enc.parse_escape(str:sub(self.byte))
          287  +				if ba then
          288  +					self.next.byte = self.next.byte + ba - 1
          289  +					self.next.code = self.next.code + bc - 1
          290  +					return nc
          291  +				end
          292  +			end;
          293  +		};
          294  +	}
   116    295   	local pos = {
   117    296   		code = 1;
   118    297   		byte = 1;
   119    298   	}
   120    299   	return function()
   121    300   		if pos.byte > #str then return nil end
   122         -		local thischar = utf8.codepoint(str, pos.byte)
   123         -		local lastpos = {
          301  +		local thischar = enc.codepoint(str, pos.byte)
          302  +		local lastpos = setmetatable({
   124    303   			code = pos.code;
   125    304   			byte = pos.byte;
   126    305   			next = pos;
   127         -		}
          306  +		},pm)
   128    307   		if not ascode then
   129         -			thischar = utf8.char(thischar)
          308  +			thischar = enc.char(thischar)
   130    309   			pos.byte = pos.byte + #thischar
   131    310   		else
   132         -			pos.byte = pos.byte + #utf8.char(thischar)
          311  +			pos.byte = pos.byte + #enc.char(thischar)
   133    312   		end
   134    313   		pos.code = pos.code + 1
   135    314   		return thischar, lastpos
   136    315   	end
   137    316   end
          317  +
          318  +function ss.str.breakwords(enc, str, max, opts)
          319  +	if enc.breakwords then return enc.breakwords(str) end
          320  +	local words = {}
          321  +	opts = opts or {}
          322  +	local buf = ''
          323  +	local flush = function()
          324  +		if buf ~= '' then table.insert(words,buf) buf = '' end
          325  +	end
          326  +	for c, p in ss.str.each(enc,str) do
          327  +		local nc
          328  +		if opts.escape then
          329  +			nc = p:esc()
          330  +		end
          331  +		if nc then
          332  +			buf = buf + nc
          333  +		elseif enc.iswhitespace(c) then
          334  +			flush()
          335  +			if max and #words == max then
          336  +				local rs = str:sub(p.next.byte)
          337  +				if rs ~= '' then
          338  +					table.insert(words, rs)
          339  +				end
          340  +				break
          341  +			end
          342  +		else
          343  +			buf = buf .. c
          344  +		end
          345  +	end
          346  +	flush()
          347  +	return words
          348  +end
          349  +function ss.str.mergewords(enc, lst)
          350  +	if enc.mergewords then return enc.mergewords(lst) end
          351  +	return table.concat(lst, enc.wordsep or ' ')
          352  +end
          353  +function ss.str.breaklines(enc, str, opts)
          354  +	if enc.breaklines then return enc.breaklines(lst,opts) end
          355  +	return ss.str.split(enc, str, enc.encodeUCS'\n', opts)
          356  +end
          357  +
          358  +function ss.str.split(enc, str, delim, opts)
          359  +	if enc.split then return enc.split(str,delim,opts) end
          360  +	opts = opts or {}
          361  +	local elts = {}
          362  +	local buf = ''
          363  +	local flush = function()
          364  +		if buf ~= '' or opts.keep_empties then
          365  +			table.insert(elts,buf)
          366  +			buf = ''
          367  +		end
          368  +	end
          369  +	local esc = enc.parse_escape
          370  +	local tryesc if opts.escape then
          371  +		tryesc = function(str, p)
          372  +			local ba, ca, escd = enc.parse_escape(str:sub(p.byte))
          373  +			if ba then
          374  +				p.next.byte = p.next.byte + ba
          375  +				p.next.code = p.next.code + ca
          376  +				buf = buf .. escd
          377  +				return true
          378  +			end
          379  +		end
          380  +	else
          381  +		tryesc = function(...)  end
          382  +	end
          383  +
          384  +	if type(delim) == 'function' then
          385  +		for c, p in ss.str.each(enc,str) do
          386  +			if not tryesc(str,p) then
          387  +				local skip = delim(str:sub(p.byte))
          388  +				if skip then
          389  +					flush()
          390  +					p.next.byte = p.next.byte + skip - 1
          391  +				else
          392  +					buf = buf .. c
          393  +				end
          394  +			end
          395  +		end
          396  +	elseif enc.len(delim) == 1 then
          397  +		for c, p in ss.str.each(enc,str) do
          398  +			if not tryesc(str,p) then
          399  +				if c == delim then
          400  +					flush()
          401  +				else
          402  +					buf = buf .. c
          403  +				end
          404  +			end
          405  +		end
          406  +	else
          407  +		local dlcode = enc.len(delim)
          408  +		for c, p in ss.str.each(enc,str) do
          409  +			if not tryesc(str,p) then
          410  +				if str:sub(p.byte, p.byte+#delim-1) == delim then
          411  +					flush()
          412  +					p.next.byte = p.next.byte + #delim - 1
          413  +					p.next.code = p.next.code + dlcode
          414  +				else
          415  +					buf = buf .. c
          416  +				end
          417  +			end
          418  +		end
          419  +	end
          420  +	flush()
          421  +	return elts
          422  +end
          423  +
          424  +function ss.str.langmatch(tbl, lang, enc)
          425  +	-- this performs primitive language matching. NOTE: THIS IS NOT
          426  +	-- STANDARDS COMPLIANT. it's "good enough" for now, but in the
          427  +	-- long term it needs to be rewritten to actually understand the
          428  +	-- format, primarily so that e.g. 'en-US-Latn' and 'en-Latn-US'
          429  +	-- match -- currently order is significant. it shouldn't be
          430  +	-- ref: IETF BCP 47 (RFC 5646) https://www.ietf.org/rfc/bcp/bcp47.html
          431  +	local dash = enc.encodeUCS'-'
          432  +	local tags = ss.str.split(enc, lang, dash, {escape=true})
          433  +	local bestlen = 0
          434  +	local bestmatch
          435  +	for k,v in pairs(tbl) do
          436  +		if k ~= true then
          437  +			local kt = ss.str.split(enc, k, dash, {escape=true})
          438  +			for i=1,math.min(#kt,#tags) do
          439  +				if kt[i] ~= tags[i] then goto skip end
          440  +			end
          441  +			if #kt > bestlen then
          442  +				-- match the most specific matching tag
          443  +				bestmatch = k
          444  +				bestlen = #kt
          445  +			end
          446  +		end
          447  +	::skip::end
          448  +	return tbl[bestmatch] or tbl[true], bestmatch
          449  +end
   138    450   
   139    451   ss.math = {}
   140    452   
   141    453   function ss.math.lerp(t, a, b)
   142    454   	return (1-t)*a + (t*b)
   143    455   end
   144    456   
................................................................................
   239    551   				elseif to == 'int' then return math.floor(tonumber(self))
   240    552   				elseif c.cast and c.cast[to] then
   241    553   					return c.cast[to](self, ...)
   242    554   				elseif type(to) == 'table' and getmetatable(to) and getmetatable(to).cvt and getmetatable(to).cvt[cls] then
   243    555   				else error((c.ident or 'class') .. ' is not convertible to ' .. (type(to) == 'string' and to or tostring(to))) end
   244    556   			end
   245    557   		end
   246         -		if c.fns then return c.fns[k] end
          558  +		if c.fns and c.fns[k] then return c.fns[k] end
          559  +		if c.index then return c.index(self,k) end
   247    560   	end
   248    561   
   249    562   	if c.cast then
   250    563   		if c.cast.string then
   251    564   			cls.__tostring = c.cast.string
   252    565   		end
   253    566   		if c.cast.number then
................................................................................
   265    578   		if c.construct then
   266    579   			c.construct(val, ...)
   267    580   		end
   268    581   		return val
   269    582   	end
   270    583   	getmetatable(cls).__call = function(_, ...) return cls.mk(...) end
   271    584   	cls.is = function(o) return getmetatable(o) == cls end
          585  +	cls.__metatable = cls -- lock metatable
   272    586   	return cls
   273    587   end
   274    588   
   275    589   -- tidy exceptions
   276    590   
   277    591   ss.exn = ss.declare {
   278    592   	ident = 'exn';
................................................................................
   302    616   		}
   303    617   	end;
   304    618   	call = function(me, ...)
   305    619   		return ss.exn(me, ...)
   306    620   	end;
   307    621   }
   308    622   ss.str.exn = ss.exnkind 'failure while string munging'
          623  +ss.bug = ss.exnkind 'tripped over bug'
   309    624   
   310    625   function ss.str.delimit(encoding, start, stop, s)
   311    626   	local depth = 0
   312    627   	encoding = encoding or ss.str.enc.utf8
   313    628   	if not ss.str.begins(s, start) then return nil end
   314         -	for c,p in encoding.each(s) do
          629  +	for c,p in ss.str.each(encoding,s) do
   315    630   		if c == (encoding.escape or '\\') then
   316    631   			p.next.byte = p.next.byte + #encoding.char(encoding.codepoint(s, p.next.byte))
   317    632   			p.next.code = p.next.code + 1
   318    633   		elseif c == start then
   319    634   			depth = depth + 1
   320    635   		elseif c == stop then
   321    636   			depth = depth - 1
................................................................................
   384    699   		return x
   385    700   	elseif select('#', ...) == 0 then
   386    701   		return nil
   387    702   	else
   388    703   		return ss.coalesce(...)
   389    704   	end
   390    705   end
          706  +
          707  +ss.tuple = {}
          708  +function ss.tuple.any(...)
          709  +	return select('#',...) > 0
          710  +end
          711  +
          712  +function ss.tuple.cat(...)
          713  +	local a = {...}
          714  +	return function(...)
          715  +		ss.push(a, ...)
          716  +		return table.unpack(a)
          717  +	end
          718  +end
          719  +
          720  +function ss.tuple.suffix(sfx,n,...)
          721  +	if n ~= nil then
          722  +		return n, ss.tuple.suffix(...)
          723  +	else
          724  +		return sfx
          725  +	end
          726  +end
          727  +
          728  +function ss.tuple.cdr(x, ...) return ... end
          729  +
          730  +ss.stack = ss.declare {
          731  +	ident = 'stack';
          732  +	mk = function() return {
          733  +		top = 0;
          734  +		store = {};
          735  +   } end;
          736  +	index = function(me, i)
          737  +		if i <= 0 then
          738  +			return me.store[me.top + i]
          739  +		else
          740  +			return me.store[i]
          741  +		end
          742  +	end;
          743  +	fns = {
          744  +		push = function(me, val, ...)
          745  +         if val~=nil then
          746  +	         me.top = me.top + 1
          747  +	         me.store[me.top] = val
          748  +	         me:push(...)
          749  +         end
          750  +         return val, ...
          751  +      end;
          752  +      pop = function(me,n) n = n or 1
          753  +         local r = {}
          754  +			if n < me.top then
          755  +				for i = 0,n-1 do
          756  +					r[i+1] = me.store[me.top - i]
          757  +					me.store[me.top - i] = nil
          758  +				end
          759  +				me.top = me.top - n
          760  +         else
          761  +	         r = me.store
          762  +				me.store = {}
          763  +         end
          764  +			return table.unpack(r)
          765  +      end;
          766  +      set = function(me,val)
          767  +         if me.top == 0 then
          768  +	         me.top = me.top + 1 --autopush
          769  +         end
          770  +         me.store[me.top] = val
          771  +      end;
          772  +      all = function(me) return table.unpack(me.store) end;
          773  +      each = function(forward)
          774  +         if forward then
          775  +	         local idx = 0
          776  +	         return function()
          777  +		         idx = idx + 1
          778  +		         if idx > top
          779  +						then return nil
          780  +						else return me.store[idx], idx
          781  +					end
          782  +	         end
          783  +         else
          784  +	         local idx = top + 1
          785  +	         return function()
          786  +		         idx = idx - 1
          787  +		         if idx == 0
          788  +						then return nil
          789  +						else return me.store[idx], idx
          790  +					end
          791  +	         end
          792  +         end
          793  +      end;
          794  +	};
          795  +}
          796  +
          797  +ss.automat = ss.declare {
          798  +	ident = 'automat';
          799  +	mk = function() return {
          800  +		state = ss.stack();
          801  +		states = {};
          802  +		ttns = {};
          803  +		mem = {};
          804  +		match = function(sym, ttn, mach)
          805  +			if ttn.pred and ttn:pred(mach, sym)~=true then
          806  +				return false
          807  +			end
          808  +			if ttn.on then
          809  +				return sym == ttn.on
          810  +			end
          811  +			return false
          812  +		end;
          813  +	} end;
          814  +
          815  +	construct = function(me, def)
          816  +		for k,v in pairs{'states','ttns','mem','syms'} do
          817  +			if def[k] then me[k] = v end
          818  +		end
          819  +	end;
          820  +
          821  +	fns = {
          822  +		react = function(me,sym)
          823  +			local s = me.states[me.state.id]
          824  +			if s and s.input then
          825  +				s:react(me, sym)
          826  +			end
          827  +		end;
          828  +
          829  +		drop = function(me,n)
          830  +			for i = 0, math.min(n-1,me.state.top-1) do
          831  +				local s = me.states[me.state[-i].id]
          832  +				if s.exit then s:exit(s.mem, me) end
          833  +			end
          834  +			if n < me.state.top then
          835  +				local newtop = me.states[me.state[-n].id]
          836  +				if newtop.activate then newtop:activate(me.state[-n].mem, me, n) end
          837  +			end
          838  +			return me.state:pop(n)
          839  +		end;
          840  +		clear = function(me) return me:drop(me.state.top) end;
          841  +
          842  +		transition = function(me,ttn,oldstates)
          843  +			local s = me.state:push {id = ttn.to, mem = {}}
          844  +			local to = me.states[ttn.to]
          845  +			if to.enter then
          846  +				to:enter(s.mem, me)
          847  +			end
          848  +		end;
          849  +
          850  +		input = function(me,sym)
          851  +			local ttns = me.ttns[me.state.id]
          852  +			local _, ttn = ss.find(ttns, function(ttn)
          853  +			                        return me.match(sym, ttn, me)
          854  +			                       end)
          855  +			if ttn then
          856  +				if ttn.pop then
          857  +					local oldstates = {me.state:drop(ttn.pop)}
          858  +					me:transition(ttn, sym, oldstates)
          859  +				else
          860  +					me:transition(ttn, sym)
          861  +				end
          862  +			else
          863  +				me:react(sym)
          864  +			end
          865  +		end;
          866  +	};
          867  +}

Added tools/ucs.lua version [3976f4bc78].

            1  +-- [ʞ] tools/ucs.lua
            2  +--  ~ lexi hale <lexi@hale.su>
            3  +--  ? table generator for unicode character classes
            4  +--  🄯 AGPLv3
            5  +
            6  +
            7  +local tpl = [[
            8  +local ss = require 'sirsem'
            9  +ss.str.enc.utf8.ranges = {%s}
           10  +]]
           11  +
           12  +local enum = function(syms)
           13  +	local e = {}
           14  +	for i,v in pairs(syms) do
           15  +		e[v] = i
           16  +		e[i] = v
           17  +	end
           18  +	return e
           19  +end
           20  +
           21  +local file = io.stdin
           22  +local path
           23  +if arg[1] then
           24  +	path = arg[1]
           25  +	file = io.open(path, 'rb')
           26  +end
           27  +
           28  +local bitmask_raw = function(n,ofs)
           29  +	ofs = ofs or 0
           30  +	local function rec(i)
           31  +		if i > n then return end
           32  +		return 1<<(i+ofs), rec(i+1)
           33  +	end
           34  +	return 1<<ofs, rec(1)
           35  +end
           36  +
           37  +local bitmask = function(tbl,ofs)
           38  +	local codes = {bitmask_raw(#tbl,ofs)}
           39  +	local m = {}
           40  +	local maxbit
           41  +	for i, s in ipairs(tbl) do
           42  +		m[s] = codes[i]
           43  +		m[codes[i]] = s
           44  +		maxbit = i
           45  +	end
           46  +	m[true] = {ofs or 0,maxbit}
           47  +	return m
           48  +end
           49  +
           50  +local basictype = enum {
           51  +	'numeral';
           52  +	'alpha';
           53  +	'symbol';
           54  +	'punct';
           55  +	'space';
           56  +	'ctl';
           57  +	'glyph'; -- hanji
           58  +}
           59  +local props = bitmask({
           60  +	'hex',
           61  +	'upper', 'lower', 'diac',
           62  +	'wordbreak', 'wordsep',
           63  +	'disallow',
           64  +	'brack', 'right', 'left',
           65  +	'noprint', 'superimpose'
           66  +}, 3)
           67  +
           68  +local overrides = {
           69  +	[0x200B] = basictype.space | props.wordsep; -- database entry is wrong
           70  +}
           71  +
           72  +local mask = ~0 -- mask out irrelevant properties to compactify database
           73  +
           74  +local function parsecat(tbl)
           75  +	local c,p,b = 0,props,basictype
           76  +	if overrides[tbl.codepoint] then
           77  +		c = overrides[tbl.codepoint]
           78  +	elseif tbl.class == 'Nd' then c = b.numeral
           79  +	elseif tbl.class == 'No' then c = b.numeral | p.diac
           80  +	elseif tbl.class == 'Cc' then
           81  +		if tbl.kind == 'S'
           82  +		or tbl.kind == 'WS'
           83  +		or tbl.kind == 'B' then c  = b.space | p.wordsep
           84  +      else c = b.ctl | p.wordbreak | p.disallow end
           85  +	elseif tbl.class == 'Lu' then c = b.alpha | p.upper
           86  +	elseif tbl.class == 'Ll' then c = b.alpha | p.lower
           87  +	elseif tbl.class == 'Lo'
           88  +	    or tbl.class == 'Lt' then c = b.alpha
           89  +	elseif tbl.class == 'Po' then c = b.punct | p.wordbreak
           90  +	elseif tbl.class == 'Sm' then c = b.symbol | p.wordsep
           91  +	elseif tbl.class == 'Ps' then c = b.punct | p.brack | p.left
           92  +	elseif tbl.class == 'Pe' then c = b.punct | p.brack | p.right
           93  +	elseif tbl.class == 'Pc'
           94  +	    or tbl.class == 'Pd'
           95  +	    or tbl.class == 'Sk'
           96  +	    or tbl.class == 'Sc' then c = b.symbol
           97  +	elseif tbl.class == 'Zs' then c = b.space
           98  +		if tbl.kind == 'WS' then c=c|p.wordsep end
           99  +	elseif tbl.class == 'So' then c = b.glyph
          100  +	elseif tbl.class == 'Mn' then c = b.symbol | p.diac | p.superimpose
          101  +	end
          102  +	return c & mask
          103  +end
          104  +
          105  +local ranuirAlpha = {0xe39d, 0xe39f, 0xe3ad, 0xe3af, 0xe3b5, 0xe3b7, 0xe3b9, 0xe3bb, 0xe3bd, 0xe3be, 0xe3bf, 0xe3c5, 0xe3c7, 0xe3c9, 0xe3cb, 0xe3cc, 0xe3cd, 0xe3ce, 0xe3cf}
          106  +local ranuirSpecial = {
          107  +	[0xe390] = basictype.space | props.wordsep;
          108  +}
          109  +
          110  +local ranuir = {}
          111  +for _,v in pairs(ranuirAlpha) do ranuir[v] = basictype.alpha end
          112  +for k,v in pairs(ranuirSpecial) do ranuir[k] = v end
          113  +local ranuirKeys = {}
          114  +for k in pairs(ranuir) do table.insert(ranuirKeys, k) end
          115  +table.sort(ranuirKeys)
          116  +
          117  +local recs = {}
          118  +local ranuirok = false
          119  +for ln in file:lines() do
          120  +	local v = {}
          121  +	for s in ln:gmatch('[^;]*') do
          122  +		table.insert(v, s)
          123  +	end
          124  +	v[1] = tonumber(v[1],0x10)
          125  +	if v[1] > 0x7f then -- discard ASCII, we already have that
          126  +		local code = {
          127  +			codepoint = v[1];
          128  +			name = v[2];
          129  +			class = v[3];
          130  +			kind = v[5];
          131  +		}
          132  +		code.cat = parsecat(code)
          133  +
          134  +		if (not ranuirok) and code.codepoint > 0xe390 then
          135  +			for _,ri in pairs(ranuirKeys) do
          136  +				table.insert(recs, {
          137  +					codepoint = ri;
          138  +					cat = ranuir[ri];
          139  +				})
          140  +			end
          141  +			ranuirok = true
          142  +		end
          143  +
          144  +		if code.cat ~= 0 then
          145  +			table.insert(recs,code)
          146  +		end
          147  +	end
          148  +end
          149  +
          150  +
          151  +local ranges = {}
          152  +local last = recs[1]
          153  +local start = last
          154  +local altern = false
          155  +local flush = function(i)
          156  +	local new = {start.codepoint, last.codepoint, last.cat}
          157  +	if altern then
          158  +		new[3] = new[3] | props.upper | props.lower
          159  +	end
          160  +	table.insert(ranges, new)
          161  +	altern = false
          162  +end
          163  +for i, r in ipairs(recs) do
          164  +	if r.cat ~= last.cat then
          165  +	-- we can massively compactify this set with one weird trick:
          166  +	-- most non-ascii cased character sets are not in AAAAaaaa,
          167  +	-- but rather AaAaAa order. so we can look for this simple
          168  +	-- pattern and compress it, shaving c. 1/3rd off our dataset
          169  +		local ambi = props.upper | props.lower
          170  +		if (altern or (start == last and (last.cat & props.upper) ~= 0)) and
          171  +			((r.cat &~ ambi) == (last.cat &~ ambi)) then
          172  +			altern = true
          173  +			last = r
          174  +		else
          175  +			flush()
          176  +			start = r
          177  +		end
          178  +	elseif altern then
          179  +		flush()
          180  +		start = r
          181  +	end
          182  +	last = r
          183  +end
          184  +flush()
          185  +
          186  +-- expand bitmask
          187  +	-- for k,v in pairs(ranges) do
          188  +	-- 	local basic = v[3] & ((1<<3) - 1) -- first three bits
          189  +	-- 	if basic ~= 0 then
          190  +	-- 		v[4] = basictype[basic]
          191  +	-- 	end
          192  +	-- 	local bitrange = props[true]
          193  +	-- 	for j=bitrange[1], bitrange[2] do
          194  +	-- 		if (v[3] & (1<<j)) ~= 0 then
          195  +	-- 			table.insert(v, props[1<<j])
          196  +	-- 		end
          197  +	-- 	end
          198  +	-- end
          199  +
          200  +-- the data has been collected and formatted in the manner we
          201  +-- need; now we just need to emit it as a lua table
          202  +
          203  +local tab = {}
          204  +local top = 1
          205  +for k,v in pairs(ranges) do
          206  +	tab[top] = string.format('{0x%x,0x%x,%u}',table.unpack(v))
          207  +	top = top + 1
          208  +end
          209  +io.stdout:write(string.format(tpl, table.concat(tab,',')))