cortav  Check-in [52b9bce7dd]

Overview
Comment:all kindsa shit
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA3-256: 52b9bce7dd9317730dfccf2eefd17494b43d4f37936044c542ad0dce7a9d6b92
User & Date: lexi on 2021-12-26 04:08:02
Other Links: manifest | tags
Context
2021-12-26
17:49
get math parser working check-in: d1b7d2fd5f user: lexi tags: trunk
04:08
all kindsa shit check-in: 52b9bce7dd user: lexi tags: trunk
2021-12-22
10:23
fix bugged makefile check-in: 36024a43c5 user: lexi tags: trunk
Changes

Modified cli.lua from [a9857f9cb6] to [ad6ab18d31].

3
4
5
6
7
8
9
10
11


12
13
14
15
16
17
18
..
70
71
72
73
74
75
76





77
78
79
80
81
82
83
84
85
86
87
88
89



90


91
92
93
94
95
96
97
...
117
118
119
120
121
122
123
124





125
126
127
128
129
130
131
...
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
...
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215

local default_mode = {
	['render:format'] = 'html';
	['html:gen-styles'] = true;
}

local function
main(input, output, log, mode, suggestions, vars)
	local doc = ct.parse(input.stream, input.src, mode)


	input.stream:close()
	if mode['parse:show-tree'] then
		log:write(ss.dump(doc))
	end

	-- the document has now had a chance to give its say; if it hasn't specified
	-- any modes of its own, we now merge in the 'weak modes' (suggestions)
................................................................................
			['mode-set'] = 1;
			['mode-clear'] = 1;
			mode = 2;

			['mode-set-weak'] = 1;
			['mode-clear-weak'] = 1;
			['mode-weak'] = 2;





		}
		return param_opts[o] or 0
	end

	local optmap = {
		o = 'out';
		l = 'log';
		d = 'define';
		V = 'version';
		h = 'help';
		y = 'mode-set',   Y = 'mode-set-weak';
		n = 'mode-clear', N = 'mode-clear-weak';
		m = 'mode',       M = 'mode-weak';



	}



	local checkmodekey = function(key)
		if not key:match '[^:]+:.+' then
			ct.exns.cli('invalid mode key %s', key):throw()
		end
		return key
	end
................................................................................
		mode = function(key,value) mode[checkmodekey(key)] = value end;
		['mode-set'] = function(key) mode[checkmodekey(key)] = true end;
		['mode-clear'] = function(key) mode[checkmodekey(key)] = false end;

		['mode-weak'] = function(key,value) suggestions[checkmodekey(key)] = value end;
		['mode-set-weak'] = function(key) suggestions[checkmodekey(key)] = true end;
		['mode-clear-weak'] = function(key) suggestions[checkmodekey(key)] = false end;






		['version'] = function()
			outp:write(ct.info:about())
			if next(ct.ext.loaded) then
				outp:write('\nactive extensions:\n')
				for k,v in pairs(ct.ext.loaded) do
					outp:write(string.format(' * %s', v.id ..
						(v.version and (' ' .. v.version:string()) or '')))
................................................................................
			keepParsing = false
		else
			local longopt = v:match '^%-%-(.+)$'
			if keepParsing and longopt then
				execLongOpt(longopt)
			else
				if keepParsing and v:sub(1,1) == '-' then
					for c,p in ss.str.enc.utf8.each(v:sub(2)) do
						if optmap[c] then
							execLongOpt(optmap[c])
						else
							ct.exns.cli('switch -%s unrecognized', c):throw()
						end
					end
				else
................................................................................
	if args[1] and args[1] ~= '' then
		local file = io.open(args[1], "rb")
		if not file then error('unable to load file ' .. args[1]) end
		input.stream = file
		input.src.file = args[1]
	end

	return main(input, outp, log, mode, suggestions, vars)
end

local ok, e = pcall(entry_cli)
-- local ok, e = true, entry_cli()
if not ok then
	local str = 'translation failure'
	if ss.exn.is(e) then
		str = e.kind.desc
	end
	local color = false
	if log:seek() == nil then







|
|
>
>







 







>
>
>
>
>













>
>
>

>
>







 







<
>
>
>
>
>







 







|







 







|


|
|







3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
..
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
...
129
130
131
132
133
134
135

136
137
138
139
140
141
142
143
144
145
146
147
...
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
...
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231

local default_mode = {
	['render:format'] = 'html';
	['html:gen-styles'] = true;
}

local function
main(input, output, log, mode, suggestions, vars, extrule)
	local doc = ct.parse(input.stream, input.src, mode, function(c)
		                     c.doc.ext = extrule
	                     end)
	input.stream:close()
	if mode['parse:show-tree'] then
		log:write(ss.dump(doc))
	end

	-- the document has now had a chance to give its say; if it hasn't specified
	-- any modes of its own, we now merge in the 'weak modes' (suggestions)
................................................................................
			['mode-set'] = 1;
			['mode-clear'] = 1;
			mode = 2;

			['mode-set-weak'] = 1;
			['mode-clear-weak'] = 1;
			['mode-weak'] = 2;
			['use'] = 1;
			['inhibit'] = 1;
			['need'] = 1;
			['load'] = 1;
			['enc'] = 1;
		}
		return param_opts[o] or 0
	end

	local optmap = {
		o = 'out';
		l = 'log';
		d = 'define';
		V = 'version';
		h = 'help';
		y = 'mode-set',   Y = 'mode-set-weak';
		n = 'mode-clear', N = 'mode-clear-weak';
		m = 'mode',       M = 'mode-weak';
		L = 'load',
		u = 'use', i = 'inhibit', r = 'require';
		e = 'enc';
	}

	local extrule = {use={},inhibit={},need={}}

	local checkmodekey = function(key)
		if not key:match '[^:]+:.+' then
			ct.exns.cli('invalid mode key %s', key):throw()
		end
		return key
	end
................................................................................
		mode = function(key,value) mode[checkmodekey(key)] = value end;
		['mode-set'] = function(key) mode[checkmodekey(key)] = true end;
		['mode-clear'] = function(key) mode[checkmodekey(key)] = false end;

		['mode-weak'] = function(key,value) suggestions[checkmodekey(key)] = value end;
		['mode-set-weak'] = function(key) suggestions[checkmodekey(key)] = true end;
		['mode-clear-weak'] = function(key) suggestions[checkmodekey(key)] = false end;

		['use'    ] = function(ext) extrule.use    [ext] = true end;
		['inhibit'] = function(ext) extrule.inhibit[ext] = true end;
		['require'] = function(ext) extrule.need   [ext] = true end;
		['load'] = function(extpath) end;
		['enc'] = function(enc) end;
		['version'] = function()
			outp:write(ct.info:about())
			if next(ct.ext.loaded) then
				outp:write('\nactive extensions:\n')
				for k,v in pairs(ct.ext.loaded) do
					outp:write(string.format(' * %s', v.id ..
						(v.version and (' ' .. v.version:string()) or '')))
................................................................................
			keepParsing = false
		else
			local longopt = v:match '^%-%-(.+)$'
			if keepParsing and longopt then
				execLongOpt(longopt)
			else
				if keepParsing and v:sub(1,1) == '-' then
					for c,p in ss.str.each(ss.str.enc.utf8, v:sub(2)) do
						if optmap[c] then
							execLongOpt(optmap[c])
						else
							ct.exns.cli('switch -%s unrecognized', c):throw()
						end
					end
				else
................................................................................
	if args[1] and args[1] ~= '' then
		local file = io.open(args[1], "rb")
		if not file then error('unable to load file ' .. args[1]) end
		input.stream = file
		input.src.file = args[1]
	end

	return main(input, outp, log, mode, suggestions, vars, extrule)
end

-- local ok, e = pcall(entry_cli)
local ok, e = true, entry_cli()
if not ok then
	local str = 'translation failure'
	if ss.exn.is(e) then
		str = e.kind.desc
	end
	local color = false
	if log:seek() == nil then

Modified cortav.ct from [c71fe3a9e8] to [5df14cacc3].

4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61

62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79



80
81



82


83


84
85
86
87
88
89
90


91
92
93
94
95



96
97
98
99
100
101
102






103



104
105
106


































































107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123









































































































124
125
126
127
128

129
130
131






132
133
134
135
136
137
138
139
140




141
142
143
144

145
146
147
148
149
150
151
152
153
...
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202


203
204
205
206
207
208
209
210
211
212
213
214

215






216




217
218






219








220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
...
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
...
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338













339
340
341
342
343
344
345
346
347
348
349
350

351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368

369
370
371
372
373
374
375
376
377
378
379

380
381
382
383
384
385
386
387
388
389
390




































391
392
393
394
395
396
397
398
































































399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419


	dict: http://ʞ.cc/fic/spirals/glossary

the cortav [!format] can be called [!cortavgil], or [!gil cortavi], to differentiate it from the reference implementation [!cortavsir] or [!sir cortavi].

%toc

## cortav vs. markdown
the most important difference between cortav and markdown is that cortav is strictly line-oriented. this choice was made to ensure that cortav was relatively easy to parse. so while a simple [$.ct] file may look a bit like a [$.md] file, in reality it's a lot closer to gemtext than any flavor of markdown.

## encoding
a cortav document is made up of a sequence of codepoints. UTF-8 must be supported, but other encodings (such as UTF-32 or C6B) may be supported as well. lines will be derived by splitting the codepoints at the linefeed character or equivalent. note that unearthly encodings like C6B or EBCDIC will need to select their own control sequences.

## file type
a cortav source file is identified using a file extension, file type, and/or magic byte sequence.

three file extensions are defined as identifying a cortav source file. where relevant, all must be recognized as indicating a cortav source file.
* [$ct] is the shorthand extension
* [$cortav] is the canonical disambiguation extension, for use in circumstances where [$*.ct] is already defined to mean a different file format.
* [$] is the canonical Corran extension, a byte sequence comprising the unicode codepoints [$U+E3CE U+E3BD]. where the filesystem in question does not specify a filename encoding, the bytes should be expressed in UTF-8.

three more extensions are reserved for identifying a cortav intent file.
* [$ctc] is the shorthand extension
* [$cortavcun] is the canonical disambiguation extension
* [$] is the canonical Corran extension, a byte sequence comprising the unicode codepoints [$U+E3CE U+E3BD U+E3CE]. where the filesystem in question does not specify a filename encoding, the bytes should be expressed in UTF-8.

on systems which use metadata to encode filetype, two values are defined to identify cortav source files
* [$text/x-cortav] should be used when strings or arbitrary byte sequences are supported
* [$CTAV] (that is, the byte sequence [$0x43 0x54 0x41 0x56]) should be used on systems that support only 32-bit file types/4-character type codes like Classic Mac OS.

two more values are defined to identify cortav intent files.
* [$text/x-cortav-intent] 
* [$CTVC] (the byte sequence [$0x43 0x54 0x56 0x43])

on systems which do not define a canonical way of encoding the filetype but support extended attributes of some kind, such as linux, an attribute named [$mime] may be created and given the value [$text/x-cortav] or [$text/x-cortav-intent]; alternatively, extensions may be used.

it is also possible to indicate the nature of a cortav file without using filesystem metadata. this is done by prefixing the file with a magic byte sequence. the sequence used depends on the encoding.
* for UTF-8 and ASCII, [$%ct[!\\n]] (that is, the byte sequence [$0x25 0x63 0x74 0x0A]) should be used
* for C6B, the file should begin with the word [$] (that is, the byte sequence [$0x03 0x07 0x3E 0x2D]).
consequently, this sequence should be ignored by a cortav parser at the start of a file (except as an indication of file format).

for FreeDesktop-based systems, the [$velartrill-cortav.xml] file included in the repository supplies mappings for the extensions and magic byte sequences. a script is also included which can be registered with xdg-open so that double-clicking on a cortav file will render it out and open it in your default web browser.

## structure
cortav is based on an HTML-like block model, where a document consists of sections, which are made up of blocks, which may contain a sequence of spans. flows of text are automatically conjoined into spans, and blocks are separated by one or more newlines. this means that, unlike in markdown, a single logical paragraph [*cannot] span multiple ASCII lines. the primary purpose of this was to ensure ease of parsing, but also, both markdown and cortav are supposed to be readable from within a plain text editor. this is the 21st century. every reasonable text editor supports soft word wrap, and if yours doesn't, that's entirely your own damn fault.

the first character(s) of every line (the "control sequence") indicates the role of that line. if no control sequence is recognized, the sequence [$.] is implied instead. the standard line classes and their associated control sequences are listed below. some control sequences have alternate forms, in order to support modern, readable unicode characters as well as plain ascii text.

* paragraphs (. ¶ ❡): a paragraph is a simple block of text. the period control sequence is only necessary if the paragraph text begins with something that would otherwise be interpreted as a control sequence.
* newlines (\\): inserts a line break into previous paragraph and attaches the following text. mostly useful for poetry or lyrics.
* section starts (# §): starts a new section. all sections have an associated depth, determined by the number of sequence repetitions (e.g. "###" indicates depth-three"). sections may have headers and IDs; both are optional. IDs, if present, are a sequence of raw-text immediately following the hash marks. if the line has one or more space character followed by styled-text, a header will be attached. the character immediately following the hashes can specify a particular type of section. e.g.:
** [$#] is a simple section break.
** [$#anchor] opens a new section with the ID [$anchor].
** [$# header] opens a new section with the title "header".
** [$#anchor header] opens a new section with both the ID [$anchor] and the title "header".
** [$#>conversation] opens a blockquote section named [$conversation] without a header.
** [$#^id] opens a footnote section for the multiline footnote [$id]. the ID must be specified.
** [$#$id] opens the multiline macro [$id]. the ID must be specified.
** [$#&id mime] opens a new inline object [$id] of type [$mime]. useful for embedding SVGs. the ID and mime type must be specified.

* lists (* :): these are like paragraph nodes, but list nodes that occur next to each other will be arranged so as to show they compose a sequence. depth is determined by the number of stars/colons. like headers, a list entry may have an ID that can be used to refer back to it; it is indicated in the same way. if colons are used, this indicates that the order of the items is signifiant. :-lists and *-lists may be intermixed; however, note than only the last character in the sequence actually controls the depth type.
* directives (%): a directive issues a hint to the renderer in the form of an arbitrary string. directives are normally ignored if they are not supported, but you may cause a warning to be emitted where the directive is not supported with [$%!] or mark a directive critical with [$%!!] so that rendering will entirely fail if it cannot be parsed.
* comments (%%): a comment is a line of text that is simply ignored by the renderer. 
* asides (!): indicates text that diverges from the narrative, and can be skipped without interrupting it. think of it like block-level parentheses. asides which follow one another are merged as paragraphs of the same aside, usually represented as a sort of box. if the first line of an aside contains a colon, the stretch of styled-text from the beginning to the aside to the colon will be treated as a "type heading," e.g. "Warning:"
* code (~~~): a line beginning with ~~~ begins or terminates a block of code. the opening line should look like one of the below
** [$~~~]
** [$~~~ language] (markdown-style shorthand syntax)
** [$~~~ \[language\] ~~~] (cortav syntax)
** [$~~~ \[language\] #id ~~~]
** [$~~~ title ~~~]
** [$~~~ title \[language\] ~~~]
** [$~~~ \[language\] title ~~~]
** [$~~~ title \[language\] #id ~~~]
* reference (tab): a line beginning with a tab is treated as a "reference." references hold out-of-line metadata for preceding text like links and footnotes. a reference consists of an identifier followed by a colon and an arbitrary number of spaces or tabs, followed by text. whether this text is interpreted as raw-text or styled-text depends on the context in which the reference is used.
* quotation (<): a line of the form [$<[!name]> [!quote]] denotes an utterance by [$name].
* blockquote (>): alternate blockquote syntax. can be nested by repeating the 
* subtitle (--): attaches a subtitle to the previous header
* embed (&): embeds a referenced object. can be used to show images or repeat previously defined objects like lists or tables, optionally with a caption.



** &myimg All that remained of the unfortunate blood magic pageant contestants and audience (police photo)
** &$mymacro arg 1|arg 2|arg 3



* break (---): inserts a horizontal rule or other context break; does not end the section. must be followed by newline.


* table cells (+ |): see [>ex.tab table examples].



## styled text
most blocks contain a sequence of spans. these spans are produced by interpreting a stream of [*styled-text] following the control sequence. styled-text is a sequence of codepoints potentially interspersed with escapes. an escape is formed by an open square bracket [$\[] followed by a [*span control sequence], and arguments for that sequence like more styled-text. escapes can be nested.

* strong \[*[!styled-text]\]: causes its text to stand out from the narrative, generally rendered as bold or a brighter color.
* emphatic \[![!styled-text]\]: indicates that its text should be spoken with emphasis, generally rendered as italics
* literal \[$[!styled-text]\]: indicates that its text is a reference to a literal sequence of characters, variable name, or other discrete token. generally rendered in monospace


* strikeout \[~[!styled-text]\]: indicates that its text should be struck through or otherwise indicated for deletion
* insertion \[+[!styled-text]\]: indicates that its text should be indicated as a new addition to the text body. 
** consider using a macro definition [$\edit: [~[#1]][+[#2]]] to save typing if you are doing editing work
* link \[>[!ref] [!styled-text]\]: produces a hyperlink or cross-reference denoted by [$ref], which may be either a URL specified with a reference or the name of an object like an image or section elsewhere in the document. the unicode characters [$→] and [$🔗] can also be used instead of [$>] to denote a link.
* footnote \[^[!ref] [!styled-text]\]: annotates the text with a defined footnote



* raw \[\\[!raw-text]\]: causes all characters within to be interpreted literally, without expansion. the only special characters are square brackets, which must have a matching closing bracket
* raw literal \[$\\[!raw-text]\]: shorthand for [\[$[\…]]]
* macro \{[!name] [!arguments]\}: invokes a [>ex.mac macro], specified with a reference
* argument \[#[!var]\]: in macros only, inserts the [$var]-th argument. otherwise, inserts a context variable provided by the renderer.
* raw argument \[##[!var]\]: like above, but does not evaluate [$var].
* term \[&[!name] ([!label])\]: quotes a defined term with a link to its definition
* inline image \[&@[!name]\]: shows a small image or other object inline. the unicode character [$🖼] can also be used instead of [$&@].










## identifiers
any identifier (including a reference) that is defined within a named section must be referred to from outside that section as [$[!sec].[!obj]], where [$sec] is the ID of the containing section and [$obj] is the ID of the object one wishes to reference.



































































## context variables
context variables are provided so that cortav renderers can process templates. certain context variables are provided for by the standard. you can test for the presence of a context variable with the directive [$%[*when] ctx [!var]].

* {def cortav.file} the name of the file currently being rendered
* {def cortav.path} the absolute path of the file currently being rendered
* {def cortav.time} the current system time in the form [$[#cortav.time]]
* {def cortav.date} the current system date in the form [$[#cortav.date]]
* {def cortav.datetime} the current system date and time represented in the locale or system-standard manner (e.g. [$[#cortav.datetime]])
* {def cortav.page} the number of the page currently being rendered
* {def cortav.id} the identifier of the renderer
* {def cortav.hash} the SHA3 hash of the source file being rendered
	def: [*[#1]]:

on systems with environment variables, these may be accessed as context variables by prefixing their name with [$env.].

different renderers may provide context in different ways, such as from command line options or a context file. any predefined variables should carry an appropriate prefix to prevent conflation. 










































































































## directives
	d: [$%[*[##1]]]
* {d author} encodes document authorship
* {d cols} specifies the number of columns the next object should be rendered with
* {d include} transcludes another file

* {d quote} transcludes another file, without expanding the text except for paragraphs 
* {d embed}, where possible, embeds another file as an object within the current one. in HTML this could be accomplished with e.g. an iframe.
* {d expand} causes the next object (usually a code block) to be fully expanded when it would otherwise not be






* {d pragma} supplies semantic data about author intent, the kind of information document contains and hints about how it should be displayed to the user. think of them like offhand remarks to the renderer -- there's no guarantee that it'll pay any attention, but if it does, your document will look better. pragmas have no scope; they affect the entire document. the pragma function exists primarily as a means to allow parameters that would normally need to be specified on e.g. the command line to be encoded in the document instead in a way that multiple implementations can understand. a few standard pragmas are defined.
** {d pragma layout} gives a hint on how the document should be layed out. the first hint that is understood will be applied; all others will be discarded. standard hints include:
*** essay
*** narrative
*** screenplay: uses asides to denote actions, quotes for dialogue
*** stageplay: uses asides to denote actions, quotes for dialogue
*** manual
*** glossary
*** news




** {d pragma accent} specifies an accent hue (in degrees around the color wheel) for renderers which support colorized output
** {d pragma accent-spread} is a factor that controls the "spread" of hues used in the document. if 0, only the accent color will be used; if larger, other hues will be used in addition to the primary accent color.
** {d pragma dark-on-light on|off} controls whether the color scheme used should be light-on-dark or dark-on-light
** {d pragma page-width} indicates how wide the pages should be


! note on pragmas: particularly when working with collections of documents, you should not keep formatting metadata in the documents themselves! the best thing to do is to have a makefile for compiling the documents using whatever tools you want to support, and encoding the rendering options in this file (for the reference implementation this currently means as command line arguments, but eventually it will support intent files as well) so they can all be changed in one place; pragmas should instead be used for per-document [*overrides] of default settings.
! a workaround for the lack of intent files in the reference implementation is to have a single pseudo-stylesheet that contains only {d pragma} statements, and then import this file from each individual source file using the {d include} directive. this is suboptimal and recommended only when you need to ensure compatibility between different implementations.
! when creating HTML files, an even better alternative may be to turn off style generation entirely and link in an external, hand-written CSS stylesheet. this is generally the way you should compile sources for existing websites if you aren't going to write your own extension.

##ex examples

~~~ blockquotes #bq [cortav] ~~~
the following excerpts of text were recovered from a partially erased hard drive found in the Hawthorne manor in the weeks after the Incident. context is unknown.
................................................................................

+:english  :| honor |
+:ranuir   :| tef   |
+:zia ţai  :| pang  |
+:thalishte:| mbecheve |
~~~

## extensions
the cortav specification also specifies a number of extensions that do not have to be supported for a renderer to be compliant. the extension mechanism supports the following directives.

* inhibits: prevents an extension from being used even where available
* uses: turns on an extension that is not specified by the user operating the renderer (e.g. on the command line)
* needs: causes rendering to fail with an error if the extensions are not available

where possible, instead of [$needs x y z], the directive [$when has-ext x y z] should be used instead. this causes the next section to be rendered only if the named extensions are available. [$unless has-ext x y z] can be used to provide an alternative format.

extensions are mainly interacted with through directives. all extension directives must be prefixed with the name of the extension.



### toc
sections that have a title will be included in the table of contents. the table of contents is by default inserted at the break between the first level-1 section and the section immediately following it. you may instead place the directive [$toc] where you wish the TOC to be inserted, or suppress it entirely with [$inhibits toc]. note that some renderers may not display the TOC as part of the document itself.

toc provides the directives:

* [$%[*toc]]: insert a table of contents in the specified position. this can be used more than once, but doing so may have confusing, incorrect, or nonsensical results under some renderers, and some may just ignore the directive entirely
* [$%[*toc] mark [!styled-text]]: inserts a TOC entry with the label [!styled-text]  pointing to the current location. this can be used to e.g. mark noteworthy images, instances of long quotes or literal blocks, or functions inside an expanded code block.
* [$%[*toc] name [!id styled-text]]: like [$%[*toc] mark] but allows an additional [!id] parameter which specifies the ID the renderer will assign to an anchor element. this is not meaningful for all renderers and when it is, it is up to the renderer to decide what it means.
** the [*html] render backend interprets [!id] as the [$id] element for the anchor tag
** the [*groff] render backend ignores [!id]

### smart-quotes

a cortav renderer may automatically translate punctuation marks to other punctuation marks depending on their context. 











### hilite
code can be highlighted according to the formal language it is written in.















### lua
renderers with a lua interpreter available can evaluate lua code:
* [$%lua use [!file]]: evaluates [$file] and makes its definitions available
* [$\[%lua raw [!script]\]]: evaluates [$script] and emits the string it returns (if any) in raw span context.
* [$\[%lua exp [!script]\]]: evaluates [$script] and emits the string it returns (if any) in expanded span context.
* [$%lua raw [!script]]: evaluates [$script] and emits the string array it returns (if any) in raw block context.
* [$%lua exp [!script]]: evaluates [$script] and emits the string array it returns (if any) in expanded block context.

the interpreter should provide a [$cortav] table with the objects:
* ctx: contains context variables

used files should return a table with the following members
* macros: an array of functions that return strings or arrays of strings when invoked. these will be injected into the global macro namespace.

### ts
the [*ts] extension allows documents to be marked up for basic classification constraints and automatically redacted. if you are seriously relying on ts for confidentiality, make damn sure you start the file with [$%[*requires] ts], so that rendering will fail with an error if the extension isn't supported.

ts enables the directives:
* [$ts class [!scope] [!level] (styled-text)]: indicates a classification level for either the while document (scope [!doc]) or the next section (scope [!sec]). if the ts level is below [$level], the section will be redacted or rendering will fail with an error, as appropriate. if styled-text is included, this will be treated as the name of the classification level.
* [$ts word [!scope] [!word] (styled-text)]: indicates a codeword clearance that must be present for the text to render. if styled-text is present, this will be used to render the name of the codeword instead of [$word].
* [$when ts level [!level]]
* [$when ts word [!word]]

ts enables the spans:
* [$\[🔒#[!level] [!styled-text]\]]: redacts the span if the security level is below that specified.
* [$\[🔒.[!word] [!styled-text]\]]: redacts the span if the specified codeword clearance is not enabled.
(the padlock emoji is shorthand for [$%ts].)

ts redacts spans securely; that is, they are simply replaced with an indicator that they have been redacted, without visually leaking the length of the redacted text.

~~~#ts-example example [cortav] ~~~
%ts word doc sorrowful-pines SORROWFUL PINES

# intercept R1440 TCT S3
................................................................................
<B> Hyacinth, I told you not to contact me without—
<A, shouting> god DAMMIT woman I am trying to SAVE your worthless skin
<B> Hyacinth! your Godforsaken scrambler!
<A> …oh, [!fuck].
(signal lost)
~~~

# reference implementation
the cortav standard is implemented in [$cortav.lua], found in this repository. only the way [$cortav.lua] interprets the cortav language is defined as a reference implementation; other behaviors are simply how [$cortav.lua] implements the specification and may be copied, ignored, tweaked, violently assaulted, or used as inspiration by a compliant parser.

the reference implementation can be used both as a lua library and from the command line. [$cortav.lua] contains the parser and renderers, [$ext/*] contain various extensions, [$sirsem.lua] contains utility functions, and [$cli.lua] contains the CLI driver.

## lua library
there are various ways to use cortav from a lua script; the simplest however is probably to precompile your script with luac and link in the necessary components of the implementation. for instance, say we have the following program

~~~ stdin2html.lua [lua] ~~~
local ct = require 'cortav'
local mode = {}
local doc = ct.parse(io.stdin, {file = '(stdin)'}, mode)
doc.stage = {
................................................................................

and the only extension we need is the table-of-contents extension. our script can be translated into a self-contained lua bytecode blob with the following command

~~~
$ luac -s -o stdin2html.lc $cortav_repo/{sirsem,cortav,ext/toc}.lua stdin2html.lua
~~~

and can then be operated with the command [$lua stdin2html.lc], with no further need for the cortav repository files. note that the order of the [$luac] command is important! [$sirsem.lua] must come first, followed by [$cortav.lua], followed by any extensions. your driver script (i.e. the script with the entry point into the application) should always come last.

### building custom tools
generally, most existing file-format conversion tools (cmark, pandoc, and so on) have a crucial limitation: they hardcode specific assumptions like document structure. this means that the files they output are generally not suitable as-is for the users' purposes, and require further munging, usually by hateful shell or perl scripts. some tools do provide libraries end users to use as a basis for designing their own tools, but these are often limited, and in any case the user ends up having to write their own (non-standard) driver. it's no surprise that very few people end up doing this.

[$cortav.lua]'s design lends itself to a more elegant solution. one can of course write their own driver using [$cortav] as a library, but most of the time when you're compiling document sources, you just want a binary you can run from the command line or a makefile. with [$cortav.lua], you can extend its capabilities easily while keeping the same driver.

in the [$cortav] spec, extensions are mostly intended to give different implementations the ability to offer extra capabilities, but the reference implementation uses an extension architecture that makes it easy to write and add your own. for each type of new behavior you want to implement, just create a new extension and list it on the make command line:

~~~
$ nvim ~/dev/my-cortav-exts/imperial-edict.lua
$ make cortav extens+=$HOME/dev/my-cortav-exts/*.lua
~~~

the cortav binary this produces will have all the extra capabilities you personally need, without any need to fork [$cortav.lua] itself or even touch the repository.

there's no reason [$cortav.lua] shouldn't be able to load extensions at runtime as well; i just haven't implemented this behavior yet. it probably would only take a few extra lines of code tho.

i will eventually document the extension API, but for now, look at [$ext/toc.lua] for a simple example of how to register an extension.

## command line driver
the [$cortav.lua] command line driver can be run from the repository directory with the command [$lua ./cli.lua], or by first compiling it into a bytecode form that links in all its dependencies. this is the preferred method for installation, as it produces a self-contained executable which loads more quickly, but running the driver in script form may be desirable for development or debugging.

the repository contains a GNU makefile to automate compilation of the reference implementation on unix-like OSes. simply run [$$ make cortav] or [$$ gmake cortav] from the repository root to produce a self-contained bytecode executable that can be installed anywhere on your filesystem, with no dependencies other than the lua interpreter.

! note that the makefile strips debugging symbols to save space, so running [$cli.lua] directly as a script may be helpful if you encounter errors and need stacktraces or other debugging information.

henceforth it will be assumed that you have produced the [$cortav] executable and placed it somewhere in your [$$PATH]; if you are instead running [$cortav.lua] directly as an interpreted script, you'll need to replace [$$ cortav] with [$$ lua ./cli.lua] in incantations.

when run without commands, [$cortav.lua] will read input from standard input and write to standard output. alternately, a source file can be given as an argument. to write to a specific file instead of the standard output stream, use the [$-o [!file]] flag.

~~~
$ cortav readme.ct -o readme.html
	# reads from readme.ct, writes to readme.html
$ cortav -o readme.html
	# reads from standard input, writes to readme.html
$ cortav readme.ct
	# reads from readme.ct, writes to standard output
~~~














### switches
[$cortav.lua] offers various switches to control its behavior.
+ long                      + short + function                                    +
| [$--out [!file]]              :|:[$-o]:| sets the output file (default stdout)       |
| [$--log [!file]]              :|:[$-l]:| sets the log file (default stderr)          |
| [$--define [!var] [!val]]     :|:[$-d]:| sets the context variable [$var] to [$val]  |
| [$--mode-set [!mode]]         :|:[$-y]:| activates the [>refimpl-mode mode] with ID [!mode]
| [$--mode-clear [!mode]]       :|:[$-n]:| disables the mode with ID [!mode]           |
| [$--mode [!id] [!val]]        :|:[$-m]:| configures mode [!id] with the value [!val] |
| [$--mode-set-weak [!mode]]    :|:[$-Y]:| activates the [>refimpl-mode mode] with ID [!mode] if the source file does not specify otherwise
| [$--mode-clear-weak [!mode]]  :|:[$-N]:| disables the mode with ID [!mode] if the source file does not specify otherwise
| [$--mode-weak [!id] [!val]]   :|:[$-M]:| configures mode [!id] with the value [!val] if the source file does not specify otherwise

| [$--help]                     :|:[$-h]:| display online help                         |
| [$--version]                  :|:[$-V]:| display the interpreter version             |

###refimpl-mode modes
most of [$cortav.lua]'s implementation-specific behavior is controlled by use of [!modes]. these are namespaced options which may have a boolean, string, or numeric value. boolean modes are set with the [$-y] [$-n] flags; other modes use the [$-m] flags.

most modes are defined by the renderer backend. the following modes affect the behavior of the frontend:

+ ID                 + type   + effect
|   [$render:format]:| string | selects the [>refimpl-rend renderer] (default [$html])
| [$parse:show-tree]:| flag   | dumps the parse tree to the log after parsing completes

##refimpl-rend renderers
[$cortav.lua] implements a frontend-backend architecture, separating the parsing stage from the rendering stage. this means new renderers can be added to [$cortav.lua] relatively easily. currently, only an [>refimpl-rend-html HTML renderer] is included; however, a [$groff] backend is planned at some point in the future, so that PDFs and manpages can be generated from cortav files.

###refimpl-rend-html html
the HTML renderer is activated with the incantation [$-m render:format html]. it is currently the default backend. it produces a single HTML file, optionally with CSS styling data, from a [$.ct] input file.


it supports the following modes:

* string (css length) [$html:width] sets a maximum width for the body content in order to make the page more readable on large displays
* number [$html:accent] applies an accent hue to the generated webpage. the hue is specified in degrees, e.g. [$-m html:accent 0] applies a red accent.
* flag [$html:dark-on-light] uses dark-on-light styling, instead of the default light-on-dark
* flag [$html:fossil-uv] outputs an HTML snippet suitable for use with the Fossil VCS webserver. this is intended to be used with the unversioned content mechanism to host rendered versions of documentation written in cortav that's stored in a Fossil repository.
* number [$html:hue-spread] generates a color palette based on the supplied accent hue. the larger the value, the more the other colors diverge from the accent hue.
* string [$html:link-css] generates a document linking to the named stylesheet
* flag [$html:gen-styles] embeds appropriate CSS styles in the document (default on)
* flag [$html:snippet] produces a snippet of html instead of an entire web page. note that proper CSS scoping is not yet implemented (and can't be implemented hygienically since [$scoped] was removed 😢)
* string [$html:title] specifies the webpage titlebar contents (normally autodetected from the document based on headings or directives)


~~~
$ cortav readme.ct --out readme.html \
	-m render:format html \
	-m html:width 40em \
	-m html:accent 80 \
	-m html:hue-spread 35 \
	-y html:dark-on-light # could also be written as:
$ cortav readme.ct -ommmmy readme.html render:format html html:width 40em html:accent 80 html:hue-spread 35 html:dark-on-light
~~~





































## further directions

### additional backends
it is eventually intended to support to following backends, if reasonably practicable.
* [*html]: emit HTML and CSS code to typeset the document. [!in progress]
* [*svg]: emit SVG, taking advantage of its precise layout features to produce a nicely formatted and paginated document. pagination can be accomplished through emitting multiple files or by assigning one layer to each page. [!long term]
* [*groff]: the most important output backend, rivalling [*html]. will allow the document to be typeset in a wide variety of formats, including PDF and manpage. [!short term]
* [*gemtext]: essentially a downrezzing of cortav to make it readable to Gemini clients

































































some formats may eventually warrant their own renderer, but are not a priority:
* [*text]: cortav source files are already plain text, but a certain amount of layout could be done using ascii art.
* [*ansi]: emit sequences of ANSI escape codes to lay out a document in a terminal-friendly way
* [*tex]: TeX is an unholy abomination and i neither like nor use it, but lots of people do and if cortav ever catches on, a TeX backend should probably be written eventually.

PDF is not on either list because it's a nightmarish mess of a format and groff, which is installed on most linux systems already, can easily generate PDFs

### LCH support
right now, the use of color in the HTML renderer is very unsatisfactory. the accent mechanism operates on the basis of the CSS HSL function, which is not perceptually uniform; different hues will present different mixes of brightness and some (yellows?) may be ugly or unreadable.

the ideal solution would be to simply switch to using LCH based colors. unfortunately, only Safari actually supports the LCH color function right now, and it's unlikely (unless Lea Verou and her husband manage to work a miracle) that Colors Level 4 is going to be implemented very widely any time soon.

this leaves us in an awkward position. we can of course do the math ourselves, working in LCH to implement the internal [$@tone] macro, and then "converting" these colors to HSL. unfortunately, you can't actually convert from LCH to HSL; it's like converting from pounds to kilograms. LCH can represent any color the human visual system can perceive; sRGB can't, and CSS HSL is implemented in sRGB. however, we could at least approximate something that would allow for perceptually uniform brightness, which would be an improvement, and this is probably the direction to go in, unless a miracle occurs and [$lch()] or [$color()] pop up in Blink.

it may be possible to do a more reasonable job of handling colors in the postscript and TeX outputs. unsure about SVG but i assume it suffers the same problems HTML/CSS do. does groff even support color??

### intent files
there's currently no standard way to describe the intent and desired formatting of a document besides placing pragmas in the source file itself. this is extremely suboptimal, as when generating collections of documents, it's ideal to be able to keep all formatting information in one place. users should also be able to specify their own styling overrides that describe the way they prefer to read [$cortav] files, especially for uses like gemini or gopher integration.

at some point soon [$cortav] needs to address this by adding intent files that can be activated from outside the source file, such as with a command line flag or a configuration file setting. these will probably consist of lines that are interpreted as pragmata. in addition to the standard intent format however, individual implementations should feel free to provide their own ways to provide intent metadata; e.g. the reference implementation, which has a lua interpreter available, should be able to take a lua script that runs after the parse stage and generates . this will be particularly useful for the end-user who wishes to specify a particular format she likes reading her files in without forcing that format on everyone she sends the compiled document to, as it will be able to interrogate the document and make intelligent decisions about what pragmata to apply.









|








|
|
|


|
|
|


|
|


|
|

|


|
|


|




|

|
|
|
|
|
|
|
|
|
<
|
>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
>
>
>
|
<
>
>
>
|
>
>
|
>
>


|

|
|
|
>
>
|
|
|
|
<
>
>
>
|

|
|
|
|
|
>
>
>
>
>
>

>
>
>
|
|

>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
|



|
|
|





|



>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
|
|


>



>
>
>
>
>
>
|

|
|
|
|
|
|
|
>
>
>
>


|

>

|







 







|






|



>
>
|
|



|
|
|
|
|

<
>
|
>
>
>
>
>
>

>
>
>
>
|
|
>
>
>
>
>
>

>
>
>
>
>
>
>
>
|

|
|
|
|
|

|





|



|
|
|
|


|
|
|







 







|
|

|

|







 







|

|


|

|






|

|

|

|
|

|

|

|

|










>
>
>
>
>
>
>
>
>
>
>
>
>
|
|

|
|
|
|
|
|
|
|
<
>
|
|


|




|
|


|


|

>
|

|
|
|
|
|
|
|
|
|
>











>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>








>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>













|




|

|
>
>
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59

60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83

84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105

106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
...
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420

421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
...
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
...
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593

594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
	dict: http://ʞ.cc/fic/spirals/glossary

the cortav [!format] can be called [!cortavgil], or [!gil cortavi], to differentiate it from the reference implementation [!cortavsir] or [!sir cortavi].

%toc

## cortav vs. markdown
the most important difference between cortav and markdown is that cortav is strictly line-oriented. this choice was made to ensure that cortav was relatively easy to parse. so while a simple [`.ct] file may look a bit like a [`.md] file, in reality it's a lot closer to gemtext than any flavor of markdown.

## encoding
a cortav document is made up of a sequence of codepoints. UTF-8 must be supported, but other encodings (such as UTF-32 or C6B) may be supported as well. lines will be derived by splitting the codepoints at the linefeed character or equivalent. note that unearthly encodings like C6B or EBCDIC will need to select their own control sequences.

## file type
a cortav source file is identified using a file extension, file type, and/or magic byte sequence.

three file extensions are defined as identifying a cortav source file. where relevant, all must be recognized as indicating a cortav source file.
* [`ct] is the shorthand extension
* [`cortav] is the canonical disambiguation extension, for use in circumstances where [`*.ct] is already defined to mean a different file format.
* [`] is the canonical Corran extension, a byte sequence comprising the unicode codepoints [`U+E3CE U+E3BD]. where the filesystem in question does not specify a filename encoding, the bytes should be expressed in UTF-8.

three more extensions are reserved for identifying a cortav intent file.
* [`ctc] is the shorthand extension
* [`cortavcun] is the canonical disambiguation extension
* [`] is the canonical Corran extension, a byte sequence comprising the unicode codepoints [`U+E3CE U+E3BD U+E3CE]. where the filesystem in question does not specify a filename encoding, the bytes should be expressed in UTF-8.

on systems which use metadata to encode filetype, two values are defined to identify cortav source files
* [`text/x-cortav] should be used when strings or arbitrary byte sequences are supported
* [`CTAV] (that is, the byte sequence [`0x43 54 41 56]) should be used on systems that support only 32-bit file types/4-character type codes like Classic Mac OS.

two more values are defined to identify cortav intent files.
* [`text/x-cortav-intent]
* [`CTVC] (the byte sequence [`0x43 54 56 43])

on systems which do not define a canonical way of encoding the filetype but support extended attributes of some kind, such as linux, an attribute named [$mime] may be created and given the value [`text/x-cortav] or [`text/x-cortav-intent]; alternatively, extensions may be used.

it is also possible to indicate the nature of a cortav file without using filesystem metadata. this is done by prefixing the file with a magic byte sequence. the sequence used depends on the encoding.
* for UTF-8 and ASCII plain text files, [`%ct[!\\n]] (that is, the byte sequence [`0x25 63 74 0A]) should be used
* for C6B+PS files (parastream), the file should begin with the paragraph [`], which equates to the byte sequence [` 0x3E 2E 14 0C 01 04 00 00 00 03 07 3E 2D], including the parastream header).
consequently, this sequence should be ignored by a cortav parser at the start of a file (except as an indication of file format).

for FreeDesktop-based systems, the [`build/velartrill-cortav.xml] file included in the repository supplies mappings for the extensions and magic byte sequences. a script is also included which can be registered with xdg-open so that double-clicking on a cortav file will render it out and open it in your default web browser. [`$ make install] will generate the necessary FreeDesktop XML files and register them, as well as install the script and the [`cortav] executable itself. for more information see [>refimpl-build building the reference implementation].

## structure
cortav is based on an HTML-like block model, where a document consists of sections, which are made up of blocks, which may contain a sequence of spans. flows of text are automatically conjoined into spans, and blocks are separated by one or more newlines. this means that, unlike in markdown, a single logical paragraph [*cannot] span multiple ASCII lines. the primary purpose of this was to ensure ease of parsing, but also, both markdown and cortav are supposed to be readable from within a plain text editor. this is the 21st century. every reasonable text editor supports soft word wrap, and if yours doesn't, that's entirely your own damn fault.

the first character(s) of every line (the "control sequence") indicates the role of that line. if no control sequence is recognized, the line is treated as a paragraph. the currently supported control sequences are listed below. some control sequences have alternate forms, in order to support modern, readable unicode characters as well as plain ascii text.

* [*paragraphs] ([`.] [` ¶] [`❡]): a paragraph is a simple block of text. the period control sequence is only necessary if the paragraph text starts with text that would be interpreted as a control sequence otherwise
* newlines [` \\]: inserts a line break into previous paragraph and attaches the following text. mostly useful for poetry or lyrics
* [*section starts] [`#] [`§]: starts a new section. all sections have an associated depth, determined by the number of sequence repetitions (e.g. "###" indicates depth-three"). sections may have headers and IDs; both are optional. IDs, if present, are a sequence of raw-text immediately following the hash marks. if the line has one or more space character followed by styled-text, a header will be attached. the character immediately following the hashes can specify a particular type of section. e.g.:
** [`#] is a simple section break.
** [`#anchor] opens a new section with the ID [`anchor].
** [`# header] opens a new section with the title "header".
** [`#anchor header] opens a new section with both the ID [`anchor] and the title "header".
** [`#>conversation] opens a blockquote section named [`conversation] without a header.
* [*nonprinting sections] ([`^]): sometimes, you'll want to create a namespace without actually adding a visible new section to the document. you can achieve this by creating a [!nonprinting section] and defining resources within it. nonprinting sections can also be used to store comments, notes, or other information that is useful to have in the source file without it becoming a part of the output

** [`#&id mime] opens a new inline object [`id] of type [`mime]. useful for embedding SVGs. the ID and mime type must be specified.
* [*resource] ([`@]): defines a [!resource]. a resource is an file or object that exists outside of the document but which will be included in the document somehow. common examples of resources include images, videos, iframes, or headers/footers. see [>rsrc resources] for more information.
* [*lists] ([`*] [`:]): these are like paragraph nodes, but list nodes that occur next to each other will be arranged so as to show they compose a sequence. depth is determined by the number of stars/colons. like headers, a list entry may have an ID that can be used to refer back to it; it is indicated in the same way. if colons are used, this indicates that the order of the items is signifiant. :-lists and *-lists may be intermixed; however, note than only the last character in the sequence actually controls the depth type.
* [*directives] ([`%]): a directive issues a hint to the renderer in the form of an arbitrary string. directives are normally ignored if they are not supported, but you may cause a warning to be emitted where the directive is not supported with [`%!] or mark a directive critical with [`%!!] so that rendering will entirely fail if it cannot be parsed.
* [*comments] ([`%%]): a comment is a line of text that is simply ignored by the renderer.
* [*asides] ([`!]): indicates text that diverges from the narrative, and can be skipped without interrupting it. think of it like block-level parentheses. asides which follow one another are merged as paragraphs of the same aside, usually represented as a sort of box. if the first line of an aside contains a colon, the stretch of styled-text from the beginning to the aside to the colon will be treated as a "type heading," e.g. "Warning:"
* [*code] ([`~~~]): a line beginning with ~~~ begins or terminates a block of code. code blocks are by default not parsed, but parsing can be activated by preceding the code block with an [`%[*expand]] directive. the opening line should look like one of the below
** [`~~~]
** [`~~~ language] (markdown-style shorthand syntax)
** [`~~~ \[language\] ~~~] (cortav syntax)
** [`~~~ \[language\] #id ~~~]
** [`~~~ title ~~~]
** [`~~~ title \[language\] ~~~]
** [`~~~ \[language\] title ~~~]
** [`~~~ title \[language\] #id ~~~]
*[*reference] (tab): a line beginning with a tab is treated as a "reference." references hold out-of-line metadata for preceding text like links and footnotes. a reference consists of an identifier followed by a colon and an arbitrary number of spaces or tabs, followed by text. whether this text is interpreted as raw-text or styled-text depends on the context in which the reference is used. in encodings without tab characters, two preceding blanks can be used instead.
* [*quotation] ([`<]): a line of the form [`<[$name]> [$quote]] denotes an utterance by [$name].
* [*blockquote] ([`>]): alternate blockquote syntax. can be nested by repeating the [`>] character.
* [*subtitle] ([`--]): attaches a subtitle to the previous header
* [*embed] ([`&]): embeds a referenced object. can be used to show images or repeat previously defined objects like lists or tables, optionally with a caption.
** [`&$[$macro] [$arg1]|[$arg2]|[$argn]…] invokes a block-level macro with the supplied arguments
*** [`&$mymacro arg 1|arg 2|arg 3]
** [`&[$image]] embeds an image or other block-level object. [!image] can be a reference with a url or file path, or it can be an embed section (e.g. for SVG files)
***[`&myimg All that remained of the unfortunate blood magic pageant contestants and audience (police photo)]

** [`&-[$section]] embeds a closed disclosure element. in interactive outputs, this will display as a block [!section] which can be clicked on to view the full contents of the referenced section; in static outputs, it will display as an enclosed box with [$section] as the title text
*** [`&-ex-a Prosecution Exhibit A (GRAPHIC CONTENT)]
** [`&+[$section]] is like the above, but the disclosure element is open by default
* [*horizontal rule] ([`\---]): inserts a horizontal rule or other context break; does not end the section. must be followed by newline. underlines can also be used in place of dashes.
* [*page break] ([`\^^]): for formats that support pagination, like HTML (when printed), indicates that the rest of the current page should be blank. for formats that do not, extra margins will be inserted. does not create a new section
* [*page rule] ([`\^^-]): inserts a page break for formats that support them, and a horizontal rule for formats that do not. does not create a new section
* [*table cells] ([`+ |]): see [>ex.tab table examples].
* [*equations] ([`=]) block-level equations can be inserted with the [`=]
* [*empty lines] (that is, lines consisting of nothing but whitespace) constitute a [!break], which terminates multiline objects that do not have a dedicated termination sequence, for example lists and asides.

## styled text
most blocks contain a sequence of spans. these spans are produced by interpreting a stream of [*styled-text] following the control sequence. styled-text is a sequence of codepoints potentially interspersed with escapes. an escape is formed by an open square bracket [`\[] followed by a [*span control sequence], and arguments for that sequence like more styled-text. escapes can be nested.

* strong {obj *|styled-text}: causes its text to stand out from the narrative, generally rendered as bold or a brighter color.
* emphatic {obj !|styled-text}: indicates that its text should be spoken with emphasis, generally rendered as italics
* literal {obj `|styled-text}: indicates that its text is a reference to a literal sequence of characters or other discrete token. generally rendered in monospace
* variable {obj $|styled-text}: indicates that its text is a stand-in that will be replaced with what it names. generally rendered in italic monospace, ideally of a different color
* underline {obj _|styled-text}: underlines the text. use sparingly on text intended for webpages -- underlined text  [!is] distinct from links, but underlining non-links is still a violation of convention.
* strikeout {obj ~|styled-text}: indicates that its text should be struck through or otherwise indicated for deletion
* insertion {obj +|styled-text}: indicates that its text should be indicated as a new addition to the text body.
** consider using a macro definition [`\edit: [~[#1]][+[#2]]] to save typing if you are doing editing work
* link \[>[!ref] [!styled-text]\]: produces a hyperlink or cross-reference denoted by [$ref], which may be either a URL specified with a reference or the name of an object like an image or section elsewhere in the document. the unicode characters [`→] and [`🔗] can also be used instead of [`>] to denote a link.

* footnote {span ^|ref|[$styled-text]}: annotates the text with a defined footnote. in interactive output media [`\[^citations.qtheo Quantum Theosophy: A Neophyte's Catechism]] will insert a link with the next [`Quantum Theosophy: A Neophyte's Catechism] that, when clicked, causes a footnote to pop up on the screen. for static output media, the text will simply have a superscript integer after it denoting where the footnote is to be found.
* superscript {obj '|[$styled-text]}:
* subscript {obj ,|[$styled-text]}:
* raw \[\\[`raw-text]\]: causes all characters within to be interpreted literally, without expansion. the only special characters are square brackets, which must have a matching closing bracket
* raw literal \[$\\[!raw-text]\]: shorthand for [\[$[\…]]]
* macro [`\{[!name] [!arguments]\}]: invokes a [>ex.mac macro], specified with a reference
* argument {obj #|var}: in macros only, inserts the [$var]-th argument. otherwise, inserts a context variable provided by the renderer.
* raw argument {obj ##|var}: like above, but does not evaluate [$var].
* term {obj &|name}, {span &|name|[$expansion]}: quotes a defined term with a link to its definition, optionally with a custom expansion of the term (for instance, to expand the first use of an acronym)
* inline image {obj &@|name}: shows a small image or other object inline. the unicode character [`🖼] can also be used instead of [`&@].
* unicode codepoint {obj U+|hex-integer}: inserts an arbitrary UCS codepoint in the output, specified by [$hex-integer]. lowercase [`u] is also legal.
* math mode {obj =|equation}: activates additional transformations on the span to format it as a mathematical equation; e.g. [`*] becomes [`×] and [`/] --> [`÷].
* extension {span %|ext|…}: invokes extension named in [$ext]. [$ext] will usually be an extension name followed by a symbol (often a period) and then an extension-specific directive, although for some simple extensions it may just be the plain extension name. further syntax and semantics depend on the extension. this syntax can also be used to apply formatting specific to certain renderers, such as assigning a CSS class in the [`html] renderer ([`\[%html.myclass my [!styled] text]]).
* critical extension {span %!|ext|…}: like [!extension], but will trigger an error if the requested extension is not available
* extension text {span %:|ext|styled-text}: like [!extension], but when the requested extension is not present, [$styled-text] wlil be emitted as-is. this is a better way to apply CSS classes, as the text will still be visible when rendered to formats other than HTML.
* inline comment {obj %%|...}: ignored. useful for editorial annotations not intended to be part of the rendered product.

	span: [` \[[*[#1]][$[#2]] [#3]\]]
	obj: [` \[[*[#1]][$[#2]]\]]

##ident identifiers
any identifier (including a reference) that is defined within a named section must be referred to from outside that section as [`[!sec].[!obj]], where [$sec] is the ID of the containing section and [$obj] is the ID of the object one wishes to reference.

##rsrc resources
a [!resource] represents content that is not encoded directly into the source file, but which is embedded by some means in the output. resources can either be [!embedded], in which case they are compiled into the final document itself, or they can be [!linked], in which case the final document only contains a URI or similar tag referencing the resource. not all render backends support both linking and embedding embedding, nor do all backends support all object types (for instance, [`groff] does not support video embedding.)

a resource definition is begun by line consisting of an [`@] sign and an [>ident identifier]. this line is followed by any number of parameters. a parameter is a line beginning with a single tab, a keyword, a colon, and a then a value. additional lines can be added to a parameter by following it with a line that consists of two tabs followed by the text you wish to add. (this is the same syntax used by references.) a resource definition is terminated by a break, or any line that does not begin with a tab

a resource definition in use looks like this:

~~~
this is a demonstration of resources
@smiley
	src: link image/webp http://cdn.example.net/img/smile.webp
		  link image/png file:img/smile.png
		  embed image/gif file img/smile.gif
	desc: the Smiling Man would like to see you in his office
here is the resource in span context [&smiley]
and here it is in block context:
&smiley
~~~

rendered as HTML, this might produce the following:

~~~
<style>
	.res-smiley {
		content: image-set(
			url(http://cdn.example.net/img/smile.webp) type(image/webp),
			url(img/smile.png) type(image/png),
			url(data:image/gif;base64,/* … */) type(image/gif)
		); /* this will actually be repeated with a -webkit- prefix */
	}
</style>
<p>this is a demonstration of resources</p>
<p>here is the resource in span context: <span class="res-smiley"></span></p>
<p>and here it is in block context:</p>
<div class=".res-smiley"></div>
~~~

note that empty elements with CSS classes are used in the output, to avoid repeating long image definitions (especially base64 inline encoded ones!)

### supported parameters
* [`src] (all): specifies where to find the file, what it is, and how to embed it. each line of [`src] should consist of three whitespace-separated words: embed method, MIME type, and URI.
** embed methods
*** [`local]: loads the resource at build time and embeds it into the output file. not all implementations may allow loading remote network resources at build time.
*** [`remote]: only embeds a reference to the location of the resource. use this for e.g. live iframes, dynamic images, or images hosted by a CDN.
*** [`auto]: embeds a reference in file formats where that's practical, and use a remote reference otherwise.
** MIME types: which file types are supported depends on the individual implementation and renderer backend; additionally, extensions can add support for extra types. MIME-types that have no available handler will, where possible, result in an attachment that can be extracted by the user, usually by clicking on a link. however, the following should be usable with all compliant implementations
*** [`image/*] (graphical outputs only)
*** [`video/*] (interactive outputs only)
*** [`image/svg+xml] is handled specially for HTML files, and may or may not be compatible with other renderer backends.
*** [`font/*] can be used with the HTML backend to reference a web font
*** [`font/woff2] can be used with the HTML backend to reference a web font
*** [`text/plain] (will be inserted as a preformatted text block)
*** [`text/css] (can be used when producing HTML files to link in an extra stylesheet, either by embedding it or referencing it from the header)
*** [`text/x-cortav] (will be parsed and inserted as a formatted text block; context variables can be passed to the file with [`ctx.[$var]] parameters)
*** any MIME-type that matches the type of file being generated by the renderer can be used to include a block of data that will be passed directly to the renderer.
** URI types: additional URI types can be added by extensions or different implementations, but every compliant implementation must support these URIs.
*** [`http], [`https]: accesses resources over HTTP. add a [`file] fallback if possible for the benefit of renderers/viewers that do not have internet access abilities.
*** [`file]: references local files. absolute paths should begin [`file:/]; the slash should be omitted for relative paths. note that this doesn't have quite the same meaning as in HTML -- [`file] can (and usually should be) used with HTML outputs to refer to resources that reside on the same server. a cortav URI of [`file:/etc/passwd] will actually result in the link [`/etc/passwd], not [`file:///etc/passwd] when converted to HTML. generally, you only should use [`http] when you're referring to a resource that exists on a different domain.
*** [`name]: a special URI used generally for referencing resources that are already installed on a target system and do not need to be embedded or linked, the name and type are enough for a renderer on another machine to locate the correct resource. this is useful mostly for [>fonts fonts], where it's more typical to refer to fonts that are installed on your system rather than providing paths to font files.
*** [`gemini]: accesses resources over the gemini protocol. currently you should really only use this for [`local] resources unless you're using the gemtext renderer backend, since nothing but gemini browsers are liable to support this protocol.
* [`desc]: supplies a narrative description of the resources, for use as an "alt-text" when the image cannot be loaded and for screenreaders.
* [`detail]: supplies extra narrative commentary that is displayed contextually, e.g. when the user hovers her mouse cursor over the embedded object.

note that in certain cases, full MIME types do not need to be used. say you're defining a font with the [`name] URI -- you can't necessary know what file type the system fonts on another computer are going to be. in this case, you can just write [`font] instead of [`font/ttf] or [`font/woff2] or similar. all cortav needs to know in this case is what abstract kind of object you're referencing.


##ctxvar context variables
context variables are provided so that cortav renderers can process templates. certain context variables are provided for by the standard. you can test for the presence of a context variable with the directive [`%[*when] ctx [$var]]. context variables are accessed with the [` \[#[$name]\]] span.

* {def cortav.file} the name of the file currently being rendered
* {def cortav.path} the absolute path of the file currently being rendered
* {def cortav.time} the current system time in the form [`[#cortav.time]]
* {def cortav.date} the current system date in the form [`[#cortav.date]]
* {def cortav.datetime} the current system date and time represented in the locale or system-standard manner (e.g. [`[#cortav.datetime]])
* {def cortav.page} the number of the page currently being rendered
* {def cortav.id} the identifier of the renderer
* {def cortav.hash} the SHA3 hash of the source file being rendered
	def: [*[#1]]:

on systems with environment variables, these may be accessed as context variables by prefixing their name with [`env.].

different renderers may provide context in different ways, such as from command line options or a context file. any predefined variables should carry an appropriate prefix to prevent conflation. 

##fonts fonts
for output backends that support font specification, cortav provides a sophisticated font management system by means of the [!font stack].

when a document parse begins, the font stack is empty (unless a default font has already been loaded by an intent file).
when the font stack is empty, cortav does not include font specifications in its output, and thus will use whatever the default of the various rendering programs is.

to use fonts, we first have to define the fonts as [>rsrc resources].

~~~cortav
%% first, we create a new section to namespace the fonts
#^fonts
%% we then define each font as a resource
@serif
	src: auto font name:Alegreya
		embed  font/ttf file:project-fonts/alegreya.ttf
		link font/woff2 file:/assets/font/alegreya.woff2
		auto font name:Times New Roman
@sans
	src: link font name:Alegreya Sans
		link font name:Open Sans
		link font name:sans-serif
~~~

here we have defined two font families, [`fonts.serif] and [`fonts.sans]. each contains a list of references to fonts which will be tried in order. for example, this could be translated into the following CSS:

~~~css
@font-face {
	font-family: "fontdef-serif";
	src: local("Alegreya"),
		url("data:font/ttf;base64,…") format("font/ttf"),
		url("/assets/font/alegreya.woff2") format("font/woff2"),
		local("Times New Roman");
}
@font-face {
	font-family: "fontdef-sans";
	src: local("Alegreya Sans"),
		local("Open Sans"),
		local("sans-serif");
}
~~~

there are two things that aren't super clear from the CSS, however. notice how we used [`auto] on a couple of those specs? this means it's up to the renderer to decide whether to link or embed the font. for html, a font specified by name can't really be embedded, but for some file formats, it can be. [`auto] lets us produce valid HTML while still taking advantage of font embedding in other formats.

now that we have our font families defined, we can use their identifiers with the [`%[*font]] directive to control the font stack. the first thing we need to do is push a new font context. there's two ways we can do this:
	fnd: [`%[*font] [#1]]
* {fnd dup} will create a copy of the current font context, allowing us to make some changes and then revert later with the {fnd pop} command. this isn't useful in our case, however, because right now the stack is empty; there's nothing to duplicate.
* {fnd new} will create a brand new empty context for us to work with and push it to the stack. this can also be used to temporarily revert to the system default fonts, and then switch back with {fnd pop}.
* {fnd set} changes one or more entries in the current font context. it can take a space-separated list of arguments in the form [`[$entry]=[$font-id]]. the supported entries are:
** [`body]: the fallback font. if only this is set in a given font context, it will be used for everything
** [`paragraph]: the font used for normal paragraphs
** [`header]: the font used in headers
** [`subtitle]: the font used in subtitles
** [`list]: the font used in lists
** [`table]: the font used in tables
** [`caption]: the font used for captions
* {fnd pop} removes the top context from the font stack.

note that extensions may consult the font context for their entries specific to them. for instance, [>toc toc] checks for [`toc] before falling back to [`body] and then the default font.

these commands are enough to give us a very flexible setup. consider the following:

~~~cortav
%% let's pretend we've also defined the fonts 'title', 'cursive', and 'thin'

%font new
%font set body=sans header=serif
%font dup
%font header=title
# lorem ipsum dolor
%font pop

%% we've now set up a default font context, created a new context for the title of the
%% document, and then popped it back off after the title was inserted so that our
%% first font context is active again. everything after that last '%font pop' will
%% be printed in sans, except for headers, which will be printed in 'serif'

lorem ipsum dolor sit amet, sed consectetur apiscing elit…

%font dup
%font set body=cursive
> sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.
> Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut
%font pop

%% above we created a blockquote whose text is printed in a cursive font; afterwards,
%% we simply remove this new context—

and everything is back the way it was at "lorem ipsum"

%% the font mechanism is at its most powerful when used with multiline macros:

	cursive-quote: %font dup
		%font set body=cursive
		> [#1]
		%font pop

%% now, whenever we want a block with a cursive body, we can simply invoke

&$cursive-quote Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident

%% without affecting the overall font context. in fact, since 'cursive-quote' creates
%% its context using 'dup', it would import all font specifications besides 'body'
%% from the environment it is invoked in
~~~

##dir directives
	d: [`%[*[##1]]]
* {d author} encodes document authorship. multiple author directives can be issued to add additional coauthors
* {d cols} specifies the number of columns the next object should be rendered with
* {d include} transcludes another file
* {d import} reads in the contents of another file as an embeddable section
* {d quote} transcludes another file, without expanding the text except for paragraphs 
* {d embed}, where possible, embeds another file as an object within the current one. in HTML this could be accomplished with e.g. an iframe.
* {d expand} causes the next object (usually a code block) to be fully expanded when it would otherwise not be
* {d font} controls the font stack, for outputs that support changing fonts. see [>fonts fonts] for more information.
* {d lang} changes the current language, which is used by extensions to e.g. control typographical conventions, and may be encoded into the output by certain renderers (e.g. HTML). note that quotes and blockquotes can be set to a separate language with a simpler syntax. the language should be notated using IETF language tags
** {d lang is x-ranuir-CR8} sets the current language to Ranuir as spoken in the Central Worlds, written in Corran and encoded using UTF-8. this might be used at the top of a document to set its primary language.
** {d lang push gsw-u-sd-chzh} temporarily switches to Zürich German, e.g. to quote a German passage in an otherwise Ranuir document
** {d lang sec en-US} switches to American English for the duration of a section. does not affect the language stack.
** {d lang pop} drops the current language off the language stack, returning to whatever was pushed or set before it. this would be used, for instance, at the end of a passage
* {d pragma} supplies semantic data about author intent, the kind of information the document contains and hints about how it should be displayed to the user. think of them like offhand remarks to the renderer -- there's no guarantee that it'll pay any attention, but if it does, your document will look better. pragmas have no scope; they affect the entire document. the pragma function exists primarily as a means to allow parameters that would normally need to be specified on e.g. the command line to be encoded in the document instead in a way that multiple implementations can understand. a few standard pragmas are defined.
** {d pragma layout} gives a hint on how the document should be layed out. the first hint that is understood will be applied; all others will be discarded. standard hints include:
*** [`essay]
*** [`narrative]
*** [`screenplay]: uses asides to denote actions, quotes for dialogue
*** [`stageplay]: uses asides to denote actions, quotes for dialogue
*** [`manual]
*** [`glossary]
*** [`news]
*** [`book]: section depths 1-3 gain additional semantics
***: [*part]: the section gets a page to itself to announce the beginning of a new part or appendix
***: [*chapter]: the section is preceded by a page break
***: [*heading]: the section can occur on the same page as text and  headings from other sections
** {d pragma accent} specifies an accent hue (in degrees around the color wheel) for renderers which support colorized output
** {d pragma accent-spread} is a factor that controls the "spread" of hues used in the document. if 0, only the accent color will be used; if larger, other hues will be used in addition to the primary accent color.
** {d pragma dark-on-light on\|off} controls whether the color scheme used should be light-on-dark or dark-on-light
** {d pragma page-width} indicates how wide the pages should be
** {d pragma title-page} specifies a section to use as a title page, for renderer backends that support pagination

! note on pragmata: particularly when working with collections of documents, you should not keep formatting metadata in the documents themselves! the best thing to do is to have a makefile for compiling the documents using whatever tools you want to support, and encoding the rendering options in this file (for the reference implementation this currently means as command line arguments, but eventually it will support intent files as well) so they can all be changed in one place; pragmas should instead be used for per-document [*overrides] of default settings.
! a workaround for the lack of intent files in the reference implementation is to have a single pseudo-stylesheet that contains only {d pragma} statements, and then import this file from each individual source file using the {d include} directive. this is suboptimal and recommended only when you need to ensure compatibility between different implementations.
! when creating HTML files, an even better alternative may be to turn off style generation entirely and link in an external, hand-written CSS stylesheet. this is generally the way you should compile sources for existing websites if you aren't going to write your own extension.

##ex examples

~~~ blockquotes #bq [cortav] ~~~
the following excerpts of text were recovered from a partially erased hard drive found in the Hawthorne manor in the weeks after the Incident. context is unknown.
................................................................................

+:english  :| honor |
+:ranuir   :| tef   |
+:zia ţai  :| pang  |
+:thalishte:| mbecheve |
~~~

##extns extensions
the cortav specification also specifies a number of extensions that do not have to be supported for a renderer to be compliant. the extension mechanism supports the following directives.

* inhibits: prevents an extension from being used even where available
* uses: turns on an extension that is not specified by the user operating the renderer (e.g. on the command line)
* needs: causes rendering to fail with an error if the extensions are not available

where possible, instead of [`needs [$x y z]], the directive [`when has-ext [$x y z]] should be used instead. this causes the next section to be rendered only if the named extensions are available. [`unless has-ext [$x y z]] can be used to provide an alternative format.

extensions are mainly interacted with through directives. all extension directives must be prefixed with the name of the extension.

the reference implementation seeks to support all standardized extensions. it's not quite there yet, however.

###toc toc
sections that have a title will be included in the table of contents. the table of contents is by default inserted at the break between the first level-1 section and the section immediately following it. you may instead place the directive [`toc] where you wish the TOC to be inserted, or suppress it entirely with [`inhibits toc]. note that some renderers may not display the TOC as part of the document itself.

toc provides the directives:

* [`%[*toc]]: insert a table of contents in the specified position. this can be used more than once, but doing so may have confusing, incorrect, or nonsensical results under some renderers, and some may just ignore the directive entirely
* [`%[*toc] mark [$styled-text]]: inserts a TOC entry with the label [$styled-text]  pointing to the current location. this can be used to e.g. mark noteworthy images, instances of long quotes or literal blocks, or functions inside an expanded code block.
* [`%[*toc] name [$id styled-text]]: like [`%[*toc] mark] but allows an additional [$id] parameter which specifies the ID the renderer will assign to an anchor element. this is not meaningful for all renderers and when it is, it is up to the renderer to decide what it means.
** the [*html] render backend interprets [$id] as the [`id] element for the anchor tag
** the [*groff] render backend ignores [$id]


###tsmog transmogrify
a cortav renderer may automatically translate punctuation marks or symbol sequences to superior representations depending on their context. to be compliant this extension should implement, at minimum:
* smart quotes (with consideration for the typographical conventions languages like German or Spanish)
** {dir.d transmogrify|language [$lang]} can be used to explicitly set the language; otherwise, it must be determined from the value of {dir.d pragma|lang}. if this is not present, implementations may fall back on their own methods for determining the language in use, such as command-line flags.
* multigraph to glyph conversion, including at least:
** [`\--] --> "—"
** [`\-->] --> "→"
** [`\<--] -->  "←"

an escape character before any of the sequence characters should prevent the sequence from being rendered. raw nodes (that is, [`\[\…\]] and [`\[`\…\]]) should not be scanned for transmogrification, nor should the contents of code blocks unless marked with the [`%[*expand]] directive

transmogrification shall only take place after all other parsing steps are completed.

###hilite hilite
code can be highlighted according to the formal language it is written in. a compliant hilite implementation must implement basic keyword, symbol, comment, pragma, and literal highlighing for the following formal languages.
* C
* [>lua Lua]
* [>html HTML]
* [>scheme Scheme]
* [>terra Terra]
* [>libconfig libconfig]

	lua: https://lua.org
	scheme: https://call-cc.org
	terra: https://terralang.org
	html: https://dev.w3.org/html5/spec-LC/
	libconfig: http://hyperrealm.github.io/libconfig/

the highlighter should make use of semantic HTML tags like [`<var>] where possible.

###lua lua
renderers with a lua interpreter available can evaluate lua code:
* [`%lua use [!file]]: evaluates [$file] and makes its definitions available
* [`\[%lua raw [!script]\]]: evaluates [$script] and emits the string it returns (if any) in raw span context.
* [` \[%lua exp [!script]\]]: evaluates [$script] and emits the string it returns (if any) in expanded span context.
* [`%lua raw [!script]]: evaluates [$script] and emits the string array it returns (if any) in raw block context.
* [`%lua exp [!script]]: evaluates [$script] and emits the string array it returns (if any) in expanded block context.

the interpreter should provide a [`cortav] table with the objects:
* ctx: contains context variables

used files should return a table with the following members
* macros: an array of functions that return strings or arrays of strings when invoked. these will be injected into the global macro namespace.

###ts ts
the [*ts] extension allows documents to be marked up for basic classification constraints and automatically redacted. if you are seriously relying on ts for confidentiality, make damn sure you start the file with [$%[*requires] ts], so that rendering will fail with an error if the extension isn't supported.

ts enables the directives:
* [`%[*ts] class [$scope level] ([$styled-text])]: indicates a classification level for either the while document (scope [$doc]) or the next section (scope [$sec]). if the ts level is below [$level], the section will be redacted or rendering will fail with an error, as appropriate. if styled-text is included, this will be treated as the name of the classification level.
* [`%[*ts] word [$scope word] ([$styled-text])]: indicates a codeword clearance that must be present for the text to render. if styled-text is present, this will be used to render the name of the codeword instead of [$word].
* [`%[*when] ts level [$level]]
* [`%[*when] ts word [$word]]

ts enables the spans:
* [`\[🔒#[!level] [$styled-text]\]]: redacts the span if the security level is below that specified.
* [`\[🔒.[!word] [$styled-text]\]]: redacts the span if the specified codeword clearance is not enabled.
(the padlock emoji is shorthand for [`%[*ts]].)

ts redacts spans securely; that is, they are simply replaced with an indicator that they have been redacted, without visually leaking the length of the redacted text.

~~~#ts-example example [cortav] ~~~
%ts word doc sorrowful-pines SORROWFUL PINES

# intercept R1440 TCT S3
................................................................................
<B> Hyacinth, I told you not to contact me without—
<A, shouting> god DAMMIT woman I am trying to SAVE your worthless skin
<B> Hyacinth! your Godforsaken scrambler!
<A> …oh, [!fuck].
(signal lost)
~~~

#refimpl reference implementation
the cortav standard is implemented in [`cortav.lua], found in this repository. only the way [`cortav.lua] interprets the cortav language is defined as a reference implementation; other behaviors are simply how [`cortav.lua] implements the specification and may be copied, ignored, tweaked, violently assaulted, or used as inspiration by a compliant parser.

the reference implementation can be used both as a lua library and from the command line. [`cortav.lua] contains the parser and renderers, [`ext/*] contain various extensions, [`sirsem.lua] contains utility functions, and [`cli.lua] contains the CLI driver.

##refimpl-lib lua library
there are various ways to use cortav from a lua script; the simplest however is probably to precompile your script with luac and link in the necessary components of the implementation. for instance, say we have the following program

~~~ stdin2html.lua [lua] ~~~
local ct = require 'cortav'
local mode = {}
local doc = ct.parse(io.stdin, {file = '(stdin)'}, mode)
doc.stage = {
................................................................................

and the only extension we need is the table-of-contents extension. our script can be translated into a self-contained lua bytecode blob with the following command

~~~
$ luac -s -o stdin2html.lc $cortav_repo/{sirsem,cortav,ext/toc}.lua stdin2html.lua
~~~

and can then be operated with the command [`lua stdin2html.lc], with no further need for the cortav repository files. note that the order of the [`luac] command is important! [`sirsem.lua] must come first, followed by [`cortav.lua], followed by any extensions. your driver script (i.e. the script with the entry point into the application) should always come last.

###refimpl-tools building custom tools
generally, most existing file-format conversion tools (cmark, pandoc, and so on) have a crucial limitation: they hardcode specific assumptions like document structure. this means that the files they output are generally not suitable as-is for the users' purposes, and require further munging, usually by hateful shell or perl scripts. some tools do provide libraries end users to use as a basis for designing their own tools, but these are often limited, and in any case the user ends up having to write their own (non-standard) driver. it's no surprise that very few people end up doing this.

[`cortav.lua]'s design lends itself to a more elegant solution. one can of course write their own driver using [`cortav] as a library, but most of the time when you're compiling document sources, you just want a binary you can run from the command line or a makefile. with [`cortav.lua], you can extend its capabilities easily while keeping the same driver.

in the [`cortav] spec, extensions are mostly intended to give different implementations the ability to offer extra capabilities, but the reference implementation uses an extension architecture that makes it easy to write and add your own. for each type of new behavior you want to implement, just create a new extension and list it on the make command line:

~~~
$ nvim ~/dev/my-cortav-exts/imperial-edict.lua
$ make cortav extens+=$HOME/dev/my-cortav-exts/*.lua
~~~

the cortav binary this produces will have all the extra capabilities you personally need, without any need to fork [`cortav.lua] itself or even touch the repository.

there's no reason [`cortav.lua] shouldn't be able to load extensions at runtime as well; i just haven't implemented this behavior yet. it probably would only take a few extra lines of code tho.

i will eventually document the extension API, but for now, look at [`ext/toc.lua] for a simple example of how to register an extension.

##refimpl-cli command line driver
the [$cortav.lua] command line driver can be run from the repository directory with the command [`lua ./cli.lua], or by first compiling it into a bytecode form that links in all its dependencies. this is the preferred method for installation, as it produces a self-contained executable which loads more quickly, but running the driver in script form may be desirable for development or debugging.

the repository contains a GNU makefile to automate compilation of the reference implementation on unix-like OSes. simply run [`$ make cortav] or [`$ gmake cortav] from the repository root to produce a self-contained bytecode executable that can be installed anywhere on your filesystem, with no dependencies other than the lua interpreter.

! note that the makefile strips debugging symbols to save space, so running [`cli.lua] directly as a script may be helpful if you encounter errors and need stacktraces or other debugging information.

henceforth it will be assumed that you have produced the [`cortav] executable and placed it somewhere in your [$$PATH]; if you are instead running [`cortav.lua] directly as an interpreted script, you'll need to replace [`$ cortav] with [`$ lua ./cli.lua] in incantations.

when run without commands, [`cortav.lua] will read input from standard input and write to standard output. alternately, a source file can be given as an argument. to write to a specific file instead of the standard output stream, use the [`-o [!file]] flag.

~~~
$ cortav readme.ct -o readme.html
	# reads from readme.ct, writes to readme.html
$ cortav -o readme.html
	# reads from standard input, writes to readme.html
$ cortav readme.ct
	# reads from readme.ct, writes to standard output
~~~

###refimpl-build building
the command line driver is built and installed with a GNU [$make] script. this script accepts the variables shown below with their default values:
+ prefix | [`[$$HOME]/.local] | the path under which the package will be installed
+ build | [`build] |  the directory where generated objects will be placed; useful for out-of-tree builds
+ bin-prefix | [`[$$prefix]/bin] | directory to install the executables to"
+ default-format-flags | [`-m html:width 35em] | a list of flags that will be passed by the viewer script to [`cortav] when generating a html fille

the following targets are supplied to automate the build:
* [`install] builds everything, installs the executable and the viewer script to [$$bin_prefix], and registers the viewer script with XDG
* [`excise] deletes everything installed and deregisters the file handlers (note that the same variables must be passed to [`exicse] as were passed to [`install]!)
* [`clean] deletes build artifacts from the [$$build] directory like it was never there
* [`wipe] is equivalent to [`$ make excise && make clean]

###refimpl-switches switches
[`cortav.lua] offers various switches to control its behavior.
+ long                      + short + function                                    +
| [`--out [!file]]              :|:[`-o]:| sets the output file (default stdout)       |
| [`--log [!file]]              :|:[`-l]:| sets the log file (default stderr)          |
| [`--define [!var] [!val]]     :|:[`-d]:| sets the context variable [$var] to [$val]  |
| [`--mode-set [!mode]]         :|:[`-y]:| activates the [>refimpl-mode mode] with ID [!mode]
| [`--mode-clear [!mode]]       :|:[`-n]:| disables the mode with ID [!mode]           |
| [`--mode [!id] [!val]]        :|:[`-m]:| configures mode [!id] with the value [!val] |
| [`--mode-set-weak [!mode]]    :|:[`-Y]:| activates the [>refimpl-mode mode] with ID [!mode] if the source file does not specify otherwise
| [`--mode-clear-weak [!mode]]  :|:[`-N]:| disables the mode with ID [$mode] if the source file does not specify otherwise

| [`--mode-weak [!id] [!val]]   :|:[`-M]:| configures mode [$id] with the value [$val] if the source file does not specify otherwise
| [`--help]                     :|:[`-h]:| display online help                         |
| [`--version]                  :|:[`-V]:| display the interpreter version             |

###refimpl-mode modes
most of [`cortav.lua]'s implementation-specific behavior is controlled by use of [!modes]. these are namespaced options which may have a boolean, string, or numeric value. boolean modes are set with the [`-y] [`-n] flags; other modes use the [`-m] flags.

most modes are defined by the renderer backend. the following modes affect the behavior of the frontend:

+ ID                 + type   + effect
|   [`render:format]:| string | selects the [>refimpl-rend renderer] (default [`html])
| [`parse:show-tree]:| flag   | dumps the parse tree to the log after parsing completes

##refimpl-rend renderers
[`cortav.lua] implements a frontend-backend architecture, separating the parsing stage from the rendering stage. this means new renderers can be added to [`cortav.lua] relatively easily. currently, only an [>refimpl-rend-html HTML renderer] is included; however, a [`groff] backend is planned at some point in the future, so that PDFs and manpages can be generated from cortav files.

###refimpl-rend-html html
the HTML renderer is activated with the incantation [`-m render:format html]. it is currently the default backend. it produces a single HTML file, optionally with CSS styling data, from a [`.ct] input file.

####refimpl-rend-html-modes modes
[`html] supports the following modes:

* string (css length) [`html:width] sets a maximum width for the body content in order to make the page more readable on large displays
* number [`html:accent] applies an accent hue to the generated webpage. the hue is specified in degrees, e.g. [$-m html:accent 0] applies a red accent.
* flag [`html:dark-on-light] uses dark-on-light styling, instead of the default light-on-dark
* flag [`html:fossil-uv] outputs an HTML snippet suitable for use with the Fossil VCS webserver. this is intended to be used with the unversioned content mechanism to host rendered versions of documentation written in cortav that's stored in a Fossil repository.
* number [`html:hue-spread] generates a color palette based on the supplied accent hue. the larger the value, the more the other colors diverge from the accent hue.
* string [`html:link-css] generates a document linking to the named stylesheet
* flag [`html:gen-styles] embeds appropriate CSS styles in the document (default on)
* flag [`html:snippet] produces a snippet of html instead of an entire web page. note that proper CSS scoping is not yet implemented (and can't be implemented hygienically since [$scoped] was removed 😢)
* string [`html:title] specifies the webpage titlebar contents (normally autodetected from the document based on headings or directives)
* string [`html:font] specifies the default font to use when rendering as a CSS font specification (e.g. [`-m html:font 'Alegreya, Junicode, Georgia, "Times New Roman"])

~~~
$ cortav readme.ct --out readme.html \
	-m render:format html \
	-m html:width 40em \
	-m html:accent 80 \
	-m html:hue-spread 35 \
	-y html:dark-on-light # could also be written as:
$ cortav readme.ct -ommmmy readme.html render:format html html:width 40em html:accent 80 html:hue-spread 35 html:dark-on-light
~~~

#### directives
[`html] supplies the following render directives.

* [`%[*html] link [$rel] [$mime] [$href]]: inserts a [`<link>] tag in the header, for example, to link in an alternate stylesheet, or help feed readers find your atom or rss feed.
** [`%[*html] link alternate\\ stylesheet text/css /res/style2.css]
** [`%[*html] link alternate application/atom+xml /feed.atom]
* [`%[*html] style [$id]]: adds the stylesheet referenced by [$id] into the document stylesheet. the stylesheet is specified using a [>rsrc resource].

#### stylsheets
the [`html] backend offers some additional directives for external CSS files that are embedded into the document, in order to simplify integration with the accent mechanism. these are:

* [`@[*fg]]: resolves to a color expression denoting the selected foreground color. equivalent to [`[*tone](1)]
* [`@[*bg]]: resolves to a color expression denoting the selected background color. equivalent to [`[*tone](0)]
* [`@[*tone]\[/[$alpha]\]([$fac] \[[$shift] \[[$saturate]\]\] )]: resolves to a color expression. [$fac] is a floating-point value scaling from the background color to the foreground color. [$shift] is a value in degrees controlling how far the hue will shift relative to the accent. [$saturate] is a floating-point value controlling how satured the color is.

###refimpl-rend-groff groff
the [`groff] backend produces a text file suitable for supplying to a [`groff] compiler. [`groff] is the GNU implementation of a venerable typesetting system from the early days of UNIX

as a convenience, the groff backend supports two modes of operation: it can write a [`groff] file directly to disk, or it can automatically launch a [`groff] process with the appropriate command line options and environment variables. this second mode is recommended unless you're rendering very large files to multiple formats, as [`groff] invocation is nontrivial and it's best to let the renderer handle that for you.

####refimpl-rend-groff-modes modes
[`groff] supports the following modes:

* string [`groff:annotate] controls how footnotes will be handled.
** [`footnote] places footnotes at the end of the page they are referenced on. if the same footnote is used on multiple pages, it will be duplicated on each.
** [`secnote] places footnotes at the end of each section. footnotes used in multiple sections will be duplicated for each
** [`endnote] places all footnotes at the end of the rendered document.
* string [`groff:dev] names an output device (such as [`dvi] or[`pdf]). if this mode is present, [`groff] will be automatically invoked
* string [`groff:title-page] takes an identifier that names a section. this section will be treated as the title page for the document.

### directives
* [`%[*pragma] title-page [$id]] sets the title page to section [$id]. this causes it to be specially formatted, with a large, centered title and subtitle.

### quirks
if the [`toc] extension is active but [`%[*toc]] directive is provided, the table of contents will be given its own section at the start of the document (after the title page, if any).

## further directions

### additional backends
it is eventually intended to support to following backends, if reasonably practicable.
* [*html]: emit HTML and CSS code to typeset the document. [!in progress]
* [*svg]: emit SVG, taking advantage of its precise layout features to produce a nicely formatted and paginated document. pagination can be accomplished through emitting multiple files or by assigning one layer to each page. [!long term]
* [*groff]: the most important output backend, rivalling [*html]. will allow the document to be typeset in a wide variety of formats, including PDF and manpage. [!short term]
* [*gemtext]: essentially a downrezzing of cortav to make it readable to Gemini clients
* [*ast]: produces a human- and/or machine-readable dump of the document's syntax tree, to aid in debugging or for interoperation with systems that do not support `cortav` direcly. mode [`ast:repr] wil allow selecting formats for the dump. [`ast:rel] can be [`tree] (the default) to emit a hierarchical representation, or [`flat] to emit an array of nodes that convey hierarchy [^flatdoc by naming one another], rather than being placed inside one another. [`tree] is easier for humans to parse; [`flat] is easier for computers. origin information can be included for each node with the flag [`ast:debug-syms], but be aware this will greatly increase file size.
** [`tabtree] [!(default)]: a hierarchical tree view, with the number of tabs preceding an item showing its depth in the tree
** [`sexp]
** [`binary]: emit a raw binary format that is easier for programs to read. maybe an lmdb or cdb file?
** [`json]

	flatdoc: ~~~flat sexp example output [scheme]~~~
		(nodes
			(section (id . "section1")
				(anchor "introduction")
				(kind . "ordinary")
				(label . "section1-heading")
				(nodes
					"section1-heading"
					"para1"
					"para2"
					"hzrule"
					"para3"))
			(section (id . "section2")
				(kind . "ordinary")
				(label . "section2-heading")
				(nodes
					"para4"
					"hzrule"
					"para5"
					"list1"))
			(block list (id . "list1")
				(kind . "ordered")
				(nodes
					"para6"
					"list2"
					"para7"))
			(block list (id . "list2")
				(kind . "unordered")
				(nodes
					"para8"
					"para9"
					"para10"))
			(block para (id . "para1")
				(nodes "text1" "format1" "text3" "foonote1" "text4"))
			(block label (id . "section1-heading") (nodes "section1-heading-text"))
			(text (id . "section1-heading-text") "Contemplating the Anathema")
			(text (id . "text1")
				"Disquieting information has recently been disclosed to virtual journalists of the Giedi Prime infomatrix by sources close to the Hyperion Entity regarding the catatrophic Year of Schisms and the unidentified agents believed to be responsible for memetically engineering the near-collapse of the Church Galactic.")
			(span format (id . "format1")
				(style . "emph")
				(nodes . "text2"))
			(text (id . "text2") "Curiously,")
			(text (id . "text3") "his Cyber-Holiness")
			(text (id . "footnote1-caption-text") "Pope Chewbacca III")
			(span footnote (id . "footnote1")
				(note . "footnote1-text")
				(ref . "papal-disclaimer")
				(nodes
					"footnode1-caption-text"))
			(text (id . "text4") "has thus far had little to say on the matter, provoking rampant speculation among the faithful.")
			(footnote-def (id . "footnote1-def")
				(nodes "footnote1-text")
			(text (id . "footnote1-text") "Currently recognized as legitimate successor to Peter of Terra by 2,756 sects, rejected by 678 of mostly Neo-Lutheran origin, and decried as an antipope by 73, most notably Pope Peter II of Centaurus Secundus, leader of the ongoing relativistic crusade against star systems owned by Microsoft.")
			;;; snip ;;;
			(document
				(nodes
					"section1" "section2")))
		~~~

some formats may eventually warrant their own renderer, but are not a priority:
* [*text]: cortav source files are already plain text, but a certain amount of layout could be done using ascii art.
* [*ansi]: emit sequences of ANSI escape codes to lay out a document in a terminal-friendly way
* [*tex]: TeX is an unholy abomination and i neither like nor use it, but lots of people do and if cortav ever catches on, a TeX backend should probably be written eventually.

PDF is not on either list because it's a nightmarish mess of a format and groff, which is installed on most linux systems already, can easily generate PDFs

### LCH support
right now, the use of color in the HTML renderer is very unsatisfactory. the accent mechanism operates on the basis of the CSS HSL function, which is not perceptually uniform; different hues will present different mixes of brightness and some (yellows?) may be ugly or unreadable.

the ideal solution would be to simply switch to using LCH based colors. unfortunately, only Safari actually supports the LCH color function right now, and it's unlikely (unless Lea Verou and her husband manage to work a miracle) that Colors Level 4 is going to be implemented very widely any time soon.

this leaves us in an awkward position. we can of course do the math ourselves, working in LCH to implement the internal [`@tone] macro, and then "converting" these colors to HSL. unfortunately, you can't actually convert from LCH to HSL; it's like converting from pounds to kilograms. LCH can represent any color the human visual system can perceive; sRGB can't, and CSS HSL is implemented in sRGB. however, we could at least approximate something that would allow for perceptually uniform brightness, which would be an improvement, and this is probably the direction to go in, unless a miracle occurs and [`lch()] or [`color()] pop up in Blink.

it may be possible to do a more reasonable job of handling colors in the postscript and TeX outputs. unsure about SVG but i assume it suffers the same problems HTML/CSS do. does groff even support color??

### intent files
there's currently no standard way to describe the intent and desired formatting of a document besides placing pragmata in the source file itself. this is extremely suboptimal, as when generating collections of documents, it's ideal to be able to keep all formatting information in one place. users should also be able to specify their own styling overrides that describe the way they prefer to read [`cortav] files, especially for uses like gemini or gopher integration.

at some point soon [`cortav] needs to address this by adding intent files that can be activated from outside the source file, such as with a command line flag or a configuration file setting. these will probably consist of lines that are interpreted as pragmata. in addition to the standard intent format however, individual implementations should feel free to provide their own ways to provide intent metadata; e.g. the reference implementation, which has a lua interpreter available, should be able to take a lua script that runs after the parse stage and generates . this will be particularly useful for the end-user who wishes to specify a particular format she likes reading her files in without forcing that format on everyone she sends the compiled document to, as it will be able to interrogate the document and make intelligent decisions about what pragmata to apply.

intent files should also be able to define [>rsrc resources], [>ctxvar context variables], and macros.

Modified cortav.lua from [eb3cc08f95] to [028f351fed].

2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
..
81
82
83
84
85
86
87



88
89
90
91
92
93
94
...
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
...
217
218
219
220
221
222
223

224
225
226
227
228
229
230
...
397
398
399
400
401
402
403
404

405
406
407

408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427



428
429
430
431
432
433
434
435
436
437
438
439
440
441









































































442

443
444
445
446
447
448
449
...
490
491
492
493
494
495
496

497
498
499
500
501
502
503

504
505
506
507
508
509
510
511
512
513
514
515
516
517











518


















519
520

521
522
523
524
525
526



















527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544


545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
...
604
605
606
607
608
609
610

611
612
613
614
615








































616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635




636
637
638
639
640
641


642













643
644
645
646
647
648
649


650
651
652
653
654
655
656
657
658
659
660
661
662
663






664
665
666
667
668




669
670
671
672










673
674
675
676
677
678
679
...
686
687
688
689
690
691
692




693
694
695
696
697
698
699
...
701
702
703
704
705
706
707


708
709
710
711

712






713

































714


715
716
717
718
719
720
721
...
766
767
768
769
770
771
772
773
774
775
776
777
778
779




780
781

782
783
784
785



786
787
788
789
790
791

792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
...
853
854
855
856
857
858
859


































860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
















907
908
909
910
911
912
913
....
1033
1034
1035
1036
1037
1038
1039


1040
1041
1042
1043
1044
1045
1046
....
1124
1125
1126
1127
1128
1129
1130
1131










































1132
1133
1134
1135
1136
1137
1138




1139
1140











1141
1142
1143
1144


1145


1146
1147
1148

































1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
....
1165
1166
1167
1168
1169
1170
1171




1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184



1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195





1196
1197
1198
1199
1200
1201
1202
....
1214
1215
1216
1217
1218
1219
1220

1221
1222
1223
1224
1225
1226
1227
....
1236
1237
1238
1239
1240
1241
1242



1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257



1258
1259
1260
1261
1262
1263
1264
....
1282
1283
1284
1285
1286
1287
1288

1289
1290
1291
1292
1293
1294
1295
....
1299
1300
1301
1302
1303
1304
1305

1306

1307

1308
1309
1310
1311
1312
1313
1314
....
1315
1316
1317
1318
1319
1320
1321


















1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
....
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
....
1371
1372
1373
1374
1375
1376
1377

1378
1379
1380
1381
1382
1383
1384
....
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411

1412

1413
1414
1415
1416
1417
1418

1419
1420
1421
1422
1423
1424
1425
....
1430
1431
1432
1433
1434
1435
1436
1437









1438









1439

1440






1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
....
1459
1460
1461
1462
1463
1464
1465

1466
1467
1468
1469
1470
1471
1472
1473
1474
....
1505
1506
1507
1508
1509
1510
1511




1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534














1535
1536
1537
1538




























1539
1540
1541





1542



























1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553








1554
1555
1556















1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
--  ~ lexi hale <lexi@hale.su>
--  © AGPLv3
--  ? reference implementation of the cortav document language

local ss = require 'sirsem'
-- aliases for commonly used sirsem funcs
local startswith = ss.str.begins
local eachcode = ss.str.enc.utf8.each
local dump = ss.dump
local declare = ss.declare

-- make this module available to require() when linked into a lua bytecode program with luac
local ct = ss.namespace 'cortav'
ct.info = {
	version = ss.version {0,1; 'devel'};
................................................................................
	end);
	cli = ss.exnkind 'command line parse error';
	mode = ss.exnkind('bad mode', function(msg, ...)
		return string.format("mode “%s” "..msg, ...)
	end);
	unimpl = ss.exnkind 'feature not implemented';
	ext = ss.exnkind 'extension error';



}

ct.ctx = declare {
	mk = function(src) return {src = src} end;
	ident = 'context';
	cast = {
		string = function(me)
................................................................................
			table.insert(self.sec.blocks,block)
			return block
		end;
		ref = function(self,id)
			if not id:find'%.' then
				local rid = self.sec.refs[id]
				if self.sec.refs[id] then
					return self.sec.refs[id]
				else self:fail("no such ref %s in current section", id or '') end
			else
				local sec, ref = string.match(id, "(.-)%.(.+)")
				local s = self.doc.sections[sec]
				if s then
					if s.refs[ref] then
						return s.refs[ref]
					else self:fail("no such ref %s in section %s", ref, sec) end
				else self:fail("no such section %s", sec) end
			end
		end
	};
}

................................................................................
		meta = {};
		vars = {};
		ext = {
			inhibit = {};
			need = {};
			use = {};
		};

	} end;
	construct = function(me)
		me.docjob = ct.ext.job('doc', me, nil)
	end;
}

-- FP helper functions
................................................................................

-- renderer engines
function ct.render.html(doc, opts)
	local doctitle = opts['title']
	local f = string.format
	local ids = {}
	local canonicalID = {}
	local function getSafeID(obj)

		if canonicalID[obj] then
			return canonicalID[obj]
		elseif obj.id and ids[obj.id] then

			local newid
			local i = 1
			repeat newid = obj.id .. string.format('-%x', i)
				i = i + 1 until not ids[newid]
			ids[newid] = obj
			canonicalID[obj] = newid
			return newid
		else
			local cid = obj.id
			if not cid then
				local i = 1
				repeat cid = string.format('x-%x', i)
					i = i + 1 until not ids[cid]
			end
			ids[cid] = obj
			canonicalID[obj] = cid
			return cid
		end
	end




	local langsused = {}
	local langpairs = {
		lua = { color = 0x9377ff };
		terra = { color = 0xff77c8 };
		c = { name = 'C', color = 0x77ffe8 };
		html = { color = 0xfff877 };
		scheme = { color = 0x77ff88 };
		lisp = { color = 0x77ff88 };
		fortran = { color = 0xff779a };
		python = { color = 0xffd277 };
		python = { color = 0xcdd6ff };
	}

	local stylesets = {









































































		header = [[

			h1,h2,h3,h4,h5,h6 { border-bottom: 1px solid @tone(0.7); }
			h1 { font-size: 200%; border-bottom-style: double !important; border-bottom-width: 3px !important; margin: 0em -1em; }
			h2 { font-size: 130%; margin: 0em -0.7em; }
			h3 { font-size: 110%; margin: 0em -0.5em; }
			h4 { font-size: 100%; font-weight: normal; margin: 0em -0.2em; }
			h5 { font-size: 90%; font-weight: normal; }
			h6 { font-size: 80%; font-weight: normal; }
................................................................................
			section:target > :is(h1,h2,h3,h4,h5,h6) {

			}
		]];
		paragraph = [[
			p {
				margin: 0.7em 0;

			}
			section {
				margin: 1.2em 0;
			}
			section:first-child { margin-top: 0; }
		]];
		accent = [[

			body { background: @bg; color: @fg }
			a[href] {
				color: @tone(0.7 30);
				text-decoration-color: @tone/0.4(0.7 30);
			}
			a[href]:hover {
				color: @tone(0.9 30);
				text-decoration-color: @tone/0.7(0.7 30);
			}
			h1 { color: @tone(2); }
			h2 { color: @tone(1.5); }
			h3 { color: @tone(1.2); }
			h4 { color: @tone(1); }
			h5,h6 { color: @tone(0.8); }











		]];


















		code = [[
			code {

				background: @fg;
				color: @bg;
				font-family: monospace;
				font-size: 90%;
				padding: 3px 5px;
			}



















		]];
		abbr = [[
			abbr[title] { cursor: help; }
		]];
		editors_markup = [[]];
		block_code_listing = [[
			section > figure.listing {
				font-family: monospace;
				background: @tone(0.05);
				color: @fg;
				padding: 0;
				margin: 0.3em 0;
				counter-reset: line-number;
				position: relative;
				border: 1px solid @fg;
			}
			section > figure.listing>div {
				white-space: pre-wrap;


				counter-increment: line-number;
				text-indent: -2.3em;
				margin-left: 2.3em;
			}
			section > figure.listing>:is(div,hr)::before {
				width: 1.0em;
				padding: 0.2em 0.4em;
				text-align: right;
				display: inline-block;
				background-color: @tone(0.2);
				border-right: 1px solid @fg;
				content: counter(line-number);
				margin-right: 0.3em;
			}
			section > figure.listing>hr::before {
				color: transparent;
				padding-top: 0;
				padding-bottom: 0;
			}
			section > figure.listing>div::before {
				color: @fg;
			}
			section > figure.listing>div:last-child::before {
				padding-bottom: 0.5em;
			}
			section > figure.listing>figcaption:first-child {
				border: none;
				border-bottom: 1px solid @fg;
			}
			section > figure.listing>figcaption::after {
				display: block;
				float: right;
				font-weight: normal;
				font-style: italic;
				font-size: 70%;
				padding-top: 0.3em;
			}
			section > figure.listing>figcaption {
				font-family: sans-serif;
				font-size: 120%;
				padding: 0.2em 0.4em;
				border: none;
				color: @tone(2);
			}
			section > figure.listing > hr {
				border: none;
				margin: 0;
				height: 0.7em;
				counter-increment: line-number;
			}
		]];
	}
................................................................................
		stylesets = stylesets;
		stylesets_active = stylesNeeded;
		obj_htmlid = getSafeID;
		-- remaining fields added later
	}

	local renderJob = doc:job('render_html', nil, render_state_handle)


	local runhook = function(h, ...)
		return renderJob:hook(h, render_state_handle, ...)
	end









































	local function getSpanRenderers(procs)
		local tag, elt, catenate = procs.tag, procs.elt, procs.catenate
		local htmlDoc = function(title, head, body)
			return [[<!doctype html>]] .. tag('html',nil,
				tag('head', nil,
					elt('meta',{charset = 'utf-8'}) ..
					(title and tag('title', nil, title) or '') ..
					(head or '')) ..
				tag('body', nil, body or ''))
		end

		local span_renderers = {}
		local function htmlSpan(spans, block, sec)
			local text = {}
			for k,v in pairs(spans) do
				if type(v) == 'string' then
					table.insert(text,(v:gsub('[<>&"]',
						function(x)
							return string.format('&#%02u;', string.byte(x))
						end)))




				else
					table.insert(text, span_renderers[v.kind](v, block, sec))
				end
			end
			return table.concat(text)
		end
















		function span_renderers.format(sp,...)
			local tags = { strong = 'strong', emph = 'em', strike = 'del', insert = 'ins', literal = 'code' }
			if sp.style == 'literal' and not opts['fossil-uv'] then
				stylesNeeded.code = true
			end
			if sp.style == 'del' or sp.style == 'ins' then
				stylesNeeded.editors_markup = true


			end
			return tag(tags[sp.style],nil,htmlSpan(sp.spans,...))
		end

		function span_renderers.term(t,b,s)
			local r = b.origin:ref(t.ref)
			local name = t.ref
			if name:find'%.' then name = name:match '^[^.]*%.(.+)$' end
			if type(r) ~= 'string' then
				b.origin:fail('%s is an object, not a reference', t.ref)
			end
			stylesNeeded.abbr = true
			return tag('abbr',{title=r},next(t.spans) and htmlSpan(t.spans,b,s) or name)
		end







		function span_renderers.macro(m,b,s)
			local r = b.origin:ref(m.macro)
			if type(r) ~= 'string' then
				b.origin:fail('%s is an object, not a reference', t.ref)




			end
			local mctx = b.origin:clone()
			mctx.invocation = m
			return htmlSpan(ct.parse_span(r, mctx),b,s)










		end

		function span_renderers.var(v,b,s)
			local val
			if v.pos then
				if not v.origin.invocation then
					v.origin:fail 'positional arguments can only be used in a macro invocation'
................................................................................
			end
			if v.raw then
				return val
			else
				return htmlSpan(ct.parse_span(val, v.origin), b, s)
			end
		end





		function span_renderers.link(sp,b,s)
			local href
			if b.origin.doc.sections[sp.ref] then
				href = '#' .. sp.ref
			else
				if sp.addr then href = sp.addr else
................................................................................
					if type(r) == 'table' then
						href = '#' .. getSafeID(r)
					else href = r end
				end
			end
			return tag('a',{href=href},next(sp.spans) and htmlSpan(sp.spans,b,s) or href)
		end


		return {
			span_renderers = span_renderers;
			htmlSpan = htmlSpan;
			htmlDoc = htmlDoc;

		}






	end





































	local function getBlockRenderers(procs, sr)
		local tag, elt, catenate = procs.tag, procs.elt, procs.catenate
		local null = function() return catenate{} end

		local block_renderers = {
			anchor = function(b,s)
................................................................................
					if #l > 0 then
						return tag('div',nil,sr.htmlSpan(l, b, s))
					else
						return elt('hr')
					end
				end, b.lines)
				if b.title then
					table.insert(nodes,1,tag('figcaption',nil,sr.htmlSpan(b.title)))
				end
				if b.lang then langsused[b.lang] = true end
				return tag('figure', {class='listing', lang=b.lang, id=b.id and getSafeID(b)}, catenate(nodes))
			end;
			aside = function(b,s)
				local bn = {}




				for _,v in pairs(b.lines) do
					table.insert(bn, tag('p', {}, sr.htmlSpan(v, b, s)))

				end
				return tag('aside', {}, bn)
			end;
			['break'] = function() --[[nop]] end;



		}
		return block_renderers;
	end

	local function getRenderers(procs)
		local r = getSpanRenderers(procs)

		r.block_renderers = getBlockRenderers(procs, r)
		return r
	end

	local tagproc do
		local elt = function(t,attrs)
			return f('<%s%s>', t,
				attrs and ss.reduce(function(a,b) return a..b end, '', 
					ss.map(function(v,k)
						if v == true
							then          return ' '..k
							elseif v then return f(' %s="%s"', k, v)
						end
					end, attrs)) or '')
		end

		tagproc = {
			toTXT = {
				tag = function(t,a,v) return v  end;
				elt = function(t,a)   return '' end;
				catenate = table.concat;
			};
			toIR = {
				tag = function(t,a,v,o) return {
					tag = t, attrs = a;
					nodes = type(v) == 'string' and {v} or v, src = o
				} end;
				
				elt = function(t,a,o) return {
					tag = t, attrs = a, src = o
				} end;

				catenate = function(...) return ... end;
			};
			toHTML = {
				elt = elt;
				tag = function(t,attrs,body)
					return f('%s%s</%s>', elt(t,attrs), body, t)
				end;
				catenate = table.concat;
			};
		}
	end

	local astproc = {
		toHTML = getRenderers(tagproc.toHTML);
		toTXT  = getRenderers(tagproc.toTXT);
		toIR   = { };
	}
................................................................................
	local ir = {}
	local dr = astproc.toHTML -- default renderers
	local plainr = astproc.toTXT
	local irBlockRdrs = astproc.toIR.block_renderers;

	render_state_handle.ir = ir;



































	runhook('ir_assemble', ir)
	for i, sec in ipairs(doc.secorder) do
		if doctitle == nil and sec.depth == 1 and sec.heading_node then
			doctitle = astproc.toTXT.htmlSpan(sec.heading_node.spans, sec.heading_node, sec)
		end
		local irs
		if sec.kind == 'ordinary' then
			if #(sec.blocks) > 0 then
				irs = {tag='section',attrs={id = getSafeID(sec)},nodes={}}

				runhook('ir_section_build', irs, sec)
				
				for i, block in ipairs(sec.blocks) do
					local rd
					if irBlockRdrs[block.kind] then
						rd = irBlockRdrs[block.kind](block,sec)
					else
						local rdr = renderJob:proc('render',block.kind,'html')
						if rdr then
							rd = rdr({
								state = render_state_handle;
								tagproc = tagproc.toIR;
								astproc = astproc.toIR;
							}, block, sec)
						end
					end
					if rd then
						if opts['heading-anchors'] and block == sec.heading_node then
							stylesNeeded.headingAnchors = true
							table.insert(rd.nodes, ' ')
							table.insert(rd.nodes, {
								tag = 'a';
								attrs = {href = '#' .. irs.attrs.id, class='anchor'};
								nodes = {type(opts['heading-anchors'])=='string' and opts['heading-anchors'] or '&sect;'};
							})
						end
						table.insert(irs.nodes, rd)
						runhook('ir_section_node_insert', rd, irs, sec)
					end
				end
			end
		elseif sec.kind == 'blockquote' then
		elseif sec.kind == 'listing' then
		elseif sec.kind == 'embed' then
		end
		if irs then table.insert(ir, irs) end
	end

















	-- restructure passes
	runhook('ir_restructure_pre', ir)
	
	---- list insertion pass
	local lists = {}
	for _, sec in pairs(ir) do
................................................................................
			local tonespan = opts.accent and .1 or 0
			local tbg = opts['dark-on-light'] and 1.0 - tonespan or tonespan
			local tfg = opts['dark-on-light'] and tonespan or 1.0 - tonespan
			if var == 'bg' then
				return tone(tbg,nil,nil,tonumber(alpha))
			elseif var == 'fg' then
				return tone(tfg,nil,nil,tonumber(alpha))


			elseif var == 'tone' then
				local l, sep, sat
				for i=1,3 do -- 🙄
					l,sep,sat = param:match('^%('..string.rep('([^%s]*)%s*',i)..'%)$')
					if l then break end
				end
				l = ss.math.lerp(tonumber(l), tbg, tfg)
................................................................................
				kind = 'var';
				pos = pos;
				raw = raw;
				var = not pos and s or nil;
				origin = c:clone();
			}
		end
	end










































	ct.spanctls = {
		{seq = '!', parse = formatter 'emph'};
		{seq = '*', parse = formatter 'strong'};
		{seq = '~', parse = formatter 'strike'};
		{seq = '+', parse = formatter 'inser'};
		{seq = '\\', parse = function(s, c) -- raw
			return s




		end};
		{seq = '$\\', parse = function(s, c) -- raw











			return {
				kind = 'format';
				style = 'literal';
				spans = {s};


				origin = c:clone();


			}
		end};
		{seq = '$', parse = formatter 'literal'};

































		{seq = '&', parse = function(s, c)
			local r, t = s:match '^([^%s]+)%s*(.-)$'
			return {
				kind = 'term';
				spans = (t and t ~= "") and ct.parse_span(t, c) or {};
				ref = r; 
				origin = c:clone();
			}
		end};
		{seq = '^', parse = function(s, c)
			local fn, t = s:match '^([^%s]+)%s*(.-)$'
................................................................................
			}
		end};
		{seq = '>', parse = insert_link};
		{seq = '→', parse = insert_link};
		{seq = '🔗', parse = insert_link};
		{seq = '##', parse = insert_var_ref(true)};
		{seq = '#', parse = insert_var_ref(false)};




	}
end

function ct.parse_span(str,ctx)
	local function delimited(start, stop, s)
		local r = { pcall(ss.str.delimit, nil, start, stop, s) }
		if r[1] then return table.unpack(r, 2) end
		ctx:fail(tostring(r[2]))
	end
	local buf = ""
	local spans = {}
	local function flush()
		if buf ~= "" then



			table.insert(spans, buf)
			buf = ""
		end
	end
	local skip = false
	for c,p in eachcode(str) do
		if skip == true then
			skip = false
			buf = buf .. c
		elseif c == '\\' then
			skip = true





		elseif c == '{' then
			flush()
			local substr, following = delimited('{','}',str:sub(p.byte))
			local splitstart, splitstop = substr:find'%s+'
			local id, argstr
			if splitstart then
				id, argstr = substr:sub(1,splitstart-1), substr:sub(splitstop+1)
................................................................................
				local i = 1
				while i <= #argstr do
					while i<=#argstr and (argstr:sub(i,i) ~= '|' or argstr:sub(i-1,i) == '\\|') do
						i = i + 1
					end
					local arg = argstr:sub(start, i == #argstr and i or i-1)
					start = i+1

					table.insert(o.args, arg)
					i = i + 1
				end
			end

			p.next.byte = p.next.byte + following - 1
			table.insert(spans,o)
................................................................................
					table.insert(spans, i.parse(substr:sub(1+#i.seq), ctx))
					break
				end
			end
			if not found then
				ctx:fail('no recognized control sequence in [%s]', substr)
			end



		else
			buf = buf .. c
		end
	end
	flush()
	return spans
end

local function
blockwrap(fn)
	return function(l,c,j)
		local block = fn(l,c,j)
		block.origin = c:clone();
		table.insert(c.sec.blocks, block);
		j:hook('block_insert', c, block, l)



	end
end

local insert_paragraph = blockwrap(function(l,c)
	if l:sub(1,1) == '.' then l = l:sub(2) end
	return {
		kind = "paragraph";
................................................................................
	if t and t ~= "" then
		local heading = {
			kind = "label";
			spans = ct.parse_span(t,c);
			origin = s.origin;
			captions = s;
		}

		table.insert(s.blocks, heading)
		s.heading_node = heading
	end
	c.sec = s

	j:hook('section_attach', c, s)
end
................................................................................
	c.doc.meta[key] = val
	j:hook('metadata_set', key, val)
end
local dextctl = function(w,c)
	local mode, exts = w(1)
	for e in exts:gmatch '([^%s]+)' do
		if mode == 'uses' then

		elseif mode == 'needs' then

		elseif mode == 'inhibits' then

		end
	end
end
local dcond = function(w,c)
	local mode, cond, exp = w(2)
	c.hide_next = mode == 'unless'
end;
................................................................................
ct.directives = {
	author = dsetmeta;
	license = dsetmeta;
	keywords = dsetmeta;
	desc = dsetmeta;
	when = dcond;
	unless = dcond;


















	expand = function(w,c)
		local _, m = w(1)
		if m ~= 'off' then
			c.expand_next = 1
		else
			c.expand_next = 0
		end
	end;
}

local function insert_table_row(l,c,j)
	local row = {}
	local buf
................................................................................
	local flush = function()
		if buf then
			buf.str = buf.str:gsub('%s+$','')
			table.insert(row, buf)
		end
		buf = { str = '' }
	end
	for c,p in eachcode(l) do
		if c == '|' or c == '+' and (p.code == 1 or l:sub(p.byte-1,p.byte-1)~='\\') then
			flush()
			buf.header = c == '+'
		elseif c == ':' then
			local lst = l:sub(p.byte-#c,p.byte-#c)
			local nxt = l:sub(p.next.byte,p.next.byte)
			if lst == '|' or lst == '+' and l:sub(p.byte-2,p.byte-2) ~= '\\' then
................................................................................
		else
			buf.str = buf.str .. c
		end
	end
	if buf.str ~= '' then flush() end 
	for _,v in pairs(row) do
		v.spans = ct.parse_span(v.str, c)

	end
	if #c.sec.blocks > 1 and c.sec.blocks[#c.sec.blocks].kind == 'table' then
		local tbl = c.sec.blocks[#c.sec.blocks]
		table.insert(tbl.rows, row)
		j:hook('block_table_attach', c, tbl, row, l)
		j:hook('block_table_row_insert', c, tbl, row, l)
	else
................................................................................
	{seq = '¶', fn = insert_paragraph};
	{seq = '❡', fn = insert_paragraph};
	{seq = '#', fn = insert_section};
	{seq = '§', fn = insert_section};
	{seq = '+', fn = insert_table_row};
	{seq = '|', fn = insert_table_row};
	{seq = '│', fn = insert_table_row};
	{seq = '!', fn = function(l,c,j) 
		local last = c.sec.blocks[#c.sec.blocks]
		local txt = l:match '^%s*!%s*(.-)$'
		if (not last) or last.kind ~= 'aside' then
			local aside = {
				kind = 'aside';
				lines = { ct.parse_span(txt, c) }

			}

			c:insert(aside)
			j:hook('block_aside_insert', c, aside, l)
			j:hook('block_aside_line_insert', c, aside, aside.lines[1], l)
			j:hook('block_insert', c, aside, l)
		else
			local sp = ct.parse_span(txt, c)

			table.insert(last.lines, sp)
			j:hook('block_aside_attach', c, last, sp, l)
			j:hook('block_aside_line_insert', c, last, sp, l)
		end
	end};
	{pred = function(s,c) return s:match'^[*:]' end, fn = blockwrap(function(l,c) -- list
		local stars = l:match '^([*:]+)'
................................................................................
		return {
			kind = 'list-item';
			depth = depth;
			ordered = ordered;
			spans = ct.parse_span(txt, c);
		}
	end)};
	{seq = '\t', fn = function(l,c,j)









		local ref, val = l:match '\t+([^:]+):%s*(.*)$'









		c.sec.refs[ref] = val

		j:hook('section_ref_attach', c, ref, val, l)






	end};
	{seq = '%', fn = function(l,c,j) -- directive
		local crit, cmdline = l:match '^%%([!%%]?)%s*(.*)$'
		local words = function(i)
			local wds = {}
			if i == 0 then return cmdline end
			for w,pos in cmdline:gmatch '([^%s]+)()' do
				table.insert(wds, w)
				i = i - 1
				if i == 0 then
					table.insert(wds,cmdline:sub(pos))
					return table.unpack(wds)
				end
			end
		end

		local cmd, rest = words(1)
		if ct.directives[cmd] then
................................................................................
			ct.directives[cmd](words,c,j)
		elseif cmd == c.doc.stage.mode['render:format'] then
			-- this is a directive for the renderer; insert it into the tree as is
			local dir = {
				kind = 'directive';
				critical = crit == '!';
				words = words;

			}
			c:insert(dir)
			j:hook('block_directive_render', j, c, dir)
		elseif c.doc:allow_ext(cmd) then -- extension directives begin with their id
			local ext = ct.ext.loaded[cmd]
			if ext.directives then
				local _, topcmd = words(2)
				if ext.directives[topcmd] then
					ext.directives[topcmd](j:delegate(ext), c, words)
................................................................................
			kind = 'code';
			listing = {
				kind = 'listing';
				lang = lang, id = id, title = title and ct.parse_span(title,c);
				lines = {};
			}
		}




		j:hook('mode_switch', c, mode)
		c.mode = mode
		if id then
			if c.sec.refs[id] then c:fail('duplicate ID %s', id) end
			c.sec.refs[id] = c.mode.listing
		end
		j:hook('block_insert', c, mode.listing, l)
		return c.mode.listing;
	end)};
	{pred = function(s,c)
		if s:match '^[%-_][*_%-%s]+' then return true end
		if startswith(s, '—') then
			for c, p in eachcode(s) do
				if ({
					['—'] = true, ['-'] = true, [' '] = true;
					['*'] = true, ['_'] = true, ['\t'] = true;
				})[c] ~= true then return false end
			end
			return true
		end
	end; fn = blockwrap(function()
		return { kind = 'horiz-rule' }
	end)};














	{fn = insert_paragraph};
}

function ct.parse(file, src, mode)




























	local function
	is_whitespace(cp)
		return cp == 0x20 or cp == 0xe390





	end




























	local ctx = ct.ctx.mk(src)
	ctx.line = 0
	ctx.doc = ct.doc.mk()
	ctx.doc.src = src
	ctx.doc.stage = {
		kind = 'parse';
		mode = mode;
	}
	ctx.sec = ctx.doc:mksec() -- toplevel section
	ctx.sec.origin = ctx:clone()









	-- create states for extension hooks
	local job = ctx.doc:job('parse',nil,ctx)
















	for full_line in file:lines() do ctx.line = ctx.line + 1
		local l
		for p, c in utf8.codes(full_line) do
			if not is_whitespace(c) then
				l = full_line:sub(p)
				break
			end
		end
		job:hook('line_read',ctx,l)

		if ctx.mode then
			if ctx.mode.kind == 'code' then
				if l and l:match '^~~~%s*$' then
					job:hook('block_listing_end',ctx,ctx.mode.listing)
					job:hook('mode_switch', c, nil)
					ctx.mode = nil
				else
					-- TODO handle formatted code
					local newline = {l}
					table.insert(ctx.mode.listing.lines, newline)
					job:hook('block_listing_newline',ctx,ctx.mode.listing,newline)
				end
			else
				ctx:fail('unimplemented syntax mode %s', ctx.mode.kind)
			end
		else
			if l then
				local function tryseqs(seqs, ...)
					for _, i in pairs(seqs) do
						if  ((not i.seq ) or startswith(l, i.seq)) and
							((not i.pred) or i.pred    (l, ctx  )) then
							i.fn(l, ctx, job, ...)
							return true
						end
					end
					return false
				end

				if not tryseqs(ct.ctlseqs) then
					local found = false
					
					for eb, ext, state in job:each('blocks') do
						if tryseqs(eb, state) then found = true break end
					end

					if not found then
						ctx:fail 'incomprehensible input line'
					end
				end
			else
				if next(ctx.sec.blocks) and ctx.sec.blocks[#ctx.sec.blocks].kind ~= 'break' then
					local brk = {kind='break'}
					job:hook('block_break', ctx, brk, l)
					table.insert(ctx.sec.blocks, brk)
				end
			end
		end
		job:hook('line_end',ctx,l)
	end

	return ctx.doc
end







<







 







>
>
>







 







|






|







 







>







 







|
>


|
>


|








|








>
>
>










|



>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>

>







 







>







>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
>
>
>
>
>
>
>
>
>
>
>

>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>


>
|





>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>






|









|

>
>




|









|




|


|


|



|







|






|







 







>





>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|










<




<
|

|
>
>
>
>

|




>
>

>
>
>
>
>
>
>
>
>
>
>
>
>

|


<
|

>
>




|



|
<
<
|
|
|
>
>
>
>
>
>
|
<
<
<
<
>
>
>
>
|
<
<
<
>
>
>
>
>
>
>
>
>
>







 







>
>
>
>







 







>
>
|
<
<
<
>
|
>
>
>
>
>
>
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>

>
>







 







|






>
>
>
>
|
|
>



|
>
>
>





|
>


<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







 







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>









<

<
|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







 







>
>







 








>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>




|

|
>
>
>
>

|
>
>
>
>
>
>
>
>
>
>
>



|
>
>
|
>
>


|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>



|







 







>
>
>
>













>
>
>





|
|
|
|
|
|
>
>
>
>
>







 







>







 







>
>
>










|
|

|

>
>
>







 







>







 







>

>

>







 







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>



|

|







 







|







 







>







 







|
|




|
>

>
|





>







 







|
>
>
>
>
>
>
>
>
>

>
>
>
>
>
>
>
>
>
|
>

>
>
>
>
>
>
|
|








|







 







>

|







 







>
>
>
>












|










>
>
>
>
>
>
>
>
>
>
>
>
>
>



|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
<
<
>
>
>
>
>
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>





<
<
<
<


>
>
>
>
>
>
>
>



>
>
>
>
>
>
>
>
>
>
>
>
>
>
>









|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
2
3
4
5
6
7
8

9
10
11
12
13
14
15
..
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
...
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
...
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
...
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
...
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
...
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802

803
804
805
806

807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839

840
841
842
843
844
845
846
847
848
849
850
851
852


853
854
855
856
857
858
859
860
861
862




863
864
865
866
867



868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
...
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
...
910
911
912
913
914
915
916
917
918
919



920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
....
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052








































1053
1054
1055
1056
1057
1058
1059
....
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121

1122

1123



























1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
....
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
....
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
....
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
....
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
....
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
....
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
....
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
....
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
....
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
....
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
....
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
....
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
....
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
....
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992


1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030




2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107














--  ~ lexi hale <lexi@hale.su>
--  © AGPLv3
--  ? reference implementation of the cortav document language

local ss = require 'sirsem'
-- aliases for commonly used sirsem funcs
local startswith = ss.str.begins

local dump = ss.dump
local declare = ss.declare

-- make this module available to require() when linked into a lua bytecode program with luac
local ct = ss.namespace 'cortav'
ct.info = {
	version = ss.version {0,1; 'devel'};
................................................................................
	end);
	cli = ss.exnkind 'command line parse error';
	mode = ss.exnkind('bad mode', function(msg, ...)
		return string.format("mode “%s” "..msg, ...)
	end);
	unimpl = ss.exnkind 'feature not implemented';
	ext = ss.exnkind 'extension error';
	enc = ss.exnkind('encoding error', function(msg, ...)
		return string.format('[%s]' .. msg, ...)
	end);
}

ct.ctx = declare {
	mk = function(src) return {src = src} end;
	ident = 'context';
	cast = {
		string = function(me)
................................................................................
			table.insert(self.sec.blocks,block)
			return block
		end;
		ref = function(self,id)
			if not id:find'%.' then
				local rid = self.sec.refs[id]
				if self.sec.refs[id] then
					return self.sec.refs[id], id, self.sec
				else self:fail("no such ref %s in current section", id or '') end
			else
				local sec, ref = string.match(id, "(.-)%.(.+)")
				local s = self.doc.sections[sec]
				if s then
					if s.refs[ref] then
						return s.refs[ref], ref, sec
					else self:fail("no such ref %s in section %s", ref, sec) end
				else self:fail("no such section %s", sec) end
			end
		end
	};
}

................................................................................
		meta = {};
		vars = {};
		ext = {
			inhibit = {};
			need = {};
			use = {};
		};
		enc = ss.str.enc.utf8;
	} end;
	construct = function(me)
		me.docjob = ct.ext.job('doc', me, nil)
	end;
}

-- FP helper functions
................................................................................

-- renderer engines
function ct.render.html(doc, opts)
	local doctitle = opts['title']
	local f = string.format
	local ids = {}
	local canonicalID = {}
	local function getSafeID(obj,pfx)
		pfx = pfx or ''
		if canonicalID[obj] then
			return canonicalID[obj]
		elseif obj.id and ids[pfx .. obj.id] then
			local objid = pfx .. obj.id
			local newid
			local i = 1
			repeat newid = objid .. string.format('-%x', i)
				i = i + 1 until not ids[newid]
			ids[newid] = obj
			canonicalID[obj] = newid
			return newid
		else
			local cid = obj.id
			if not cid then
				local i = 1
				repeat cid = string.format('%sx-%x', pfx, i)
					i = i + 1 until not ids[cid]
			end
			ids[cid] = obj
			canonicalID[obj] = cid
			return cid
		end
	end

	local footnotes = {}
	local footnotecount = 0

	local langsused = {}
	local langpairs = {
		lua = { color = 0x9377ff };
		terra = { color = 0xff77c8 };
		c = { name = 'C', color = 0x77ffe8 };
		html = { color = 0xfff877 };
		scheme = { color = 0x77ff88 };
		lisp = { color = 0x77ff88 };
		fortran = { color = 0xff779a };
		python = { color = 0xffd277 };
		ruby = { color = 0xcdd6ff };
	}

	local stylesets = {
		footnote = [[
			div.footnote {
			font-family: 90%;
				display: none;
				grid-template-columns: 1em 1fr min-content;
				grid-template-rows: 1fr min-content;
				position: fixed;
				padding: 1em;
				background: @tone(0.05);
				border: black;
				margin:auto;
			}
			div.footnote:target { display:grid; }
			@media screen {
				div.footnote {
					left: 10em;
					right: 10em;
					max-width: calc(@width + 2em);
					max-height: 30vw;
					bottom: 1em;
				}
			}
			@media print {
				div.footnote {
					position: relative;
				}
				div.footnote:first-of-type {
					border-top: 1px solid black;
				}
			}

			div.footnote > a[href="#0"]{
				grid-row: 2/3;
				grid-column: 3/4;
				display: block;
				padding: 0.2em 0.7em;
				text-align: center;
				text-decoration: none;
				background: @tone(0.2);
				color: @tone(1);
				border: 1px solid black;
				margin-top: 0.6em;
				-webkit-user-select: none;
				-ms-user-select: none;
				user-select: none;
				-webkit-user-drag: none;
				user-drag: none;
			}
			div.footnote > a[href="#0"]:hover {
				background: @tone(0.3);
				color: @tone(2);
			}
			div.footnote > a[href="#0"]:active {
				background: @tone(0.05);
				color: @tone(0.4);
			}
			@media print {
				div.footnote > a[href="#0"]{
					display:none;
				}
			}
			div.footnote > div.number {
				text-align:right;
				grid-row: 1/2;
				grid-column: 1/2;
			}
			div.footnote > div.text {
				grid-row: 1/2;
				grid-column: 2/4;
				padding-left: 1em;
				overflow-y: scroll;
			}
		]];
		header = [[
			body { padding: 0 2.5em !important }
			h1,h2,h3,h4,h5,h6 { border-bottom: 1px solid @tone(0.7); }
			h1 { font-size: 200%; border-bottom-style: double !important; border-bottom-width: 3px !important; margin: 0em -1em; }
			h2 { font-size: 130%; margin: 0em -0.7em; }
			h3 { font-size: 110%; margin: 0em -0.5em; }
			h4 { font-size: 100%; font-weight: normal; margin: 0em -0.2em; }
			h5 { font-size: 90%; font-weight: normal; }
			h6 { font-size: 80%; font-weight: normal; }
................................................................................
			section:target > :is(h1,h2,h3,h4,h5,h6) {

			}
		]];
		paragraph = [[
			p {
				margin: 0.7em 0;
				text-align: justify;
			}
			section {
				margin: 1.2em 0;
			}
			section:first-child { margin-top: 0; }
		]];
		accent = [[
			@media screen {
				body { background: @bg; color: @fg }
				a[href] {
					color: @tone(0.7 30);
					text-decoration-color: @tone/0.4(0.7 30);
				}
				a[href]:hover {
					color: @tone(0.9 30);
					text-decoration-color: @tone/0.7(0.7 30);
				}
				h1 { color: @tone(2); }
				h2 { color: @tone(1.5); }
				h3 { color: @tone(1.2); }
				h4 { color: @tone(1); }
				h5,h6 { color: @tone(0.8); }
			}
			@media print {
				a[href] {
					text-decoration: none;
					color: black;
					font-weight: bold;
				}
				h1,h2,h3,h4,h5,h6 {
					border-bottom: 1px black;
				}
			}
		]];
		aside = [[
			section > aside {
				text-align: justify;
				margin: 0 1.5em;
				padding: 0.5em 0.8em;
				background: @tone(0.05);
				font-size: 90%;
				border-left: 5px solid @tone(0.2 15);
				border-right: 5px solid @tone(0.2 15);
			}
			section > aside p {
				margin: 0;
				margin-top: 0.6em;
			}
			section > aside p:first-child {
				margin: 0;
			}
      ]];
		code = [[
			code {
				display: inline-block;
				background: @tone(0.9);
				color: @bg;
				font-family: monospace;
				font-size: 90%;
				padding: 3px 5px;
			}
		]];
		var = [[
			var {
				font-style: italic;
				font-family: monospace;
				color: @tone(0.7);
			}
			code var {
				color: @tone(0.25);
			}
		]];
		math = [[
			span.equation {
				display: inline-block;
				background: @tone(0.08);
				color: @tone(2);
				padding: 0.1em 0.3em;
				border: 1px solid @tone(0.5);
			}
		]];
		abbr = [[
			abbr[title] { cursor: help; }
		]];
		editors_markup = [[]];
		block_code_listing = [[
			figure.listing {
				font-family: monospace;
				background: @tone(0.05);
				color: @fg;
				padding: 0;
				margin: 0.3em 0;
				counter-reset: line-number;
				position: relative;
				border: 1px solid @fg;
			}
			figure.listing>div {
				white-space: pre-wrap;
				tab-size: 3;
				-moz-tab-size: 3;
				counter-increment: line-number;
				text-indent: -2.3em;
				margin-left: 2.3em;
			}
			figure.listing>:is(div,hr)::before {
				width: 1.0em;
				padding: 0.2em 0.4em;
				text-align: right;
				display: inline-block;
				background-color: @tone(0.2);
				border-right: 1px solid @fg;
				content: counter(line-number);
				margin-right: 0.3em;
			}
			figure.listing>hr::before {
				color: transparent;
				padding-top: 0;
				padding-bottom: 0;
			}
			figure.listing>div::before {
				color: @fg;
			}
			figure.listing>div:last-child::before {
				padding-bottom: 0.5em;
			}
			figure.listing>figcaption:first-child {
				border: none;
				border-bottom: 1px solid @fg;
			}
			figure.listing>figcaption::after {
				display: block;
				float: right;
				font-weight: normal;
				font-style: italic;
				font-size: 70%;
				padding-top: 0.3em;
			}
			figure.listing>figcaption {
				font-family: sans-serif;
				font-size: 120%;
				padding: 0.2em 0.4em;
				border: none;
				color: @tone(2);
			}
			figure.listing > hr {
				border: none;
				margin: 0;
				height: 0.7em;
				counter-increment: line-number;
			}
		]];
	}
................................................................................
		stylesets = stylesets;
		stylesets_active = stylesNeeded;
		obj_htmlid = getSafeID;
		-- remaining fields added later
	}

	local renderJob = doc:job('render_html', nil, render_state_handle)
	doc.stage.job = renderJob;

	local runhook = function(h, ...)
		return renderJob:hook(h, render_state_handle, ...)
	end

	local tagproc do
		local elt = function(t,attrs)
			return f('<%s%s>', t,
				attrs and ss.reduce(function(a,b) return a..b end, '',
					ss.map(function(v,k)
						if v == true
							then          return ' '..k
							elseif v then return f(' %s="%s"', k, v)
						end
					end, attrs)) or '')
		end

		tagproc = {
			toTXT = {
				tag = function(t,a,v) return v  end;
				elt = function(t,a)   return '' end;
				catenate = table.concat;
			};
			toIR = {
				tag = function(t,a,v,o) return {
					tag = t, attrs = a;
					nodes = type(v) == 'string' and {v} or v, src = o
				} end;

				elt = function(t,a,o) return {
					tag = t, attrs = a, src = o
				} end;

				catenate = function(...) return ... end;
			};
			toHTML = {
				elt = elt;
				tag = function(t,attrs,body)
					return f('%s%s</%s>', elt(t,attrs), body, t)
				end;
				catenate = table.concat;
			};
		}
	end

	local function getBaseRenderers(procs, span_renderers)
		local tag, elt, catenate = procs.tag, procs.elt, procs.catenate
		local htmlDoc = function(title, head, body)
			return [[<!doctype html>]] .. tag('html',nil,
				tag('head', nil,
					elt('meta',{charset = 'utf-8'}) ..
					(title and tag('title', nil, title) or '') ..
					(head or '')) ..
				tag('body', nil, body or ''))
		end


		local function htmlSpan(spans, block, sec)
			local text = {}
			for k,v in pairs(spans) do
				if type(v) == 'string' then

					v=v:gsub('[<>&"]', function(x)
							return string.format('&#%02u;', string.byte(x))
						end)
					for fn, ext in renderJob:each('hook','render_html_sanitize') do
						v = fn(renderJob:delegate(ext), v)
					end
					table.insert(text,v)
				else
					table.insert(text, (span_renderers[v.kind](v, block, sec)))
				end
			end
			return table.concat(text)
		end
		return {htmlDoc=htmlDoc, htmlSpan=htmlSpan}
	end

	local spanparse = function(...)
		local s = ct.parse_span(...)
		doc.docjob:hook('meddle_span', s)
		return s
	end

	local cssRulesFor = {}
	local function getSpanRenderers(procs)
		local tag, elt, catenate = procs.tag, procs.elt, procs.catenate
		local span_renderers = {}
		local plainrdr = getBaseRenderers(tagproc.toTXT, span_renderers)
		local htmlSpan = getBaseRenderers(procs, span_renderers).htmlSpan

		function span_renderers.format(sp,...)
			local tags = { strong = 'strong', emph = 'em', strike = 'del', insert = 'ins', literal = 'code', variable = 'var'}
			if sp.style == 'literal' and not opts['fossil-uv'] then
				stylesNeeded.code = true

			elseif sp.style == 'strike' or sp.style == 'insert' then
				stylesNeeded.editors_markup = true
			elseif sp.style == 'variable' then
				stylesNeeded.var = true
			end
			return tag(tags[sp.style],nil,htmlSpan(sp.spans,...))
		end

		function span_renderers.deref(t,b,s)
			local r = b.origin:ref(t.ref)
			local name = t.ref
			if name:find'%.' then name = name:match '^[^.]*%.(.+)$' end
			if type(r) == 'string' then


				stylesNeeded.abbr = true
				return tag('abbr',{title=r},next(t.spans) and htmlSpan(t.spans,b,s) or name)
			end
			if r.kind == 'resource' then
				local rid = getSafeID(r, 'res-')
				if r.class == 'image' then
					if not cssRulesFor[r] then
						local css = prepcss(string.format([[
							section p > .%s {
							}




						]], rid))
						stylesets[r] = css
						cssRulesFor[r] = css
						stylesNeeded[r] = true
					end



					return tag('div',{class=rid},catenate{'blaah'})
				elseif r.class == 'video' then
					local vid = {}
					return tag('video',nil,vid)
				elseif r.class == 'font' then
					b.origin:fail('fonts cannot be instantiated, use %font directive instead')
				end
			else
				b.origin:fail('%s is not an object that can be embedded', t.ref)
			end
		end

		function span_renderers.var(v,b,s)
			local val
			if v.pos then
				if not v.origin.invocation then
					v.origin:fail 'positional arguments can only be used in a macro invocation'
................................................................................
			end
			if v.raw then
				return val
			else
				return htmlSpan(ct.parse_span(val, v.origin), b, s)
			end
		end

		function span_renderers.raw(v,b,s)
			return htmlSpan(v.spans, b, s)
		end

		function span_renderers.link(sp,b,s)
			local href
			if b.origin.doc.sections[sp.ref] then
				href = '#' .. sp.ref
			else
				if sp.addr then href = sp.addr else
................................................................................
					if type(r) == 'table' then
						href = '#' .. getSafeID(r)
					else href = r end
				end
			end
			return tag('a',{href=href},next(sp.spans) and htmlSpan(sp.spans,b,s) or href)
		end

		span_renderers['line-break'] = function(sp,b,s)
			return elt('br')



		end

		function span_renderers.macro(m,b,s)
			local macroname = plainrdr.htmlSpan(
				ct.parse_span(m.macro, b.origin), b,s)
			local r = b.origin:ref(macroname)
			if type(r) ~= 'string' then
				b.origin:fail('%s is an object, not a reference', t.ref)
			end
			local mctx = b.origin:clone()
			mctx.invocation = m
			return htmlSpan(ct.parse_span(r, mctx),b,s)
		end
		function span_renderers.math(m,b,s)
			stylesNeeded.math = true
			return tag('span',{class='equation'},htmlSpan(m.spans, b, s))
		end;
		function span_renderers.directive(d,b,s)
			if d.ext == 'html' then
			elseif b.origin.doc:allow_ext(d.ext) then
			elseif d.crit then
				b.origin:fail('critical extension %s unavailable', d.ext)
			elseif d.failthru then
				return htmlSpan(d.spans, b, s)
			end
		end
		function span_renderers.footnote(f,b,s)
			stylesNeeded.footnote = true
			local source, sid, ssec = b.origin:ref(f.ref)
			local cnc = getSafeID(ssec) .. ' ' .. sid
			local fn
			if footnotes[cnc] then
				fn = footnotes[cnc]
			else
				footnotecount = footnotecount + 1
				fn = {num = footnotecount, origin = b.origin, fnid=cnc, source = source}
				fn.id = getSafeID(fn)
				footnotes[cnc] = fn
			end
			return tag('a', {href='#'..fn.id}, htmlSpan(f.spans) ..
						tag('sup',nil, fn.num))
		end

		return span_renderers
	end

	local function getBlockRenderers(procs, sr)
		local tag, elt, catenate = procs.tag, procs.elt, procs.catenate
		local null = function() return catenate{} end

		local block_renderers = {
			anchor = function(b,s)
................................................................................
					if #l > 0 then
						return tag('div',nil,sr.htmlSpan(l, b, s))
					else
						return elt('hr')
					end
				end, b.lines)
				if b.title then
					table.insert(nodes,1, tag('figcaption',nil,sr.htmlSpan(b.title)))
				end
				if b.lang then langsused[b.lang] = true end
				return tag('figure', {class='listing', lang=b.lang, id=b.id and getSafeID(b)}, catenate(nodes))
			end;
			aside = function(b,s)
				local bn = {}
				stylesNeeded.aside = true
				if #b.lines == 1 then
					bn[1] = sr.htmlSpan(b.lines[1], b, s)
				else
					for _,v in pairs(b.lines) do
						table.insert(bn, tag('p', {}, sr.htmlSpan(v, b, s)))
					end
				end
				return tag('aside', {}, bn)
			end;
			['break'] = function() -- HACK
				-- lists need to be rewritten to work like asides
				return '';
			end;
		}
		return block_renderers;
	end

	local function getRenderers(procs)
		local span_renderers = getSpanRenderers(procs)
		local r = getBaseRenderers(procs,span_renderers)
		r.block_renderers = getBlockRenderers(procs, r)
		return r








































	end

	local astproc = {
		toHTML = getRenderers(tagproc.toHTML);
		toTXT  = getRenderers(tagproc.toTXT);
		toIR   = { };
	}
................................................................................
	local ir = {}
	local dr = astproc.toHTML -- default renderers
	local plainr = astproc.toTXT
	local irBlockRdrs = astproc.toIR.block_renderers;

	render_state_handle.ir = ir;

	local function renderBlocks(blocks, irs)
		for i, block in ipairs(blocks) do
			local rd
			if irBlockRdrs[block.kind] then
				rd = irBlockRdrs[block.kind](block,sec)
			else
				local rdr = renderJob:proc('render',block.kind,'html')
				if rdr then
					rd = rdr({
						state = render_state_handle;
						tagproc = tagproc.toIR;
						astproc = astproc.toIR;
					}, block, sec)
				end
			end
			if rd then
				if opts['heading-anchors'] and block == sec.heading_node then
					stylesNeeded.headingAnchors = true
					table.insert(rd.nodes, ' ')
					table.insert(rd.nodes, {
						tag = 'a';
						attrs = {href = '#' .. irs.attrs.id, class='anchor'};
						nodes = {type(opts['heading-anchors'])=='string' and opts['heading-anchors'] or '&sect;'};
					})
				end
				if rd.src and rd.src.origin.lang then
					if not rd.attrs then rd.attrs = {} end
					rd.attrs.lang = rd.src.origin.lang
				end
				table.insert(irs.nodes, rd)
				runhook('ir_section_node_insert', rd, irs, sec)
			end
		end
	end
	runhook('ir_assemble', ir)
	for i, sec in ipairs(doc.secorder) do
		if doctitle == nil and sec.depth == 1 and sec.heading_node then
			doctitle = astproc.toTXT.htmlSpan(sec.heading_node.spans, sec.heading_node, sec)
		end
		local irs
		if sec.kind == 'ordinary' then
			if #(sec.blocks) > 0 then
				irs = {tag='section',attrs={id = getSafeID(sec)},nodes={}}

				runhook('ir_section_build', irs, sec)

				renderBlocks(sec.blocks, irs)



























			end
		elseif sec.kind == 'blockquote' then
		elseif sec.kind == 'listing' then
		elseif sec.kind == 'embed' then
		end
		if irs then table.insert(ir, irs) end
	end

	for _, fn in pairs(footnotes) do
		local tag = tagproc.toIR.tag
		local body = {nodes={}}
		local ftir = {}
		for l in fn.source:gmatch('([^\n]*)') do
			ct.parse_line(l, fn.origin, ftir)
		end
		renderBlocks(ftir,body)
		local note = tag('div',{class='footnote',id=fn.id}, {
			tag('div',{class='number'}, tostring(fn.num)),
			tag('div',{class='text'}, body.nodes),
			tag('a',{href='#0'},'close')
		})
		table.insert(ir, note)
	end

	-- restructure passes
	runhook('ir_restructure_pre', ir)
	
	---- list insertion pass
	local lists = {}
	for _, sec in pairs(ir) do
................................................................................
			local tonespan = opts.accent and .1 or 0
			local tbg = opts['dark-on-light'] and 1.0 - tonespan or tonespan
			local tfg = opts['dark-on-light'] and tonespan or 1.0 - tonespan
			if var == 'bg' then
				return tone(tbg,nil,nil,tonumber(alpha))
			elseif var == 'fg' then
				return tone(tfg,nil,nil,tonumber(alpha))
			elseif var == 'width' then
				return opts['width'] or '100vw'
			elseif var == 'tone' then
				local l, sep, sat
				for i=1,3 do -- 🙄
					l,sep,sat = param:match('^%('..string.rep('([^%s]*)%s*',i)..'%)$')
					if l then break end
				end
				l = ss.math.lerp(tonumber(l), tbg, tfg)
................................................................................
				kind = 'var';
				pos = pos;
				raw = raw;
				var = not pos and s or nil;
				origin = c:clone();
			}
		end
	end
	local function insert_span_directive(crit, failthru)
		return function(s,c)
			local args = ss.str.breakwords(d.doc.enc, s, 1)
			local brksyms = map(enc.encodeUCS, {
				'.', ',', ':', ';', '!', '$', '&', '^',
				'/', '?', '@', '='
			})
			local brkhash = {} for _,s in pairs(brksyms) do
				brkhash[s] = true
			end

			local extname = ''
			local sym
			local cmd = ''
			for ch,p in ss.str.each(c.doc.enc, args[1]) do
				if sym == nil then
					if brkhash[ch] then
						sym = ch
					else
						extname = extname .. ch
					end
				elseif brkhash[ch] then
					sym = sym + ch
				else
					cmd = cmd + ch
				end
			end
			if cmd == '' then cmd = nil end
			local spans if failthru then
				spans = ct.parse_span(args[2], c)
			end
			return {
				kind = 'directive';
				ext = extname;
				cmd = cmd;
				args = args;
				crit = crit;
				failthru = failthru;
				spans = spans;
			}
		end
	end
	ct.spanctls = {
		{seq = '!', parse = formatter 'emph'};
		{seq = '*', parse = formatter 'strong'};
		{seq = '~', parse = formatter 'strike'};
		{seq = '+', parse = formatter 'insert'};
		{seq = '\\', parse = function(s, c) -- raw
			return {
				kind = 'raw';
				spans = {s};
				origin = c:clone();
			}
		end};
		{seq = '`\\', parse = function(s, c) -- raw
			local o = c:clone();
			local str = ''
			for c, p in ss.str.each(c.doc.enc, s) do
				local q = p:esc()
				if q then
					str = str ..  q
					p.next.byte = p.next.byte + #q
				else
					str = str .. c
				end
			end
			return {
				kind = 'format';
				style = 'literal';
				spans = {{
					kind = 'raw';
					spans = {str};
					origin = o;
				}};
				origin = o;
			}
		end};
		{seq = '`', parse = formatter 'literal'};
		{seq = '$', parse = formatter 'variable'};
		{seq = '^', parse = function(s,c) --footnotes
			local r, t = s:match '^([^%s]+)%s*(.-)$'
			return {
				kind = 'footnote';
				ref = r;
				spans = ct.parse_span(t, c);
				origin = c:clone();
			}
		-- TODO support for footnote sections
		end};
		{seq = '=', parse = function(s,c) --math mode
			local tx = {
				['%*'] = '×';
				['/'] = '÷';
			}
			for k,v in pairs(tx) do s = s:gsub(k,v) end
			s=s:gsub('%^([0-9]+)', function(num)
				local sup = {'⁰','¹','²','³','⁴','⁵','⁶','⁷','⁸','⁹'};
				local r = ''
				for i=1,#num do
					r = r .. sup[1 + (num:byte(i) - 0x30)]
				end
				return r
			end)
			local m = {s} --TODO
			return {
				kind = 'math';
				original = s;
				spans = m;
				origin = c:clone();
			};
		end};
		{seq = '&', parse = function(s, c)
			local r, t = s:match '^([^%s]+)%s*(.-)$'
			return {
				kind = 'deref';
				spans = (t and t ~= "") and ct.parse_span(t, c) or {};
				ref = r; 
				origin = c:clone();
			}
		end};
		{seq = '^', parse = function(s, c)
			local fn, t = s:match '^([^%s]+)%s*(.-)$'
................................................................................
			}
		end};
		{seq = '>', parse = insert_link};
		{seq = '→', parse = insert_link};
		{seq = '🔗', parse = insert_link};
		{seq = '##', parse = insert_var_ref(true)};
		{seq = '#', parse = insert_var_ref(false)};
		{seq = '%%', parse = function() --[[NOP]] end};
		{seq = '%!', parse = insert_span_directive(true,false)};
		{seq = '%:', parse = insert_span_directive(false,true)};
		{seq = '%', parse = insert_span_directive(false,false)};
	}
end

function ct.parse_span(str,ctx)
	local function delimited(start, stop, s)
		local r = { pcall(ss.str.delimit, nil, start, stop, s) }
		if r[1] then return table.unpack(r, 2) end
		ctx:fail(tostring(r[2]))
	end
	local buf = ""
	local spans = {}
	local function flush()
		if buf ~= "" then
	-- 			for fn, ext in ctx.doc.docjob:each('hook','doc_meddle_string') do
	-- 				buf = fn(ctx.doc.docjob:delegate(ext), ctx, buf)
	-- 			end
			table.insert(spans, buf)
			buf = ""
		end
	end
	local skip = false
	for c,p in ss.str.each(ctx.doc.enc,str) do
		local ba, ca, es = ctx.doc.enc.parse_escape(str:sub(p.byte))
		if es then
			flush()
			table.insert(spans, {
				kind = 'raw';
				spans = {es};
				origin = ctx:clone()
			})
			p.next.byte = p.next.byte + ba;
			p.next.code = p.next.code + ca;
		elseif c == '{' then
			flush()
			local substr, following = delimited('{','}',str:sub(p.byte))
			local splitstart, splitstop = substr:find'%s+'
			local id, argstr
			if splitstart then
				id, argstr = substr:sub(1,splitstart-1), substr:sub(splitstop+1)
................................................................................
				local i = 1
				while i <= #argstr do
					while i<=#argstr and (argstr:sub(i,i) ~= '|' or argstr:sub(i-1,i) == '\\|') do
						i = i + 1
					end
					local arg = argstr:sub(start, i == #argstr and i or i-1)
					start = i+1
					arg=arg:gsub('\\|','|')
					table.insert(o.args, arg)
					i = i + 1
				end
			end

			p.next.byte = p.next.byte + following - 1
			table.insert(spans,o)
................................................................................
					table.insert(spans, i.parse(substr:sub(1+#i.seq), ctx))
					break
				end
			end
			if not found then
				ctx:fail('no recognized control sequence in [%s]', substr)
			end
		elseif c == '\n' then
			flush()
			table.insert(spans,{kind='line-break',origin=ctx:clone()})
		else
			buf = buf .. c
		end
	end
	flush()
	return spans
end

local function
blockwrap(fn)
	return function(l,c,j,d)
		local block = fn(l,c,j,d)
		block.origin = c:clone();
		table.insert(d, block);
		j:hook('block_insert', c, block, l)
		if block.spans then
			c.doc.docjob:hook('meddle_span', block.spans, block)
		end
	end
end

local insert_paragraph = blockwrap(function(l,c)
	if l:sub(1,1) == '.' then l = l:sub(2) end
	return {
		kind = "paragraph";
................................................................................
	if t and t ~= "" then
		local heading = {
			kind = "label";
			spans = ct.parse_span(t,c);
			origin = s.origin;
			captions = s;
		}
		c.doc.docjob:hook('meddle_span', heading.spans, heading)
		table.insert(s.blocks, heading)
		s.heading_node = heading
	end
	c.sec = s

	j:hook('section_attach', c, s)
end
................................................................................
	c.doc.meta[key] = val
	j:hook('metadata_set', key, val)
end
local dextctl = function(w,c)
	local mode, exts = w(1)
	for e in exts:gmatch '([^%s]+)' do
		if mode == 'uses' then
			c.doc.ext.use[e] = true
		elseif mode == 'needs' then
			c.doc.ext.need[e] = true
		elseif mode == 'inhibits' then
			c.doc.ext.inhibit[e] = true
		end
	end
end
local dcond = function(w,c)
	local mode, cond, exp = w(2)
	c.hide_next = mode == 'unless'
end;
................................................................................
ct.directives = {
	author = dsetmeta;
	license = dsetmeta;
	keywords = dsetmeta;
	desc = dsetmeta;
	when = dcond;
	unless = dcond;
	pragma = function(w,c)
	end;
	lang = function(w,c)
		local _, op, l = w(2)
		local langstack = c.doc.stage.langstack
		if op == 'is' then
			langstack[math.max(1, #langstack)] = l
		elseif op == 'push' then
			table.insert(langstack, l)
		elseif op == 'pop' then
			if next(langstack) then
				langstack[#langstack] = nil
			end
		elseif op == 'sec' then
			c.sec.lang = l
		else c:fail('bad language directive “%s”', op) end
		c.lang = langstack[#langstack]
	end;
	expand = function(w,c)
		local _, m = w(1)
		if m ~= 'off' then
			c.doc.stage.expand_next = 1
		else
			c.doc.stage.expand_next = 0
		end
	end;
}

local function insert_table_row(l,c,j)
	local row = {}
	local buf
................................................................................
	local flush = function()
		if buf then
			buf.str = buf.str:gsub('%s+$','')
			table.insert(row, buf)
		end
		buf = { str = '' }
	end
	for c,p in ss.str.each(c.doc.enc,l) do
		if c == '|' or c == '+' and (p.code == 1 or l:sub(p.byte-1,p.byte-1)~='\\') then
			flush()
			buf.header = c == '+'
		elseif c == ':' then
			local lst = l:sub(p.byte-#c,p.byte-#c)
			local nxt = l:sub(p.next.byte,p.next.byte)
			if lst == '|' or lst == '+' and l:sub(p.byte-2,p.byte-2) ~= '\\' then
................................................................................
		else
			buf.str = buf.str .. c
		end
	end
	if buf.str ~= '' then flush() end 
	for _,v in pairs(row) do
		v.spans = ct.parse_span(v.str, c)
		c.doc.docjob:hook('meddle_span', v.spans, v)
	end
	if #c.sec.blocks > 1 and c.sec.blocks[#c.sec.blocks].kind == 'table' then
		local tbl = c.sec.blocks[#c.sec.blocks]
		table.insert(tbl.rows, row)
		j:hook('block_table_attach', c, tbl, row, l)
		j:hook('block_table_row_insert', c, tbl, row, l)
	else
................................................................................
	{seq = '¶', fn = insert_paragraph};
	{seq = '❡', fn = insert_paragraph};
	{seq = '#', fn = insert_section};
	{seq = '§', fn = insert_section};
	{seq = '+', fn = insert_table_row};
	{seq = '|', fn = insert_table_row};
	{seq = '│', fn = insert_table_row};
	{seq = '!', fn = function(l,c,j,d)
		local last = d[#d]
		local txt = l:match '^%s*!%s*(.-)$'
		if (not last) or last.kind ~= 'aside' then
			local aside = {
				kind = 'aside';
				lines = { ct.parse_span(txt, c) };
				origin = c:clone();
			}
			c.doc.docjob:hook('meddle_span', aside.lines[1], aside)
			table.insert(d,aside)
			j:hook('block_aside_insert', c, aside, l)
			j:hook('block_aside_line_insert', c, aside, aside.lines[1], l)
			j:hook('block_insert', c, aside, l)
		else
			local sp = ct.parse_span(txt, c)
			c.doc.docjob:hook('meddle_span', sp, last)
			table.insert(last.lines, sp)
			j:hook('block_aside_attach', c, last, sp, l)
			j:hook('block_aside_line_insert', c, last, sp, l)
		end
	end};
	{pred = function(s,c) return s:match'^[*:]' end, fn = blockwrap(function(l,c) -- list
		local stars = l:match '^([*:]+)'
................................................................................
		return {
			kind = 'list-item';
			depth = depth;
			ordered = ordered;
			spans = ct.parse_span(txt, c);
		}
	end)};
	{seq = '\t\t', fn = function(l,c,j,d)
		local last = d[#d]
		if (not last) or (last.kind ~= 'reference') then
			c:fail('reference continuations must immediately follow a reference')
		end
		local str = l:match '^\t\t(.-)%s*$'
		last.val = last.val .. '\n' .. str
		c.sec.refs[last.key] = last.val
	end};
	{seq = '\t', fn = blockwrap(function(l,c,j,d)
		local ref, val = l:match '\t+([^:]+):%s*(.*)$'
		local last = d[#d]
		local rsrc
		if last and last.kind == 'resource' then
			last.props[ref] = val
			rsrc = last
		elseif last and last.kind == 'reference' and last.rsrc then
			last.rsrc.props[ref] = val
			rsrc = last.rsrc
		else
			c.sec.refs[ref] = val
		end
		j:hook('section_ref_attach', c, ref, val, l)
		return {
			kind = 'reference';
			rsrc = rsrc;
			key = ref;
			val = val;
		}
	end)};
	{seq = '%', fn = function(l,c,j,d) -- directive
		local crit, cmdline = l:match '^%%([!%%]?)%s*(.*)$'
		local words = function(i)
			local wds = {}
			if i == 0 then return cmdline end
			for w,pos in cmdline:gmatch '([^%s]+)()' do
				table.insert(wds, w)
				i = i - 1
				if i == 0 then
					table.insert(wds,(cmdline:sub(pos):match('^%s*(.-)%s*$')))
					return table.unpack(wds)
				end
			end
		end

		local cmd, rest = words(1)
		if ct.directives[cmd] then
................................................................................
			ct.directives[cmd](words,c,j)
		elseif cmd == c.doc.stage.mode['render:format'] then
			-- this is a directive for the renderer; insert it into the tree as is
			local dir = {
				kind = 'directive';
				critical = crit == '!';
				words = words;
				origin = c;
			}
			table.insert(d, dir)
			j:hook('block_directive_render', j, c, dir)
		elseif c.doc:allow_ext(cmd) then -- extension directives begin with their id
			local ext = ct.ext.loaded[cmd]
			if ext.directives then
				local _, topcmd = words(2)
				if ext.directives[topcmd] then
					ext.directives[topcmd](j:delegate(ext), c, words)
................................................................................
			kind = 'code';
			listing = {
				kind = 'listing';
				lang = lang, id = id, title = title and ct.parse_span(title,c);
				lines = {};
			}
		}
		if c.doc.stage.expand_next and c.doc.stage.expand_next > 0 then
			c.doc.stage.expand_next = c.doc.stage.expand_next - 1
			mode.expand = true
		end
		j:hook('mode_switch', c, mode)
		c.mode = mode
		if id then
			if c.sec.refs[id] then c:fail('duplicate ID %s', id) end
			c.sec.refs[id] = c.mode.listing
		end
		j:hook('block_insert', c, mode.listing, l)
		return c.mode.listing;
	end)};
	{pred = function(s,c)
		if s:match '^[%-_][*_%-%s]+' then return true end
		if startswith(s, '—') then
			for c, p in ss.str.each(c.doc.enc,s) do
				if ({
					['—'] = true, ['-'] = true, [' '] = true;
					['*'] = true, ['_'] = true, ['\t'] = true;
				})[c] ~= true then return false end
			end
			return true
		end
	end; fn = blockwrap(function()
		return { kind = 'horiz-rule' }
	end)};
	{seq='@', fn=blockwrap(function(s,c)
		local id = s:match '^@%s*(.-)%s*$'
		local rsrc = {
			kind = 'resource';
			props = {};
			id = id;
		}
		if c.sec.refs[id] then
			c:fail('an object with id “%s” already exists in that section',id)
		else
			c.sec.refs[id] = rsrc
		end
		return rsrc
	end)};
	{fn = insert_paragraph};
}

function ct.parse_line(l, ctx, dest)
	local newspan
	local job = ctx.doc.stage.job
	job:hook('line_read',ctx,l)
	if ctx.mode then
		if ctx.mode.kind == 'code' then
			if l and l:match '^~~~%s*$' then
				job:hook('block_listing_end',ctx,ctx.mode.listing)
				job:hook('mode_switch', c, nil)
				ctx.mode = nil
			else
				-- TODO handle formatted code
				local newline
				if ctx.mode.expand
					then newline = ct.parse_span(l, ctx)
					else newline = {l}
				end
				table.insert(ctx.mode.listing.lines, newline)
				job:hook('block_listing_newline',ctx,ctx.mode.listing,newline)
			end
	  else
			local mf = job:proc('modes', ctx.mode.kind)
			if not mf then
				ctx:fail('unimplemented syntax mode %s', ctx.mode.kind)
			end
			mf(job, ctx, l, dest) --NOTE: you are responsible for triggering the appropriate hooks if you insert anything!
		end
	else
		if l then
			local function tryseqs(seqs, ...)


				for _, i in pairs(seqs) do
					if ((not i.seq ) or startswith(l, i.seq)) and
					   ((not i.pred) or i.pred    (l, ctx  )) then
						i.fn(l, ctx, job, dest, ...)
						return true
					end
				end
				return false
			end

			if not tryseqs(ct.ctlseqs) then
				local found = false

				for eb, ext, state in job:each('blocks') do
					if tryseqs(eb, state) then found = true break end
				end

				if not found then
					ctx:fail 'incomprehensible input line'
				end
			end
		else
			if next(dest) and dest[#dest].kind ~= 'break' then
				local brk = {kind='break', origin = ctx:clone()}
				job:hook('block_break', ctx, brk, l)
				table.insert(dest, brk)
			end
		end
	end
	job:hook('line_end',ctx,l)
end

function ct.parse(file, src, mode, setup)

	local ctx = ct.ctx.mk(src)
	ctx.line = 0
	ctx.doc = ct.doc.mk()
	ctx.doc.src = src




	ctx.sec = ctx.doc:mksec() -- toplevel section
	ctx.sec.origin = ctx:clone()
	ctx.lang = mode['meta:lang']
	if mode['parse:enc'] then
		local e = ss.str.enc[mode['parse:enc']]
		if not e then
			ct.exns.enc('requested encoding not supported',mode['parse:enc']):throw()
		end
		ctx.doc.enc = e
	end

	-- create states for extension hooks
	local job = ctx.doc:job('parse',nil,ctx)
	ctx.doc.stage = {
		kind = 'parse';
		mode = mode;
		job = job;
		langstack = {ctx.lang};
		fontstack = {};
	}

	local function
	is_whitespace(cp)
		return ctx.doc.enc.iswhitespace(cp)
	end

	if setup then setup(ctx) end


	for full_line in file:lines() do ctx.line = ctx.line + 1
		local l
		for p, c in utf8.codes(full_line) do
			if not is_whitespace(c) then
				l = full_line:sub(p)
				break
			end
		end
		ct.parse_line(l, ctx, ctx.sec.blocks)
	end

	for i, sec in ipairs(ctx.doc.secorder) do
		for refid, r in ipairs(sec.refs) do
			if type(r) == 'table' and r.kind == 'resource' and r.props.src then
				local lines = ss.str.breaklines(ctx.doc.enc, r.props.src)
				local srcs = {}
				for i,l in ipairs(lines) do
					local args = ss.str.breakwords(ctx.doc.enc, l, 2, {escape=true})
					if #args < 3 then
						r.origin:fail('invalid syntax for resource %s', t.ref)
					end
					local mimebreak = function(s)
						local wds = ss.str.split(ctx.doc.enc, s, '/', 1, {escape=true})
						return wds
					end
					local mime = mimebreak(args[2]);
					local mimeclasses = {
						['application/svg+xml'] = 'image';
					}
					local class = mimeclasses[mime]
					table.insert(srcs, {
						mode = args[1];
						mime = mime;
						uri = args[3];
						class = class or mime[1];
					})
				end
				 --ideally move this into its own mimetype lib
				local kind = r.props.as or srcs[1].class
				r.class = kind
				r.srcs = srcs
			end
		end
	end
	ctx.doc.stage = nil
	ctx.doc.docjob:hook('meddle_ast')
	return ctx.doc
end














Modified desk/cortav.xml from [8189edad17] to [b82e1b14f3].

8
9
10
11
12
13
14













15
16
17
18
19
20
21
..
24
25
26
27
28
29
30

31
32

33
34









35
36
37
38
39
40
41
..
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100

101
102
103



104
105
106
107
108
109
110










111

112
113
114
115
116
117
118
...
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145


146
147
148
149
150
151
152
153

154
155
156
157
158
159
160
-->
<language name='Cortav' version='1' kateversion='2.4' section='Markup' extensions='*.ct'>
	<highlighting>
		<list name='extension-directives'>
			<item>uses</item>
			<item>needs</item>
			<item>inhibits</item>













		</list>
		<list name='renderer-directives'>
			<item>html</item>
			<item>groff</item>
			<item>ps</item>
			<item>tex</item>
			<item>plaintext</item>
................................................................................
		</list>
		<contexts>
			<context name='init' attribute='Normal Text' lineEndContext='#pop' fallthroughContext='text'>
				<RegExpr String='\\.' attribute='Escaped Char'/>
				<RegExpr attribute='Section Cue' context='sec-ident' String='(#|§)+' firstNonSpace='true' />
				<StringDetect String='~~~' attribute='Literal Block Cue' firstNonSpace='true' context='literal-block-cue'/>
				<RegExpr attribute='List' String='[\*:]+' firstNonSpace='true' context='text' />

				<Detect2Chars char='%' char1='!' attribute='Critical Directive Cue' context='directive'/>
				<DetectChar char='%' attribute='Directive Cue' context='directive'/>

				<DetectChar char='&#9;' attribute='Normal Text' context='refdef-id'/>
			</context>










			<context name='sec-ident' attribute='Identifier' lineEndContext='#pop'>
				<DetectSpaces context='#pop!sec' attribute='Normal Text'/>
			</context>

			<context name='sec' attribute='Header' lineEndContext='#pop'>
				<IncludeRules context='text'/>
................................................................................
				<IncludeRules context='span'/>
			</context>

			<context name='span-del' attribute='Deleted Text' lineEndContext='#pop'>
				<IncludeRules context='span'/>
			</context>

			<context name='span-cue' attribute='Span Cue' lineEndContext='#pop'>
				<StringDetect attribute='Span Cue' String='$\' context='#pop!flat-span' />

				<DetectChar   attribute='Span Cue' char='!' context='#pop!span-emph' />
				<DetectChar   attribute='Span Cue' char='*' context='#pop!span-strong' />
				<DetectChar   attribute='Span Cue' char='~' context='#pop!span-del' />

				<AnyChar      attribute='Span Cue' String='$+🔒' context='#pop!span' />
				<StringDetect attribute='Span Cue' String='→' context='#pop!ref' />
				<StringDetect attribute='Span Cue' String='🔗' context='#pop!ref' />
				<DetectChar   attribute='Span Cue' char='>' context='#pop!ref' />

				<DetectChar   attribute='Span Cue' char='&amp;' context='#pop!ref' />
				<DetectChar   attribute='Span Cue' char='#' context='#pop!var-ref' />
				<DetectChar   attribute='Span Cue' char='\' context='#pop!flat-span' />



			</context>

			<context name='flat-span' attribute='Unstyled Text' lineEndContext='#pop'>
				<Detect2Chars attribute='Escaped Char' context='#stay' char='\' char1=']'/>
				<DetectChar attribute='Span Delimiter' context='#pop' char=']'/>
			</context>











			<context name='ref' attribute='Reference' lineEndContext='#pop'>

				<DetectSpaces context='#pop!span'/>
			</context>

			<context name='var-ref' attribute='Reference' lineEndContext='#pop'>
				<WordDetect String="cortav" attribute='Standard Namespace'/>
				<WordDetect String="env" attribute='Standard Namespace'/>
				<DetectChar attribute='Span Delimiter' context='#pop' char=']'/>
................................................................................
			</context>
		</contexts>
		<itemDatas>
			<itemData name='Normal Text' defStyleNum='dsNormal'/>
			<itemData name='Styled Text' defStyleNum='dsNormal'/>
			<itemData name='Emphatic Text' defStyleNum='dsNormal' italic='true'/>
			<itemData name='Strong Text' defStyleNum='dsNormal' bold='true'/>
			<itemData name='Deleted Text' defStyleNum='dsNormal' strikeout='true'/>
				
			<itemData name='Section Cue' defStyleNum='dsKeyword' bold='true'/>
			<itemData name='Header' defStyleNum='dsControlFlow' underline='true'/>
			<itemData name='Identifier' defStyleNum='dsVariable'/>

			<itemData name='Unstyled Text' defStyleNum='dsVerbatimString'/>
			<itemData name='Escaped Char' defStyleNum='dsSpecialChar'/>
			<itemData name='Reference' defStyleNum='dsControlFlow' underline='true'/>
			<itemData name='Span Cue' defStyleNum='dsKeyword' bold='true'/>


			<itemData name='Span Delimiter' defStyleNum='dsKeyword'/>
			<itemData name='Directive' defStyleNum='dsAttribute' bold='true'/>
			<itemData name='Directive Cue' defStyleNum='dsAttribute'/>
			<itemData name='Critical Directive Cue' defStyleNum='dsImport' bold='true'/>
			<itemData name='Extension Directive' defStyleNum='dsImport' bold='true'/>
			<itemData name='Renderer Directive' defStyleNum='dsExtension' bold='true'/>
			<itemData name='Standard Namespace' defStyleNum='dsBuiltIn' bold='true'/>
			<itemData name='Comment' defStyleNum='dsComment'/>

			<itemData name='Macro' defStyleNum='dsPreprocessor' bold='true'/>
			<itemData name='Macro Delimiter' defStyleNum='dsPreprocessor'/>
			<itemData name='Field Delimiter' defStyleNum='dsPreprocessor' bold='true'/>
			<itemData name='List' defStyleNum='dsOperator'/>

			<itemData name='Literal Block' defStyleNum='dsSpecialString'/>
			<itemData name='Literal Block Cue' defStyleNum='dsPreprocessor' bold='true'/>







>
>
>
>
>
>
>
>
>
>
>
>
>







 







>


>


>
>
>
>
>
>
>
>
>







 







|
|





|



>



>
>
>







>
>
>
>
>
>
>
>
>
>

>







 







|









>
>








>







8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
..
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
...
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
...
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
-->
<language name='Cortav' version='1' kateversion='2.4' section='Markup' extensions='*.ct'>
	<highlighting>
		<list name='extension-directives'>
			<item>uses</item>
			<item>needs</item>
			<item>inhibits</item>
		</list>
		<list name='meta-directives'>
			<item>author</item>
			<item>lang</item>
			<item>pragma</item>
		</list>
		<list name='ctl-directives'>
			<item>when</item>
			<item>unless</item>
			<item>cols</item>
			<item>quote</item>
			<item>include</item>
			<item>embed</item>
		</list>
		<list name='renderer-directives'>
			<item>html</item>
			<item>groff</item>
			<item>ps</item>
			<item>tex</item>
			<item>plaintext</item>
................................................................................
		</list>
		<contexts>
			<context name='init' attribute='Normal Text' lineEndContext='#pop' fallthroughContext='text'>
				<RegExpr String='\\.' attribute='Escaped Char'/>
				<RegExpr attribute='Section Cue' context='sec-ident' String='(#|§)+' firstNonSpace='true' />
				<StringDetect String='~~~' attribute='Literal Block Cue' firstNonSpace='true' context='literal-block-cue'/>
				<RegExpr attribute='List' String='[\*:]+' firstNonSpace='true' context='text' />
				<Detect2Chars char='%' char1='%' attribute='Comment' context='comment'/>
				<Detect2Chars char='%' char1='!' attribute='Critical Directive Cue' context='directive'/>
				<DetectChar char='%' attribute='Directive Cue' context='directive'/>
				<DetectChar char='@' attribute='Resource Cue' context='resource'/>
				<DetectChar char='&#9;' attribute='Normal Text' context='refdef-id'/>
			</context>

			<context name='comment' attribute='Comment' lineEndContext='#pop'>
			</context>
			<context name='error' attribute='Error' lineEndContext='#pop'>
			</context>

			<context name='resource' attribute='Resource Identifier' lineEndContext='#pop'>
				<DetectSpaces context='#pop!error' attribute='Error'/>
			</context>

			<context name='sec-ident' attribute='Identifier' lineEndContext='#pop'>
				<DetectSpaces context='#pop!sec' attribute='Normal Text'/>
			</context>

			<context name='sec' attribute='Header' lineEndContext='#pop'>
				<IncludeRules context='text'/>
................................................................................
				<IncludeRules context='span'/>
			</context>

			<context name='span-del' attribute='Deleted Text' lineEndContext='#pop'>
				<IncludeRules context='span'/>
			</context>

			<context name='span-cue' attribute='Span Cue' lineEndContext='#pop' fallthroughContext="error">
				<StringDetect attribute='Span Cue' String='`\' context='#pop!flat-span' />

				<DetectChar   attribute='Span Cue' char='!' context='#pop!span-emph' />
				<DetectChar   attribute='Span Cue' char='*' context='#pop!span-strong' />
				<DetectChar   attribute='Span Cue' char='~' context='#pop!span-del' />

				<AnyChar      attribute='Span Cue' String='`$+🔒' context='#pop!span' />
				<StringDetect attribute='Span Cue' String='→' context='#pop!ref' />
				<StringDetect attribute='Span Cue' String='🔗' context='#pop!ref' />
				<DetectChar   attribute='Span Cue' char='>' context='#pop!ref' />
				<DetectChar   attribute='Span Cue' char='^' context='#pop!ref' />
				<DetectChar   attribute='Span Cue' char='&amp;' context='#pop!ref' />
				<DetectChar   attribute='Span Cue' char='#' context='#pop!var-ref' />
				<DetectChar   attribute='Span Cue' char='\' context='#pop!flat-span' />
				<Detect2Chars attribute='Comment' char='%' char1='%' context='#pop!inline-comment' />
				<Detect2Chars attribute='Critical Directive Cue' char='%' char1='!' context='#pop!inline-directive' />
				<DetectChar   attribute='Directive Cue' char='%' context='#pop!inline-directive' />
			</context>

			<context name='flat-span' attribute='Unstyled Text' lineEndContext='#pop'>
				<Detect2Chars attribute='Escaped Char' context='#stay' char='\' char1=']'/>
				<DetectChar attribute='Span Delimiter' context='#pop' char=']'/>
			</context>

			<context name='inline-comment' attribute='Comment' lineEndContext='#pop'>
				<IncludeRules context='flat-span'/>
			</context>

			<context name='inline-directive' attribute='Directive' lineEndContext='#pop'>
				<IncludeRules context='flat-span'/>
				<AnyChar String=".:!#$%@~'&quot;" attribute='Directive Cue'/>
				<DetectSpaces context='#pop!span'/>
			</context>

			<context name='ref' attribute='Reference' lineEndContext='#pop'>
				<IncludeRules context='flat-span'/>
				<DetectSpaces context='#pop!span'/>
			</context>

			<context name='var-ref' attribute='Reference' lineEndContext='#pop'>
				<WordDetect String="cortav" attribute='Standard Namespace'/>
				<WordDetect String="env" attribute='Standard Namespace'/>
				<DetectChar attribute='Span Delimiter' context='#pop' char=']'/>
................................................................................
			</context>
		</contexts>
		<itemDatas>
			<itemData name='Normal Text' defStyleNum='dsNormal'/>
			<itemData name='Styled Text' defStyleNum='dsNormal'/>
			<itemData name='Emphatic Text' defStyleNum='dsNormal' italic='true'/>
			<itemData name='Strong Text' defStyleNum='dsNormal' bold='true'/>
			<itemData name='Deleted Text' defStyleNum='dsNormal' strikeOut='true'/>
				
			<itemData name='Section Cue' defStyleNum='dsKeyword' bold='true'/>
			<itemData name='Header' defStyleNum='dsControlFlow' underline='true'/>
			<itemData name='Identifier' defStyleNum='dsVariable'/>

			<itemData name='Unstyled Text' defStyleNum='dsVerbatimString'/>
			<itemData name='Escaped Char' defStyleNum='dsSpecialChar'/>
			<itemData name='Reference' defStyleNum='dsControlFlow' underline='true'/>
			<itemData name='Span Cue' defStyleNum='dsKeyword' bold='true'/>
			<itemData name='Resource Cue' defStyleNum='dsKeyword' bold='true'/>
			<itemData name='Resource Identifier' defStyleNum='dsVariable' bold='true'/>
			<itemData name='Span Delimiter' defStyleNum='dsKeyword'/>
			<itemData name='Directive' defStyleNum='dsAttribute' bold='true'/>
			<itemData name='Directive Cue' defStyleNum='dsAttribute'/>
			<itemData name='Critical Directive Cue' defStyleNum='dsImport' bold='true'/>
			<itemData name='Extension Directive' defStyleNum='dsImport' bold='true'/>
			<itemData name='Renderer Directive' defStyleNum='dsExtension' bold='true'/>
			<itemData name='Standard Namespace' defStyleNum='dsBuiltIn' bold='true'/>
			<itemData name='Comment' defStyleNum='dsComment'/>
			<itemData name='Error' defStyleNum='dsError'/>
			<itemData name='Macro' defStyleNum='dsPreprocessor' bold='true'/>
			<itemData name='Macro Delimiter' defStyleNum='dsPreprocessor'/>
			<itemData name='Field Delimiter' defStyleNum='dsPreprocessor' bold='true'/>
			<itemData name='List' defStyleNum='dsOperator'/>

			<itemData name='Literal Block' defStyleNum='dsSpecialString'/>
			<itemData name='Literal Block Cue' defStyleNum='dsPreprocessor' bold='true'/>

Modified desk/velartrill-cortav.xml from [356c2a8842] to [51a69a6dad].

17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
		<expanded-acronym>Cortav</expanded-acronym>

		<generic-icon>x-office-document</generic-icon>
		<glob pattern="*.ct"/> <glob pattern="*."/>
		<glob pattern="*.cortav"/>
		<magic>
			<match value="%ct\n" offset="0" type="string"/>
			<match value="\x03\x07\x3E\x2D" offset="0" type="string"/>
		</magic>
	</mime-type>
	<mime-type type="text/x-cortav-intent">
		<comment xml:lang="en">Cortav rendering intent file</comment>
		<comment xml:lang="x-ranuir-Latn">tav cunloci Cortavi</comment>
		<comment xml:lang="x-ranuir-CR8">  </comment>








|







17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
		<expanded-acronym>Cortav</expanded-acronym>

		<generic-icon>x-office-document</generic-icon>
		<glob pattern="*.ct"/> <glob pattern="*."/>
		<glob pattern="*.cortav"/>
		<magic>
			<match value="%ct\n" offset="0" type="string"/>
			<match value="\x3E\x2E\x14\x0C\x01\x04\x00\x00\x00\x03\x07\x3E\x2D" offset="0" type="string"/>
		</magic>
	</mime-type>
	<mime-type type="text/x-cortav-intent">
		<comment xml:lang="en">Cortav rendering intent file</comment>
		<comment xml:lang="x-ranuir-Latn">tav cunloci Cortavi</comment>
		<comment xml:lang="x-ranuir-CR8">  </comment>

Added ext/transmogrify.lua version [ffa0ca0a64].































































































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
local ct = require 'cortav'
local ss = require 'sirsem'

local patterns = {
	[ss.str.enc.utf8] = {
		{
	      ['<-->'] = '⟷';
			['--->'] = '⟶';
			['<---'] = '⟵';
			['----'] = '⸻';
      };

		{
			['<==>'] = '⟺';
			['===>'] = '⇐';
			['<==='] = '⟸';
		};

		{
			['<->'] = '↔';
			['-->'] = '→';
			['<--'] = '←';
			['==>'] = '⇒';
			['<=>'] = '⇔';
			['<=='] = '⇐';
			['=/='] = '≠';
			['---'] = '⸺';
		};

		{
			['-:-'] = '÷';
			['--'] = '—';
			['(C)'] = '©';
			['(>)'] = '🄯';
			['(R)'] = '®';
			['(TM)'] = '™';
			['(SM)'] = '℠';
		};
   };
}

local quotes = {
	[ss.str.enc.utf8] = {
		['en'] = {'“', '”'; '‘', '’'};
		['de'] = {'„', '“'; '‚', '‘'};
		['sp'] = {'«', '»'; '‹', '›'};
		['ja'] = {'「', '」'; '『', '』'};
		['fr'] = {'« ', ' »'; '‹ ', ' ›'};
		[true] = {'“', '”'; '‘', '’'};
	};
}

local function meddle(ctx, t)
	local pts = patterns[ctx.doc.enc]
	if not pts then return t end
	local str = ''
	local lastchar
	local dquo = ctx.doc.enc.encodeUCS'"'
	local squo = ctx.doc.enc.encodeUCS"'"
	local forceRight = ctx.doc.enc.encodeUCS'`'
	local ptns = patterns[ctx.doc.enc]
	local function quo(c,p)
		if c == dquo then
			return 1
		elseif c == squo then
			return 2
		end
	end
	local qtbl if quotes[ctx.doc.enc] then
		if ctx.lang then
			qtbl = ss.str.langmatch(quotes[ctx.doc.enc], ctx.lang, ctx.doc.enc) or quotes[ctx.doc.enc][true]
		else
			qtbl = quotes[ctx.doc.enc][true]
		end
	end
	for c, p in ss.str.each(ctx.doc.enc,t) do
		local n = t:sub(p.byte)
		local ba, ca, nt = ctx.doc.enc.parse_escape(n)
		if ba then
			p.next.byte = p.next.byte + ba
			p.next.code = p.next.code + ca
			str = str .. nt
			lastchar = nt
		else
			local found = false
			local quote = quo(c,p)
			local force
			if not quote and c == forceRight and #t >= p.next.byte then
				quote = quo(ctx.doc.enc.char(ctx.doc.enc.codepoint(t,p.next.byte)))
				if quote then
		           force = 2
		           p.next.byte = p.next.byte + #forceRight
		           p.next.code = p.next.code + ctx.doc.enc.len(forceRight)
				end
			end
			if qtbl and quote then
				found = true
				if force then
					str = str .. qtbl[quote*force]
				elseif lastchar == nil or ctx.doc.enc.iswhitespace(lastchar) then
					str = str .. qtbl[quote]
				else
					str = str .. qtbl[quote*2]
				end
			elseif ptns then
				for _, order in ipairs(ptns) do
					for k,v in pairs(order) do
						if ss.str.begins(n, k) then
							found = true
							str = str .. v
							p.next.byte = p.next.byte + string.len(k) - 1
							p.next.code = p.next.code + utf8.len(k) - 1
							goto stopsearch
						end
					end
				end::stopsearch::
			end
			if not found then
				str = str .. c
			end
			lastchar = c
		end
	end
	return str
end

local function enterspan(origin, spans)
	for i,v in pairs(spans) do
		if type(v) == 'string' then
			spans[i] = meddle(origin, v)
		elseif v.kind ~= 'raw' and v.spans then
			enterspan(v.origin, v.spans)
		end
	end
end

ct.ext.install {
	id = 'transmogrify';
	version = ss.version {0,1; 'devel'};
	contributors = {{name='lexi hale', handle='velartrill', mail='lexi@hale.su', homepage='https://hale.su'}};
	default = true; -- on unless inhibited
	slow = true;
	hook = {
		doc_meddle_ast = function(job)
			for n, sec in pairs(job.doc.secorder) do
				if sec.kind=='ordinary' or sec.kind=='blockquote'
				or sec.kind=='footnote' then
					for i, block in pairs(sec.blocks) do
			         if type(block.spans) == 'table' then
							enterspan(block.origin, block.spans)
						elseif type(block.spans) == 'string' then
							block.spans = meddle(block.origin, block.spans)
						end
					end
				end
			end
		end;
	};
}

Modified makefile from [42776f3212] to [4482353657].

1
2
3
4
5
6
7
8
9
10
11
12
13
14










15
16
17
18
19
20
21
22
23
..
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46





47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
lua != which lua
luac != which luac
sh != which sh

extens = $(wildcard ext/*.lua)
extens_names ?= $(basename $(notdir $(extens)))
build = build
executable = cortav
default-format-flags = -m html:width 40em

prefix = $(HOME)/.local
bin_prefix = $(prefix)/bin
share_prefix = $(prefix)/share/$(executable)











$(build)/$(executable): sirsem.lua cortav.lua $(extens) cli.lua | $(build)/
	@echo ' » building with extensions $(extens_names)'
	echo '#!$(lua)' > $@
	luac -o - $^ >> $@
	chmod +x $@

$(build)/cortav.html: cortav.ct $(build)/$(executable) | $(build)/
	$(build)/$(executable) $< -o $@ -m render:format html -y html:fossil-uv

................................................................................

.PHONY: clean
clean:
	rm -f $(build)/cortav $(build)/cortav.html $(build)/velartrill-cortav-view.desktop $(build)/cortav-view.sh

$(build)/%.sh: desk/%.sh
	echo >$@ "#!$(sh)"
	echo >>$@ 'cortav_exec="$(bin_prefix)/$(executable)"'
	echo >>$@ 'cortav_flags="$${ct_format_flags-$(default-format-flags)}"'
	cat $< >> $@
	chmod +x $@

$(build)/velartrill-cortav-view.desktop: desk/cortav-view.desktop
	cp $< $@
	echo "Exec=$(bin_prefix)/cortav-view.sh" >>$@

%/:
	mkdir -p $@






.PHONY: install
install: $(build)/cortav $(build)/cortav-view.sh $(build)/velartrill-cortav-view.desktop | $(bin_prefix)/
	install $(build)/$(executable)  $(bin_prefix)
	install $(build)/cortav-view.sh $(bin_prefix)
	xdg-mime         install desk/velartrill-cortav.xml
	xdg-desktop-menu install $(build)/velartrill-cortav-view.desktop
	xdg-mime         default velartrill-cortav-view.desktop text/x-cortav

.PHONY: excise
excise: $(build)/velartrill-cortav-view.desktop
	xdg-mime         uninstall desk/velartrill-cortav.xml
	xdg-desktop-menu uninstall $(build)/velartrill-cortav-view.desktop
	rm $(bin_prefix)/$(executable)
	rm $(bin_prefix)/cortav-view.sh

.PHONY: wipe
wipe: excise clean





|





|
|

>
>
>
>
>
>
>
>
>
>
|
|







 







|






|




>
>
>
>
>

|
|
|








|
|



1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
..
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
lua != which lua
luac != which luac
sh != which sh

extens = $(wildcard ext/*.lua)
extens-names ?= $(basename $(notdir $(extens)))
build = build
executable = cortav
default-format-flags = -m html:width 40em

prefix = $(HOME)/.local
bin-prefix = $(prefix)/bin
share-prefix = $(prefix)/share/$(executable)

# by default, we fetch and parse information about encodings we
# support so that cortav can do fancy things like format math
# equations by character class (e.g. italicizing variables)
# this is not necessary for parsing the format, and can be
# disabled by blanking the encoding-data list when building
# ($ make encoding-data=)
encoding-data  = ucstbls
encoding-files = $(patsubst %,$(build)/%.lc,$(encoding-data))
encoding-data-ucs = https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt

$(build)/$(executable): sirsem.lua $(encoding-files) cortav.lua $(extens) cli.lua | $(build)/
	@echo ' » building with extensions $(extens-names)'
	echo '#!$(lua)' > $@
	luac -o - $^ >> $@
	chmod +x $@

$(build)/cortav.html: cortav.ct $(build)/$(executable) | $(build)/
	$(build)/$(executable) $< -o $@ -m render:format html -y html:fossil-uv

................................................................................

.PHONY: clean
clean:
	rm -f $(build)/cortav $(build)/cortav.html $(build)/velartrill-cortav-view.desktop $(build)/cortav-view.sh

$(build)/%.sh: desk/%.sh
	echo >$@ "#!$(sh)"
	echo >>$@ 'cortav_exec="$(bin-prefix)/$(executable)"'
	echo >>$@ 'cortav_flags="$${ct_format_flags-$(default-format-flags)}"'
	cat $< >> $@
	chmod +x $@

$(build)/velartrill-cortav-view.desktop: desk/cortav-view.desktop
	cp $< $@
	echo "Exec=$(bin-prefix)/cortav-view.sh" >>$@

%/:
	mkdir -p $@

$(build)/unicode.txt: | $(build)/
	curl $(encoding-data-ucs) > $@
$(build)/ucstbls.lc: $(build)/unicode.txt | $(build)/
	$(lua) tools/ucs.lua $< | $(luac) -o $@ -

.PHONY: install
install: $(build)/cortav $(build)/cortav-view.sh $(build)/velartrill-cortav-view.desktop | $(bin-prefix)/
	install $(build)/$(executable)  $(bin-prefix)
	install $(build)/cortav-view.sh $(bin-prefix)
	xdg-mime         install desk/velartrill-cortav.xml
	xdg-desktop-menu install $(build)/velartrill-cortav-view.desktop
	xdg-mime         default velartrill-cortav-view.desktop text/x-cortav

.PHONY: excise
excise: $(build)/velartrill-cortav-view.desktop
	xdg-mime         uninstall desk/velartrill-cortav.xml
	xdg-desktop-menu uninstall $(build)/velartrill-cortav-view.desktop
	rm $(bin-prefix)/$(executable)
	rm $(bin-prefix)/cortav-view.sh

.PHONY: wipe
wipe: excise clean

Modified sirsem.lua from [1f16b393f5] to [581e1b0127].

86
87
88
89
90
91
92











93
94
95
96
97
98
99
100
101
102






103










104
105



































































106
107
108
109







110
111
112




























113







114









115



































116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137





































































































































138
139
140
141
142
143
144
...
239
240
241
242
243
244
245
246

247
248
249
250
251
252
253
...
265
266
267
268
269
270
271

272
273
274
275
276
277
278
...
302
303
304
305
306
307
308

309
310
311
312
313
314
315
316
317
318
319
320
321
...
384
385
386
387
388
389
390


































































































































































			end
		else
			new[k] = v
		end
	end
	return new
end












function ss.delegate(tbl,tpl) -- returns a table that looks up keys it lacks from
                              -- tbl (lightweight alternative to shallow copies)
	tpl = tpl or {}
	return setmetatable({}, {__index=tbl})
end

ss.str = {}

function ss.str.begins(str, pfx)






	return string.sub(str, 1, #pfx) == pfx










end




































































ss.str.enc = {
	utf8 = {
		char = utf8.char;
		codepoint = utf8.codepoint;







	};
	c6b = {};
	ascii = {};




























}

















function ss.str.enc.utf8.each(str, ascode)



































	local pos = {
		code = 1;
		byte = 1;
	}
	return function()
		if pos.byte > #str then return nil end
		local thischar = utf8.codepoint(str, pos.byte)
		local lastpos = {
			code = pos.code;
			byte = pos.byte;
			next = pos;
		}
		if not ascode then
			thischar = utf8.char(thischar)
			pos.byte = pos.byte + #thischar
		else
			pos.byte = pos.byte + #utf8.char(thischar)
		end
		pos.code = pos.code + 1
		return thischar, lastpos
	end
end






































































































































ss.math = {}

function ss.math.lerp(t, a, b)
	return (1-t)*a + (t*b)
end

................................................................................
				elseif to == 'int' then return math.floor(tonumber(self))
				elseif c.cast and c.cast[to] then
					return c.cast[to](self, ...)
				elseif type(to) == 'table' and getmetatable(to) and getmetatable(to).cvt and getmetatable(to).cvt[cls] then
				else error((c.ident or 'class') .. ' is not convertible to ' .. (type(to) == 'string' and to or tostring(to))) end
			end
		end
		if c.fns then return c.fns[k] end

	end

	if c.cast then
		if c.cast.string then
			cls.__tostring = c.cast.string
		end
		if c.cast.number then
................................................................................
		if c.construct then
			c.construct(val, ...)
		end
		return val
	end
	getmetatable(cls).__call = function(_, ...) return cls.mk(...) end
	cls.is = function(o) return getmetatable(o) == cls end

	return cls
end

-- tidy exceptions

ss.exn = ss.declare {
	ident = 'exn';
................................................................................
		}
	end;
	call = function(me, ...)
		return ss.exn(me, ...)
	end;
}
ss.str.exn = ss.exnkind 'failure while string munging'


function ss.str.delimit(encoding, start, stop, s)
	local depth = 0
	encoding = encoding or ss.str.enc.utf8
	if not ss.str.begins(s, start) then return nil end
	for c,p in encoding.each(s) do
		if c == (encoding.escape or '\\') then
			p.next.byte = p.next.byte + #encoding.char(encoding.codepoint(s, p.next.byte))
			p.next.code = p.next.code + 1
		elseif c == start then
			depth = depth + 1
		elseif c == stop then
			depth = depth - 1
................................................................................
		return x
	elseif select('#', ...) == 0 then
		return nil
	else
		return ss.coalesce(...)
	end
end









































































































































































>
>
>
>
>
>
>
>
>
>
>










>
>
>
>
>
>
|
>
>
>
>
>
>
>
>
>
>


>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>




>
>
>
>
>
>
>

<
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
>
>
>
>
>
>
>
|
>
>
>
>
>
>
>
>
>
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>






|
|



|

|


|





>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







 







|
>







 







>







 







>





|







 







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211

212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
...
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
...
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
...
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
...
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
			end
		else
			new[k] = v
		end
	end
	return new
end

function ss.push(tbl, ...)
	local idx = #tbl + 1
	local function rec(v, ...)
		tbl[idx] = v
		idx = idx + 1
		if ss.tuple.any(...) then rec(...) end
	end
	rec(...)
	return tbl
end

function ss.delegate(tbl,tpl) -- returns a table that looks up keys it lacks from
                              -- tbl (lightweight alternative to shallow copies)
	tpl = tpl or {}
	return setmetatable({}, {__index=tbl})
end

ss.str = {}

function ss.str.begins(str, pfx)
	-- appallingly, this is actually ~2/5ths faster than either
	-- of the below. i hate scripting languages so much
	return string.find(str, pfx, 1, true) == 1
	-- to my shock, disgust, and horror, even writing my own
	-- string scanning library for lua IN C only sped this up by
	-- a tiny fraction. i am just speechless.
-- 	return string.sub(str, 1, #pfx) == pfx

-- 	local pl = string.len(pfx)
-- 	local sl = string.len(str)
-- 	if sl < pl then return false end
-- 	for i=1,pl do
-- 		if string.byte(str,i) ~= string.byte(pfx,i) then
-- 			return false
-- 		end
-- 	end
-- 	return true
end

function ss.enum(syms)
	local e = {}
	for i,v in pairs(syms) do
		e[v] = i
		e[i] = v
	end
	return e
end

function ss.bitmask_bytes(n,ofs)
	ofs = ofs or 0
	local function rec(i)
		if i > n then return end
		return 1<<(i+ofs), rec(i+1)
	end
	return 1<<ofs, rec(1)
end

function ss.bitmask(tbl,ofs)
	local codes = {ss.bitmask_bytes(#tbl,ofs)}
	local m = {}
	local maxbit
	for i, s in ipairs(tbl) do
		m[s] = codes[i]
		m[codes[i]] = s
		maxbit = i
	end
	m[true] = {ofs or 0,maxbit}
	return m
end

ss.str.charclass = ss.enum {
	'numeral'; 'letter'; 'symbol'; 'punct';
	'space'; 'ctl'; 'glyph'; -- hanji
}
ss.str.charprop = ss.bitmask({
	'hexnumeral', -- character that can be used to write hexadecimal notation
	'upper', 'lower';
	'diac'; -- diacritic/modifier letter
	'wordbreak'; -- char causes following characters to be treated as a separate word (e.g. punctuation)
	'wordsep'; -- char causes previous and following characters to be treated as separate words; char constitutes a word of its own in between (e.g. interpunct)
	'breakokay'; -- is it okay to break words at this character? (eg hyphen)
	'mathop'; -- char is a mathematical operator
	'disallow', -- char is not allowed in narrative text
	'brack', 'right', 'left', -- brackets
	'noprint', -- character deposits no ink
	'superimpose' -- character is superimposed over previous
}, 3)

ss.str.enc_generics = {
	pfxescape = function(ch, enc, chain)
		local bytes = #ch
		local codes = enc.len(ch)
		return function(s)
			if s == ch then
				return 0, 0, ch
			elseif ss.str.begins(s, ch) then
				local nc = enc.char(enc.codepoint(s, bytes + 1))
				return bytes, codes, nc
			elseif chain then
				return chain(s)
			end
		end
	end;
};

local cc,cp = ss.str.charclass, ss.str.charprop
ss.str.enc = {
	utf8 = {
		char = utf8.char;
		codepoint = utf8.codepoint;
		len = utf8.len;
		encodeUCS = function(str) return str end;
		iswhitespace = function(c)
			return (c == ' ') or (c == '\t') or (c == '\n')
				or (c == '\u{3000}')
				or (c == '\u{200B}')
      end;
	};

	ascii = {
		len = string.len; char = string.char; codepoint = string.byte;
		iswhitespace = function(c)
			return (c == ' ') or (c == '\t') or (c == '\n')
      end;
		ranges = {
			{0x00,0x1a, cc.ctl};
			{0x1b,0x1b, cc.ctl, cp.disallow};
			{0x1c,0x1f, cc.ctl};
			{0x20,0x20, cc.space};
			{0x21,0x22, cc.punct};
			{0x23,0x26, cc.symbol};
			{0x27,0x29, cc.punct};
			{0x2a,0x2b, cc.symbol};
			{0x2c,0x2f, cc.punct};
			{0x30,0x39, cc.numeral, cp.hexnumeral};
			{0x3a,0x3b, cc.punct};
			{0x3c,0x3e, cc.symbol, cp.mathop};
			{0x3f,0x3f, cc.punct};
			{0x40,0x40, cc.symbol};
			{0x41,0x46, cc.letter, cp.ucase, cp.hexnumeral};
			{0x47,0x5a, cc.letter, cp.ucase};
			{0x5b,0x5d, cc.symbol, cp.mathop};
			{0x5e,0x5e, cc.symbol, mathop};
			{0x5f,0x60, cc.symbol};
			{0x61,0x66, cc.letter, cp.lcase, cp.hexnumeral};
			{0x67,0x7a, cc.letter, cp.lcase};
			{0x7b,0x7e, cc.symbol};
			{0x7f,0x7f, cc.ctl, cp.disallow};
		}
	};
	raw = {len = string.len; char = string.char; codepoint = string.byte;
		encodeUCS = function(str) return str end;
		iswhitespace = function(c)
			return (c == ' ') or (c == '\t') or (c == '\n')
      end;
   };
}

-- unicode ranges are optionally generated from consortium data
-- files and injected through a generated source file. if this
-- part of the build process is disabled (e.g. due to lack of
-- internet access, or to keep the size of the executable as
-- small as possible), we still at least can make the ascii
-- ranges available to UTF8 (UTF8 being a superset of ascii)
ss.str.enc.utf8.ranges = ss.delegate(ss.str.enc.ascii.ranges)

function ss.str.enc.ascii.encodeUCS(str)
	local newstr = ''
	for c,p in ss.str.each(ss.str.enc.utf8, str, true) do
		if c > 0x7F then
			newstr = newstr .. '?'
		else
			newstr = newstr .. string.char(c)
		end
	end
end

for _, v in pairs{'utf8','ascii','raw'} do
	ss.str.enc[v].parse_escape = ss.str.enc_generics.pfxescape('\\',ss.str.enc[v])
end

function ss.str.classify(enc, ch)
	if not enc.ranges then return {} end
	if type(ch)=='string' then ch = enc.codepoint(ch) end
	-- TODO
end


function ss.str.each(enc, str, ascode)
	if enc.each then return enc.each(enc,str,ascode) end
	local pm = {
		__index = {
			esc = function(self)
				local ba, bc, nc = enc.parse_escape(str:sub(self.byte))
				if ba then
					self.next.byte = self.next.byte + ba - 1
					self.next.code = self.next.code + bc - 1
					return nc
				end
			end;
		};
	}
	local pos = {
		code = 1;
		byte = 1;
	}
	return function()
		if pos.byte > #str then return nil end
		local thischar = enc.codepoint(str, pos.byte)
		local lastpos = setmetatable({
			code = pos.code;
			byte = pos.byte;
			next = pos;
		},pm)
		if not ascode then
			thischar = enc.char(thischar)
			pos.byte = pos.byte + #thischar
		else
			pos.byte = pos.byte + #enc.char(thischar)
		end
		pos.code = pos.code + 1
		return thischar, lastpos
	end
end

function ss.str.breakwords(enc, str, max, opts)
	if enc.breakwords then return enc.breakwords(str) end
	local words = {}
	opts = opts or {}
	local buf = ''
	local flush = function()
		if buf ~= '' then table.insert(words,buf) buf = '' end
	end
	for c, p in ss.str.each(enc,str) do
		local nc
		if opts.escape then
			nc = p:esc()
		end
		if nc then
			buf = buf + nc
		elseif enc.iswhitespace(c) then
			flush()
			if max and #words == max then
				local rs = str:sub(p.next.byte)
				if rs ~= '' then
					table.insert(words, rs)
				end
				break
			end
		else
			buf = buf .. c
		end
	end
	flush()
	return words
end
function ss.str.mergewords(enc, lst)
	if enc.mergewords then return enc.mergewords(lst) end
	return table.concat(lst, enc.wordsep or ' ')
end
function ss.str.breaklines(enc, str, opts)
	if enc.breaklines then return enc.breaklines(lst,opts) end
	return ss.str.split(enc, str, enc.encodeUCS'\n', opts)
end

function ss.str.split(enc, str, delim, opts)
	if enc.split then return enc.split(str,delim,opts) end
	opts = opts or {}
	local elts = {}
	local buf = ''
	local flush = function()
		if buf ~= '' or opts.keep_empties then
			table.insert(elts,buf)
			buf = ''
		end
	end
	local esc = enc.parse_escape
	local tryesc if opts.escape then
		tryesc = function(str, p)
			local ba, ca, escd = enc.parse_escape(str:sub(p.byte))
			if ba then
				p.next.byte = p.next.byte + ba
				p.next.code = p.next.code + ca
				buf = buf .. escd
				return true
			end
		end
	else
		tryesc = function(...)  end
	end

	if type(delim) == 'function' then
		for c, p in ss.str.each(enc,str) do
			if not tryesc(str,p) then
				local skip = delim(str:sub(p.byte))
				if skip then
					flush()
					p.next.byte = p.next.byte + skip - 1
				else
					buf = buf .. c
				end
			end
		end
	elseif enc.len(delim) == 1 then
		for c, p in ss.str.each(enc,str) do
			if not tryesc(str,p) then
				if c == delim then
					flush()
				else
					buf = buf .. c
				end
			end
		end
	else
		local dlcode = enc.len(delim)
		for c, p in ss.str.each(enc,str) do
			if not tryesc(str,p) then
				if str:sub(p.byte, p.byte+#delim-1) == delim then
					flush()
					p.next.byte = p.next.byte + #delim - 1
					p.next.code = p.next.code + dlcode
				else
					buf = buf .. c
				end
			end
		end
	end
	flush()
	return elts
end

function ss.str.langmatch(tbl, lang, enc)
	-- this performs primitive language matching. NOTE: THIS IS NOT
	-- STANDARDS COMPLIANT. it's "good enough" for now, but in the
	-- long term it needs to be rewritten to actually understand the
	-- format, primarily so that e.g. 'en-US-Latn' and 'en-Latn-US'
	-- match -- currently order is significant. it shouldn't be
	-- ref: IETF BCP 47 (RFC 5646) https://www.ietf.org/rfc/bcp/bcp47.html
	local dash = enc.encodeUCS'-'
	local tags = ss.str.split(enc, lang, dash, {escape=true})
	local bestlen = 0
	local bestmatch
	for k,v in pairs(tbl) do
		if k ~= true then
			local kt = ss.str.split(enc, k, dash, {escape=true})
			for i=1,math.min(#kt,#tags) do
				if kt[i] ~= tags[i] then goto skip end
			end
			if #kt > bestlen then
				-- match the most specific matching tag
				bestmatch = k
				bestlen = #kt
			end
		end
	::skip::end
	return tbl[bestmatch] or tbl[true], bestmatch
end

ss.math = {}

function ss.math.lerp(t, a, b)
	return (1-t)*a + (t*b)
end

................................................................................
				elseif to == 'int' then return math.floor(tonumber(self))
				elseif c.cast and c.cast[to] then
					return c.cast[to](self, ...)
				elseif type(to) == 'table' and getmetatable(to) and getmetatable(to).cvt and getmetatable(to).cvt[cls] then
				else error((c.ident or 'class') .. ' is not convertible to ' .. (type(to) == 'string' and to or tostring(to))) end
			end
		end
		if c.fns and c.fns[k] then return c.fns[k] end
		if c.index then return c.index(self,k) end
	end

	if c.cast then
		if c.cast.string then
			cls.__tostring = c.cast.string
		end
		if c.cast.number then
................................................................................
		if c.construct then
			c.construct(val, ...)
		end
		return val
	end
	getmetatable(cls).__call = function(_, ...) return cls.mk(...) end
	cls.is = function(o) return getmetatable(o) == cls end
	cls.__metatable = cls -- lock metatable
	return cls
end

-- tidy exceptions

ss.exn = ss.declare {
	ident = 'exn';
................................................................................
		}
	end;
	call = function(me, ...)
		return ss.exn(me, ...)
	end;
}
ss.str.exn = ss.exnkind 'failure while string munging'
ss.bug = ss.exnkind 'tripped over bug'

function ss.str.delimit(encoding, start, stop, s)
	local depth = 0
	encoding = encoding or ss.str.enc.utf8
	if not ss.str.begins(s, start) then return nil end
	for c,p in ss.str.each(encoding,s) do
		if c == (encoding.escape or '\\') then
			p.next.byte = p.next.byte + #encoding.char(encoding.codepoint(s, p.next.byte))
			p.next.code = p.next.code + 1
		elseif c == start then
			depth = depth + 1
		elseif c == stop then
			depth = depth - 1
................................................................................
		return x
	elseif select('#', ...) == 0 then
		return nil
	else
		return ss.coalesce(...)
	end
end

ss.tuple = {}
function ss.tuple.any(...)
	return select('#',...) > 0
end

function ss.tuple.cat(...)
	local a = {...}
	return function(...)
		ss.push(a, ...)
		return table.unpack(a)
	end
end

function ss.tuple.suffix(sfx,n,...)
	if n ~= nil then
		return n, ss.tuple.suffix(...)
	else
		return sfx
	end
end

function ss.tuple.cdr(x, ...) return ... end

ss.stack = ss.declare {
	ident = 'stack';
	mk = function() return {
		top = 0;
		store = {};
   } end;
	index = function(me, i)
		if i <= 0 then
			return me.store[me.top + i]
		else
			return me.store[i]
		end
	end;
	fns = {
		push = function(me, val, ...)
         if val~=nil then
	         me.top = me.top + 1
	         me.store[me.top] = val
	         me:push(...)
         end
         return val, ...
      end;
      pop = function(me,n) n = n or 1
         local r = {}
			if n < me.top then
				for i = 0,n-1 do
					r[i+1] = me.store[me.top - i]
					me.store[me.top - i] = nil
				end
				me.top = me.top - n
         else
	         r = me.store
				me.store = {}
         end
			return table.unpack(r)
      end;
      set = function(me,val)
         if me.top == 0 then
	         me.top = me.top + 1 --autopush
         end
         me.store[me.top] = val
      end;
      all = function(me) return table.unpack(me.store) end;
      each = function(forward)
         if forward then
	         local idx = 0
	         return function()
		         idx = idx + 1
		         if idx > top
						then return nil
						else return me.store[idx], idx
					end
	         end
         else
	         local idx = top + 1
	         return function()
		         idx = idx - 1
		         if idx == 0
						then return nil
						else return me.store[idx], idx
					end
	         end
         end
      end;
	};
}

ss.automat = ss.declare {
	ident = 'automat';
	mk = function() return {
		state = ss.stack();
		states = {};
		ttns = {};
		mem = {};
		match = function(sym, ttn, mach)
			if ttn.pred and ttn:pred(mach, sym)~=true then
				return false
			end
			if ttn.on then
				return sym == ttn.on
			end
			return false
		end;
	} end;

	construct = function(me, def)
		for k,v in pairs{'states','ttns','mem','syms'} do
			if def[k] then me[k] = v end
		end
	end;

	fns = {
		react = function(me,sym)
			local s = me.states[me.state.id]
			if s and s.input then
				s:react(me, sym)
			end
		end;

		drop = function(me,n)
			for i = 0, math.min(n-1,me.state.top-1) do
				local s = me.states[me.state[-i].id]
				if s.exit then s:exit(s.mem, me) end
			end
			if n < me.state.top then
				local newtop = me.states[me.state[-n].id]
				if newtop.activate then newtop:activate(me.state[-n].mem, me, n) end
			end
			return me.state:pop(n)
		end;
		clear = function(me) return me:drop(me.state.top) end;

		transition = function(me,ttn,oldstates)
			local s = me.state:push {id = ttn.to, mem = {}}
			local to = me.states[ttn.to]
			if to.enter then
				to:enter(s.mem, me)
			end
		end;

		input = function(me,sym)
			local ttns = me.ttns[me.state.id]
			local _, ttn = ss.find(ttns, function(ttn)
			                        return me.match(sym, ttn, me)
			                       end)
			if ttn then
				if ttn.pop then
					local oldstates = {me.state:drop(ttn.pop)}
					me:transition(ttn, sym, oldstates)
				else
					me:transition(ttn, sym)
				end
			else
				me:react(sym)
			end
		end;
	};
}

Added tools/ucs.lua version [3976f4bc78].



































































































































































































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
-- [ʞ] tools/ucs.lua
--  ~ lexi hale <lexi@hale.su>
--  ? table generator for unicode character classes
--  🄯 AGPLv3


local tpl = [[
local ss = require 'sirsem'
ss.str.enc.utf8.ranges = {%s}
]]

local enum = function(syms)
	local e = {}
	for i,v in pairs(syms) do
		e[v] = i
		e[i] = v
	end
	return e
end

local file = io.stdin
local path
if arg[1] then
	path = arg[1]
	file = io.open(path, 'rb')
end

local bitmask_raw = function(n,ofs)
	ofs = ofs or 0
	local function rec(i)
		if i > n then return end
		return 1<<(i+ofs), rec(i+1)
	end
	return 1<<ofs, rec(1)
end

local bitmask = function(tbl,ofs)
	local codes = {bitmask_raw(#tbl,ofs)}
	local m = {}
	local maxbit
	for i, s in ipairs(tbl) do
		m[s] = codes[i]
		m[codes[i]] = s
		maxbit = i
	end
	m[true] = {ofs or 0,maxbit}
	return m
end

local basictype = enum {
	'numeral';
	'alpha';
	'symbol';
	'punct';
	'space';
	'ctl';
	'glyph'; -- hanji
}
local props = bitmask({
	'hex',
	'upper', 'lower', 'diac',
	'wordbreak', 'wordsep',
	'disallow',
	'brack', 'right', 'left',
	'noprint', 'superimpose'
}, 3)

local overrides = {
	[0x200B] = basictype.space | props.wordsep; -- database entry is wrong
}

local mask = ~0 -- mask out irrelevant properties to compactify database

local function parsecat(tbl)
	local c,p,b = 0,props,basictype
	if overrides[tbl.codepoint] then
		c = overrides[tbl.codepoint]
	elseif tbl.class == 'Nd' then c = b.numeral
	elseif tbl.class == 'No' then c = b.numeral | p.diac
	elseif tbl.class == 'Cc' then
		if tbl.kind == 'S'
		or tbl.kind == 'WS'
		or tbl.kind == 'B' then c  = b.space | p.wordsep
      else c = b.ctl | p.wordbreak | p.disallow end
	elseif tbl.class == 'Lu' then c = b.alpha | p.upper
	elseif tbl.class == 'Ll' then c = b.alpha | p.lower
	elseif tbl.class == 'Lo'
	    or tbl.class == 'Lt' then c = b.alpha
	elseif tbl.class == 'Po' then c = b.punct | p.wordbreak
	elseif tbl.class == 'Sm' then c = b.symbol | p.wordsep
	elseif tbl.class == 'Ps' then c = b.punct | p.brack | p.left
	elseif tbl.class == 'Pe' then c = b.punct | p.brack | p.right
	elseif tbl.class == 'Pc'
	    or tbl.class == 'Pd'
	    or tbl.class == 'Sk'
	    or tbl.class == 'Sc' then c = b.symbol
	elseif tbl.class == 'Zs' then c = b.space
		if tbl.kind == 'WS' then c=c|p.wordsep end
	elseif tbl.class == 'So' then c = b.glyph
	elseif tbl.class == 'Mn' then c = b.symbol | p.diac | p.superimpose
	end
	return c & mask
end

local ranuirAlpha = {0xe39d, 0xe39f, 0xe3ad, 0xe3af, 0xe3b5, 0xe3b7, 0xe3b9, 0xe3bb, 0xe3bd, 0xe3be, 0xe3bf, 0xe3c5, 0xe3c7, 0xe3c9, 0xe3cb, 0xe3cc, 0xe3cd, 0xe3ce, 0xe3cf}
local ranuirSpecial = {
	[0xe390] = basictype.space | props.wordsep;
}

local ranuir = {}
for _,v in pairs(ranuirAlpha) do ranuir[v] = basictype.alpha end
for k,v in pairs(ranuirSpecial) do ranuir[k] = v end
local ranuirKeys = {}
for k in pairs(ranuir) do table.insert(ranuirKeys, k) end
table.sort(ranuirKeys)

local recs = {}
local ranuirok = false
for ln in file:lines() do
	local v = {}
	for s in ln:gmatch('[^;]*') do
		table.insert(v, s)
	end
	v[1] = tonumber(v[1],0x10)
	if v[1] > 0x7f then -- discard ASCII, we already have that
		local code = {
			codepoint = v[1];
			name = v[2];
			class = v[3];
			kind = v[5];
		}
		code.cat = parsecat(code)

		if (not ranuirok) and code.codepoint > 0xe390 then
			for _,ri in pairs(ranuirKeys) do
				table.insert(recs, {
					codepoint = ri;
					cat = ranuir[ri];
				})
			end
			ranuirok = true
		end

		if code.cat ~= 0 then
			table.insert(recs,code)
		end
	end
end


local ranges = {}
local last = recs[1]
local start = last
local altern = false
local flush = function(i)
	local new = {start.codepoint, last.codepoint, last.cat}
	if altern then
		new[3] = new[3] | props.upper | props.lower
	end
	table.insert(ranges, new)
	altern = false
end
for i, r in ipairs(recs) do
	if r.cat ~= last.cat then
	-- we can massively compactify this set with one weird trick:
	-- most non-ascii cased character sets are not in AAAAaaaa,
	-- but rather AaAaAa order. so we can look for this simple
	-- pattern and compress it, shaving c. 1/3rd off our dataset
		local ambi = props.upper | props.lower
		if (altern or (start == last and (last.cat & props.upper) ~= 0)) and
			((r.cat &~ ambi) == (last.cat &~ ambi)) then
			altern = true
			last = r
		else
			flush()
			start = r
		end
	elseif altern then
		flush()
		start = r
	end
	last = r
end
flush()

-- expand bitmask
	-- for k,v in pairs(ranges) do
	-- 	local basic = v[3] & ((1<<3) - 1) -- first three bits
	-- 	if basic ~= 0 then
	-- 		v[4] = basictype[basic]
	-- 	end
	-- 	local bitrange = props[true]
	-- 	for j=bitrange[1], bitrange[2] do
	-- 		if (v[3] & (1<<j)) ~= 0 then
	-- 			table.insert(v, props[1<<j])
	-- 		end
	-- 	end
	-- end

-- the data has been collected and formatted in the manner we
-- need; now we just need to emit it as a lua table

local tab = {}
local top = 1
for k,v in pairs(ranges) do
	tab[top] = string.format('{0x%x,0x%x,%u}',table.unpack(v))
	top = top + 1
end
io.stdout:write(string.format(tpl, table.concat(tab,',')))