Index: cli.lua ================================================================== --- cli.lua +++ cli.lua @@ -1,11 +1,16 @@ +-- [ʞ] cli.lua +-- ~ lexi hale +-- 🄯 AGPLv3 +-- ? simple command line driver for the cortav library local ct = require 'cortav' local ss = require 'sirsem' local default_mode = { ['render:format'] = 'html'; ['html:gen-styles'] = true; + ['groff:color'] = true; } local function main(input, output, log, mode, suggestions, vars, extrule) local doc = ct.parse(input.stream, input.src, mode, function(c) @@ -35,21 +40,15 @@ end, ss.kfilter(mode, function(m) return ss.str.begins(m, mode['render:format']..':') end)) doc.vars = vars - - -- this is kind of gross but the context object belongs to the parser, - -- not the renderer, so that's not a suitable place for this information - doc.stage = { - kind = 'render'; - format = mode['render:format']; - mode = mode; - suggestions = suggestions; - } - output:write(ct.render[mode['render:format']](doc, render_opts)) + output:write(ct.render[mode['render:format']]( + doc, render_opts, function(stage) + stage.mode = mode + end)) return 0 end local inp,outp,log = io.stdin, io.stdout, io.stderr Index: cortav.ct ================================================================== --- cortav.ct +++ cortav.ct @@ -76,12 +76,10 @@ * [*section starts] [`#] [`§]: starts a new section. all sections have an associated depth, determined by the number of sequence repetitions (e.g. "###" indicates depth-three"). sections may have headers and IDs; both are optional. IDs, if present, are a sequence of raw-text immediately following the hash marks. if the line has one or more space character followed by styled-text, a header will be attached. the character immediately following the hashes can specify a particular type of section. e.g.: ** [`#] is a simple section break. ** [`#anchor] opens a new section with the ID [`anchor]. ** [`# header] opens a new section with the title "header". ** [`#anchor header] opens a new section with both the ID [`anchor] and the title "header". -** [`#>conversation] opens a blockquote section named [`conversation] without a header. -** [`#&id mime] opens a new inline object [`id] of type [`mime]. useful for embedding SVGs. the ID and mime type must be specified. * [*nonprinting sections] ([`^]): sometimes, you'll want to create a namespace without actually adding a visible new section to the document. you can achieve this by creating a [!nonprinting section] and defining resources within it. nonprinting sections can also be used to store comments, notes, or other information that is useful to have in the source file without it becoming a part of the output * [*resource] ([`@]): defines a [!resource]. a resource is an file or object that exists outside of the document but which will be included in the document somehow. common examples of resources include images, videos, iframes, or headers/footers. see [>rsrc resources] for more information. * [*lists] ([`*] [`:]): these are like paragraph nodes, but list nodes that occur next to each other will be arranged so as to show they compose a sequence. depth is determined by the number of stars/colons. like headers, a list entry may have an ID that can be used to refer back to it; it is indicated in the same way. if colons are used, this indicates that the order of the items is signifiant. :-lists and *-lists may be intermixed; however, note than only the last character in the sequence actually controls the depth type. * [*directives] ([`%]): a directive issues a hint to the renderer in the form of an arbitrary string. directives are normally ignored if they are not supported, but you may cause a warning to be emitted where the directive is not supported with [`%!] or mark a directive critical with [`%!!] so that rendering will entirely fail if it cannot be parsed. * [*comments] ([`%%]): a comment is a line of text that is simply ignored by the renderer. @@ -128,11 +126,11 @@ ** consider using a macro definition [`\edit: [~[#1]][+[#2]]] to save typing if you are doing editing work * link \[>[!ref] [!styled-text]\]: produces a hyperlink or cross-reference denoted by [$ref], which may be either a URL specified with a reference or the name of an object like an image or section elsewhere in the document. the unicode characters [`→] and [`🔗] can also be used instead of [`>] to denote a link. * footnote {span ^|ref|[$styled-text]}: annotates the text with a defined footnote. in interactive output media [`\[^citations.qtheo Quantum Theosophy: A Neophyte's Catechism]] will insert a link with the next [`Quantum Theosophy: A Neophyte's Catechism] that, when clicked, causes a footnote to pop up on the screen. for static output media, the text will simply have a superscript integer after it denoting where the footnote is to be found. * superscript {obj '|[$styled-text]} * subscript {obj ,|[$styled-text]} -* raw {obj \\ |[$raw-text]}: causes all characters within to be interpreted literally, without expansion. the only special characters are square brackets, which must have a matching closing bracket +* raw {obj \\ |[$raw-text]}: causes all characters within to be interpreted literally, without expansion. the only special characters are square brackets, which must have a matching closing bracket, and backslashes. * raw literal \[$\\[!raw-text]\]: shorthand for [\[$[\…]]] * macro [`\{[!name] [!arguments]\}]: invokes a [>ex.mac macro], specified with a reference * argument {obj #|var}: in macros only, inserts the [$var]-th argument. otherwise, inserts a context variable provided by the renderer. * raw argument {obj ##|var}: like above, but does not evaluate [$var]. * term {obj &|name}, {span &|name|[$expansion]}: quotes a defined term with a link to its definition, optionally with a custom expansion of the term (for instance, to expand the first use of an acronym) @@ -198,10 +196,68 @@
~~~ note that empty elements with CSS classes are used in the output, to avoid repeating long image definitions (especially base64 inline encoded ones!) +inline resources are defined a bit differently: + +~~~cortav +@smiling-man-business-card text/plain { + THE SMILING MAN | tel. 0-Ω00-666█ + if you can read this | email: nameless@smiles.gov + it is already too late | address: right behind you +} +@smiling-man-business-card image/png;base64 { + %% incomprehensible gibbering redacted +} +~~~ + +for an inline resource, the identifier is followed by a MIME type and an opening bracket. the opening bracket may be any of the characters [`\{][`\[][`(][`<], and can optionally be followed by additional characters to help disambiguate the closing bracket. the closing bracket is determined by "flipping" the opening bracket, producing bracket pairs like the following: +* [`\{:][`:}] +* [`] +* [`(*<][`>*)] +* [`<>][`<>] [!(disables nesting!)] +if the open and closing brackets are distinguishable, they will nest appropriately, meaning that [`{][`}] alone is very likely to be a safe choice to escape a syntactically correct C program (that doesn't abuse macros too badly). brackets are searched for during parsing; encoded resources are not decoded until a later stage, so a closing bracket character in a base64-encoded text file cannot break out of its escaping. + +as a convenience, if the first line of the resource definition begins with a single tab, one tab will be dropped from every following line in order to allow legible indentation. similarly, if an opening bracket is followed immediately by a newline, this newline is discarded. + +text within a resource definition body is not expanded unless the resource definition is preceded with an [`%[*expand]] directive. if an expand directive is found, the MIME type will be used to try and determine an appropriate type of formatting, potentially invoking a separate renderer. for example, [`text/html] will invoke the [`html] backend, and [`application/x-troff] will invoke the [`groff] backend. if no suitable renderer is available, expansions will generate only plain text. + +two suffixes are accepted: [`;base64] and [`;hex]. the former will decode the presented strings using the base64 algorithm to obtain the resources data; the second will ignore all characters but ASCII hexadecimal digits and derive the resource data byte-by-byte by reading in hexadecimal pairs. for instance, the following sections are equivalent: + +~~~ +@propaganda text/plain { + WORLDGOV SAYS + “don't waste time with unproductive thoughts + your wages will be docked accordingly” +} +~~~ +~~~ +@propaganda text/plain;hex { + 574f 524c 4447 4f56 2053 4159 530a e280 9c64 6f6e 2774 2077 6173 + 7465 2074 696d 6520 7769 7468 2075 6e70 726f 6475 6374 6976 6520 + 7468 6f75 6768 7473 0a20 796f 7572 2077 6167 6573 2077 696c 6c20 + 6265 2064 6f63 6b65 6420 6163 636f 7264 696e 676c 79e2 809d 0a +} +~~~ +~~~ +@propaganda text/plain;base64 { + V09STERHT1YgU0FZUwrigJxkb24ndCB3YXN0ZSB0aW1lIHdpdGggdW5wcm9kdWN0aXZlIHRob3Vn + aHRzCiB5b3VyIHdhZ2VzIHdpbGwgYmUgZG9ja2VkIGFjY29yZGluZ2x54oCdCg== +} +~~~ + +inline resources can also be (ab)used for multiline macros: +~~~ +@def text/x-cortav { + * [*[#1]] [!([#2]) + *: [#3] +} +&def nuclear bunker|n|that which will not protect you from the Smiling Man +~~~ +to make this usage simpler, resources with a type of [`text/x-cortav] can omit the MIME type field. + ### supported parameters * [`src] (all): specifies where to find the file, what it is, and how to embed it. each line of [`src] should consist of three whitespace-separated words: embed method, MIME type, and URI. ** embed methods *** [`local]: loads the resource at build time and embeds it into the output file. not all implementations may allow loading remote network resources at build time. *** [`remote]: only embeds a reference to the location of the resource. use this for e.g. live iframes, dynamic images, or images hosted by a CDN. @@ -213,20 +269,22 @@ *** [`font/*] can be used with the HTML backend to reference a web font *** [`font/woff2] can be used with the HTML backend to reference a web font *** [`text/plain] (will be inserted as a preformatted text block) *** [`text/css] (can be used when producing HTML files to link in an extra stylesheet, either by embedding it or referencing it from the header) *** [`text/x-cortav] (will be parsed and inserted as a formatted text block; context variables can be passed to the file with [`ctx.[$var]] parameters) +*** [`application/x-troff] can be used to supply sections of text written in raw [`groff] syntax. these are ignored by other renderers. +*** [`text/html] can be used to supply sections of text written in raw HTML. *** any MIME-type that matches the type of file being generated by the renderer can be used to include a block of data that will be passed directly to the renderer. ** URI types: additional URI types can be added by extensions or different implementations, but every compliant implementation must support these URIs. *** [`http], [`https]: accesses resources over HTTP. add a [`file] fallback if possible for the benefit of renderers/viewers that do not have internet access abilities. *** [`file]: references local files. absolute paths should begin [`file:/]; the slash should be omitted for relative paths. note that this doesn't have quite the same meaning as in HTML -- [`file] can (and usually should be) used with HTML outputs to refer to resources that reside on the same server. a cortav URI of [`file:/etc/passwd] will actually result in the link [`/etc/passwd], not [`file:///etc/passwd] when converted to HTML. generally, you only should use [`http] when you're referring to a resource that exists on a different domain. *** [`name]: a special URI used generally for referencing resources that are already installed on a target system and do not need to be embedded or linked, the name and type are enough for a renderer on another machine to locate the correct resource. this is useful mostly for [>fonts fonts], where it's more typical to refer to fonts that are installed on your system rather than providing paths to font files. *** [`gemini]: accesses resources over the gemini protocol. currently you should really only use this for [`local] resources unless you're using the gemtext renderer backend, since nothing but gemini browsers are liable to support this protocol. * [`desc]: supplies a narrative description of the resources, for use as an "alt-text" when the image cannot be loaded and for screenreaders. -* [`detail]: supplies extra narrative commentary that is displayed contextually, e.g. when the user hovers her mouse cursor over the embedded object. +* [`detail]: supplies extra narrative commentary that is displayed contextually, e.g. when the user hovers her mouse cursor over the embedded object. also used for [`desc] if [`desc] is not supplied. -note that in certain cases, full MIME types do not need to be used. say you're defining a font with the [`name] URI -- you can't necessary know what file type the system fonts on another computer are going to be. in this case, you can just write [`font] instead of [`font/ttf] or [`font/woff2] or similar. all cortav needs to know in this case is what abstract kind of object you're referencing. +note that in certain cases, full MIME types do not need to be used. say you're defining a font with the [`name] URI -- you can't necessary know what file type the system fonts on another computer are going to be. in this case, you can just write [`font] instead of [`font/ttf] or [`font/woff2] or similar. all cortav needs to know in this case is what abstract kind of object you're referencing. [`groff] fonts (referenced with the [`dit] URI) don't have a specific MIME type either. ##ctxvar context variables context variables are provided so that cortav renderers can process templates. certain context variables are provided for by the standard. you can test for the presence of a context variable with the directive [`%[*when] ctx [$var]]. context variables are accessed with the [` \[#[$name]\]] span. @@ -284,11 +342,11 @@ local("Open Sans"), local("sans-serif"); } ~~~ -there are two things that aren't super clear from the CSS, however. notice how we used [`auto] on a couple of those specs? this means it's up to the renderer to decide whether to link or embed the font. for html, a font specified by name can't really be embedded, but for some file formats, it can be. [`auto] lets us produce valid HTML while still taking advantage of font embedding in other formats. +there are two things that aren't super clear from the CSS, however. notice how we used [`auto] on a couple of those specs? this means it's up to the renderer to decide whether to link or embed the font. in HTML, a font specified by name can't really be embedded, but for some file formats, it can be. [`auto] lets us produce valid HTML while still taking advantage of font embedding in other formats. now that we have our font families defined, we can use their identifiers with the [`%[*font]] directive to control the font stack. the first thing we need to do is push a new font context. there's two ways we can do this: fnd: [`%[*font] [#1]] * {fnd dup} will create a copy of the current font context, allowing us to make some changes and then revert later with the {fnd pop} command. this isn't useful in our case, however, because right now the stack is empty; there's nothing to duplicate. * {fnd new} will create a brand new empty context for us to work with and push it to the stack. this can also be used to temporarily revert to the system default fonts, and then switch back with {fnd pop}. @@ -311,30 +369,30 @@ %font new %font set body=sans header=serif %font dup %font header=title -# lorem ipsum dolor +# WorldGov announcement %font pop %% we've now set up a default font context, created a new context for the title of the %% document, and then popped it back off after the title was inserted so that our %% first font context is active again. everything after that last '%font pop' will %% be printed in sans, except for headers, which will be printed in 'serif' -lorem ipsum dolor sit amet, sed consectetur apiscing elit… +WorldGov would like to congratulate 2274's Employee of the Year, [*The Smiling Man]! The Smiling Man had a few words of encouragement for the weary proles of the world when he graciously accepted his award at this year's ceremonial bloodletting: %font dup %font set body=cursive -> sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. -> Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut +> It is very important for you to understand that your dreams are the intellectual property of the WorldGov organization. +> Laborers who fail more than one duplicity check per workcycle will receive extra Pit Time. %font pop %% above we created a blockquote whose text is printed in a cursive font; afterwards, -%% we simply remove this new context— +%% we simply remove this new context, and everything is back the way it was at "WorldGov would like" -and everything is back the way it was at "lorem ipsum" +In addition to his 227th consecutive Employee of the Year Award, The Smiling Man has been nominated for a WorldGov Lifetime Achievement Award by the Hyperion Entity in recognition of his exceptional leadership in the Department Which Has No Name. Chief Ritual Officer Mr. Winthrop had this to say: %% the font mechanism is at its most powerful when used with multiline macros: cursive-quote: %font dup %font set body=cursive @@ -341,11 +399,11 @@ > [#1] %font pop %% now, whenever we want a block with a cursive body, we can simply invoke -&$cursive-quote Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident +&$cursive-quote A sea of blood yet lies between us and the Destination. It won't impede me. And I'm so very proud to say that, apparently, it won't impede the Smiling Man either, if the Svalbard contract was any indication! [pause for laughter] %% without affecting the overall font context. in fact, since 'cursive-quote' creates %% its context using 'dup', it would import all font specifications besides 'body' %% from the environment it is invoked in ~~~ @@ -688,11 +746,11 @@ #### deterministic builds some operating systems, like NixOS, require packages that can be built in reproducible ways. this implies that all data, all [!state] that goes into producing a package needs to be accounted for before the build proper begins. the [`cortav] build process needs to be slightly altered to support such a build process. while the cortav specification itself does not concern itself with matters like whether a particular character is a numeral or a letter, optimal typesetting in some cases requires such information. this is the case for the equation span- and block-types, which need to be able to distinguish between literals, variables, and mathematical symbols in [^alas-math the equations they format]. the ASCII charset is small enough that exhaustive character class information can be manually hardcoded into a cortav implementation, the various encodings of Unicode most certainly are not. - alas-math: sadly, i was not at any point consulted by any of the generations of mathematicians stretching back into antiquity who devised their notations without any regard for machine-readability. [!for shame!] + alas-math: sadly, i was not at any point consulted by any of the generations of mathematicians stretching back into antiquity, who as a direct consequence devised their notations without [*any] regard for machine-readability. [!for shame!] for this reason, the reference implementation of cortav embeds the file [`UnicodeData.txt], a database maintained by the Unicode Consortium. this is a rather large file that updates for each new Unicode version, so it is downloaded as part of the build process. to build on NixOS, you'll need to either disable the features that rely on this database (not recommended), or download the database yourself and tell the build script where to find it. this is the approach the official nix expression will take when i can be bothered to write it. see the examples below for how to conduct a deterministic build ~~~ deterministic build with unicode database [sh] ~~~ /src $ mkdir cortav && cd cortav @@ -705,11 +763,11 @@ /src/cortav $ fossil clone https://c.hale.su/cortav .fossil && fossil open .fossil /src/cortav $ make build/cortav encoding-data= ~~~ ! while most of the data used is taken directly from UnicodeData.txt, the database generated by [`tools/ucs.lua] splices in some extra character information before generating a database. this is partly because certain characters may not be classified in a useful way and need to be manually overwritten. however, the reference implementation also seeks to provide accurate data for certain character sets that are not part of unicode proper and can be expressed in UTF only through its private use areas. -! currently, only the [>corran Corran] script is currently supported in this fashion, but i intend to add [>tengwar Tengwar] as well. if there is a con-script or any other informally encoded script you would like supported by the reference implementation, please open an issue. +! currently, only the [>corran Corran] script is supported in this fashion, but i intend to add [>tengwar Tengwar] as well. if there is a con-script or any other informally encoded script you would like supported by the reference implementation, please open an issue. [*do note] that no cortav implementation needs to concern itself with character class data. this functionality is provided in the reference implementation strictly as an (optional) extension to the spec to improve usability, not as a normative requirement. corran: http://ʞ.cc/fic/spirals/society tengwar: https://en.wikipedia.org/wiki/Tengwar @@ -784,21 +842,27 @@ * [`@[*tone]\[/[$alpha]\]([$fac] \[[$shift] \[[$saturate]\]\] )]: resolves to a color expression. [$fac] is a floating-point value scaling from the background color to the foreground color. [$shift] is a value in degrees controlling how far the hue will shift relative to the accent. [$saturate] is a floating-point value controlling how satured the color is. ###refimpl-rend-groff groff the [`groff] backend produces a text file suitable for supplying to a [`groff] compiler. [`groff] is the GNU implementation of a venerable typesetting system from the early days of UNIX -as a convenience, the groff backend supports two modes of operation: it can write a [`groff] file directly to disk, or it can automatically launch a [`groff] process with the appropriate command line options and environment variables. this second mode is recommended unless you're rendering very large files to multiple formats, as [`groff] invocation is nontrivial and it's best to let the renderer handle that for you. +you can produce a final output directly by piping from the [`cortav] driver into [`groff]. if your document uses an encoding other than ASCII, you'll need to notify [`groff] of this with the [`-K] flag. for example, to render a UTF8 cortav file to PDF: + +~~~ +$ cortav input.ct -m render:format groff | groff -Tpdf -Kutf8 > output.pdf +~~~ + +in the future, it is planned to enable the driver to operate groff automatically and directly produce the desired output format when the binary wrapper is in use. doing so securely and hygienically is not possible in pure lua, however. ####refimpl-rend-groff-modes modes [`groff] supports the following modes: * string [`groff:annotate] controls how footnotes will be handled. ** [`footnote] places footnotes at the end of the page they are referenced on. if the same footnote is used on multiple pages, it will be duplicated on each. ** [`secnote] places footnotes at the end of each section. footnotes used in multiple sections will be duplicated for each ** [`endnote] places all footnotes at the end of the rendered document. -* string [`groff:dev] names an output device (such as [`dvi] or [`pdf]). if this mode is present, [`groff] will be automatically invoked * string [`groff:title-page] takes an identifier that names a section. this section will be treated as the title page for the document. +* string [`groff:title] sets a specific title to be used in headers instead of relying on header heuristics ### directives * [`%[*pragma] title-page [$id]] sets the title page to section [$id]. this causes it to be specially formatted, with a large, centered title and subtitle. ### quirks Index: cortav.lua ================================================================== --- cortav.lua +++ cortav.lua @@ -85,10 +85,13 @@ unimpl = ss.exnkind 'feature not implemented'; ext = ss.exnkind 'extension error'; enc = ss.exnkind('encoding error', function(msg, ...) return string.format('[%s]' .. msg, ...) end); + rdr = ss.exnkind('could not render', function(msg, ...) + return string.format('(backend %s)'..msg, ...) + end); } ct.ctx = declare { mk = function(src) return {src = src} end; ident = 'context'; @@ -143,10 +146,27 @@ } end; construct = function(self, id, depth) self.id = id self.depth = depth end; + fns = { + visible = function(self) + if self.kind == 'nonprinting' then return false end + local invisibles = { + ['break'] = true; + reference = true; + resource = true; + directive = true; + } + for k,b in pairs(self.blocks) do + if not (invisibles[b.kind] or b.invisible) then return true end + -- extensions that add invisible nodes to the AST must + -- mark them as such for rendering to work properly! + end + return false + end; + } } ct.doc = declare { ident = 'doc'; fns = { @@ -751,11 +771,11 @@ table.insert(spans, i.parse(substr:sub(1+#i.seq), ctx)) break end end if not found then - ctx:fail('no recognized control sequence in [%s]', substr) + buf = buf .. c end elseif c == '\n' then flush() table.insert(spans,{kind='line-break',origin=ctx:clone()}) else @@ -988,11 +1008,13 @@ end local str = l:match '^\t\t(.-)%s*$' last.val = last.val .. '\n' .. str c.sec.refs[last.key] = last.val end}; - {seq = '\t', fn = blockwrap(function(l,c,j,d) + {seq = '\t', pred = function(l) + return (l:match '\t+([^:]+):%s*(.*)$') + end; fn = blockwrap(function(l,c,j,d) local ref, val = l:match '\t+([^:]+):%s*(.*)$' local last = d[#d] local rsrc if last and last.kind == 'resource' then last.props[ref] = val @@ -1187,11 +1209,13 @@ end job:hook('line_end',ctx,l) end function ct.parse(file, src, mode, setup) - + -- this object is threaded down through the parse tree + -- and copied to store information like the origin of the + -- element in the source code local ctx = ct.ctx.mk(src) ctx.line = 0 ctx.doc = ct.doc.mk() ctx.doc.src = src ctx.sec = ctx.doc:mksec() -- toplevel section @@ -1269,5 +1293,24 @@ end ctx.doc.stage = nil ctx.doc.docjob:hook('meddle_ast') return ctx.doc end + +function ct.expand_var(v) + local val + if v.pos then + if not v.origin.invocation then + v.origin:fail 'positional arguments can only be used in a macro invocation' + elseif not v.origin.invocation.args[v.pos] then + v.origin.invocation.origin:fail('macro invocation %s missing positional argument #%u', v.origin.invocation.macro, v.pos) + end + val = v.origin.invocation.args[v.pos] + else + val = v.origin.doc:context_var(v.var, v.origin) + end + if v.raw then + return val, true + else + return ct.parse_span(val, v.origin), false + end +end Index: render/groff.lua ================================================================== --- render/groff.lua +++ render/groff.lua @@ -20,39 +20,292 @@ local s = ss.strac() for _, v in pairs{...} do s(v) end return s end -function ct.render.groff(doc, opts) +local function gsan(str) + local tocodepoint = function(ch) + return string.format('\\[u%04X]', utf8.codepoint(ch)) + end + str = str:gsub('(["\'\\])',tocodepoint) + return str +end + +local gtxt = ss.declare { + ident = 'groff-text'; + mk = function() return { + lines = {}; + } end; + fns = { + raw = function(me, text) + if me.linbuf == nil then + me.linbuf = ss.strac() + end + me.linbuf(text) + end; + txt = function(me, str, ...) + if str == nil then return end + me:raw(gsan(str)) + -- WARN this will cause problems if str is ever allowed to + -- include a line break. we can sanitize by converting + -- every line break into a new entry in the table, but i + -- don't think it should be possible for a \n to reach us + -- at this point, so i'm omitting the safety check as it + -- would involve an excessive hit to performance + me:txt(...) + end; + brk = function(me) + me:flush() + table.insert(me.lines, '') + end; + line = function(me, ...) + me:flush() + me:txt(...) + end; + req = function(me, r) + me:flush() + table.insert(me.lines, '.'..r) + end; + esc = function(me, e) + me:raw('\\' .. e) + end; + flush = function(me) + if me.linbuf ~= nil then + local line = me.linbuf:compile() + local first = line:sub(1,1) + -- make sure our lines aren't accidentally interpreted + -- as groff requests. groff is kinda hostile to script + -- generation, huh? + if first == '.' or first == "'" then + line = '\\&' ..line + end + table.insert(me.lines, line) + me.linbuf = nil + end + end; + compile = function(me) + me:flush() + return table.concat(me.lines, '\n') + end; + } +} + +local function mkColorDef(name, color) + return '.defcolor '..name..' rgb ' .. + table.concat({color:rgb_t()}, ' ', 1, 3) +end + +local function addAccentTones(rs,hue,spread) + local base = ss.color(hue, 1, .5) + local right = spread > 0 and ss.color(hue + spread, 1, .5) + or ss.color(hue, 0.4, 0.6) + local left = spread > 0 and ss.color(hue - spread, 1, .5) + or ss.color(hue, 1, 0.3) + + local steps = 6 + for i=-3,3 do + local nc, nm + local o if i > 0 + then o = right nm = 'R' + else o = left nm = 'L' + end + nc = base + o:alt('alpha', math.abs(i) / 3) + rs.addColor('accent'..nm..tostring(math.abs(i)),nc) + end +end +local function mkrc() + return { + clone = function(self, origin) + return { + origin = origin; + clone = self.clone; + prop = ss.clone(self.prop); + mk = self.mk; + add = self.add; + block = self.block; + blocks = self.blocks; + span = self.span; + spans = self.spans; + } + end; + blocks = {}; + prop = {}; + block = function(self) + local sub = self:clone() + sub.spans = {} + sub.blocks = nil + sub.span = function(me, ln) + local p = ss.clone(me.prop) + p.txt = ln + p.block = sub + p.origin = me.origin + table.insert(me.spans, p) + return p + end; + table.insert(self.blocks, sub) + return sub + end; + } +end + +function ct.render.groff(doc, opts, setup) -- rs contains state specific to this render job -- that modules will need access to + local fail = function(msg, ...) + ct.exns.rdr(msg, 'groff', ...):throw() + end local rs = {}; rs.macsets = { strike = { '.de ST'; [[.nr ww \w'\\$1']]; - [[\Z@\v'-.25m'\l'\\n[ww]u'@\\$1']]; + [[\Z@\v'-.25m'\l'\\n[ww]u'@\\$1]]; + '..'; + }; + color = {'.color'}; + insert = {}; + footnote = { + '.de footnote-blank'; + '. sp 0.25m'; + '..'; + '.ev footnote-env'; + '. ps 8p'; + '. in 0.5c'; + '. blm footnote-blank'; + '.ev'; + '.de footnote-print'; +-- '. sp |\\\\n[.p]u-\\\\n[footnote-pos]u'; + '. sp 0.5c'; + '. ev footnote-env'; + '. fn'; + '. ev'; + '. rm fn'; + '. nr footnote-pos 0'; + -- move the trap past the bottom of the page so it's not + -- invoked again until more footnotes have been assembled + '. ch footnote-print |\\\\n[.p]u+10'; + '. bp'; + '..'; + '.wh |\\n[.p]u footnote-print'; + }; + root = { + -- these are macros included in all documents + -- page offset is hideously broken and unusable; we + -- zero it out so we can use .in to control indents + -- instead. note that the upshot of this is we need + -- to manually specify the indent in every other + -- environment from now on, .evc doesn't seem to cut it + -- set up the page title environment & trap + "'in 2c"; + "'ll 18c"; + "'po 0"; + "'ps 13p"; + "'vs 15p"; + ".ev pgti"; + ". evc 0"; + ". fam H"; + ". ps 10pt"; + ".ev"; + '.de ph'; + '. sp 0.6c'; + '. ev pgti'; + '. po 1c'; + '. lt 19c'; + ". tl '\\\\*[doctitle]'\\fB\\\\*[title]\\f[]'%'"; + '. po 0'; + ". br"; + '. ev'; + '. sp 1.2c'; + '..'; + '.wh 0 ph'; + '.de np'; + '. sp 0.2c'; '..'; + '.blm np' + }; } rs.macsNeeded = { order = {}; + map = {}; count = 0; + deps = { + insert = {'color'}; + strike = {'color'}; + }; } + rs.linkctr = 0 + function rs.macAdd(id) - if rs.macsets[id] then - rs.macsNeeded.count = macsNeeded.count + 1 + if rs.macsets[id] and not rs.macsNeeded.map[id] then + rs.macsNeeded.count = rs.macsNeeded.count + 1 rs.macsNeeded.order[rs.macsNeeded.count] = id + rs.macsNeeded.map[id] = true + if not rs.macsNeeded.deps[id] then + return true + end + + for k,v in pairs(rs.macsNeeded.deps[id]) do + if not rs.macsNeeded.map[v] then + rs.macAdd(v) + end + end + return true else return false end end + + rs.macAdd 'root' + + rs.colors = {} + rs.addColor = function(name,color) + if not ss.color.is(color) then + ss.bug('%s is not a color value', color):throw() + end + rs.colors[name] = color + end + + if opts.accent then + addAccentTones(rs, tonumber(opts.accent), tonumber(opts['hue-spread']) or 0) + rs.addColor('new', rs.colors.accentR3) + rs.addColor('del', rs.colors.accentL3) + else + rs.addColor('new', ss.color(80, 1, .3)) + rs.addColor('del', ss.color(0, 1, .3)) + end + + doc.stage = { + type = 'render'; + format = 'groff'; + groff_render_state = rs; + } + + setup(doc.stage) local job = doc:job('render_groff',nil,rs) + + local function collect(rc, spans, b, s) + local rcc = rc:clone() + rcc.spans = {} + rs.renderSpans(rcc, spans, b, s) + return rcc.spans + end + local function collectText(...) + local text = collect(...) + local s = ss.strac() + for i, l in ipairs(text) do + s(l.txt) + end + return s + end + -- the way this module works is we build up a table for each block -- of individual strings paired with attributes that say how they -- should be rendered. we then iterate over the table, applying -- formats as need be, and inserting blanks after each block + + local spanRenderers = {} function spanRenderers.format(rc, s, b, sec) local rcc = rc:clone() if s.style == 'strong' then @@ -60,111 +313,278 @@ elseif s.style == 'emph' then rcc.prop.emph = true elseif s.style == 'strike' then rcc.prop.strike = true rs.macAdd 'strike' + rcc.prop.color = 'del' elseif s.style == 'insert' then + rs.macAdd 'insert' + rcc.prop.color = 'new' end rs.renderSpans(rcc, s.spans, b, sec) end; + + function spanRenderers.link(rc, l, b, sec) + rs.renderSpans(rc, l.spans, b, sec) + rs.linkctr = rs.linkctr + 1 + rs.macAdd 'footnote' + local p = rc:span(string.format('[%u]', rs.linkctr)) + if type(l.ref) == 'string' then + local t = '' + if b.origin.doc.sections[l.ref] then + local hn = b.origin.doc.sections[l.ref].heading_node + if hn then + t = collectText(rc, hn.spans, b, sec):compile() + end + else + local obj = l.origin:ref(l.ref) + if type(obj) == 'string' then + t = l.origin:ref(l.ref) + end + end + p.div = { fn = tostring(rs.linkctr) .. ') ' .. t } + end + end; + + function spanRenderers.raw(rc, s, b, sec) + rs.renderSpans(rc, s.spans, b, sec) + end; + + function spanRenderers.var(rc,v,b,s) + local t, raw = ct.expand_var(v) + if raw then rc:span(t) else + rs.renderSpans(rc,t,b,s) + end + end + function spanRenderers.macro(rc, m,b,s) + local macroname = collectText(rc, + ct.parse_span(m.macro, b.origin), + b, s):compile() + + local r = b.origin:ref(macroname) + if type(r) ~= 'string' then + b.origin:fail('%s is an object, not a reference', t.ref) + end + local mctx = b.origin:clone() + mctx.invocation = m + rs.renderSpans(rc, ct.parse_span(r, mctx)) + end function rs.renderSpans(rc, sp, b, sec) + rc = rc or mkrc(b.origin) for i, v in ipairs(sp) do if type(v) == 'string' then - rc:add(v) + rc:span(v) elseif spanRenderers[v.kind] then spanRenderers[v.kind](rc, v, b, sec) end end end local blockRenderers = {} + function blockRenderers.label(rc, b, sec) + if ct.sec.is(b.captions) then + local sizes = {36,24,12,8,4,2} + local margins = {0,5,2,1,0.5} + local dedents = {2.5,1.3,0.8,0.4} + rc.prop.dsz = sizes[b.captions.depth] or 10 + rc.prop.underline = b.captions.depth < 4 + rc.prop.bold = b.captions.depth > 3 + rc.prop.margin = { + top = margins[b.captions.depth] or 0; + bottom = 0.1; + } + rc.prop.indent = -(dedents[b.captions.depth] or 0) + rc.prop.underline = true + rc.prop.chtitle = collectText(rc, b.spans, b.spec):compile() + if b.captions.depth == 1 then + rc.prop.breakBefore = true + end + rs.renderSpans(rc, b.spans, b, sec) + else + ss.bug 'tried to render label for an unknown object type':throw() + end + end function blockRenderers.paragraph(rc, b, sec) rs.renderSpans(rc, b.spans, b, sec) end - function rs.renderBlock(b, sec) - local rc = { - clone = function(self) - return { - clone = self.clone; - lines = self.lines; - prop = ss.clone(self.prop); - mk = self.mk; - add = self.add; - } - end; - lines = {}; - prop = {}; - mk = function(self, ln) - local p = ss.clone(self.prop) - p.txt = ln - return p + function rs.renderBlock(rc, b, sec, outerBlockRenderContext) + if blockRenderers[b.kind] then + local rcc = rc:block() + blockRenderers[b.kind](rcc, b, sec) + end + end + + rs.sanitize = gsan + + local skippedFirstPagebreak = doc.secorder[1]:visible() + local deferrer = ss.declare { + ident = 'groff-deferrer'; + mk = function(buf) return {ops={}, tgt=buf} end; + fns = { + esc = function(me, str) table.insert(me.ops, {0, str}) end; + req = function(me, str) table.insert(me.ops, {1, str}) end; + flush = function(me) + for i=#me.ops,1,-1 do + local d = me.ops[i] + if d[1] == 0 then + me.tgt:esc(d[2]) + elseif d[1] == 1 then + me.tgt:req(d[2]) + end + end + me.ops = {} end; - add = function(self, ln) - table.insert(self.lines, self:mk(ln)) - end; - } - if blockRenderers[b.kind] then - blockRenderers[b.kind](rc, b, sec) + }; + } + function rs.emitSpan(gtxt, s) + local defer = deferrer(gtxt) + if s.bold or s.emph then + if s.bold and s.emph then + gtxt:esc 'f(BI' + elseif s.bold then + gtxt:esc 'fB' + elseif s.emph then + gtxt:esc 'fI' + end + defer:esc'f[]' end - return rc.lines + + if s.color and opts.color then + gtxt:esc('m[' .. s.color .. ']') + defer:esc('m[]') + end + if s.strike then + gtxt:req('ST "'..s.txt..'"') + else + gtxt:txt(s.txt) + end + defer:flush() + if s.div then + for div, body in pairs(s.div) do + if div == 'fn' then + gtxt:req 'ev footnote-env' + end + gtxt:req('boxa '..div) + gtxt:txt(body) + gtxt:raw '\n' + gtxt:req 'boxa' + if div == 'fn' then + gtxt:req 'ev' + gtxt:req 'nr footnote-pos (\\n[footnote-pos]u+\\n[dn]u)' + gtxt:req 'ch footnote-print -(\\n[footnote-pos]u+1c)' + end + end + end end + function rs.emitBlock(gtxt, b) + local didfinalbreak = false + local defer = deferrer(gtxt) + local ln = b.prop + if ln.chtitle then + gtxt:req('ds title '..ln.chtitle) + end + if ln.breakBefore then + if skippedFirstPagebreak then + gtxt:req 'bp' + else + skippedFirstPagebreak = true + end + end + if ln.indent then + if ln.indent < 0 then + gtxt:req('in '..tostring(ln.indent)..'m') + defer:req 'in' + gtxt:req('ll +'..tostring(-ln.indent)..'m') + defer:req 'll' + else + gtxt:req('in +'..tostring(ln.indent)..'m') + defer:req 'in' + end + defer:req 'br' + end + if ln.margin then + if ln.margin.top then + gtxt:req(string.format('sp %sm', ln.margin.top)) + end + end + + if ln.underline then + defer:esc("D'l \\n[.ll]u-\\n[.in]u 0'") + defer:esc"v'-0.5'" + defer:req'br' + end + + if ln.dsz and ln.dsz > 0 then + gtxt:req('ps +' .. tostring(ln.dsz) .. 'p') + defer:req('ps -' .. tostring(ln.dsz) .. 'p') + elseif ln.sz or ln.dsz then + if ln.sz and ln.sz <= 0 then + ln.origin:fail 'font sizes must be greater than 0' + end + gtxt:req('ps ' .. tostring(ln.sz or ln.dsz) ..'p') + if ln.dsz then + defer:req('ps +' .. tostring(0 - ln.dsz) .. 'p') + else + defer:req'ps' + end + end + + for i,s in pairs(b.spans) do + rs.emitSpan(gtxt, s) + end + - function rs.emitLine(ln) - local q = ss.strac() - if ln.dsz then - q('\\ps +' .. tostring(ln.dsz)) - elseif ln.sz then - q('\\ps ' .. tostring(ln.dsz)) + if ln.margin then + if ln.margin.bottom then + gtxt:req(string.format('sp %sm', ln.margin.bottom)) + end end - if ln.bold and ln.emph then - q '\\f(BI' - elseif ln.bold then - q '\\fB' - elseif ln.emph then - q '\\fI' - end + defer:flush() - - q(ln.txt) - - if ln.bold or ln.emph then - q'\\f[]' - end - - if ln.dsz then - q('.ps -' .. tostring(ln.dsz)) - elseif ln.sz then - q '.ps' - end - return q + if not ln.margin then gtxt:brk() end end local ir = {} for i, sec in ipairs(doc.secorder) do if sec.kind == 'ordinary' then - local blks = {} + local rc = mkrc() for j, b in ipairs(sec.blocks) do - local r = rs.renderBlock(b, sec) - if r then table.insert(blks, r) end + rs.renderBlock(rc, b, sec) end - table.insert(ir, blks) + table.insert(ir, {blocks = rc.blocks, src = sec}) end end - local rd = ss.strac() + local gd = gtxt() for i, s in ipairs(ir) do - for j, b in ipairs(s) do - for z, l in ipairs(b) do - rd(rs.emitLine(l)) - end - rd'\n' + for j, b in ipairs(s.blocks) do + rs.emitBlock(gd,b) end end local macs = ss.strac() for _, m in pairs(rs.macsNeeded.order) do - for _, ln in pairs(m) do macs(ln) end + for _,ln in pairs(rs.macsets[m]) do macs(ln) end + end + if rs.macsNeeded.map.color and opts.color then + for k,v in pairs(rs.colors) do + macs(mkColorDef(k,v)) + end + end + + local doctitle = '' if opts.title then + doctitle = opts.title + else + local top = math.huge + for i,s in ipairs(doc.secorder) do + if s.heading_node and s.depth < top then + top = s.depth + doctitle = collectText(mkrc():block(), s.heading_node.spans, s.heading_node, s):compile() + end + end end - return macs:compile'\n' .. rd:compile'' + macs('.ds doctitle '..doctitle) + + return macs:compile'\n' .. '\n' .. gd:compile() end Index: render/html.lua ================================================================== --- render/html.lua +++ render/html.lua @@ -8,11 +8,11 @@ local ct = require 'cortav' local ss = require 'sirsem' -- install rendering function for html -function ct.render.html(doc, opts) +function ct.render.html(doc, opts, setup) local doctitle = opts['title'] local f = string.format local getSafeID = ct.tool.namespace() local footnotes = {} @@ -419,10 +419,20 @@ stylesets = stylesets; stylesets_active = stylesNeeded; obj_htmlid = getSafeID; -- remaining fields added later } + + -- this is kind of gross but the context object belongs to the parser, + -- not the renderer, so that's not a suitable place for this information + doc.stage = { + kind = 'render'; + format = 'html'; + html_render_state = render_state_handle; + } + + setup(doc.stage) local renderJob = doc:job('render_html', nil, render_state_handle) doc.stage.job = renderJob; local runhook = function(h, ...) @@ -561,25 +571,13 @@ b.origin:fail('%s is not an object that can be embedded', t.ref) end end function span_renderers.var(v,b,s) - local val - if v.pos then - if not v.origin.invocation then - v.origin:fail 'positional arguments can only be used in a macro invocation' - elseif not v.origin.invocation.args[v.pos] then - v.origin.invocation.origin:fail('macro invocation %s missing positional argument #%u', v.origin.invocation.macro, v.pos) - end - val = v.origin.invocation.args[v.pos] - else - val = v.origin.doc:context_var(v.var, v.origin) - end - if v.raw then - return val - else - return htmlSpan(ct.parse_span(val, v.origin), b, s) + local r, raw = ct.expand_var(v) + if raw then return r else + return htmlSpan(r , b, s) end end function span_renderers.raw(v,b,s) return htmlSpan(v.spans, b, s) Index: sirsem.lua ================================================================== --- sirsem.lua +++ sirsem.lua @@ -1,7 +1,8 @@ -- [ʞ] sirsem.lua --- ~ lexu hale +-- ~ lexi hale +-- glowpelt (hsl conversion) -- ? utility library with functionality common to -- cortav.lua and its extensions -- from Ranuir "software utility" -- > local ss = require 'sirsem.lua' @@ -45,10 +46,14 @@ local nk,nv = fn(k,v) new[nk or k] = nv or v end return new end +function ss.tmap(fn, a, ...) + if a == nil then return end + return fn(a), ss.tmap(fn, ...) +end function ss.kfilter(list, fn) local new = {} for k, v in pairs(list) do if fn(k,v) then new[k] = v end @@ -548,10 +553,11 @@ local cls = setmetatable({ __name = c.ident; }, { __name = 'class'; __tostring = function() return c.ident or '(class)' end; + __index = c.cfns; }) cls.__call = c.call cls.__index = function(self, k) if c.default and c.default[k] then @@ -589,10 +595,18 @@ end if c.cast.number then cls.__tonumber = c.cast.number end end + + if c.op then + cls.__add = c.op.sum + cls.__sub = c.op.sub + cls.__div = c.op.div + cls.__mul = c.op.mul + cls.__concat = c.op.cat + end cls.mk = function(...) local val = setmetatable(c.mk and c.mk(...) or {}, cls) if c.init then for k,v in pairs(c.init) do @@ -887,10 +901,14 @@ me:react(sym) end end; }; } + +function ss.math.clamp(v, l, h) + return math.max(math.min(v, h or 1), l or 0) +end -- convenience buffer for holding strings under -- construction, accumulating and compiling then in -- as quick a way as lua permits ss.strac = ss.declare { @@ -937,5 +955,159 @@ table.insert(self.strs, 1, a) table.insert(self.strs, b) end; }; } + +-- color class based on c.hale.su/sorcery's, hsl conversion +-- code written by glowpelt. TODO switch to LCH +local function clip(v, ...) + if v == nil then return end + return math.max(0,math.min(0xFF,math.floor(v))), clip(...) +end; +local function bytefrac(f, ...) + if f == nil then return end + return clip(f*0xFF), bytefrac(...) +end +ss.color = ss.declare { + ident = 'color'; + mk = function(h,s,l,a) return { + hue = h or 0.0; + sat = s or 0.0; + lum = l or 0.0; + alpha = a or 1.0; + } end; + cfns = { + byteclip = clip; + bytefrac = bytefrac; + }; + cast = { + string = function(self) return self:hex() end; + number = function(self) return self:u32() end; + }; + op = { + sum = function(self, other) + if ss.color.is(other) then + local fac = ss.math.lerp(self.alpha, 1, other.alpha) + return self:blend(other, fac):warp(function(c) + c.alpha = ss.math.clamp(self.alpha+other.alpha) + end) + else -- color + number = brighter color + return self:warp(function(c) + c.lum = c.lum + other + end) + end + end; + mul = function(self, other) + if ss.color.is(other) then + ss.color.exn 'how the heck do you multiply in hsl anyway':throw() + else + return self:warp(function(c) + c.lum = c.lum * other + end) + end + end; + }; + fns = { + tuple = function(self) + return self.hue, self.sat, self.lum, self.alpha + end; + warp = function(self, func) + local n = self:clone() + func(n) + return n + end; + blend = function(self, other, fac) + return ss.color( + ss.math.lerp(fac, self.hue, other.hue), + ss.math.lerp(fac, self.sat, other.sat), + ss.math.lerp(fac, self.lum, other.lum), + ss.math.lerp(fac, self.alpha, other.alpha)) + end; + hex = function(self) + local r,g,b,a = bytefrac(self:rgb_t()) + if self.alpha == 1 then a = nil end + return string.format('#'..string.rep('%02x',a and 4 or 3), + r,g,b,a) + end; + u32 = function(self) + local r,g,b,a = bytefrac(self:rgb_t()) + return r<<24 | g << 16 | b << 8 | a + end; + bytes = function(self) + return { bytefrac(self:rgb_t()) } + end; + alt = function(self, fld, new) + if self[fld] then + return self:warp(function(c) c[fld]=new end) + else + ss.color.exn('no such field %s in color', fld):throw() + end + end; + rgb = function(self) + -- convenience function to get a standardized struct + local r,g,b,a = self:rgb_t() + return { + red = r; + green = g; + blue = b; + alpha = a; + } + end; + rgb_t = function(self) + -- returns rgba values as a tuple + local value = function(n1, n2, hue) + if hue > 360 then + hue = hue - 360 + elseif hue < 0 then + hue = hue + 360 + end + if hue < 60 then + return n1 + (n2 - n1) * hue/60 + elseif hue < 180 then + return n2 + elseif hue < 240 then + return n1 + (n2 - n1) * (240 - hue)/60 + else + return n1 + end + end + local h,s,l,alpha = self:tuple() + local m2 + if l < 0.5 then + m2 = l * (1 + s) + else + m2 = l + s - l * s + end + local m1 = 2 * l - m2 + if s == 0 then + -- Achromatic, there is no hue + -- In book this errors if hue is not undefined, but we set hue to 0 in this case, not nil or something, so + return l, l, l, alpha + else + -- Chromatic case, so there is a hue + return + value(m1, m2, h + 120), + value(m1, m2, h), + value(m1, m2, h - 120), + alpha + end + end; + }; +}; +ss.color.exn = ss.exnkind 'color error' + +ss.cmdfmt = function(cmd, ...) + return string.format(cmd, ss.tmap(function(s) + if typeof(s) == 'string' then + return string.format("%q", s) + -- FIXME this is incredibly lazy and uses lua quoting, not + -- bourne shell quoting. it *will* cause problems if anything + -- exotic finds its way in and needs to be fixed. + -- TODO provide a proper popen in the C wrapper so wrapped + -- versions at least can launch programs in a sane and secure + -- way. + else + return s + end + end, ...)) +end