-- [ʞ] cortav.lua
-- ~ lexi hale <lexi@hale.su>
-- © EUPL v1.2
-- ? reference implementation of the cortav document language
-- \ from Ranuir "written document"
--
-- ! TODO refactor encoding logic. it's a complete
-- mess and i seem to have repeatedly gotten
-- confused about how it's supposed to work.
-- the whole shitshow needs to be replaced
-- with a clean, simple paradigm: documents
-- are translated to UTF8 on the way in, and
-- translate back out on the way out. trying
-- to cope with multiple simultaneous
-- encodings in memory is a disaster zone.
local ss = require 'sirsem'
-- aliases for commonly used sirsem funcs
local startswith = ss.str.begins
local dump = ss.dump
local declare = ss.declare
-- make this module available to require() when linked into a lua bytecode program with luac
local ct = ss.namespace 'cortav'
ct.info = {
version = ss.version {0,1; 'devel'};
package_name = 'cortav';
contributors = {
{ name = 'lexi hale', handle = 'velartrill';
mail = 'lexi@hale.su', homepage = 'https://hale.su' };
};
ident_string = function(i)
return string.format('%s %s', i.package_name, i.version)
end;
credits = function(i)
local all = ss.copy(i.contributors)
for i,who in pairs(all) do
who.role = who.role or 'core functionality'
end
for name,ext in pairs(ct.ext.loaded) do
if ext.contributors then
for _,c in pairs(ext.contributors) do
local ofs, ref = ss.find(all, function(a)
return a.handle == c.handle
end)
if ofs then
ref.role = string.format('%s; %s extension', ref.role, name)
else
local c = ss.clone(ext.author)
c.role = name .. ' extension'
end
end
end
end
return all
end;
credits_ascii = function(contributors)
local body = ''
for _, c in pairs(contributors) do
local str
if c.handle then
str = string.format('%s “%s” <%s>', c.name, c.handle, c.mail)
else
str = string.format('%s <%s>', c.name, c.mail)
end
if c.homepage then
str = string.format('%s (%s)', str, c.homepage)
end
if c.role then
str = string.format('%s: %s', str, c.role)
end
body = body .. string.format(' ~ %s\n', str)
end
return body
end;
about = function(i)
return i:ident_string() .. '\n' ..
i.credits_ascii(i:credits())
end;
}
ct.render = {}
ct.exns = {
tx = ss.exnkind('translation error', function(msg,...)
return string.format("(%s:%u) "..msg, ...)
end);
io = ss.exnkind('IO error', function(msg, ...)
return string.format("<%s %s> "..msg, ...)
end);
cli = ss.exnkind 'command line parse error';
mode = ss.exnkind('bad mode', function(msg, ...)
return string.format("mode “%s” "..msg, ...)
end);
unimpl = ss.exnkind 'feature not implemented';
ext = ss.exnkind 'extension error';
enc = ss.exnkind('encoding error', function(msg, ...)
return string.format('[%s]' .. msg, ...)
end);
rdr = ss.exnkind('could not render', function(msg, ...)
return string.format('(backend %s)'..msg, ...)
end);
}
ct.ctx = declare {
mk = function(src) return {src = src} end;
ident = 'context';
cast = {
string = function(me)
return string.format("%s:%s [%u]", me.src.file, me.line, me.generation or 0)
end;
};
clonesetup = function(new, old)
for k,v in pairs(old) do new[k] = v end
if old.generation then
new.generation = old.generation + 1
else
new.generation = 1
end
end;
fns = {
fail = function(self, msg, ...)
-- error(string.format(msg,...))
ct.exns.tx(msg, self.src.file, self.line or 0, ...):throw()
end;
insert = function(self, block)
block.origin = self:clone()
table.insert(self.sec.blocks,block)
return block
end;
init = function(ctx, doc, src)
ctx.line = 0
ctx.doc = doc
ctx.doc.src = src
ctx.sec = doc:mksec() -- toplevel section
ctx.sec.origin = ctx:clone()
end;
ref = function(self,id)
if self.invocation then
-- allow IDs to contain template substitutions by mimicking the [#n] syntax
id = id:gsub('%b[]', function(sp)
-- should indirection be allowed here? TODO
if sp:sub(2,2) == '#' then
local n = tonumber(sp:sub(3,-2))
if n == nil then
self:fail('invalid template substitution “%s” in ID “%s”', sp, id)
end
local arg = self.invocation.args[n]
if arg == nil then
self:fail('template instantiation requires at least %u arguments (in ID “%s”)',n,id)
end
return arg
else return sp end
end)
end
local function checkFromSec(sec,doc)
if not id:find'%.' then
if sec then
local rid = sec.refs[id]
if rid then
return rid, id, sec
end
end
if sec.imports.objs[id] then
local ol = sec.imports.objs[id]
return ol.obj, id, ol.sec
end
for _, i in ipairs(sec.imports.scope) do
local rr, ri, rs = checkFromSec(i, doc)
if ri then return rr, ri, rs end
end
if doc.sections[id] then
return nil, id, doc.sections[id]
end
if doc.globals.objs[id] then
local ol = doc.globals.objs[id]
return ol.obj, id, ol.sec
end
for _, i in ipairs(doc.globals.scope) do
local rr, ri, rs = checkFromSec(i, doc)
if ri then return rr, ri, rs end
end
else
local secid, ref = string.match(id, "(.-)%.(.+)")
local s
s = s or doc.sections[secid]
if s then
if s.refs[ref] then
return s.refs[ref], ref, s
end
end
end
end
local function scanParents(doc)
for i, p in ipairs(doc.parents) do
-- TODO figure out a way to ref the embedding section
local o,i,s = checkFromSec(doc.defined_in_parent_section, p)
if o or s then return o,i,s end
end
-- breadth-first search
for i, p in ipairs(doc.parents) do
local o,i,s = scanParents(p)
if o or s then return o,i,s end
end
end
local o,i,s = checkFromSec(self.sec, self.doc)
if o or s then return o,i,s end
--nothing in the current section, but this ID could be looked up in the context of a macro expansion. if so, check section of the site of invocation as well
if self.invocation then
local dp = id:find'%.'
if dp == 1 then
local s = self.invocation.origin.sec
local ref = id:sub(2)
if s and s.refs[ref] then
return s.refs[ref], ref, s
end
elseif not dp then
rid = self.invocation.origin:ref(id)
if rid then
return rid, id, self.invocation.origin.sec
end
end
end
o,i,s = scanParents(self.doc)
if o or s then return o,i,s end
self:fail("ID “%s” does not name an object or section %s", id, self.invocation or "NIL")
end
};
}
ct.sec = declare {
ident = 'section';
mk = function() return {
blocks = {};
refs = {};
imports = {scope={}, objs={}};
depth = 0;
kind = 'ordinary';
} end;
construct = function(self, id, depth)
self.id = id
self.depth = depth or self.depth
end;
fns = {
visible = function(self)
if self.kind == 'nonprinting' then return false end
local invisibles = {
['break'] = true;
reference = true;
resource = true;
directive = true;
}
for k,b in pairs(self.blocks) do
if not (invisibles[b.kind] or b.invisible) then return true end
-- extensions that add invisible nodes to the AST must
-- mark them as such for rendering to work properly!
end
return false
end;
}
}
ct.doc = declare {
ident = 'doc';
fns = {
mksec = function(self, id, depth)
local o = ct.sec(id, depth)
if id then self.sections[id] = o end
table.insert(self.secorder, o)
return o
end;
allow_ext = function(self,name)
if not ct.ext.loaded[name] then return false end
if self.ext.inhibit[name] then return false end
if self.ext.need[name] or self.ext.use[name] then
return true
end
return ct.ext.loaded[name].default
end;
context_var = function(self, var, ctx, test)
local fail = function(...)
if test then return false end
ctx:fail(...)
end
local scanParents = function(k)
for k,p in pairs(self.parents) do
local v = p:context_var(k, ctx, true)
if v ~= false then return v end
end
end
if startswith(var, 'cortav.') then
local v = var:sub(8)
if v == 'page' then
if ctx.page then return tostring(ctx.page)
else return '(unpaged)' end
elseif v == 'renderer' then
if not self.stage then
return fail 'document is not being rendererd'
end
return self.stage.format
elseif v == 'datetime' then
return os.date()
elseif v == 'time' then
return os.date '%H:%M:%S'
elseif v == 'date' then
return os.date '%A %d %B %Y'
elseif v == 'id' then
return 'cortav.lua (reference implementation)'
elseif v == 'file' then
return self.src.file
else
return fail('unimplemented predefined variable %s', var)
end
elseif startswith(var, 'env.') then
local v = var:sub(5)
local val = os.getenv(v)
if not val then
return fail('undefined environment variable %s', v)
end
elseif self.stage.kind == 'render' and startswith(var, self.stage.format..'.') then
-- TODO query the renderer somehow
return fail('renderer %s does not implement variable %s', self.stage.format, var)
elseif startswith(var, 'super.') then
local sp = scanParents(var:sub(8))
if sp == nil then
if test then return false else return '' end
else
return sp
end
elseif self.vars[var] then
return self.vars[var]
elseif ctx.invocation
and ctx.invocation.props
and ctx.invocation.props['.' .. var] then
return ctx.invocation.props['.' .. var]
elseif ctx.declaration
and ctx.declaration.props['.' .. var] then
return ctx.declaration.props['.' .. var]
else
local sp = scanParents(var)
if sp then return sp end
if test then return false end
return '' -- is this desirable behavior?
end
end;
job = function(self, name, pred, ...) -- convenience func
return self.docjob:fork(name, pred, ...)
end;
sub = function(self, ctx)
-- convenience function for single-inheritance structure
-- sets up a doc/ctx pair for a subdocument embedded in the source
-- of a greater document, pointing subdoc props to global tables/values
local newdoc = ct.doc.mk(self)
newdoc.meta = self.meta
newdoc.ext = self.ext
newdoc.enc = self.enc
newdoc.stage = self.stage
newdoc.defined_in_parent_section = ctx.sec
-- vars are handled through proper recursion across all parents and
-- are intentionally excluded here; subdocs can have their own vars
-- without losing access to parent vars
local nctx = ctx:clone()
nctx:init(newdoc, ctx.src)
nctx.line = ctx.line
nctx.docDepth = (ctx.docDepth or 0) + ctx.sec.depth - 1
return newdoc, nctx
end;
};
mk = function(...) return {
sections = {};
globals = {objs={},scope={}};
secorder = {};
embed = {};
meta = {};
vars = {};
parents = {...};
ext = {
inhibit = {};
need = {};
use = {};
};
enc = ss.str.enc.utf8;
} end;
construct = function(me)
me.docjob = ct.ext.job('doc', me, nil)
end;
}
-- FP helper functions
local function fmtfn(str)
return function(...)
return string.format(str, ...)
end
end
ct.ext = { loaded = {} }
function ct.ext.install(ext)
if not ext.id then
ct.exns.ext 'extension missing “id” field':throw()
end
if ct.ext.loaded[ext.id] then
ct.exns.ext('there is already an extension with ID “%s” loaded', ext.id):throw()
end
ct.ext.loaded[ext.id] = ext
end
function ct.ext.bind(doc)
local fns = {}
function fns.each(...)
local cext
local args = {...}
return function()
while true do
cext = next(ct.ext.loaded, cext)
if cext == nil then return nil end
if doc == nil or doc:allow_ext(cext.id) then
local v = ss.walk(ct.ext.loaded[cext.id], table.unpack(args))
if v ~= nil then
return v, cext
end
end
end
end
end
function fns.hook(h, ...)
-- this is the raw hook invocation function, used when hooks won't need
-- private state to hold onto between invocation. if private state is
-- necessary, construct a job instead
local ret = {} -- for hooks that compile lists of responses from extensions
for hook in fns.each('hook', h) do table.insert(ret,(hook(...))) end
return ret
end
return fns
end
do local globalfns = ct.ext.bind()
-- use these functions when document restrictions don't matter
ct.ext.each, ct.ext.hook = globalfns.each, globalfns.hook
end
ct.ext.job = declare {
ident = 'ext-job';
init = {
states = {};
};
construct = function(me,name,doc,pred,...)
-- prepare contexts for relevant extensions
me.name = name
me.doc = doc -- for reqs + limiting
for _, ext in pairs(ct.ext.loaded) do
if pred == nil or pred(ext) then
me.states[ext] = {}
end
end
me:hook('init', ...)
end;
fns = {
fork = function(me, name, pred, ...)
-- generate a branch job linked to this job
local branch = getmetatable(me)(name, me.doc, pred, ...)
branch.parent = me
return branch
end;
delegate = function(me, ext) -- creates a delegate for hierarchical state access
local submethods = {
unwind = function(self, n)
local function
climb(dlg, job, n)
if n == 0 then
return job:delegate(dlg.extension)
else
return climb(dlg, job.parent, n-1)
end
end
return climb(self._delegate_state, self._delegate_state.target, n)
end;
}
local d = setmetatable({
_delegate_state = {
target = (me._delegate_state and me._delegate_state.target) or me;
extension = ext;
};
}, {
__name = 'job:delegate';
__index = function(self, key)
local D = self._delegate_state
if key == 'state' then
return D.target.states[self._delegate_state.extension]
elseif submethods[key] then
return submethods[key]
end
return D.target[key]
end;
__newindex = function(self, key, value)
local D = self._delegate_state
if key == 'state' then
D.target.states[D.extension] = value
else
D.target[D.extension] = value -- FIXME?? is this right???
end
end;
})
return d
end;
each = function(me, ...)
local ek
local path = {...}
return function()
while true do
ek = next(me.states, ek)
if not ek then return nil end
if me.doc:allow_ext(ek.id) then
local v = ss.walk(ek, table.unpack(path))
if v then
return v, ek, me.states[ek]
end
end
end
end
end;
proc = function(me, ...)
local p
local owner
local state
for func, ext, s in me:each(...) do
if p == nil then
p = func
owner = ext
state = s
else
ct.exn.ext('extensions %s and %s define conflicting procedures for %s', owner.id, ext.id, table.concat({...},'.')):throw()
end
end
if p == nil then return nil end
if type(p) ~= 'function' then return p end
return function(...)
return p(me:delegate(owner), ...)
end, owner, state
end;
hook = function(me, hook, ...)
-- used when extensions may need to persist state across
-- multiple functions or invocations
local ret = {}
local hook_id = me.name ..'_'.. hook
for hookfn, ext, state in me:each('hook', hook_id) do
table.insert(ret, (hookfn(me:delegate(ext),...)))
end
return ret
end;
};
}
-- common renderer utility functions
ct.tool = {}
function ct.tool.mathfmt(ctx, eqn)
local buf = ''
local m = ss.enum {'num','var','op'}
local lsc = 0
local spans = {}
local flush = function()
local o
if buf ~= '' then
if lsc == 0 then
o = buf
elseif lsc == m.num then
o = {
kind = 'format';
style = 'literal';
spans = {buf};
}
elseif lsc == m.var then
o = {
kind = 'format';
style = 'variable';
spans = {buf};
}
elseif lsc == m.op then
o = {
kind = 'format';
style = 'strong';
spans = {buf};
}
end
if o then
table.insert(spans, o)
end
end
buf = ''
lsc = 0
end
for c, p in ss.str.each(ctx.doc.enc, eqn) do
local cl = ss.str.classify(ctx.doc.enc, c)
local nc = 0
if not cl.space then
if cl.numeral then
nc = m.num
elseif cl.mathop or cl.symbol then
nc = m.op
elseif cl.letter then
nc = m.var
end
if nc ~= lsc then
flush()
lsc = nc
end
buf = buf .. c
end
end
flush()
return spans
end
function ct.tool.namespace()
-- some renderers need to be able to generate unique IDs for
-- objects, including ones that users have not assigned IDs
-- to, and objects with the same name in different unlabeled
-- sections. to handle this, we provide a "namespace" mechanism,
-- where some lua table (really its address in memory) is used
-- as a handle for the object and a unique ID is attached to it.
-- if the object has an ID of its own, it is guaranteed to be
-- unique and returned; otherwise, a generic id of the form
-- `x-%u` is generated, where %u is an integer that increments
-- for every new object
local ids = {}
local canonicalID = {}
return function(obj,pfx)
pfx = pfx or ''
if canonicalID[obj] then
return canonicalID[obj]
elseif obj.id and ids[pfx .. obj.id] then
local objid = pfx .. obj.id
local newid
local i = 1
repeat newid = objid .. string.format('-%x', i)
i = i + 1 until not ids[newid]
ids[newid] = obj
canonicalID[obj] = newid
return newid
else
local cid = obj.id
if not cid then
local i = 1
repeat cid = string.format('%sx-%x', pfx, i)
i = i + 1 until not ids[cid]
end
ids[cid] = obj
canonicalID[obj] = cid
return cid
end
end
end
-- renderer engines
do -- define span control sequences
local function formatter(sty)
return function(s,c)
return {
kind = 'format';
style = sty;
spans = ct.parse_span(s, c);
origin = c:clone();
}
end
end
local function insert_link(s, c)
local to, t = s:match '^([^%s]+)%s*(.-)$'
if not to then c:fail('invalid link syntax >%s', s) end
if t == "" then t = nil end
return {
kind = 'link';
spans = (t and t~='') and ct.parse_span(t, c) or {};
ref = to;
origin = c:clone();
}
end
local function insert_var_ref(raw)
return function(s, c)
local pos = tonumber(s)
return {
kind = 'var';
pos = pos;
raw = raw;
var = not pos and s or nil;
origin = c:clone();
}
end
end
local function insert_span_directive(crit, failthru)
return function(s,c)
local args = ss.str.breakwords(d.doc.enc, s, 1)
local brksyms = map(enc.encodeUCS, {
'.', ',', ':', ';', '!', '$', '&', '^',
'/', '?', '@', '='
})
local brkhash = {} for _,s in pairs(brksyms) do
brkhash[s] = true
end
local extname = ''
local sym
local cmd = ''
for ch,p in ss.str.each(c.doc.enc, args[1]) do
if sym == nil then
if brkhash[ch] then
sym = ch
else
extname = extname .. ch
end
elseif brkhash[ch] then
sym = sym + ch
else
cmd = cmd + ch
end
end
if cmd == '' then cmd = nil end
local spans if failthru then
spans = ct.parse_span(args[2], c)
end
return {
kind = 'directive';
ext = extname;
cmd = cmd;
args = args;
crit = crit;
failthru = failthru;
spans = spans;
}
end
end
local function rawcode(s, c) -- raw
local o = c:clone();
local str = ''
for c, p in ss.str.each(c.doc.enc, s) do
local q = p:esc()
if q then
str = str .. q
p.next.byte = p.next.byte + #q
else
str = str .. c
end
end
return {
kind = 'format';
style = 'literal';
spans = {{
kind = 'raw';
spans = {str};
origin = o;
}};
origin = o;
}
end
local function unicodepoint(s,c)
local cp = tonumber(s, 16)
return {
kind = 'codepoint';
code = cp;
}
end
ct.spanctls = {
{seq = '!', parse = formatter 'emph'};
{seq = '*', parse = formatter 'strong'};
{seq = '~', parse = formatter 'strike'};
{seq = '_', parse = formatter 'underline'};
{seq = '+', parse = formatter 'insert'};
{seq = '\\', parse = function(s, c) -- raw
return {
kind = 'raw';
spans = {s};
origin = c:clone();
}
end};
{seq = '`', parse = formatter 'literal'};
{seq = '"', parse = rawcode};
{seq = '$', parse = formatter 'variable'};
{seq = "'", parse = formatter 'super'};
{seq = ',', parse = formatter 'sub'};
{seq = '^', parse = function(s, c)
-- TODO support for footnote sections
local fn, t = s:match '^([^%s]+)%s*(.-)$'
return {
kind = 'footnote';
spans = (t and t~='') and ct.parse_span(t, c) or {};
ref = fn;
origin = c:clone();
}
end};
{seq = '=', parse = function(s,c) --math mode
if c.doc.enc ~= ss.str.enc.ascii then
for _,v in pairs(ss.compseq.math) do
local seq, utf8, html, cp = table.unpack(v)
seq = seq:gsub('[-+.*?[%]%%]', '%%%0') -- >_<
s = s:gsub(seq,c.doc.enc.encodeUCS(utf8))
end
end
-- s=s:gsub('%^([0-9]+)', function(num)
-- local sup = {'⁰','¹','²','³','⁴','⁵','⁶','⁷','⁸','⁹'};
-- local r = ''
-- for i=1,#num do
-- r = r .. sup[1 + (num:byte(i) - 0x30)]
-- end
-- return r
-- end)
local m = {s} --TODO
return {
kind = 'math';
original = s;
spans = {s};
origin = c:clone();
};
end};
{seq = '&', parse = function(s, c)
local r, t = s:match '^([^%s]+)%s*(.-)$'
return {
kind = 'deref';
spans = (t and t ~= "") and ct.parse_span(t, c) or {};
ref = r;
origin = c:clone();
}
end};
{seq = '>', parse = insert_link};
{seq = '→', parse = insert_link};
{seq = '🔗', parse = insert_link};
{seq = '##', parse = insert_var_ref(true)};
{seq = '#', parse = insert_var_ref(false)};
{seq = 'U+', parse = unicodepoint};
{seq = 'u+', parse = unicodepoint};
{seq = 'U', parse = unicodepoint};
{seq = 'u', parse = unicodepoint};
{seq = '%%', parse = function (s,c)
local com = s:match '^%%%%%s*(.*)$'
return {
kind = 'comment';
comment = com;
}
end};
{seq = '%!', parse = insert_span_directive(true,false)};
{seq = '%:', parse = insert_span_directive(false,true)};
{seq = '%', parse = insert_span_directive(false,false)};
}
end
function ct.parse_span(str,ctx)
local function delimited(start, stop, s)
local r = { pcall(ss.str.delimit, nil, start, stop, s) }
if r[1] then return table.unpack(r, 2) end
ctx:fail(tostring(r[2]))
end
local buf = ""
local spans = {}
local function flush()
if buf ~= "" then
-- for fn, ext in ctx.doc.docjob:each('hook','doc_meddle_string') do
-- buf = fn(ctx.doc.docjob:delegate(ext), ctx, buf)
-- end
table.insert(spans, buf)
buf = ""
end
end
local skip = false
for c,p in ss.str.each(ctx.doc.enc,str) do
local ba, ca, es = ctx.doc.enc.parse_escape(str:sub(p.byte))
if es then
flush()
table.insert(spans, {
kind = 'raw';
spans = {es};
origin = ctx:clone()
})
p.next.byte = p.next.byte + ba;
p.next.code = p.next.code + ca;
elseif c == '{' then
flush()
local substr, following = delimited('{','}',str:sub(p.byte))
local splitstart, splitstop = substr:find'%s+'
local id, argstr
if splitstart then
id, argstr = substr:sub(1,splitstart-1), substr:sub(splitstop+1)
else
id, argstr = substr, ''
end
local o = {
kind = 'macro';
macro = id;
args = {};
origin = ctx:clone();
}
do local start = 1
local i = 1
while i <= #argstr do
while i<=#argstr and (argstr:sub(i,i) ~= '|' or argstr:sub(i-1,i) == '\\|') do
i = i + 1
end
local arg = argstr:sub(start, i == #argstr and i or i-1)
start = i+1
arg=arg:gsub('\\|','|')
table.insert(o.args, arg)
i = i + 1
end
end
p.next.byte = p.next.byte + following - 1
table.insert(spans,o)
elseif c == '[' then
flush()
local substr, following = delimited('[',']',str:sub(p.byte))
p.next.byte = following + p.byte
local found = false
for _,i in pairs(ct.spanctls) do
if ss.str.begins(substr, i.seq) then
found = true
table.insert(spans, i.parse(substr:sub(1+#i.seq), ctx))
break
end
end
if not found then
buf = buf .. c
end
elseif c == '\n' then
flush()
table.insert(spans,{kind='line-break',origin=ctx:clone()})
else
buf = buf .. c
end
end
flush()
for hk, ext in ctx.doc.docjob:each('hook', 'doc_macro_expand_span') do
hk(ctx.doc.docjob:delegate(ext), spans, {origin=ctx})
end
return spans
end
local function
blockwrap(fn)
return function(l,c,j,d)
local block = fn(l,c,j,d)
if block then
block.origin = c:clone();
table.insert(d, block);
j:hook('block_insert', c, block, l)
if block.spans then
c.doc.docjob:hook('meddle_span', block.spans, block)
end
end
end
end
local insert_paragraph = blockwrap(function(l,c)
if l:sub(1,1) == '.' then l = l:sub(2) end
return {
kind = "paragraph";
spans = ct.parse_span(l, c);
}
end)
local insert_caption = blockwrap(function(l,c,j,d)
if next(d) == nil then
c:fail 'subtitle in an unlabeled section is meaningless'
end
local last = d[#d]
-- make the syntax a bit friendlier in edge cases
if last.kind == 'reference' then last = last.rsrc end
local me = {
kind = 'subtitle';
spans = ct.parse_span(l:sub(3):gsub("^%s+",""), c);
}
local captionable = {
quote=true, aside=true,
table=true, code=true,
embed=true, link=true,
}
if last.kind == 'label' then
me.attach = last;
elseif last.kind == 'subtitle' then
me.attach = last.attach;
elseif captionable[last.kind] then
me.kind = 'label'
me.captions = last
last.label_node = me
else
c:fail 'subtitle/attribution syntax in improper context'
end
return me
end)
local function
insert_section(skind) return function(l,c,j)
local depth, id, t = l:match '^([#§^]+)([^%s]*)%s*(.-)$'
if id and id ~= "" then
if c.doc.sections[id] then
c:fail('duplicate section name “%s”', id)
end
else id = nil end
local s = c.doc:mksec(id, utf8.len(depth))
s.depth = utf8.len(depth)
s.origin = c:clone()
s.blocks = {}
if skind then s.kind = skind end
if skind ~= "namespace" and t and t ~= "" then
local heading = {
kind = "label";
spans = ct.parse_span(t,c);
origin = s.origin;
captions = s;
}
c.doc.docjob:hook('meddle_span', heading.spans, heading)
table.insert(s.blocks, heading)
s.heading_node = heading
end
c.sec = s
j:hook('section_attach', c, s)
end end
local dsetmeta = function(w,c,j)
local key, val = w(1)
c.doc.meta[key] = val
j:hook('metadata_set', key, val)
end
local dextctl = function(w,c)
local mode, exts = w(1)
for e in exts:gmatch '([^%s]+)' do
if mode == 'uses' then
c.doc.ext.use[e] = true
elseif mode == 'needs' then
c.doc.ext.need[e] = true
elseif mode == 'inhibits' then
c.doc.ext.inhibit[e] = true
end
end
end
local dcond = function(w,c)
local mode, cond, exp = w(2)
c.hide_next = mode == 'unless'
end;
ct.directives = {
author = dsetmeta;
license = dsetmeta;
keywords = dsetmeta;
desc = dsetmeta;
when = dcond;
unless = dcond;
with = function(w,c)
local _,str = w(2)
local aka, name = str:match '^([^=]+)=(.+)$'
if aka == nil then name=str end
local o,id,s = c:ref(name)
if o then -- import object
c.sec.imports.objs[aka or name] = {obj=o, sec=s}
else -- import scope
if aka ~= nil then c:fail'alias is meaningless for scope import' end
table.insert(c.sec.imports.scope, s)
end
end;
global = function(w,c)
local _,str = w(2)
if str ~= nil and str ~= '' then
local aka, name = str:match '^([^=])=(.*)$'
if aka == nil then name=str end
local o,id,s = c:ref(name)
if o then
c.doc.globals.objs[aka or name] = {obj=o, sec=s}
else
if aka ~= nil then c:fail'alias is meaningless for scope import' end
table.insert(c.doc.globals.scope, s)
end
else
table.insert(c.doc.globals.scope, c.sec)
end
end;
pragma = function(w,c)
end;
lang = function(w,c)
local _, op, l = w(2)
local langstack = c.doc.stage.langstack
if op == 'is' then
langstack[math.max(1, #langstack)] = l
elseif op == 'push' then
table.insert(langstack, l)
elseif op == 'pop' then
if next(langstack) then
langstack[#langstack] = nil
end
elseif op == 'sec' then
c.sec.lang = l
else c:fail('bad language directive “%s”', op) end
c.lang = langstack[#langstack]
end;
expand = function(w,c)
local _, m = w(1)
if m ~= 'off' then
c.doc.stage.expand_next = 1
else
c.doc.stage.expand_next = 0
end
end;
}
local function insert_table_row(l,c,j)
local row = {}
local buf
local flush = function()
if buf then
buf.str = buf.str:gsub('%s+$','')
table.insert(row, buf)
end
buf = { str = '' }
end
for c,p in ss.str.each(c.doc.enc,l) do
if c == '|' or c == '+' and (p.code == 1 or l:sub(p.byte-1,p.byte-1)~='\\') then
flush()
buf.header = c == '+'
elseif c == ':' then
local lst = l:sub(p.byte-#c,p.byte-#c)
local nxt = l:sub(p.next.byte,p.next.byte)
if lst == '|' or lst == '+' and l:sub(p.byte-2,p.byte-2) ~= '\\' then
buf.align = 'left'
elseif nxt == '|' or nxt == '+' then
if buf.align == 'left' then
buf.align = 'center'
else
buf.align = 'right'
end
else
buf.str = buf.str .. c
end
elseif c:match '%s' then
if buf.str ~= '' then buf.str = buf.str .. c end
elseif c == '\\' then
local nxt = l:sub(p.next.byte,p.next.byte)
if nxt == '|' or nxt == '+' or nxt == ':' then
buf.str = buf.str .. nxt
p.next.byte = p.next.byte + #nxt
p.next.code = p.next.code + 1
else
buf.str = buf.str .. c
end
else
buf.str = buf.str .. c
end
end
if buf.str ~= '' then flush() end
for _,v in pairs(row) do
v.spans = ct.parse_span(v.str, c)
c.doc.docjob:hook('meddle_span', v.spans, v)
end
if next(c.sec.blocks) and c.sec.blocks[#c.sec.blocks].kind == 'table' then
local tbl = c.sec.blocks[#c.sec.blocks]
table.insert(tbl.rows, row)
j:hook('block_table_attach', c, tbl, row, l)
j:hook('block_table_row_insert', c, tbl, row, l)
else
local tbl = {
kind = 'table';
rows = {row};
origin = c:clone();
}
table.insert(c.sec.blocks, tbl)
j:hook('block_table_insert', c, tbl, l)
j:hook('block_table_row_insert', c, tbl, tbl.rows[1], l)
end
end
local function insert_link_block(seq)
return blockwrap(function(s,c)
local r = s:sub(#seq+1)
local k, uri, txt = r:match('^(%s*)([^%s]*)%s*(.*)$')
return {
kind = 'link';
uri = (k~='') and ss.uri(uri) or nil;
ref = (k=='') and uri or nil;
spans = ct.parse_span(txt, c);
}
end)
end
ct.ctlseqs = {
{seq = '.', fn = insert_paragraph};
{seq = '¶', fn = insert_paragraph};
{seq = '❡', fn = insert_paragraph};
{seq = '#', fn = insert_section()};
{seq = '§', fn = insert_section()};
{seq = '^', fn = insert_section 'namespace'};
{seq = '--',fn = insert_caption};
{seq = '+', fn = insert_table_row};
{seq = '|', fn = insert_table_row};
{seq = '│', fn = insert_table_row};
{seq = '!', fn = function(l,c,j,d)
local last = d[#d]
local txt = l:match '^%s*!%s*(.-)$'
if (not last) or last.kind ~= 'aside' then
local aside = {
kind = 'aside';
lines = { ct.parse_span(txt, c) };
origin = c:clone();
}
c.doc.docjob:hook('meddle_span', aside.lines[1], aside)
table.insert(d,aside)
j:hook('block_aside_insert', c, aside, l)
j:hook('block_aside_line_insert', c, aside, aside.lines[1], l)
j:hook('block_insert', c, aside, l)
else
local sp = ct.parse_span(txt, c)
c.doc.docjob:hook('meddle_span', sp, last)
table.insert(last.lines, sp)
j:hook('block_aside_attach', c, last, sp, l)
j:hook('block_aside_line_insert', c, last, sp, l)
end
end};
{pred = function(s,c) return s:match'^[*:]' end,
fn = blockwrap(function(l,c) -- list
local stars = l:match '^([*:]+)'
local depth = utf8.len(stars)
local id, txt = l:sub(#stars+1):match '^(.-)%s*(.-)$'
local ordered = stars:sub(#stars) == ':'
if id == '' then id = nil end
return {
kind = 'list-item';
depth = depth;
ordered = ordered;
spans = ct.parse_span(txt, c);
}
end)};
{seq = '\t\t', fn = function(l,c,j,d)
local last = d[#d]
if (not last) or (last.kind ~= 'reference') then
c:fail('reference continuations must immediately follow a reference')
end
local str = l:match '^\t\t(.-)%s*$'
if last.val == '' then
last.val = str
else
last.val = last.val .. '\n' .. str
end
c.sec.refs[last.key] = last.val
end};
{seq = '\t', pred = function(l)
return (l:match '\t+([^:]+):%s*(.*)$')
end; fn = blockwrap(function(l,c,j,d)
local ref, val = l:match '\t+([^:]+):%s*(.*)$'
local last = d[#d]
local rsrc
if last and (last.kind == 'resource'
or last.kind == 'embed'
or last.kind == 'macro') then
last.props = last.props or {}
last.props[ref] = val
j:hook('set_prop', c, last, ref, val, l)
rsrc = last
elseif last and last.kind == 'reference' and last.rsrc then
last.rsrc.props[ref] = val
rsrc = last.rsrc
else
c.sec.refs[ref] = val
end
j:hook('section_ref_attach', c, ref, val, l)
return {
kind = 'reference';
rsrc = rsrc;
key = ref;
val = val;
}
end)};
{seq = '%', fn = function(l,c,j,d) -- directive
local crit, cmdline = l:match '^%%([!%%]?)%s*(.*)$'
local words = function(i)
local wds = {}
if i == 0 then return cmdline end
for w,pos in cmdline:gmatch '([^%s]+)()' do
table.insert(wds, w)
i = i - 1
if i == 0 then
table.insert(wds,(cmdline:sub(pos):match('^%s*(.-)%s*$')))
return table.unpack(wds)
end
end
end
local cmd, rest = words(1)
if ct.directives[cmd] then
ct.directives[cmd](words,c,j)
elseif cmd == c.doc.stage.mode['render:format'] then
-- this is a directive for the renderer; insert it into the tree as is
local dir = {
kind = 'directive';
critical = crit == '!';
words = words;
origin = c;
}
table.insert(d, dir)
j:hook('block_directive_render', j, c, dir)
elseif c.doc:allow_ext(cmd) then -- extension directives begin with their id
local ext = ct.ext.loaded[cmd]
if ext.directives then
local _, topcmd = words(2)
if ext.directives[topcmd] then
ext.directives[topcmd](j:delegate(ext), c, words)
elseif ext.directives[true] then -- catch-all
ext.directives[true](j:delegate(ext), c, words)
elseif crit == '!' then
c:fail('extension %s does not support critical directive %s', cmd, topcmd)
end
end
elseif crit == '!' then
c:fail('critical directive %s not supported',cmd)
end
end;};
{pred = function(s) return s:match '^>[^>%s]*%s*.*$' end,
fn = function(l,c,j,d)
local id,txt = l:match '^>([^>%s]*)%s*(.*)$'
if id == '' then id = nil end
local last = d[#d]
local node
local ctx
if last and last.kind == 'quote' and (id == nil or id == last.id) then
node = last
ctx = node.ctx
ctx.line = c.line -- is this enough??
else
local doc
doc, ctx = c.doc:sub(c)
node = { kind = 'quote', doc = doc, ctx = ctx, id = id, origin = c }
table.insert(d, node)
j:hook('block_insert', c, node, l)
end
ct.parse_line(txt, ctx, ctx.sec.blocks)
end};
{seq = '~~~', fn = blockwrap(function(l,c,j)
local extract = function(ptn, str)
local start, stop = str:find(ptn)
if not start then return nil, str end
local ex = str:sub(start,stop)
local n = str:sub(1,start-1) .. str:sub(stop+1)
return ex, n
end
local lang, id, title
if l:match '^~~~%s*$' then -- no args
elseif l:match '^~~~.*~~~%s*$' then -- CT style
local s = l:match '^~~~%s*(.-)%s*~~~%s*$'
lang, s = extract('%b[]', s)
if lang then lang = lang:sub(2,-2) end
id, title = extract('#[^%s]+', s)
if id then id = id:sub(2) end
elseif l:match '^~~~' then -- MD shorthand style
lang = l:match '^~~~%s*(.-)%s*$'
end
local mode = {
kind = 'code';
listing = {
kind = 'listing';
lang = lang, id = id, title = title and ct.parse_span(title,c);
lines = {};
}
}
if c.doc.stage.expand_next and c.doc.stage.expand_next > 0 then
c.doc.stage.expand_next = c.doc.stage.expand_next - 1
mode.expand = true
end
j:hook('mode_switch', c, mode)
c.mode = mode
if id then
if c.sec.refs[id] then c:fail('duplicate ID %s', id) end
c.sec.refs[id] = c.mode.listing
end
return c.mode.listing;
end)};
{pred = function(s,c)
if s:match '^[%-_][*_%-%s]+' then return true end
if startswith(s, '—') then
for c, p in ss.str.each(c.doc.enc,s) do
if ({
['—'] = true, ['-'] = true, [' '] = true;
['*'] = true, ['_'] = true, ['\t'] = true;
})[c] ~= true then return false end
end
return true
end
end; fn = blockwrap(function()
return { kind = 'horiz-rule' }
end)};
{seq='=>', fn = insert_link_block '=>'};
{seq='⇒', fn = insert_link_block '⇒'};
{seq='@', fn=function(s,c,j,d)
local function mirror(b)
local ch = {}
local rev = {
['['] = ']'; [']'] = '[';
['{'] = '}'; ['}'] = '{';
['('] = ')'; [')'] = '(';
['<'] = '>'; ['>'] = '<';
}
for i = 1,#b do
local c = string.sub(b,-i,-i)
if rev[c] then
ch[i] = rev[c]
else
ch[i] = c
end
end
return table.concat(ch)
end
local id,rest = s:match '^@([^%s]*)%s*(.*)$'
local bs, brak = rest:match '()([{[(<][^%s]*)%s*$'
local src
if brak then
src = rest:sub(1,bs-1):gsub('%s+$','')
else src = rest end
if src == '' then src = nil end
if id == '' then id = nil end
local rsrc = {
kind = 'resource';
props = {src = src};
id = id;
origin = c;
}
if brak then
rsrc.bracket = {
open = brak;
close = mirror(brak);
}
rsrc.raw = '';
if src == nil then
rsrc.props.src = 'text/x.cortav'
end
end
if id then
if c.sec.refs[id] then
c:fail('an object with id “%s” already exists in that section',id)
else
c.sec.refs[id] = rsrc
end
end
table.insert(d, rsrc)
j:hook('block_insert', c, rsrc, s)
if id == nil then --shorthand syntax
local embed = {
kind = 'embed';
rsrc = rsrc;
origin = c;
mode = 'inline';
}
table.insert(d, embed)
j:hook('block_insert', c, embed, s)
end
if brak then
c.mode = {
kind = 'inline-rsrc';
rsrc = rsrc;
indent = nil;
depth = 0;
}
end
end};
{seq='$', fn=blockwrap(function(s,c)
local id, args = s:match('^%$([^%s]+)%s?(.-)$')
if id == nil or id == '' then
c:fail 'malformed macro block'
end
local argv = ss.str.split(c.doc.enc, args, c.doc.enc.encodeUCS'|', {esc=true})
return {
kind = 'macro';
macro = id;
args = argv;
}
end)};
{seq='&', fn=blockwrap(function(s,c)
local mode, id, cap = s:match('^&([-+]?)([^%s]+)%s*(.-)%s*$')
if id == nil or id == '' then
c:fail 'malformed embed block'
end
if cap == '' then cap = nil end
if mode == '-' then mode = 'closed'
elseif mode == '+' then mode = 'open'
else mode = 'inline' end
return {
kind = 'embed';
ref = id;
cap = cap;
mode = mode;
}
end)};
{fn = insert_paragraph};
}
function ct.parse_line(rawline, ctx, dest)
local newspan
local job = ctx.doc.stage.job
job:hook('line_read',ctx,rawline)
local l
if rawline then
l = rawline:gsub("^ +","") -- trim leading spaces
end
if ctx.mode then
if ctx.mode.kind == 'code' then
if l and l:match '^~~~%s*$' then
job:hook('block_listing_end',ctx,ctx.mode.listing)
job:hook('mode_switch', c, nil)
ctx.mode = nil
else
-- TODO handle formatted code
local newline
if ctx.mode.expand
then newline = ct.parse_span(l, ctx)
else newline = {l}
end
table.insert(ctx.mode.listing.lines, newline)
job:hook('block_listing_newline',ctx,ctx.mode.listing,newline)
end
elseif ctx.mode.kind == 'inline-rsrc' then
local r = ctx.mode.rsrc
if rawline then
if rawline == r.bracket.close then
if ctx.mode.depth == 0 then
-- TODO how to handle depth?
ctx.mode = nil
end
else
if r.indent ~= nil then
r.raw = r.raw .. '\n'
else
r.indent = (rawline:sub(1,1) == '\t')
end
if r.indent == true then
if rawline:sub(1,1) == '\t' then
rawline = rawline:sub(2)
end
end
r.raw = r.raw .. rawline
end
end
else
local mf = job:proc('modes', ctx.mode.kind)
if not mf then
ctx:fail('unimplemented syntax mode %s', ctx.mode.kind)
end
mf(job, ctx, l, dest) --NOTE: you are responsible for triggering the appropriate hooks if you insert anything!
end
else
if l and l ~= '' then
local function tryseqs(seqs, ...)
for _, i in pairs(seqs) do
if ((not i.seq ) or startswith(l, i.seq)) and
((not i.pred) or i.pred (l, ctx )) then
i.fn(l, ctx, job, dest, ...)
return true
end
end
return false
end
if not tryseqs(ct.ctlseqs) then
local found = false
for eb, ext, state in job:each 'blocks' do
if tryseqs(eb, state) then found = true break end
end
if not found then
ctx:fail 'incomprehensible input line'
end
end
else
if next(dest) and dest[#dest].kind ~= 'break' then
local brk = {kind='break', origin = ctx:clone()}
job:hook('block_break', ctx, brk, l)
table.insert(dest, brk)
end
end
end
job:hook('line_end',ctx,l)
end
function ct.parse(file, src, mode, setup)
-- this object is threaded down through the parse tree
-- and copied to store information like the origin of the
-- element in the source code
local ctx = ct.ctx.mk(src)
ctx:init(ct.doc.mk(), src)
ctx.lang = mode['meta:lang']
if mode['parse:enc'] then
local e = ss.str.enc[mode['parse:enc']]
if not e then
ct.exns.enc('requested encoding not supported',mode['parse:enc']):throw()
end
ctx.doc.enc = e
end
-- create states for extension hooks
local job = ctx.doc:job('parse',nil,ctx)
ctx.doc.stage = {
kind = 'parse';
mode = mode;
job = job;
langstack = {ctx.lang};
fontstack = {};
}
local function
is_whitespace(cp)
return ctx.doc.enc.iswhitespace(cp)
end
if setup then setup(ctx) end
for full_line in file:lines() do ctx.line = ctx.line + 1
-- local l
-- for p, c in utf8.codes(full_line) do
-- if not is_whitespace(c) then
-- l = full_line:sub(p)
-- break
-- end
-- end
ct.parse_line(full_line, ctx, ctx.sec.blocks)
end
for i, sec in ipairs(ctx.doc.secorder) do
for n, r in pairs(sec.blocks) do
if r.kind == 'resource' and r.props.src then
local lines = ss.str.breaklines(ctx.doc.enc, r.props.src)
local srcs = {}
for i,l in ipairs(lines) do
local args = ss.str.breakwords(ctx.doc.enc, l, 2, {escape=true})
if #args > 3 or (r.raw and #args > 2) then
r.origin:fail('invalid syntax for resource %s', r.id or '(anonymous)')
end
local p_mode, p_mime, p_uri
if r.raw then
p_mode = 'embed'
end
if #args == 1 then
if r.raw then -- inline content
p_mime = ss.mime(args[1])
else
p_uri = args[1]
end
elseif #args == 2 then
local ok, m = pcall(ss.mime, args[1])
if r.raw then
if not ok then
r.origin:fail('invalid mime-type “%s”', args[1])
end
p_mode, p_mime = args[1], m
else
if ok then
p_mime, p_uri = m, args[2]
else
p_mode, p_uri = table.unpack(args)
end
end
else
p_mode, p_mime, p_uri = table.unpack(args)
p_mime = ss.mime(args[2])
end
local resource = {
mode = p_mode;
mime = p_mime or 'text/x.cortav';
uri = p_uri and ss.uri(p_uri) or nil;
}
if resource.mode == 'embed' or resource.mode == 'auto' then
-- the resource must be available for reading within this job
-- open it and read its source into memory
if resource.uri then
if resource.uri:canfetch() then
resource.raw = resource.uri:fetch()
elseif resource.mode == 'auto' then
-- resource cannot be accessed; force linking
resource.mode = 'link'
else
r.origin:fail('resource “%s” wants to embed unfetchable URI “%s”',
r.id or "(anonymous)", tostring(resource.uri))
end
elseif r.raw then
resource.raw = r.raw
else
r.origin:fail('resource “%s” is not inline and supplies no URI',
r.id or "(anonymous)")
end
-- the resource has been cached. check the mime-type to see if
-- we need to parse it or if it is suitable as-is
if ss.mime 'text/x.cortav' < resource.mime then
local sd, sc = r.origin.doc:sub(r.origin)
-- we store the resource block itself in the declaration
-- slot so that its properties (e.g. context variables)
-- can affect the way the document is rendered
sc.declaration = r
local lines = ss.str.breaklines(r.origin.doc.enc, resource.raw, {})
for i, ln in ipairs(lines) do
sc.line = sc.line + 1
ct.parse_line(ln, sc, sc.sec.blocks)
end
resource.doc = sd
end
end
table.insert(srcs, resource)
end
r.srcs = srcs
-- note that resources do not themselves have kinds. when a
-- document requests to insert a resource, the renderer must
-- iterate through the sources and find the first source it
-- is capable of emitting. this allows constructions like
-- emitting a video for HTML outputs, a photo for printers,
-- and a screenplay for tty/plaintext outputs.
end
end
end
-- expand block macros
for i, sec in ipairs(ctx.doc.secorder) do
for n, r in pairs(sec.blocks) do
if r.kind == 'macro' then
local mc = r.origin
local mac = mc:ref(r.macro)
if not mac then
mc:fail('no such reference or resource “%s”', r.macro)
end
local subdoc, subctx = ctx.doc:sub(mc)
local rawbody
subctx.invocation = r
if type(mac) == 'string' then
rawbody = mac
elseif mac.raw then
rawbody = mac.raw
subctx.declaration = mac
else
mc:fail('block macro “%s” must be either a reference or an embedded text/x.cortav resource', r.macro)
end
local lines = ss.str.breaklines(ctx.doc.enc, rawbody)
for i, ln in ipairs(lines) do
ct.parse_line(ln, subctx, subctx.sec.blocks)
end
r.doc = subdoc
end
end
end
ctx.doc.stage = nil
ctx.doc.docjob:hook('meddle_ast')
return ctx.doc
end
function ct.expand_var(v)
local val
if v.pos then
if not v.origin.invocation then
v.origin:fail 'positional arguments can only be used in a macro invocation'
elseif not v.origin.invocation.args[v.pos] then
v.origin.invocation.origin:fail('macro invocation %s missing positional argument #%u', v.origin.invocation.macro, v.pos)
end
val = v.origin.invocation.args[v.pos]
else
val = v.origin.doc:context_var(v.var, v.origin)
end
if v.raw then
return val, true
else
return ct.parse_span(val, v.origin), false
end
end
function ct.linkspans(link)
if link.spans and next(link.spans) then return link.spans end
local o, id, s = link.origin:ref(link.ref)
if o then --TODO
if type(o) == 'table' then
end
return {id}
else -- it's a section link
if s.heading_node then
return s.heading_node.spans
end
end
-- give up; the renderer decides how to represent this link
end