cortav  cortav.lua at [5a78370f0f]

File cortav.lua artifact 940c3efd41 part of check-in 5a78370f0f


-- [ʞ] cortav.lua
--  ~ lexi hale <lexi@hale.su>
--  © AGPLv3
--  ? reference implementation of the cortav document language

local ss = require 'sirsem'
-- aliases for commonly used sirsem funcs
local startswith = ss.str.begins
local dump = ss.dump
local declare = ss.declare

-- make this module available to require() when linked into a lua bytecode program with luac
local ct = ss.namespace 'cortav'
ct.info = {
	version = ss.version {0,1; 'devel'};
	package_name = 'cortav';
	contributors = {
		{ name = 'lexi hale', handle = 'velartrill';
		  mail = 'lexi@hale.su', homepage = 'https://hale.su' };
	};
	ident_string = function(i)
		return string.format('%s %s', i.package_name, i.version)
	end;
	credits = function(i)
		local all = ss.copy(i.contributors)
		for i,who in pairs(all) do
			who.role = who.role or 'core functionality'
		end
		for name,ext in pairs(ct.ext.loaded) do
			if ext.contributors then
				for _,c in pairs(ext.contributors) do
					local ofs, ref = ss.find(all, function(a)
						return a.handle == c.handle
					end)
					if ofs then
						ref.role = string.format('%s; %s extension', ref.role, name)
					else
						local c = ss.clone(ext.author)
						c.role = name .. ' extension'
					end
				end
			end
		end
		return all
	end;
	credits_ascii = function(contributors)
		local body = ''
		for _, c in pairs(contributors) do
			local str
			if c.handle then
				str = string.format('%s “%s” <%s>', c.name, c.handle, c.mail)
			else
				str = string.format('%s <%s>', c.name, c.mail)
			end
			if c.homepage then
				str = string.format('%s (%s)', str, c.homepage)
			end
			if c.role then
				str = string.format('%s: %s', str, c.role)
			end
			body = body .. string.format(' ~ %s\n', str)
		end
		return body
	end;
	about = function(i)
		return i:ident_string() .. '\n' ..
		       i.credits_ascii(i:credits())
	end;
}


ct.render = {}

ct.exns = {
	tx = ss.exnkind('translation error', function(msg,...)
		return string.format("(%s:%u) "..msg, ...)
	end);
	io = ss.exnkind('IO error', function(msg, ...)
		return string.format("<%s %s> "..msg, ...)
	end);
	cli = ss.exnkind 'command line parse error';
	mode = ss.exnkind('bad mode', function(msg, ...)
		return string.format("mode “%s” "..msg, ...)
	end);
	unimpl = ss.exnkind 'feature not implemented';
	ext = ss.exnkind 'extension error';
	enc = ss.exnkind('encoding error', function(msg, ...)
		return string.format('[%s]' .. msg, ...)
	end);
	rdr = ss.exnkind('could not render', function(msg, ...)
		return string.format('(backend %s)'..msg, ...)
	end);
}

ct.ctx = declare {
	mk = function(src) return {src = src} end;
	ident = 'context';
	cast = {
		string = function(me)
			return string.format("%s:%s [%u]", me.src.file, me.line, me.generation or 0)
		end;
	};
	clonesetup = function(new, old)
		for k,v in pairs(old) do new[k] = v end
		if old.generation then
			new.generation = old.generation + 1
		else
			new.generation = 1
		end
	end;
	fns = {
		fail = function(self, msg, ...)
			ct.exns.tx(msg, self.src.file, self.line or 0, ...):throw()
		end;
		insert = function(self, block)
			block.origin = self:clone()
			table.insert(self.sec.blocks,block)
			return block
		end;
		init = function(ctx, doc, src)
			ctx.line = 0
			ctx.doc = doc
			ctx.doc.src = src
			ctx.sec = doc:mksec() -- toplevel section
			ctx.sec.origin = ctx:clone()
		end;
		ref = function(self,id)
			if self.invocation then
				-- allow IDs to contain template substitutions by mimicking the [#n] syntax
				id = id:gsub('%b[]', function(sp)
					-- should indirection be allowed here? TODO
					if sp:sub(2,2) == '#' then
						local n = tonumber(sp:sub(3,-2))
						if n == nil then
							self:fail('invalid template substitution “%s” in ID “%s”', sp, id)
						end
						local arg = self.invocation.args[n]
						if arg == nil then
							self:fail('template instantiation requires at least %u arguments (in ID “%s”)',n,id)
						end
						return arg
					else return sp end
				end)

			end

			local function checkFromSec(sec,doc)
				if not id:find'%.' then
					if sec then
						local rid = sec.refs[id]
						if rid then
							return rid, id, sec
						end
					end

					if doc.sections[id] then
						return nil, id, doc.sections[id]
					end
				else
					local secid, ref = string.match(id, "(.-)%.(.+)")
					local s
					s = s or doc.sections[secid]
					if s then
						if s.refs[ref] then
							return s.refs[ref], ref, s
						end
					end
				end
			end

			local function scanParents(doc)
				for i, p in ipairs(doc.parents) do
					-- TODO figure out a way to ref the embedding section
					local o,i,s = checkFromSec(nil, p)
					if o or s then return o,i,s end
				end
				-- breadth-first search
				for i, p in ipairs(doc.parents) do
					local o,i,s = scanParents(p)
					if o or s then return o,i,s end
				end
			end

			local o,i,s = checkFromSec(self.sec, self.doc)

			if o or s then return o,i,s end

			--nothing in the current section, but this ID could be looked up in the context of a macro expansion. if so, check section of the site of invocation as well
			if self.invocation then
				local dp = id:find'%.'
				if dp == 1 then
					local s = self.invocation.origin.sec
					local ref = id:sub(2)
					if s and s.refs[ref] then
						return s.refs[ref], ref, s
					end
				elseif not dp then
					rid = self.invocation.origin:ref(id)
					if rid then
						return rid, id, self.invocation.origin.sec
					end
				end
			end

			o,i,s = scanParents(self.doc)
			if o or s then return o,i,s end

			self:fail("ID “%s” does not name an object or section", id)
		end
	};
}

ct.sec = declare {
	ident = 'section';
	mk = function() return {
		blocks = {};
		refs = {};
		depth = 0;
		kind = 'ordinary';
	} end;
	construct = function(self, id, depth)
		self.id = id
		self.depth = depth or self.depth
	end;
	fns = {
		visible = function(self)
			if self.kind == 'nonprinting' then return false end
			local invisibles = {
				['break'] = true;
				reference = true;
				resource = true;
				directive = true;
			}
			for k,b in pairs(self.blocks) do
				if not (invisibles[b.kind] or b.invisible) then return true end
				-- extensions that add invisible nodes to the AST must
				-- mark them as such for rendering to work properly!
			end
			return false
		end;
	}
}

ct.doc = declare {
	ident = 'doc';
	fns = {
		mksec = function(self, id, depth)
			local o = ct.sec(id, depth)
			if id then self.sections[id] = o end
			table.insert(self.secorder, o)
			return o
		end;
		allow_ext = function(self,name)
			if not ct.ext.loaded[name] then return false end
			if self.ext.inhibit[name] then return false end
			if self.ext.need[name] or self.ext.use[name] then
				return true
			end
			return ct.ext.loaded[name].default
		end;
		context_var = function(self, var, ctx, test)
			local fail = function(...)
				if test then return false end
				ctx:fail(...)
			end
			local scanParents = function(k)
				for k,p in pairs(self.parents) do
					local v = p:context_var(k, ctx, true)
					if v ~= false then return v end
				end
			end
			if startswith(var, 'cortav.') then
				local v = var:sub(8)
				if v == 'page' then
					if ctx.page then return tostring(ctx.page)
						else return '(unpaged)' end
				elseif v == 'renderer' then
					if not self.stage then
						return fail 'document is not being rendererd'
					end
					return self.stage.format
				elseif v == 'datetime' then
					return os.date()
				elseif v == 'time' then
					return os.date '%H:%M:%S'
				elseif v == 'date' then
					return os.date '%A %d %B %Y'
				elseif v == 'id' then
					return 'cortav.lua (reference implementation)'
				elseif v == 'file' then
					return self.src.file
				else
					return fail('unimplemented predefined variable %s', var)
				end
			elseif startswith(var, 'env.') then
				local v = var:sub(5)
				local val = os.getenv(v)
				if not val then
					return fail('undefined environment variable %s', v)
				end
			elseif self.stage.kind == 'render' and startswith(var, self.stage.format..'.') then
				-- TODO query the renderer somehow
				return fail('renderer %s does not implement variable %s', self.stage.format, var)
			elseif startswith(var, 'super.') then
				local sp = scanParents(var:sub(8))
				if sp == nil then
					if test then return false else return '' end
				else
					return sp
				end
			elseif self.vars[var] then
				return self.vars[var]
			elseif ctx.invocation
				and ctx.invocation.props
				and ctx.invocation.props['.' .. var] then
				return ctx.invocation.props['.' .. var]
			elseif ctx.declaration
				and ctx.declaration.props['.' .. var] then
				return ctx.declaration.props['.' .. var]
			else
				local sp = scanParents(var)
				if sp then return sp end
				if test then return false end
				return '' -- is this desirable behavior?
			end
		end;
		job = function(self, name, pred, ...) -- convenience func
			return self.docjob:fork(name, pred, ...)
		end;
		sub = function(self, ctx)
			-- convenience function for single-inheritance structure
			-- sets up a doc/ctx pair for a subdocument embedded in the source
			-- of a gretaer document, pointing subdoc props to global tables/values
			local newdoc = ct.doc.mk(self)
			newdoc.meta = self.meta
			newdoc.ext = self.ext
			newdoc.enc = self.enc
			newdoc.stage = self.stage
			-- vars are handled through proper recursion across all parents and
			-- are intentionally excluded here; subdocs can have their own vars
			-- without losing access to parent vars
			local nctx = ctx:clone()
			nctx:init(newdoc, ctx.src)
			nctx.line = ctx.line
			nctx.docDepth = (ctx.docDepth or 0) + ctx.sec.depth - 1
			return newdoc, nctx
		end;
	};
	mk = function(...) return {
		sections = {};
		secorder = {};
		embed = {};
		meta = {};
		vars = {};
		parents = {...};
		ext = {
			inhibit = {};
			need = {};
			use = {};
		};
		enc = ss.str.enc.utf8;
	} end;
	construct = function(me)
		me.docjob = ct.ext.job('doc', me, nil)
	end;
}

-- FP helper functions

local function fmtfn(str)
	return function(...)
		return string.format(str, ...)
	end
end

ct.ext = { loaded = {} }
function ct.ext.install(ext)
	if not ext.id then
		ct.exns.ext 'extension missing “id” field':throw()
	end
	if ct.ext.loaded[ext.id] then
		ct.exns.ext('there is already an extension with ID “%s” loaded', ext.id):throw()
	end
	ct.ext.loaded[ext.id] = ext
end

function ct.ext.bind(doc)
	local fns = {}
	function fns.each(...)
		local cext
		local args = {...}
		return function()
			while true do
				cext = next(ct.ext.loaded, cext)
				if cext == nil then return nil end
				if doc == nil or doc:allow_ext(cext.id) then
					local v = ss.walk(ct.ext.loaded[cext.id], table.unpack(args))
					if v ~= nil then
						return v, cext
					end
				end
			end
		end
	end

	function fns.hook(h, ...)
		-- this is the raw hook invocation function, used when hooks won't need
		-- private state to hold onto between invocation. if private state is
		-- necessary, construct a job instead
		local ret = {} -- for hooks that compile lists of responses from extensions
		for hook in fns.each('hook', h) do table.insert(ret,(hook(...))) end
		return ret
	end
	
	return fns
end

do local globalfns = ct.ext.bind()
	-- use these functions when document restrictions don't matter
	ct.ext.each, ct.ext.hook = globalfns.each, globalfns.hook
end

ct.ext.job = declare {
	ident = 'ext-job';
	init = {
		states = {};
	};
	construct = function(me,name,doc,pred,...)
		-- prepare contexts for relevant extensions
		me.name = name
		me.doc = doc -- for reqs + limiting
		for _, ext in pairs(ct.ext.loaded) do
			if pred == nil or pred(ext) then
				me.states[ext] = {}
			end
		end
		me:hook('init', ...)
	end;
	fns = {
		fork = function(me, name, pred, ...)
			-- generate a branch job linked to this job
			local branch = getmetatable(me)(name, me.doc, pred, ...)
			branch.parent = me
			return branch
		end;
		delegate = function(me, ext) -- creates a delegate for state access
			local submethods = {
				unwind = function(self, n)
					local function
					climb(dlg, job, n)
						if n == 0 then
							return job:delegate(dlg.extension)
						else
							return climb(dlg, job.parent, n-1)
						end
					end

					return climb(self._delegate_state, self._delegate_state.target, n)
				end;
			}
			local d = setmetatable({
				_delegate_state = {
					target = (me._delegate_state and me._delegate_state.target) or me;
					extension = ext;
				};
			}, {
				__name = 'job:delegate';
				__index = function(self, key)
					local D = self._delegate_state
					if key == 'state' then
						return D.target.states[self._delegate_state.extension]
					elseif submethods[key] then
						return submethods[key]
					end
					return D.target[key]
				end;
				__newindex = function(self, key, value)
					local D = self._delegate_state
					if key == 'state' then
						D.target.states[D.extension] = value
					else
						D.target[D.extension] = value
					end
				end;
			});
			return d;
		end;
		each = function(me, ...)
			local ek
			local path = {...}
			return function()
				while true do
					ek = next(me.states, ek)
					if not ek then return nil end
					if me.doc:allow_ext(ek.id) then
						local v = ss.walk(ek, table.unpack(path))
						if v then
							return v, ek, me.states[ek]
						end
					end
				end
			end
		end;
		proc = function(me, ...)
			local p
			local owner
			local state
			for func, ext, s in me:each(...) do
				if p == nil then
					p = func
					owner = ext
					state = s
				else
					ct.exn.ext('extensions %s and %s define conflicting procedures for %s', owner.id, ext.id, table.concat({...},'.')):throw()
				end
			end
			if p == nil then return nil end
			if type(p) ~= 'function' then return p end
			return function(...)
				return p(me:delegate(owner), ...)
			end, owner, state
		end;
		hook = function(me, hook, ...)
			-- used when extensions may need to persist state across
			-- multiple functions or invocations
			local ret = {}
			local hook_id = me.name ..'_'.. hook
			for hookfn, ext, state in me:each('hook', hook_id) do
				table.insert(ret, (hookfn(me:delegate(ext),...)))
			end
			return ret
		end;
	};
}

-- common renderer utility functions
ct.tool = {}

function ct.tool.mathfmt(ctx, eqn)
	local buf = ''
	local m = ss.enum {'num','var','op'}
	local lsc = 0
	local spans = {}

	local flush = function()
		local o
		if buf ~= '' then
			if lsc == 0 then
				o = buf
			elseif lsc == m.num then
				o = {
					kind = 'format';
					style = 'literal';
					spans = {buf};
				}
			elseif lsc == m.var then
				o = {
					kind = 'format';
					style = 'variable';
					spans = {buf};
				}
			elseif lsc == m.op then
				o = {
					kind = 'format';
					style = 'strong';
					spans = {buf};
				}
			end
			if o then
				table.insert(spans, o)
			end
		end
		buf = ''
		lsc = 0
	end

	for c, p in ss.str.each(ctx.doc.enc, eqn) do
		local cl = ss.str.classify(ctx.doc.enc, c)
		local nc = 0
		if not cl.space then
			if cl.numeral then
				nc = m.num
			elseif cl.mathop or cl.symbol then
				nc = m.op
			elseif cl.letter then
				nc = m.var
			end
			if nc ~= lsc then
				flush()
				lsc = nc
			end
			buf = buf .. c
		end
	end
	flush()
	return spans
end

function ct.tool.namespace()
-- some renderers need to be able to generate unique IDs for
-- objects, including ones that users have not assigned IDs
-- to, and objects with the same name in different unlabeled
-- sections. to handle this, we provide a "namespace" mechanism,
-- where some lua table (really its address in memory) is used
-- as a handle for the object and a unique ID is attached to it.
-- if the object has an ID of its own, it is guaranteed to be
-- unique and returned; otherwise, a generic id of the form
-- `x-%u` is generated, where %u is an integer that increments
-- for every new object
	local ids = {}
	local canonicalID = {}
	return function(obj,pfx)
		pfx = pfx or ''
		if canonicalID[obj] then
			return canonicalID[obj]
		elseif obj.id and ids[pfx .. obj.id] then
			local objid = pfx .. obj.id
			local newid
			local i = 1
			repeat newid = objid .. string.format('-%x', i)
				i = i + 1 until not ids[newid]
			ids[newid] = obj
			canonicalID[obj] = newid
			return newid
		else
			local cid = obj.id
			if not cid then
				local i = 1
				repeat cid = string.format('%sx-%x', pfx, i)
					i = i + 1 until not ids[cid]
			end
			ids[cid] = obj
			canonicalID[obj] = cid
			return cid
		end
	end
end

-- renderer engines

do -- define span control sequences
	local function formatter(sty)
		return function(s,c)
			return {
				kind = 'format';
				style = sty;
				spans = ct.parse_span(s, c);
				origin = c:clone();
			}
		end
	end
	local function insert_link(s, c)
		local to, t = s:match '^([^%s]+)%s*(.-)$'
		if not to then c:fail('invalid link syntax >%s', s) end
		if t == "" then t = nil end
		return {
			kind = 'link';
			spans = (t and t~='') and ct.parse_span(t, c) or {};
			ref = to;
			origin = c:clone();
		}
	end
	local function insert_var_ref(raw)
		return function(s, c)
			local pos = tonumber(s)
			return {
				kind = 'var';
				pos = pos;
				raw = raw;
				var = not pos and s or nil;
				origin = c:clone();
			}
		end
	end
	local function insert_span_directive(crit, failthru)
		return function(s,c)
			local args = ss.str.breakwords(d.doc.enc, s, 1)
			local brksyms = map(enc.encodeUCS, {
				'.', ',', ':', ';', '!', '$', '&', '^',
				'/', '?', '@', '='
			})
			local brkhash = {} for _,s in pairs(brksyms) do
				brkhash[s] = true
			end

			local extname = ''
			local sym
			local cmd = ''
			for ch,p in ss.str.each(c.doc.enc, args[1]) do
				if sym == nil then
					if brkhash[ch] then
						sym = ch
					else
						extname = extname .. ch
					end
				elseif brkhash[ch] then
					sym = sym + ch
				else
					cmd = cmd + ch
				end
			end
			if cmd == '' then cmd = nil end
			local spans if failthru then
				spans = ct.parse_span(args[2], c)
			end
			return {
				kind = 'directive';
				ext = extname;
				cmd = cmd;
				args = args;
				crit = crit;
				failthru = failthru;
				spans = spans;
			}
		end
	end
	local function rawcode(s, c) -- raw
		local o = c:clone();
		local str = ''
		for c, p in ss.str.each(c.doc.enc, s) do
			local q = p:esc()
			if q then
				str = str ..  q
				p.next.byte = p.next.byte + #q
			else
				str = str .. c
			end
		end
		return {
			kind = 'format';
			style = 'literal';
			spans = {{
				kind = 'raw';
				spans = {str};
				origin = o;
			}};
			origin = o;
		}
	end
	ct.spanctls = {
		{seq = '!', parse = formatter 'emph'};
		{seq = '*', parse = formatter 'strong'};
		{seq = '~', parse = formatter 'strike'};
		{seq = '+', parse = formatter 'insert'};
		{seq = '\\', parse = function(s, c) -- raw
			return {
				kind = 'raw';
				spans = {s};
				origin = c:clone();
			}
		end};
		{seq = '`', parse = formatter 'literal'};
		{seq = '"', parse = rawcode};
		{seq = '$', parse = formatter 'variable'};
		{seq = '^', parse = function(s, c)
		-- TODO support for footnote sections
			local fn, t = s:match '^([^%s]+)%s*(.-)$'
			return {
				kind = 'footnote';
				spans = (t and t~='') and ct.parse_span(t, c) or {};
				ref = fn;
				origin = c:clone();
			}
		end};
		{seq = '=', parse = function(s,c) --math mode
			local tx = {
				['%*'] = '×';
				['/'] = '÷';
			}
			for k,v in pairs(tx) do s = s:gsub(k,v) end
			s=s:gsub('%^([0-9]+)', function(num)
				local sup = {'⁰','¹','²','³','⁴','⁵','⁶','⁷','⁸','⁹'};
				local r = ''
				for i=1,#num do
					r = r .. sup[1 + (num:byte(i) - 0x30)]
				end
				return r
			end)
			local m = {s} --TODO
			return {
				kind = 'math';
				original = s;
				spans = {s};
				origin = c:clone();
			};
		end};
		{seq = '&', parse = function(s, c)
			local r, t = s:match '^([^%s]+)%s*(.-)$'
			return {
				kind = 'deref';
				spans = (t and t ~= "") and ct.parse_span(t, c) or {};
				ref = r;
				origin = c:clone();
			}
		end};
		{seq = '>', parse = insert_link};
		{seq = '→', parse = insert_link};
		{seq = '🔗', parse = insert_link};
		{seq = '##', parse = insert_var_ref(true)};
		{seq = '#', parse = insert_var_ref(false)};
		{seq = '%%', parse = function (s,c)
			local com = s:match '^%%%%%s*(.*)$'
			return {
				kind = 'comment';
				comment = com;
			}
		end};
		{seq = '%!', parse = insert_span_directive(true,false)};
		{seq = '%:', parse = insert_span_directive(false,true)};
		{seq = '%', parse = insert_span_directive(false,false)};
	}
end

function ct.parse_span(str,ctx)
	local function delimited(start, stop, s)
		local r = { pcall(ss.str.delimit, nil, start, stop, s) }
		if r[1] then return table.unpack(r, 2) end
		ctx:fail(tostring(r[2]))
	end
	local buf = ""
	local spans = {}
	local function flush()
		if buf ~= "" then
	-- 			for fn, ext in ctx.doc.docjob:each('hook','doc_meddle_string') do
	-- 				buf = fn(ctx.doc.docjob:delegate(ext), ctx, buf)
	-- 			end
			table.insert(spans, buf)
			buf = ""
		end
	end
	local skip = false
	for c,p in ss.str.each(ctx.doc.enc,str) do
		local ba, ca, es = ctx.doc.enc.parse_escape(str:sub(p.byte))
		if es then
			flush()
			table.insert(spans, {
				kind = 'raw';
				spans = {es};
				origin = ctx:clone()
			})
			p.next.byte = p.next.byte + ba;
			p.next.code = p.next.code + ca;
		elseif c == '{' then
			flush()
			local substr, following = delimited('{','}',str:sub(p.byte))
			local splitstart, splitstop = substr:find'%s+'
			local id, argstr
			if splitstart then
				id, argstr = substr:sub(1,splitstart-1), substr:sub(splitstop+1)
			else
				id, argstr = substr, ''
			end
			local o = {
				kind = 'macro';
				macro = id;
				args = {};
				origin = ctx:clone();
			}

			do local start = 1
				local i = 1
				while i <= #argstr do
					while i<=#argstr and (argstr:sub(i,i) ~= '|' or argstr:sub(i-1,i) == '\\|') do
						i = i + 1
					end
					local arg = argstr:sub(start, i == #argstr and i or i-1)
					start = i+1
					arg=arg:gsub('\\|','|')
					table.insert(o.args, arg)
					i = i + 1
				end
			end

			p.next.byte = p.next.byte + following - 1
			table.insert(spans,o)
		elseif c == '[' then
			flush()
			local substr, following = delimited('[',']',str:sub(p.byte))
			p.next.byte = following + p.byte
			local found = false
			for _,i in pairs(ct.spanctls) do
				if ss.str.begins(substr, i.seq) then
					found = true
					table.insert(spans, i.parse(substr:sub(1+#i.seq), ctx))
					break
				end
			end
			if not found then
				buf = buf .. c
			end
		elseif c == '\n' then
			flush()
			table.insert(spans,{kind='line-break',origin=ctx:clone()})
		else
			buf = buf .. c
		end
	end
	flush()
	return spans
end

local function
blockwrap(fn)
	return function(l,c,j,d)
		local block = fn(l,c,j,d)
		if block then
			block.origin = c:clone();
			table.insert(d, block);
			j:hook('block_insert', c, block, l)
			if block.spans then
				c.doc.docjob:hook('meddle_span', block.spans, block)
			end
		end
	end
end

local insert_paragraph = blockwrap(function(l,c)
	if l:sub(1,1) == '.' then l = l:sub(2) end
	return {
		kind = "paragraph";
		spans = ct.parse_span(l, c);
	}
end)

local insert_section = function(l,c,j)
	local depth, id, t = l:match '^([#§]+)([^%s]*)%s*(.-)$'
	if id and id ~= "" then
		if c.doc.sections[id] then
			c:fail('duplicate section name “%s”', id)
		end
	else id = nil end

	local s = c.doc:mksec(id, utf8.len(depth))
	s.depth = utf8.len(depth)
	s.origin = c:clone()
	s.blocks={}

	if t and t ~= "" then
		local heading = {
			kind = "label";
			spans = ct.parse_span(t,c);
			origin = s.origin;
			captions = s;
		}
		c.doc.docjob:hook('meddle_span', heading.spans, heading)
		table.insert(s.blocks, heading)
		s.heading_node = heading
	end
	c.sec = s

	j:hook('section_attach', c, s)
end

local dsetmeta = function(w,c,j)
	local key, val = w(1)
	c.doc.meta[key] = val
	j:hook('metadata_set', key, val)
end
local dextctl = function(w,c)
	local mode, exts = w(1)
	for e in exts:gmatch '([^%s]+)' do
		if mode == 'uses' then
			c.doc.ext.use[e] = true
		elseif mode == 'needs' then
			c.doc.ext.need[e] = true
		elseif mode == 'inhibits' then
			c.doc.ext.inhibit[e] = true
		end
	end
end
local dcond = function(w,c)
	local mode, cond, exp = w(2)
	c.hide_next = mode == 'unless'
end;
ct.directives = {
	author = dsetmeta;
	license = dsetmeta;
	keywords = dsetmeta;
	desc = dsetmeta;
	when = dcond;
	unless = dcond;
	pragma = function(w,c)
	end;
	lang = function(w,c)
		local _, op, l = w(2)
		local langstack = c.doc.stage.langstack
		if op == 'is' then
			langstack[math.max(1, #langstack)] = l
		elseif op == 'push' then
			table.insert(langstack, l)
		elseif op == 'pop' then
			if next(langstack) then
				langstack[#langstack] = nil
			end
		elseif op == 'sec' then
			c.sec.lang = l
		else c:fail('bad language directive “%s”', op) end
		c.lang = langstack[#langstack]
	end;
	expand = function(w,c)
		local _, m = w(1)
		if m ~= 'off' then
			c.doc.stage.expand_next = 1
		else
			c.doc.stage.expand_next = 0
		end
	end;
}

local function insert_table_row(l,c,j)
	local row = {}
	local buf
	local flush = function()
		if buf then
			buf.str = buf.str:gsub('%s+$','')
			table.insert(row, buf)
		end
		buf = { str = '' }
	end
	for c,p in ss.str.each(c.doc.enc,l) do
		if c == '|' or c == '+' and (p.code == 1 or l:sub(p.byte-1,p.byte-1)~='\\') then
			flush()
			buf.header = c == '+'
		elseif c == ':' then
			local lst = l:sub(p.byte-#c,p.byte-#c)
			local nxt = l:sub(p.next.byte,p.next.byte)
			if lst == '|' or lst == '+' and l:sub(p.byte-2,p.byte-2) ~= '\\' then
				buf.align = 'left'
			elseif nxt == '|' or nxt == '+' then
				if buf.align == 'left' then
					buf.align = 'center'
				else
					buf.align = 'right'
				end
			else
				buf.str = buf.str .. c
			end
		elseif c:match '%s' then
			if buf.str ~= '' then buf.str = buf.str .. c end
		elseif c == '\\' then
			local nxt = l:sub(p.next.byte,p.next.byte)
			if nxt == '|' or nxt == '+' or nxt == ':' then
				buf.str = buf.str .. nxt
				p.next.byte = p.next.byte + #nxt
				p.next.code = p.next.code + 1
			else
				buf.str = buf.str .. c
			end
		else
			buf.str = buf.str .. c
		end
	end
	if buf.str ~= '' then flush() end 
	for _,v in pairs(row) do
		v.spans = ct.parse_span(v.str, c)
		c.doc.docjob:hook('meddle_span', v.spans, v)
	end
	if next(c.sec.blocks) and c.sec.blocks[#c.sec.blocks].kind == 'table' then
		local tbl = c.sec.blocks[#c.sec.blocks]
		table.insert(tbl.rows, row)
		j:hook('block_table_attach', c, tbl, row, l)
		j:hook('block_table_row_insert', c, tbl, row, l)
	else
		local tbl = {
			kind = 'table';
			rows = {row};
			origin = c:clone();
		}
		table.insert(c.sec.blocks, tbl)
		j:hook('block_table_insert', c, tbl, l)
		j:hook('block_table_row_insert', c, tbl, tbl.rows[1], l)
	end
end

local function insert_link_block(seq)
	return blockwrap(function(s,c)
		local r = s:sub(#seq+1)
		local k, uri, txt = r:match('^(%s*)([^%s]*)%s*(.*)$')
		return {
			kind = 'link';
			uri = (k~='') and ss.uri(uri) or nil;
			ref = (k=='') and uri or nil;
			spans = ct.parse_span(txt, c);
		}
	end)
end

ct.ctlseqs = {
	{seq = '.', fn = insert_paragraph};
	{seq = '¶', fn = insert_paragraph};
	{seq = '❡', fn = insert_paragraph};
	{seq = '#', fn = insert_section};
	{seq = '§', fn = insert_section};
	{seq = '+', fn = insert_table_row};
	{seq = '|', fn = insert_table_row};
	{seq = '│', fn = insert_table_row};
	{seq = '!', fn = function(l,c,j,d)
		local last = d[#d]
		local txt = l:match '^%s*!%s*(.-)$'
		if (not last) or last.kind ~= 'aside' then
			local aside = {
				kind = 'aside';
				lines = { ct.parse_span(txt, c) };
				origin = c:clone();
			}
			c.doc.docjob:hook('meddle_span', aside.lines[1], aside)
			table.insert(d,aside)
			j:hook('block_aside_insert', c, aside, l)
			j:hook('block_aside_line_insert', c, aside, aside.lines[1], l)
			j:hook('block_insert', c, aside, l)
		else
			local sp = ct.parse_span(txt, c)
			c.doc.docjob:hook('meddle_span', sp, last)
			table.insert(last.lines, sp)
			j:hook('block_aside_attach', c, last, sp, l)
			j:hook('block_aside_line_insert', c, last, sp, l)
		end
	end};
	{pred = function(s,c) return s:match'^[*:]' end,
	 fn   = blockwrap(function(l,c) -- list
		local stars = l:match '^([*:]+)'
		local depth = utf8.len(stars)
		local id, txt = l:sub(#stars+1):match '^(.-)%s*(.-)$'
		local ordered = stars:sub(#stars) == ':'
		if id == '' then id = nil end
		return {
			kind = 'list-item';
			depth = depth;
			ordered = ordered;
			spans = ct.parse_span(txt, c);
		}
	end)};
	{seq = '\t\t', fn = function(l,c,j,d)
		local last = d[#d]
		if (not last) or (last.kind ~= 'reference') then
			c:fail('reference continuations must immediately follow a reference')
		end
		local str = l:match '^\t\t(.-)%s*$'
		if last.val == '' then
			last.val = str
		else
			last.val = last.val .. '\n' .. str
		end
		c.sec.refs[last.key] = last.val
	end};
	{seq = '\t', pred = function(l)
		return (l:match '\t+([^:]+):%s*(.*)$')
	end; fn = blockwrap(function(l,c,j,d)
		local ref, val = l:match '\t+([^:]+):%s*(.*)$'
		local last = d[#d]
		local rsrc
		if last and (last.kind == 'resource'
		         or  last.kind == 'embed'
               or  last.kind == 'macro') then
			last.props = last.props or {}
			last.props[ref] = val
			j:hook('set_prop', c, last, ref, val, l)
			rsrc = last
		elseif last and last.kind == 'reference' and last.rsrc then
			last.rsrc.props[ref] = val
			rsrc = last.rsrc
		else
			c.sec.refs[ref] = val
		end
		j:hook('section_ref_attach', c, ref, val, l)
		return {
			kind = 'reference';
			rsrc = rsrc;
			key = ref;
			val = val;
		}
	end)};
	{seq = '%', fn = function(l,c,j,d) -- directive
		local crit, cmdline = l:match '^%%([!%%]?)%s*(.*)$'
		local words = function(i)
			local wds = {}
			if i == 0 then return cmdline end
			for w,pos in cmdline:gmatch '([^%s]+)()' do
				table.insert(wds, w)
				i = i - 1
				if i == 0 then
					table.insert(wds,(cmdline:sub(pos):match('^%s*(.-)%s*$')))
					return table.unpack(wds)
				end
			end
		end

		local cmd, rest = words(1)
		if ct.directives[cmd] then
			ct.directives[cmd](words,c,j)
		elseif cmd == c.doc.stage.mode['render:format'] then
			-- this is a directive for the renderer; insert it into the tree as is
			local dir = {
				kind = 'directive';
				critical = crit == '!';
				words = words;
				origin = c;
			}
			table.insert(d, dir)
			j:hook('block_directive_render', j, c, dir)
		elseif c.doc:allow_ext(cmd) then -- extension directives begin with their id
			local ext = ct.ext.loaded[cmd]
			if ext.directives then
				local _, topcmd = words(2)
				if ext.directives[topcmd] then
					ext.directives[topcmd](j:delegate(ext), c, words)
				elseif ext.directives[true] then -- catch-all
					ext.directives[true](j:delegate(ext), c, words)
				elseif crit == '!' then
					c:fail('extension %s does not support critical directive %s', cmd, topcmd)
				end
			end
		elseif crit == '!' then
			c:fail('critical directive %s not supported',cmd)
		end
	end;};
	{pred = function(s) return s:match '^>[^>%s]*%s*.*$' end,
	 fn   = function(l,c,j,d)
		local id,txt = l:match '^>([^>%s]*)%s*(.*)$'
		if id == '' then id = nil end
		local last = d[#d]
		local node
		local ctx
		if last and last.kind == 'quote' and (id == nil or id == last.id) then
			node = last
			ctx = node.ctx
			ctx.line = c.line -- is this enough??
		else
			local doc
			doc, ctx = c.doc:sub(c)
			node = { kind = 'quote', doc = doc, ctx = ctx, id = id, origin = c }
			table.insert(d, node)
			j:hook('block_insert', c, node, l)
		end

		ct.parse_line(txt, ctx, ctx.sec.blocks)
	end};
	{seq = '~~~', fn = blockwrap(function(l,c,j)
		local extract = function(ptn, str)
			local start, stop = str:find(ptn)
			if not start then return nil, str end
			local ex = str:sub(start,stop)
			local n = str:sub(1,start-1) .. str:sub(stop+1)
			return ex, n
		end
		local lang, id, title
		if l:match '^~~~%s*$' then -- no args
		elseif l:match '^~~~.*~~~%s*$' then -- CT style
			local s = l:match '^~~~%s*(.-)%s*~~~%s*$'
			lang, s = extract('%b[]', s)
			if lang then lang = lang:sub(2,-2) end
			id, title = extract('#[^%s]+', s)
			if id then id = id:sub(2) end
		elseif l:match '^~~~' then -- MD shorthand style
			lang = l:match '^~~~%s*(.-)%s*$'
		end
		local mode = {
			kind = 'code';
			listing = {
				kind = 'listing';
				lang = lang, id = id, title = title and ct.parse_span(title,c);
				lines = {};
			}
		}
		if c.doc.stage.expand_next and c.doc.stage.expand_next > 0 then
			c.doc.stage.expand_next = c.doc.stage.expand_next - 1
			mode.expand = true
		end
		j:hook('mode_switch', c, mode)
		c.mode = mode
		if id then
			if c.sec.refs[id] then c:fail('duplicate ID %s', id) end
			c.sec.refs[id] = c.mode.listing
		end
		return c.mode.listing;
	end)};
	{pred = function(s,c)
		if s:match '^[%-_][*_%-%s]+' then return true end
		if startswith(s, '—') then
			for c, p in ss.str.each(c.doc.enc,s) do
				if ({
					['—'] = true, ['-'] = true, [' '] = true;
					['*'] = true, ['_'] = true, ['\t'] = true;
				})[c] ~= true then return false end
			end
			return true
		end
	end; fn = blockwrap(function()
		return { kind = 'horiz-rule' }
	end)};
	{seq='=>', fn = insert_link_block '=>'};
	{seq='⇒',  fn = insert_link_block '⇒'};
	{seq='@', fn=function(s,c,j,d)
		local function mirror(b)
			local ch = {}
			local rev = {
				['['] = ']'; [']'] = '[';
				['{'] = '}'; ['}'] = '{';
				['('] = ')'; [')'] = '(';
				['<'] = '>'; ['>'] = '<';
			}
			for i = 1,#b do
				local c = string.sub(b,-i,-i)
				if rev[c] then
					ch[i] = rev[c]
				else
					ch[i] = c
				end
			end
			return table.concat(ch)
		end

		local id,rest = s:match '^@([^%s]*)%s*(.*)$'
		local bs, brak = rest:match '()([{[(<][^%s]*)%s*$'
		local src
		if brak then
			src = rest:sub(1,bs-1):gsub('%s+$','')
		else src = rest end
		if src == '' then src = nil end
		if id == '' then id = nil end
		local rsrc = {
			kind = 'resource';
			props = {src = src};
			id = id;
			origin = c;
		}
		if brak then
			rsrc.bracket = {
				open = brak;
				close = mirror(brak);
			}
			rsrc.raw = '';
			if src == nil then
				rsrc.props.src = 'text/x.cortav'
			end
		end
		if id then
			if c.sec.refs[id] then
				c:fail('an object with id “%s” already exists in that section',id)
			else
				c.sec.refs[id] = rsrc
			end
		end
		table.insert(d, rsrc)
		j:hook('block_insert', c, rsrc, s)
		if id == nil then --shorthand syntax
			local embed = {
				kind = 'embed';
				rsrc = rsrc;
				origin = c;
				mode = 'inline';
			}
			table.insert(d, embed)
			j:hook('block_insert', c, embed, s)
		end

		if brak then
			c.mode = {
				kind = 'inline-rsrc';
				rsrc = rsrc;
				indent = nil;
				depth = 0;
			}
		end
	end};
	{seq='$', fn=blockwrap(function(s,c)
		local id, args = s:match('^%$([^%s]+)%s?(.-)$')
		if id == nil or id == '' then
			c:fail 'malformed macro block'
		end
		local argv = ss.str.split(c.doc.enc, args, c.doc.enc.encodeUCS'|', {esc=true})
		return {
			kind = 'macro';
			macro = id;
			args = argv;
		}
	end)};
	{seq='&', fn=blockwrap(function(s,c)
		local mode, id, cap = s:match('^&([-+]?)([^%s]+)%s*(.-)%s*$')
		if id == nil or id == '' then
			c:fail 'malformed embed block'
		end
		if     cap  == ''  then cap = nil end
		if     mode == '-' then mode = 'closed'
		elseif mode == '+' then mode = 'open'
		                   else mode = 'inline' end
		return {
			kind = 'embed';
			ref = id;
			cap = cap;
			mode = mode;
		}
	end)};
	{fn = insert_paragraph};
}

function ct.parse_line(rawline, ctx, dest)
	local newspan
	local job = ctx.doc.stage.job
	job:hook('line_read',ctx,rawline)
	local l
	if rawline then
		l = rawline:gsub("^ +","") -- trim leading spaces
	end
	if ctx.mode then
		if ctx.mode.kind == 'code' then
			if l and l:match '^~~~%s*$' then
				job:hook('block_listing_end',ctx,ctx.mode.listing)
				job:hook('mode_switch', c, nil)
				ctx.mode = nil
			else
				-- TODO handle formatted code
				local newline
				if ctx.mode.expand
					then newline = ct.parse_span(l, ctx)
					else newline = {l}
				end
				table.insert(ctx.mode.listing.lines, newline)
				job:hook('block_listing_newline',ctx,ctx.mode.listing,newline)
			end
		elseif ctx.mode.kind == 'inline-rsrc' then
			local r = ctx.mode.rsrc
			if rawline then
				if rawline == r.bracket.close then
					if ctx.mode.depth == 0 then
						-- TODO how to handle depth?
						ctx.mode = nil
					end
				else
					if r.indent ~= nil then
						r.raw = r.raw .. '\n'
					else
						r.indent = (rawline:sub(1,1) == '\t')
					end

					if r.indent == true then
						if rawline:sub(1,1) == '\t' then
							rawline = rawline:sub(2)
						end
					end

					r.raw = r.raw .. rawline
				end
			end
		else
			local mf = job:proc('modes', ctx.mode.kind)
			if not mf then
				ctx:fail('unimplemented syntax mode %s', ctx.mode.kind)
			end
			mf(job, ctx, l, dest) --NOTE: you are responsible for triggering the appropriate hooks if you insert anything!
		end
	else
		if l and l ~= '' then
			local function tryseqs(seqs, ...)
				for _, i in pairs(seqs) do
					if ((not i.seq ) or startswith(l, i.seq)) and
					   ((not i.pred) or i.pred    (l, ctx  )) then
						i.fn(l, ctx, job, dest, ...)
						return true
					end
				end
				return false
			end

			if not tryseqs(ct.ctlseqs) then
				local found = false

				for eb, ext, state in job:each 'blocks' do
					if tryseqs(eb, state) then found = true break end
				end

				if not found then
					ctx:fail 'incomprehensible input line'
				end
			end
		else
			if next(dest) and dest[#dest].kind ~= 'break' then
				local brk = {kind='break', origin = ctx:clone()}
				job:hook('block_break', ctx, brk, l)
				table.insert(dest, brk)
			end
		end
	end
	job:hook('line_end',ctx,l)
end

function ct.parse(file, src, mode, setup)
	-- this object is threaded down through the parse tree
	-- and copied to store information like the origin of the
	-- element in the source code
	local ctx = ct.ctx.mk(src)
	ctx:init(ct.doc.mk(), src)
	ctx.lang = mode['meta:lang']
	if mode['parse:enc'] then
		local e = ss.str.enc[mode['parse:enc']]
		if not e then
			ct.exns.enc('requested encoding not supported',mode['parse:enc']):throw()
		end
		ctx.doc.enc = e
	end

	-- create states for extension hooks
	local job = ctx.doc:job('parse',nil,ctx)
	ctx.doc.stage = {
		kind = 'parse';
		mode = mode;
		job = job;
		langstack = {ctx.lang};
		fontstack = {};
	}

	local function
	is_whitespace(cp)
		return ctx.doc.enc.iswhitespace(cp)
	end

	if setup then setup(ctx) end

	for full_line in file:lines() do ctx.line = ctx.line + 1
	-- 		local l
	-- 		for p, c in utf8.codes(full_line) do
	-- 			if not is_whitespace(c) then
	-- 				l = full_line:sub(p)
	-- 				break
	-- 			end
	-- 		end
		ct.parse_line(full_line, ctx, ctx.sec.blocks)
	end

	for i, sec in ipairs(ctx.doc.secorder) do
		for n, r in pairs(sec.blocks) do
			if r.kind == 'resource' and r.props.src then
				local lines = ss.str.breaklines(ctx.doc.enc, r.props.src)
				local srcs = {}
				for i,l in ipairs(lines) do
					local args = ss.str.breakwords(ctx.doc.enc, l, 2, {escape=true})
					if #args > 3 or (r.raw and #args > 2) then
						r.origin:fail('invalid syntax for resource %s', r.id or '(anonymous)')
					end
					local p_mode, p_mime, p_uri
					if r.raw then
						p_mode = 'embed'
					end
					if #args == 1 then
						if r.raw then -- inline content
							p_mime = ss.mime(args[1])
						else
							p_uri = args[1]
						end
					elseif #args == 2 then
						local ok, m = pcall(ss.mime, args[1])
						if r.raw then
							if not ok then
								r.origin:fail('invalid mime-type “%s”', args[1])
							end
							p_mode, p_mime = args[1], m
						else
							if ok then
								p_mime, p_uri = m, args[2]
							else
								p_mode, p_uri = table.unpack(args)
							end
						end
					else
						p_mode, p_mime, p_uri = table.unpack(args)
						p_mime = ss.mime(args[2])
					end
					local resource = {
						mode = p_mode;
						mime = p_mime or 'text/x.cortav';
						uri = p_uri and ss.uri(p_uri) or nil;
					}
					if resource.mode == 'embed' or resource.mode == 'auto' then
						-- the resource must be available for reading within this job
						-- open it and read its source into memory
						if resource.uri then
							if resource.uri:canfetch() then
								resource.raw = resource.uri:fetch()
							elseif resource.mode == 'auto' then
								-- resource cannot be accessed; force linking
								resource.mode = 'link'
							else
								r.origin:fail('resource “%s” wants to embed unfetchable URI “%s”',
												  r.id or "(anonymous)", tostring(resource.uri))
							end
						elseif r.raw then
							resource.raw = r.raw
						else
							r.origin:fail('resource “%s” is not inline and supplies no URI',
											  r.id or "(anonymous)")
						end

						-- the resource has been cached. check the mime-type to see if
						-- we need to parse it or if it is suitable as-is

						if ss.mime 'text/x.cortav' < resource.mime then
							local sd, sc = r.origin.doc:sub(r.origin)
							-- we store the resource block itself in the declaration
							-- slot so that its properties (e.g. context variables)
							-- can affect the way the document is rendered
							sc.declaration = r
							local lines = ss.str.breaklines(r.origin.doc.enc, resource.raw, {})
							for i, ln in ipairs(lines) do
								sc.line = sc.line + 1
								ct.parse_line(ln, sc, sc.sec.blocks)
							end
							resource.doc = sd
						end
					end
					table.insert(srcs, resource)
				end
				r.srcs = srcs
				-- note that resources do not themselves have kinds. when a
				-- document requests to insert a resource, the renderer must
				-- iterate through the sources and find the first source it
				-- is capable of emitting. this allows constructions like
				-- emitting a video for HTML outputs, a photo for printers,
				-- and a screenplay for tty/plaintext outputs.
			end
		end
	end

	-- expand block macros
	for i, sec in ipairs(ctx.doc.secorder) do
		for n, r in pairs(sec.blocks) do
			if r.kind == 'macro' then
				local mc = r.origin
				local mac = mc:ref(r.macro)
				if not mac then
					mc:fail('no such reference or resource “%s”', r.macro)
				end

				local subdoc, subctx = ctx.doc:sub(mc)
				local rawbody
				subctx.invocation = r

				if type(mac) == 'string' then
					rawbody = mac
				elseif mac.raw then
					rawbody = mac.raw
					subctx.declaration = mac
				else
					mc:fail('block macro “%s” must be either a reference or an embedded text/x.cortav resource', r.macro)
				end

				local lines = ss.str.breaklines(ctx.doc.enc, rawbody)
				for i, ln in ipairs(lines) do
					ct.parse_line(ln, subctx, subctx.sec.blocks)
				end
				r.doc = subdoc
			end
		end
	end

	ctx.doc.stage = nil
	ctx.doc.docjob:hook('meddle_ast')
	return ctx.doc
end

function ct.expand_var(v)
	local val
	if v.pos then
		if not v.origin.invocation then
			v.origin:fail 'positional arguments can only be used in a macro invocation'
		elseif not v.origin.invocation.args[v.pos] then
			v.origin.invocation.origin:fail('macro invocation %s missing positional argument #%u', v.origin.invocation.macro, v.pos)
		end
		val = v.origin.invocation.args[v.pos]
	else
		val = v.origin.doc:context_var(v.var, v.origin)
	end
	if v.raw then
		return val, true
	else
		return ct.parse_span(val, v.origin), false
	end
end