cortav  Artifact [5feb0b86b1]

Artifact 5feb0b86b11f58650cb721035af92a70bac5255d01ec2952f397f1e57ab2a690:


-- [ʞ] cortav.lua
--  ~ lexi hale <lexi@hale.su>
--  © AGPLv3
--  ? reference implementation of the cortav document language

local ss = require 'sirsem'
-- aliases for commonly used sirsem funcs
local startswith = ss.str.begins
local dump = ss.dump
local declare = ss.declare

-- make this module available to require() when linked into a lua bytecode program with luac
local ct = ss.namespace 'cortav'
ct.info = {
	version = ss.version {0,1; 'devel'};
	package_name = 'cortav';
	contributors = {
		{ name = 'lexi hale', handle = 'velartrill';
		  mail = 'lexi@hale.su', homepage = 'https://hale.su' };
	};
	ident_string = function(i)
		return string.format('%s %s', i.package_name, i.version)
	end;
	credits = function(i)
		local all = ss.copy(i.contributors)
		for i,who in pairs(all) do
			who.role = who.role or 'core functionality'
		end
		for name,ext in pairs(ct.ext.loaded) do
			if ext.contributors then
				for _,c in pairs(ext.contributors) do
					local ofs, ref = ss.find(all, function(a)
						return a.handle == c.handle
					end)
					if ofs then
						ref.role = string.format('%s; %s extension', ref.role, name)
					else
						local c = ss.clone(ext.author)
						c.role = name .. ' extension'
					end
				end
			end
		end
		return all
	end;
	credits_ascii = function(contributors)
		local body = ''
		for _, c in pairs(contributors) do
			local str
			if c.handle then
				str = string.format('%s “%s” <%s>', c.name, c.handle, c.mail)
			else
				str = string.format('%s <%s>', c.name, c.mail)
			end
			if c.homepage then
				str = string.format('%s (%s)', str, c.homepage)
			end
			if c.role then
				str = string.format('%s: %s', str, c.role)
			end
			body = body .. string.format(' ~ %s\n', str)
		end
		return body
	end;
	about = function(i)
		return i:ident_string() .. '\n' ..
		       i.credits_ascii(i:credits())
	end;
}


ct.render = {}

ct.exns = {
	tx = ss.exnkind('translation error', function(msg,...)
		return string.format("(%s:%u) "..msg, ...)
	end);
	io = ss.exnkind('IO error', function(msg, ...)
		return string.format("<%s %s> "..msg, ...)
	end);
	cli = ss.exnkind 'command line parse error';
	mode = ss.exnkind('bad mode', function(msg, ...)
		return string.format("mode “%s” "..msg, ...)
	end);
	unimpl = ss.exnkind 'feature not implemented';
	ext = ss.exnkind 'extension error';
	enc = ss.exnkind('encoding error', function(msg, ...)
		return string.format('[%s]' .. msg, ...)
	end);
}

ct.ctx = declare {
	mk = function(src) return {src = src} end;
	ident = 'context';
	cast = {
		string = function(me)
			return string.format("%s:%s [%u]", me.src.file, me.line, me.generation or 0)
		end;
	};
	clonesetup = function(new, old)
		for k,v in pairs(old) do new[k] = v end
		if old.generation then
			new.generation = old.generation + 1
		else
			new.generation = 1
		end
	end;
	fns = {
		fail = function(self, msg, ...)
			ct.exns.tx(msg, self.src.file, self.line or 0, ...):throw()
		end;
		insert = function(self, block)
			block.origin = self:clone()
			table.insert(self.sec.blocks,block)
			return block
		end;
		ref = function(self,id)
			if not id:find'%.' then
				local rid = self.sec.refs[id]
				if self.sec.refs[id] then
					return self.sec.refs[id], id, self.sec
				else self:fail("no such ref %s in current section", id or '') end
			else
				local sec, ref = string.match(id, "(.-)%.(.+)")
				local s = self.doc.sections[sec]
				if s then
					if s.refs[ref] then
						return s.refs[ref], ref, sec
					else self:fail("no such ref %s in section %s", ref, sec) end
				else self:fail("no such section %s", sec) end
			end
		end
	};
}

ct.sec = declare {
	ident = 'section';
	mk = function() return {
		blocks = {};
		refs = {};
		depth = 0;
		kind = 'ordinary';
	} end;
	construct = function(self, id, depth)
		self.id = id
		self.depth = depth
	end;
}

ct.doc = declare {
	ident = 'doc';
	fns = {
		mksec = function(self, id, depth)
			local o = ct.sec(id, depth)
			if id then self.sections[id] = o end
			table.insert(self.secorder, o)
			return o
		end;
		allow_ext = function(self,name)
			if not ct.ext.loaded[name] then return false end
			if self.ext.inhibit[name] then return false end
			if self.ext.need[name] or self.ext.use[name] then
				return true
			end
			return ct.ext.loaded[name].default
		end;
		context_var = function(self, var, ctx, test)
			local fail = function(...)
				if test then return false end
				ctx:fail(...)
			end
			if startswith(var, 'cortav.') then
				local v = var:sub(8)
				if v == 'page' then
					if ctx.page then return tostring(ctx.page)
						else return '(unpaged)' end
				elseif v == 'renderer' then
					if not self.stage then
						return fail 'document is not being rendererd'
					end
					return self.stage.format
				elseif v == 'datetime' then
					return os.date()
				elseif v == 'time' then
					return os.date '%H:%M:%S'
				elseif v == 'date' then
					return os.date '%A %d %B %Y'
				elseif v == 'id' then
					return 'cortav.lua (reference implementation)'
				elseif v == 'file' then
					return self.src.file
				else
					return fail('unimplemented predefined variable %s', var)
				end
			elseif startswith(var, 'env.') then
				local v = var:sub(5)
				local val = os.getenv(v)
				if not val then
					return fail('undefined environment variable %s', v)
				end
			elseif self.stage.kind == 'render' and startswith(var, self.stage.format..'.') then
				-- TODO query the renderer somehow
				return fail('renderer %s does not implement variable %s', self.stage.format, var)
			elseif self.vars[var] then
				return self.vars[var]
			else
				if test then return false end
				return '' -- is this desirable behavior?
			end
		end;
		job = function(self, name, pred, ...) -- convenience func
			return self.docjob:fork(name, pred, ...)
		end
	};
	mk = function() return {
		sections = {};
		secorder = {};
		embed = {};
		meta = {};
		vars = {};
		ext = {
			inhibit = {};
			need = {};
			use = {};
		};
		enc = ss.str.enc.utf8;
	} end;
	construct = function(me)
		me.docjob = ct.ext.job('doc', me, nil)
	end;
}

-- FP helper functions

local function fmtfn(str)
	return function(...)
		return string.format(str, ...)
	end
end

ct.ext = { loaded = {} }
function ct.ext.install(ext)
	if not ext.id then
		ct.exns.ext 'extension missing “id” field':throw()
	end
	if ct.ext.loaded[ext.id] then
		ct.exns.ext('there is already an extension with ID “%s” loaded', ext.id):throw()
	end
	ct.ext.loaded[ext.id] = ext
end

function ct.ext.bind(doc)
	local fns = {}
	function fns.each(...)
		local cext
		local args = {...}
		return function()
			while true do
				cext = next(ct.ext.loaded, cext)
				if cext == nil then return nil end
				if doc == nil or doc:allow_ext(cext.id) then
					local v = ss.walk(ct.ext.loaded[cext.id], table.unpack(args))
					if v ~= nil then
						return v, cext
					end
				end
			end
		end
	end

	function fns.hook(h, ...)
		-- this is the raw hook invocation function, used when hooks won't need
		-- private state to hold onto between invocation. if private state is
		-- necessary, construct a job instead
		local ret = {} -- for hooks that compile lists of responses from extensions
		for hook in fns.each('hook', h) do table.insert(ret,(hook(...))) end
		return ret
	end
	
	return fns
end

do local globalfns = ct.ext.bind()
	-- use these functions when document restrictions don't matter
	ct.ext.each, ct.ext.hook = globalfns.each, globalfns.hook
end

ct.ext.job = declare {
	ident = 'ext-job';
	init = {
		states = {};
	};
	construct = function(me,name,doc,pred,...)
		-- prepare contexts for relevant extensions
		me.name = name
		me.doc = doc -- for reqs + limiting
		for _, ext in pairs(ct.ext.loaded) do
			if pred == nil or pred(ext) then
				me.states[ext] = {}
			end
		end
		me:hook('init', ...)
	end;
	fns = {
		fork = function(me, name, pred, ...)
			-- generate a branch job linked to this job
			local branch = getmetatable(me)(name, me.doc, pred, ...)
			branch.parent = me
			return branch
		end;
		delegate = function(me, ext) -- creates a delegate for state access
			local submethods = {
				unwind = function(self, n)
					local function
					climb(dlg, job, n)
						if n == 0 then
							return job:delegate(dlg.extension)
						else
							return climb(dlg, job.parent, n-1)
						end
					end

					return climb(self._delegate_state, self._delegate_state.target, n)
				end;
			}
			local d = setmetatable({
				_delegate_state = {
					target = (me._delegate_state and me._delegate_state.target) or me;
					extension = ext;
				};
			}, {
				__name = 'job:delegate';
				__index = function(self, key)
					local D = self._delegate_state
					if key == 'state' then
						return D.target.states[self._delegate_state.extension]
					elseif submethods[key] then
						return submethods[key]
					end
					return D.target[key]
				end;
				__newindex = function(self, key, value)
					local D = self._delegate_state
					if key == 'state' then
						D.target.states[D.extension] = value
					else
						D.target[D.extension] = value
					end
				end;
			});
			return d;
		end;
		each = function(me, ...)
			local ek
			local path = {...}
			return function()
				while true do
					ek = next(me.states, ek)
					if not ek then return nil end
					if me.doc:allow_ext(ek.id) then
						local v = ss.walk(ek, table.unpack(path))
						if v then
							return v, ek, me.states[ek]
						end
					end
				end
			end
		end;
		proc = function(me, ...)
			local p
			local owner
			local state
			for func, ext, s in me:each(...) do
				if p == nil then
					p = func
					owner = ext
					state = s
				else
					ct.exn.ext('extensions %s and %s define conflicting procedures for %s', owner.id, ext.id, table.concat({...},'.')):throw()
				end
			end
			if p == nil then return nil end
			if type(p) ~= 'function' then return p end
			return function(...)
				return p(me:delegate(owner), ...)
			end, owner, state
		end;
		hook = function(me, hook, ...)
			-- used when extensions may need to persist state across
			-- multiple functions or invocations
			local ret = {}
			local hook_id = me.name ..'_'.. hook
			for hookfn, ext, state in me:each('hook', hook_id) do
				table.insert(ret, (hookfn(me:delegate(ext),...)))
			end
			return ret
		end;
	};
}

-- common renderer utility functions
ct.tool = {}

function ct.tool.mathfmt(ctx, eqn)
	local buf = ''
	local m = ss.enum {'num','var','op'}
	local lsc = 0
	local spans = {}

	local flush = function()
		local o
		if buf ~= '' then
			if lsc == 0 then
				o = buf
			elseif lsc == m.num then
				o = {
					kind = 'format';
					style = 'literal';
					spans = {buf};
				}
			elseif lsc == m.var then
				o = {
					kind = 'format';
					style = 'variable';
					spans = {buf};
				}
			elseif lsc == m.op then
				o = {
					kind = 'format';
					style = 'strong';
					spans = {buf};
				}
			end
			if o then
				table.insert(spans, o)
			end
		end
		buf = ''
		lsc = 0
	end

	for c, p in ss.str.each(ctx.doc.enc, eqn) do
		local cl = ss.str.classify(ctx.doc.enc, c)
		local nc = 0
		if not cl.space then
			if cl.numeral then
				nc = m.num
			elseif cl.mathop or cl.symbol then
				nc = m.op
			elseif cl.letter then
				nc = m.var
			end
			if nc ~= lsc then
				flush()
				lsc = nc
			end
			buf = buf .. c
		end
	end
	flush()
	return spans
end

function ct.tool.namespace()
-- some renderers need to be able to generate unique IDs for
-- objects, including ones that users have not assigned IDs
-- to, and objects with the same name in different unlabeled
-- sections. to handle this, we provide a "namespace" mechanism,
-- where some lua table (really its address in memory) is used
-- as a handle for the object and a unique ID is attached to it.
-- if the object has an ID of its own, it is guaranteed to be
-- unique and returned; otherwise, a generic id of the form
-- `x-%u` is generated, where %u is an integer that increments
-- for every new object
	local ids = {}
	local canonicalID = {}
	return function(obj,pfx)
		pfx = pfx or ''
		if canonicalID[obj] then
			return canonicalID[obj]
		elseif obj.id and ids[pfx .. obj.id] then
			local objid = pfx .. obj.id
			local newid
			local i = 1
			repeat newid = objid .. string.format('-%x', i)
				i = i + 1 until not ids[newid]
			ids[newid] = obj
			canonicalID[obj] = newid
			return newid
		else
			local cid = obj.id
			if not cid then
				local i = 1
				repeat cid = string.format('%sx-%x', pfx, i)
					i = i + 1 until not ids[cid]
			end
			ids[cid] = obj
			canonicalID[obj] = cid
			return cid
		end
	end
end

-- renderer engines

do -- define span control sequences
	local function formatter(sty)
		return function(s,c)
			return {
				kind = 'format';
				style = sty;
				spans = ct.parse_span(s, c);
				origin = c:clone();
			}
		end
	end
	local function insert_link(s, c)
		local to, t = s:match '^([^%s]+)%s*(.-)$'
		if not to then c:fail('invalid link syntax >%s', s) end
		if t == "" then t = nil end
		return {
			kind = 'link';
			spans = (t and t~='') and ct.parse_span(t, c) or {};
			ref = to;
			origin = c:clone();
		}
	end
	local function insert_var_ref(raw)
		return function(s, c)
			local pos = tonumber(s)
			return {
				kind = 'var';
				pos = pos;
				raw = raw;
				var = not pos and s or nil;
				origin = c:clone();
			}
		end
	end
	local function insert_span_directive(crit, failthru)
		return function(s,c)
			local args = ss.str.breakwords(d.doc.enc, s, 1)
			local brksyms = map(enc.encodeUCS, {
				'.', ',', ':', ';', '!', '$', '&', '^',
				'/', '?', '@', '='
			})
			local brkhash = {} for _,s in pairs(brksyms) do
				brkhash[s] = true
			end

			local extname = ''
			local sym
			local cmd = ''
			for ch,p in ss.str.each(c.doc.enc, args[1]) do
				if sym == nil then
					if brkhash[ch] then
						sym = ch
					else
						extname = extname .. ch
					end
				elseif brkhash[ch] then
					sym = sym + ch
				else
					cmd = cmd + ch
				end
			end
			if cmd == '' then cmd = nil end
			local spans if failthru then
				spans = ct.parse_span(args[2], c)
			end
			return {
				kind = 'directive';
				ext = extname;
				cmd = cmd;
				args = args;
				crit = crit;
				failthru = failthru;
				spans = spans;
			}
		end
	end
	ct.spanctls = {
		{seq = '!', parse = formatter 'emph'};
		{seq = '*', parse = formatter 'strong'};
		{seq = '~', parse = formatter 'strike'};
		{seq = '+', parse = formatter 'insert'};
		{seq = '\\', parse = function(s, c) -- raw
			return {
				kind = 'raw';
				spans = {s};
				origin = c:clone();
			}
		end};
		{seq = '`\\', parse = function(s, c) -- raw
			local o = c:clone();
			local str = ''
			for c, p in ss.str.each(c.doc.enc, s) do
				local q = p:esc()
				if q then
					str = str ..  q
					p.next.byte = p.next.byte + #q
				else
					str = str .. c
				end
			end
			return {
				kind = 'format';
				style = 'literal';
				spans = {{
					kind = 'raw';
					spans = {str};
					origin = o;
				}};
				origin = o;
			}
		end};
		{seq = '`', parse = formatter 'literal'};
		{seq = '$', parse = formatter 'variable'};
		{seq = '^', parse = function(s,c) --footnotes
			local r, t = s:match '^([^%s]+)%s*(.-)$'
			return {
				kind = 'footnote';
				ref = r;
				spans = ct.parse_span(t, c);
				origin = c:clone();
			}
		-- TODO support for footnote sections
		end};
		{seq = '=', parse = function(s,c) --math mode
			local tx = {
				['%*'] = '×';
				['/'] = '÷';
			}
			for k,v in pairs(tx) do s = s:gsub(k,v) end
			s=s:gsub('%^([0-9]+)', function(num)
				local sup = {'⁰','¹','²','³','⁴','⁵','⁶','⁷','⁸','⁹'};
				local r = ''
				for i=1,#num do
					r = r .. sup[1 + (num:byte(i) - 0x30)]
				end
				return r
			end)
			local m = {s} --TODO
			return {
				kind = 'math';
				original = s;
				spans = {s};
				origin = c:clone();
			};
		end};
		{seq = '&', parse = function(s, c)
			local r, t = s:match '^([^%s]+)%s*(.-)$'
			return {
				kind = 'deref';
				spans = (t and t ~= "") and ct.parse_span(t, c) or {};
				ref = r;
				origin = c:clone();
			}
		end};
		{seq = '^', parse = function(s, c)
			local fn, t = s:match '^([^%s]+)%s*(.-)$'
			return {
				kind = 'footnote';
				spans = (t and t~='') and ct.parse_span(t, c) or {};
				ref = fn;
				origin = c:clone();
			}
		end};
		{seq = '>', parse = insert_link};
		{seq = '→', parse = insert_link};
		{seq = '🔗', parse = insert_link};
		{seq = '##', parse = insert_var_ref(true)};
		{seq = '#', parse = insert_var_ref(false)};
		{seq = '%%', parse = function() --[[NOP]] end};
		{seq = '%!', parse = insert_span_directive(true,false)};
		{seq = '%:', parse = insert_span_directive(false,true)};
		{seq = '%', parse = insert_span_directive(false,false)};
	}
end

function ct.parse_span(str,ctx)
	local function delimited(start, stop, s)
		local r = { pcall(ss.str.delimit, nil, start, stop, s) }
		if r[1] then return table.unpack(r, 2) end
		ctx:fail(tostring(r[2]))
	end
	local buf = ""
	local spans = {}
	local function flush()
		if buf ~= "" then
	-- 			for fn, ext in ctx.doc.docjob:each('hook','doc_meddle_string') do
	-- 				buf = fn(ctx.doc.docjob:delegate(ext), ctx, buf)
	-- 			end
			table.insert(spans, buf)
			buf = ""
		end
	end
	local skip = false
	for c,p in ss.str.each(ctx.doc.enc,str) do
		local ba, ca, es = ctx.doc.enc.parse_escape(str:sub(p.byte))
		if es then
			flush()
			table.insert(spans, {
				kind = 'raw';
				spans = {es};
				origin = ctx:clone()
			})
			p.next.byte = p.next.byte + ba;
			p.next.code = p.next.code + ca;
		elseif c == '{' then
			flush()
			local substr, following = delimited('{','}',str:sub(p.byte))
			local splitstart, splitstop = substr:find'%s+'
			local id, argstr
			if splitstart then
				id, argstr = substr:sub(1,splitstart-1), substr:sub(splitstop+1)
			else
				id, argstr = substr, ''
			end
			local o = {
				kind = 'macro';
				macro = id;
				args = {};
				origin = ctx:clone();
			}

			do local start = 1
				local i = 1
				while i <= #argstr do
					while i<=#argstr and (argstr:sub(i,i) ~= '|' or argstr:sub(i-1,i) == '\\|') do
						i = i + 1
					end
					local arg = argstr:sub(start, i == #argstr and i or i-1)
					start = i+1
					arg=arg:gsub('\\|','|')
					table.insert(o.args, arg)
					i = i + 1
				end
			end

			p.next.byte = p.next.byte + following - 1
			table.insert(spans,o)
		elseif c == '[' then
			flush()
			local substr, following = delimited('[',']',str:sub(p.byte))
			p.next.byte = following + p.byte
			local found = false
			for _,i in pairs(ct.spanctls) do
				if ss.str.begins(substr, i.seq) then
					found = true
					table.insert(spans, i.parse(substr:sub(1+#i.seq), ctx))
					break
				end
			end
			if not found then
				ctx:fail('no recognized control sequence in [%s]', substr)
			end
		elseif c == '\n' then
			flush()
			table.insert(spans,{kind='line-break',origin=ctx:clone()})
		else
			buf = buf .. c
		end
	end
	flush()
	return spans
end

local function
blockwrap(fn)
	return function(l,c,j,d)
		local block = fn(l,c,j,d)
		block.origin = c:clone();
		table.insert(d, block);
		j:hook('block_insert', c, block, l)
		if block.spans then
			c.doc.docjob:hook('meddle_span', block.spans, block)
		end
	end
end

local insert_paragraph = blockwrap(function(l,c)
	if l:sub(1,1) == '.' then l = l:sub(2) end
	return {
		kind = "paragraph";
		spans = ct.parse_span(l, c);
	}
end)

local insert_section = function(l,c,j)
	local depth, id, t = l:match '^([#§]+)([^%s]*)%s*(.-)$'
	if id and id ~= "" then
		if c.doc.sections[id] then
			c:fail('duplicate section name “%s”', id)
		end
	else id = nil end

	local s = c.doc:mksec(id, utf8.len(depth))
	s.depth = utf8.len(depth)
	s.origin = c:clone()
	s.blocks={}

	if t and t ~= "" then
		local heading = {
			kind = "label";
			spans = ct.parse_span(t,c);
			origin = s.origin;
			captions = s;
		}
		c.doc.docjob:hook('meddle_span', heading.spans, heading)
		table.insert(s.blocks, heading)
		s.heading_node = heading
	end
	c.sec = s

	j:hook('section_attach', c, s)
end

local dsetmeta = function(w,c,j)
	local key, val = w(1)
	c.doc.meta[key] = val
	j:hook('metadata_set', key, val)
end
local dextctl = function(w,c)
	local mode, exts = w(1)
	for e in exts:gmatch '([^%s]+)' do
		if mode == 'uses' then
			c.doc.ext.use[e] = true
		elseif mode == 'needs' then
			c.doc.ext.need[e] = true
		elseif mode == 'inhibits' then
			c.doc.ext.inhibit[e] = true
		end
	end
end
local dcond = function(w,c)
	local mode, cond, exp = w(2)
	c.hide_next = mode == 'unless'
end;
ct.directives = {
	author = dsetmeta;
	license = dsetmeta;
	keywords = dsetmeta;
	desc = dsetmeta;
	when = dcond;
	unless = dcond;
	pragma = function(w,c)
	end;
	lang = function(w,c)
		local _, op, l = w(2)
		local langstack = c.doc.stage.langstack
		if op == 'is' then
			langstack[math.max(1, #langstack)] = l
		elseif op == 'push' then
			table.insert(langstack, l)
		elseif op == 'pop' then
			if next(langstack) then
				langstack[#langstack] = nil
			end
		elseif op == 'sec' then
			c.sec.lang = l
		else c:fail('bad language directive “%s”', op) end
		c.lang = langstack[#langstack]
	end;
	expand = function(w,c)
		local _, m = w(1)
		if m ~= 'off' then
			c.doc.stage.expand_next = 1
		else
			c.doc.stage.expand_next = 0
		end
	end;
}

local function insert_table_row(l,c,j)
	local row = {}
	local buf
	local flush = function()
		if buf then
			buf.str = buf.str:gsub('%s+$','')
			table.insert(row, buf)
		end
		buf = { str = '' }
	end
	for c,p in ss.str.each(c.doc.enc,l) do
		if c == '|' or c == '+' and (p.code == 1 or l:sub(p.byte-1,p.byte-1)~='\\') then
			flush()
			buf.header = c == '+'
		elseif c == ':' then
			local lst = l:sub(p.byte-#c,p.byte-#c)
			local nxt = l:sub(p.next.byte,p.next.byte)
			if lst == '|' or lst == '+' and l:sub(p.byte-2,p.byte-2) ~= '\\' then
				buf.align = 'left'
			elseif nxt == '|' or nxt == '|' then
				if buf.align == 'left' then
					buf.align = 'center'
				else
					buf.align = 'right'
				end
			else
				buf.str = buf.str .. c
			end
		elseif c:match '%s' then
			if buf.str ~= '' then buf.str = buf.str .. c end
		elseif c == '\\' then
			local nxt = l:sub(p.next.byte,p.next.byte)
			if nxt == '|' or nxt == '+' or nxt == ':' then
				buf.str = buf.str .. nxt
				p.next.byte = p.next.byte + #nxt
				p.next.code = p.next.code + 1
			else
				buf.str = buf.str .. c
			end
		else
			buf.str = buf.str .. c
		end
	end
	if buf.str ~= '' then flush() end 
	for _,v in pairs(row) do
		v.spans = ct.parse_span(v.str, c)
		c.doc.docjob:hook('meddle_span', v.spans, v)
	end
	if #c.sec.blocks > 1 and c.sec.blocks[#c.sec.blocks].kind == 'table' then
		local tbl = c.sec.blocks[#c.sec.blocks]
		table.insert(tbl.rows, row)
		j:hook('block_table_attach', c, tbl, row, l)
		j:hook('block_table_row_insert', c, tbl, row, l)
	else
		local tbl = {
			kind = 'table';
			rows = {row};
			origin = c:clone();
		}
		table.insert(c.sec.blocks, tbl)
		j:hook('block_table_insert', c, tbl, l)
		j:hook('block_table_row_insert', c, tbl, tbl.rows[1], l)
	end
end

ct.ctlseqs = {
	{seq = '.', fn = insert_paragraph};
	{seq = '¶', fn = insert_paragraph};
	{seq = '❡', fn = insert_paragraph};
	{seq = '#', fn = insert_section};
	{seq = '§', fn = insert_section};
	{seq = '+', fn = insert_table_row};
	{seq = '|', fn = insert_table_row};
	{seq = '│', fn = insert_table_row};
	{seq = '!', fn = function(l,c,j,d)
		local last = d[#d]
		local txt = l:match '^%s*!%s*(.-)$'
		if (not last) or last.kind ~= 'aside' then
			local aside = {
				kind = 'aside';
				lines = { ct.parse_span(txt, c) };
				origin = c:clone();
			}
			c.doc.docjob:hook('meddle_span', aside.lines[1], aside)
			table.insert(d,aside)
			j:hook('block_aside_insert', c, aside, l)
			j:hook('block_aside_line_insert', c, aside, aside.lines[1], l)
			j:hook('block_insert', c, aside, l)
		else
			local sp = ct.parse_span(txt, c)
			c.doc.docjob:hook('meddle_span', sp, last)
			table.insert(last.lines, sp)
			j:hook('block_aside_attach', c, last, sp, l)
			j:hook('block_aside_line_insert', c, last, sp, l)
		end
	end};
	{pred = function(s,c) return s:match'^[*:]' end, fn = blockwrap(function(l,c) -- list
		local stars = l:match '^([*:]+)'
		local depth = utf8.len(stars)
		local id, txt = l:sub(#stars+1):match '^(.-)%s*(.-)$'
		local ordered = stars:sub(#stars) == ':'
		if id == '' then id = nil end
		return {
			kind = 'list-item';
			depth = depth;
			ordered = ordered;
			spans = ct.parse_span(txt, c);
		}
	end)};
	{seq = '\t\t', fn = function(l,c,j,d)
		local last = d[#d]
		if (not last) or (last.kind ~= 'reference') then
			c:fail('reference continuations must immediately follow a reference')
		end
		local str = l:match '^\t\t(.-)%s*$'
		last.val = last.val .. '\n' .. str
		c.sec.refs[last.key] = last.val
	end};
	{seq = '\t', fn = blockwrap(function(l,c,j,d)
		local ref, val = l:match '\t+([^:]+):%s*(.*)$'
		local last = d[#d]
		local rsrc
		if last and last.kind == 'resource' then
			last.props[ref] = val
			rsrc = last
		elseif last and last.kind == 'reference' and last.rsrc then
			last.rsrc.props[ref] = val
			rsrc = last.rsrc
		else
			c.sec.refs[ref] = val
		end
		j:hook('section_ref_attach', c, ref, val, l)
		return {
			kind = 'reference';
			rsrc = rsrc;
			key = ref;
			val = val;
		}
	end)};
	{seq = '%', fn = function(l,c,j,d) -- directive
		local crit, cmdline = l:match '^%%([!%%]?)%s*(.*)$'
		local words = function(i)
			local wds = {}
			if i == 0 then return cmdline end
			for w,pos in cmdline:gmatch '([^%s]+)()' do
				table.insert(wds, w)
				i = i - 1
				if i == 0 then
					table.insert(wds,(cmdline:sub(pos):match('^%s*(.-)%s*$')))
					return table.unpack(wds)
				end
			end
		end

		local cmd, rest = words(1)
		if ct.directives[cmd] then
			ct.directives[cmd](words,c,j)
		elseif cmd == c.doc.stage.mode['render:format'] then
			-- this is a directive for the renderer; insert it into the tree as is
			local dir = {
				kind = 'directive';
				critical = crit == '!';
				words = words;
				origin = c;
			}
			table.insert(d, dir)
			j:hook('block_directive_render', j, c, dir)
		elseif c.doc:allow_ext(cmd) then -- extension directives begin with their id
			local ext = ct.ext.loaded[cmd]
			if ext.directives then
				local _, topcmd = words(2)
				if ext.directives[topcmd] then
					ext.directives[topcmd](j:delegate(ext), c, words)
				elseif ext.directives[true] then -- catch-all
					ext.directives[true](j:delegate(ext), c, words)
				elseif crit == '!' then
					c:fail('extension %s does not support critical directive %s', cmd, topcmd)
				end
			end
		elseif crit == '!' then
			c:fail('critical directive %s not supported',cmd)
		end
	end;};
	{seq = '~~~', fn = blockwrap(function(l,c,j)
		local extract = function(ptn, str)
			local start, stop = str:find(ptn)
			if not start then return nil, str end
			local ex = str:sub(start,stop)
			local n = str:sub(1,start-1) .. str:sub(stop+1)
			return ex, n
		end
		local lang, id, title
		if l:match '^~~~%s*$' then -- no args
		elseif l:match '^~~~.*~~~%s*$' then -- CT style
			local s = l:match '^~~~%s*(.-)%s*~~~%s*$'
			lang, s = extract('%b[]', s)
			if lang then lang = lang:sub(2,-2) end
			id, title = extract('#[^%s]+', s)
			if id then id = id:sub(2) end
		elseif l:match '^~~~' then -- MD shorthand style
			lang = l:match '^~~~%s*(.-)%s*$'
		end
		local mode = {
			kind = 'code';
			listing = {
				kind = 'listing';
				lang = lang, id = id, title = title and ct.parse_span(title,c);
				lines = {};
			}
		}
		if c.doc.stage.expand_next and c.doc.stage.expand_next > 0 then
			c.doc.stage.expand_next = c.doc.stage.expand_next - 1
			mode.expand = true
		end
		j:hook('mode_switch', c, mode)
		c.mode = mode
		if id then
			if c.sec.refs[id] then c:fail('duplicate ID %s', id) end
			c.sec.refs[id] = c.mode.listing
		end
		j:hook('block_insert', c, mode.listing, l)
		return c.mode.listing;
	end)};
	{pred = function(s,c)
		if s:match '^[%-_][*_%-%s]+' then return true end
		if startswith(s, '—') then
			for c, p in ss.str.each(c.doc.enc,s) do
				if ({
					['—'] = true, ['-'] = true, [' '] = true;
					['*'] = true, ['_'] = true, ['\t'] = true;
				})[c] ~= true then return false end
			end
			return true
		end
	end; fn = blockwrap(function()
		return { kind = 'horiz-rule' }
	end)};
	{seq='@', fn=blockwrap(function(s,c)
		local id = s:match '^@%s*(.-)%s*$'
		local rsrc = {
			kind = 'resource';
			props = {};
			id = id;
		}
		if c.sec.refs[id] then
			c:fail('an object with id “%s” already exists in that section',id)
		else
			c.sec.refs[id] = rsrc
		end
		return rsrc
	end)};
	{fn = insert_paragraph};
}

function ct.parse_line(l, ctx, dest)
	local newspan
	local job = ctx.doc.stage.job
	job:hook('line_read',ctx,l)
	if ctx.mode then
		if ctx.mode.kind == 'code' then
			if l and l:match '^~~~%s*$' then
				job:hook('block_listing_end',ctx,ctx.mode.listing)
				job:hook('mode_switch', c, nil)
				ctx.mode = nil
			else
				-- TODO handle formatted code
				local newline
				if ctx.mode.expand
					then newline = ct.parse_span(l, ctx)
					else newline = {l}
				end
				table.insert(ctx.mode.listing.lines, newline)
				job:hook('block_listing_newline',ctx,ctx.mode.listing,newline)
			end
	  else
			local mf = job:proc('modes', ctx.mode.kind)
			if not mf then
				ctx:fail('unimplemented syntax mode %s', ctx.mode.kind)
			end
			mf(job, ctx, l, dest) --NOTE: you are responsible for triggering the appropriate hooks if you insert anything!
		end
	else
		if l then
			local function tryseqs(seqs, ...)
				for _, i in pairs(seqs) do
					if ((not i.seq ) or startswith(l, i.seq)) and
					   ((not i.pred) or i.pred    (l, ctx  )) then
						i.fn(l, ctx, job, dest, ...)
						return true
					end
				end
				return false
			end

			if not tryseqs(ct.ctlseqs) then
				local found = false

				for eb, ext, state in job:each('blocks') do
					if tryseqs(eb, state) then found = true break end
				end

				if not found then
					ctx:fail 'incomprehensible input line'
				end
			end
		else
			if next(dest) and dest[#dest].kind ~= 'break' then
				local brk = {kind='break', origin = ctx:clone()}
				job:hook('block_break', ctx, brk, l)
				table.insert(dest, brk)
			end
		end
	end
	job:hook('line_end',ctx,l)
end

function ct.parse(file, src, mode, setup)

	local ctx = ct.ctx.mk(src)
	ctx.line = 0
	ctx.doc = ct.doc.mk()
	ctx.doc.src = src
	ctx.sec = ctx.doc:mksec() -- toplevel section
	ctx.sec.origin = ctx:clone()
	ctx.lang = mode['meta:lang']
	if mode['parse:enc'] then
		local e = ss.str.enc[mode['parse:enc']]
		if not e then
			ct.exns.enc('requested encoding not supported',mode['parse:enc']):throw()
		end
		ctx.doc.enc = e
	end

	-- create states for extension hooks
	local job = ctx.doc:job('parse',nil,ctx)
	ctx.doc.stage = {
		kind = 'parse';
		mode = mode;
		job = job;
		langstack = {ctx.lang};
		fontstack = {};
	}

	local function
	is_whitespace(cp)
		return ctx.doc.enc.iswhitespace(cp)
	end

	if setup then setup(ctx) end


	for full_line in file:lines() do ctx.line = ctx.line + 1
		local l
		for p, c in utf8.codes(full_line) do
			if not is_whitespace(c) then
				l = full_line:sub(p)
				break
			end
		end
		ct.parse_line(l, ctx, ctx.sec.blocks)
	end

	for i, sec in ipairs(ctx.doc.secorder) do
		for refid, r in ipairs(sec.refs) do
			if type(r) == 'table' and r.kind == 'resource' and r.props.src then
				local lines = ss.str.breaklines(ctx.doc.enc, r.props.src)
				local srcs = {}
				for i,l in ipairs(lines) do
					local args = ss.str.breakwords(ctx.doc.enc, l, 2, {escape=true})
					if #args < 3 then
						r.origin:fail('invalid syntax for resource %s', t.ref)
					end
					local mimebreak = function(s)
						local wds = ss.str.split(ctx.doc.enc, s, '/', 1, {escape=true})
						return wds
					end
					local mime = mimebreak(args[2]);
					local mimeclasses = {
						['application/svg+xml'] = 'image';
					}
					local class = mimeclasses[mime]
					table.insert(srcs, {
						mode = args[1];
						mime = mime;
						uri = args[3];
						class = class or mime[1];
					})
				end
				 --ideally move this into its own mimetype lib
				local kind = r.props.as or srcs[1].class
				r.class = kind
				r.srcs = srcs
			end
		end
	end
	ctx.doc.stage = nil
	ctx.doc.docjob:hook('meddle_ast')
	return ctx.doc
end