cortav  sirsem.lua at [e51980e07a]

File sirsem.lua artifact dc1f0ae1fb part of check-in e51980e07a


-- [ʞ] sirsem.lua
--  ~ lexu hale <lexi@hale.su>
--  ? utility library with functionality common to
--    cortav.lua and its extensions
--    from Ranuir "software utility"
--  > local ss = require 'sirsem.lua'

local ss
do -- pull ourselves up by our own bootstraps
	local package = _G.package -- prevent namespace from being broken by env shenanigans
	local function namespace(name, tbl)
		local pkg = tbl or {}
		if package then
			package.loaded[name] = pkg
		end
		return pkg
	end
	ss = namespace 'sirsem'
	ss.namespace = namespace
end

function ss.map(fn, lst)
	local new = {}
	for k,v in pairs(lst) do
		table.insert(new, fn(v,k))
	end
	return new
end
function ss.reduce(fn, acc, lst)
	for i,v in ipairs(lst) do
		acc = fn(acc, v, i)
	end
	return acc
end
function ss.filter(list, fn)
	local new = {}
	for i, v in ipairs(list) do
		if fn(v,i) then table.insert(new, v) end
	end
	return new
end
function ss.kmap(fn, list)
	local new = {}
	for k, v in pairs(list) do
		local nk,nv = fn(k,v)
		new[nk or k] = nv or v
	end
	return new
end

function ss.kfilter(list, fn)
	local new = {}
	for k, v in pairs(list) do
		if fn(k,v) then new[k] = v end
	end
	return new
end

function ss.find(tbl, pred, ...)
	pred = pred or function(a,b)
		return a == b
	end
	for k,v in pairs(tbl) do
		if pred(v,k,...) then return k,v end
	end
	return nil
end

function ss.clone(tbl) -- performs a shallow copy
	if tbl.clone then return tbl:clone() end
	local new = {}
	for k,v in pairs(tbl) do
		new[k] = v
	end
	return new
end

function ss.copy(tbl) -- performs a deep copy
	local new = {}
	for k,v in pairs(tbl) do
		if type(v) == 'table' then
			if v.clone then
				new[k] = v:clone() -- this may or may not do a deep copy!
			else
				new[k] = ss.copy(v)
			end
		else
			new[k] = v
		end
	end
	return new
end

function ss.push(tbl, ...)
	local idx = #tbl + 1
	local function rec(v, ...)
		tbl[idx] = v
		idx = idx + 1
		if ss.tuple.any(...) then rec(...) end
	end
	rec(...)
	return tbl
end

function ss.delegate(tbl,tpl) -- returns a table that looks up keys it lacks from
                              -- tbl (lightweight alternative to shallow copies)
	tpl = tpl or {}
	return setmetatable({}, {__index=tbl})
end

ss.str = {}

function ss.str.begins(str, pfx)
	-- appallingly, this is actually ~2/5ths faster than either
	-- of the below. i hate scripting languages so much
	return string.find(str, pfx, 1, true) == 1
	-- to my shock, disgust, and horror, even writing my own
	-- string scanning library for lua IN C only sped this up by
	-- a tiny fraction. i am just speechless.
-- 	return string.sub(str, 1, #pfx) == pfx

-- 	local pl = string.len(pfx)
-- 	local sl = string.len(str)
-- 	if sl < pl then return false end
-- 	for i=1,pl do
-- 		if string.byte(str,i) ~= string.byte(pfx,i) then
-- 			return false
-- 		end
-- 	end
-- 	return true
end

function ss.enum(syms)
	local e = {}
	for i,v in pairs(syms) do
		e[v] = i
		e[i] = v
	end
	return e
end

function ss.bitmask_bytes(n,ofs)
	ofs = ofs or 0
	local function rec(i)
		if i > n then return end
		return 1<<(i+ofs), rec(i+1)
	end
	return 1<<ofs, rec(1)
end

function ss.bitmask(tbl,ofs)
	local codes = {ss.bitmask_bytes(#tbl,ofs)}
	local m = {}
	local maxbit
	for i, s in ipairs(tbl) do
		m[s] = codes[i]
		m[codes[i]] = s
		maxbit = i
	end
	m[true] = {ofs or 0,maxbit}
	return m
end

ss.str.charclass = ss.enum {
	'numeral'; 'letter'; 'symbol'; 'punct';
	'space'; 'ctl'; 'glyph'; -- hanji
}
ss.str.charprop = ss.bitmask({
	'hexnumeral', -- character that can be used to write hexadecimal notation
	'upper', 'lower';
	'diac'; -- diacritic/modifier letter
	'wordbreak'; -- char causes following characters to be treated as a separate word (e.g. punctuation)
	'wordsep'; -- char causes previous and following characters to be treated as separate words; char constitutes a word of its own in between (e.g. interpunct)
	'breakokay'; -- is it okay to break words at this character? (eg hyphen)
	'mathop'; -- char is a mathematical operator
	'disallow', -- char is not allowed in narrative text
	'brack', 'right', 'left', -- brackets
	'noprint', -- character deposits no ink
	'superimpose' -- character is superimposed over previous
}, 3)

ss.str.enc_generics = {
	pfxescape = function(ch, enc, chain)
		local bytes = #ch
		local codes = enc.len(ch)
		return function(s)
			if s == ch then
				return 0, 0, ch
			elseif ss.str.begins(s, ch) then
				local nc = enc.char(enc.codepoint(s, bytes + 1))
				return bytes, codes, nc
			elseif chain then
				return chain(s)
			end
		end
	end;
};

local cc,cp = ss.str.charclass, ss.str.charprop
ss.str.enc = {
	utf8 = {
		char = utf8.char;
		codepoint = utf8.codepoint;
		len = utf8.len;
		encodeUCS = function(str) return str end;
		iswhitespace = function(c)
			return (c == ' ') or (c == '\t') or (c == '\n')
				or (c == '\u{3000}')
				or (c == '\u{200B}')
      end;
	};
	ascii = {
		len = string.len; char = string.char; codepoint = string.byte;
		iswhitespace = function(c)
			return (c == ' ') or (c == '\t') or (c == '\n')
      end;
		ranges = {
			{0x00,0x1a, cc.ctl};
			{0x1b,0x1b, cc.ctl | cp.disallow};
			{0x1c,0x1f, cc.ctl};
			{0x20,0x20, cc.space};
			{0x21,0x22, cc.punct};
			{0x23,0x26, cc.symbol};
			{0x27,0x29, cc.punct};
			{0x2a,0x2b, cc.symbol};
			{0x2c,0x2f, cc.punct};
			{0x30,0x39, cc.numeral | cp.hexnumeral};
			{0x3a,0x3b, cc.punct};
			{0x3c,0x3e, cc.symbol | cp.mathop};
			{0x3f,0x3f, cc.punct};
			{0x40,0x40, cc.symbol};
			{0x41,0x46, cc.letter | cp.upper | cp.hexnumeral};
			{0x47,0x5a, cc.letter | cp.upper};
			{0x5b,0x5d, cc.symbol | cp.mathop};
			{0x5e,0x5e, cc.symbol | cp.mathop};
			{0x5f,0x60, cc.symbol};
			{0x61,0x66, cc.letter | cp.lower | cp.hexnumeral};
			{0x67,0x7a, cc.letter | cp.lower};
			{0x7b,0x7e, cc.symbol};
			{0x7f,0x7f, cc.ctl, cp.disallow};
		}
	};
	raw = {len = string.len; char = string.char; codepoint = string.byte;
		encodeUCS = function(str) return str end;
		iswhitespace = function(c)
			return (c == ' ') or (c == '\t') or (c == '\n')
      end;
   };
}

-- unicode ranges are optionally generated from consortium data
-- files and injected through a generated source file. if this
-- part of the build process is disabled (e.g. due to lack of
-- internet access, or to keep the size of the executable as
-- small as possible), we still at least can make the ascii
-- ranges available to UTF8 (UTF8 being a superset of ascii)
ss.str.enc.utf8.ranges = ss.str.enc.ascii.ranges

function ss.str.enc.ascii.encodeUCS(str)
	local newstr = ''
	for c,p in ss.str.each(ss.str.enc.utf8, str, true) do
		if c > 0x7F then
			newstr = newstr .. '?'
		else
			newstr = newstr .. string.char(c)
		end
	end
end

for _, v in pairs{'utf8','ascii','raw'} do
	ss.str.enc[v].parse_escape = ss.str.enc_generics.pfxescape('\\',ss.str.enc[v])
end

function ss.bitmask_expand(ty, v)
	local bitrange = ty[true]
	local fb
	if bitrange[1] ~= 0 then
		fb = v & ((1<<bitrange[1]) - 1) -- first N bits
	end
	local tbl = {}
	for j=bitrange[1], bitrange[2] do
		if (fb & (1<<j)) ~= 0 then
			tbl[ty[1<<j]] = true
		end
	end
	return tbl, fb
end

function ss.str.classify(enc, ch)
	if not enc.ranges then return {} end
	if type(ch)=='string' then ch = enc.codepoint(ch) end

	for _, r in pairs(enc.ranges) do
		if ch >= r[1] and ch <= r[2] then
			local p,b = ss.bitmask_expand(ss.str.charprop, r[3])
			if b then p[ss.str.charclass[b]] = true end
			return p
		end
	end

	return {}
end


function ss.str.each(enc, str, ascode)
	if enc.each then return enc.each(enc,str,ascode) end
	local pm = {
		__index = {
			esc = function(self)
				local ba, bc, nc = enc.parse_escape(str:sub(self.byte))
				if ba then
					self.next.byte = self.next.byte + ba - 1
					self.next.code = self.next.code + bc - 1
					return nc
				end
			end;
		};
	}
	local pos = {
		code = 1;
		byte = 1;
	}
	return function()
		if pos.byte > #str then return nil end
		local thischar = enc.codepoint(str, pos.byte)
		local lastpos = setmetatable({
			code = pos.code;
			byte = pos.byte;
			next = pos;
		},pm)
		if not ascode then
			thischar = enc.char(thischar)
			pos.byte = pos.byte + #thischar
		else
			pos.byte = pos.byte + #enc.char(thischar)
		end
		pos.code = pos.code + 1
		return thischar, lastpos
	end
end

function ss.str.breakwords(enc, str, max, opts)
	if enc.breakwords then return enc.breakwords(str) end
	local words = {}
	opts = opts or {}
	local buf = ''
	local flush = function()
		if buf ~= '' then table.insert(words,buf) buf = '' end
	end
	for c, p in ss.str.each(enc,str) do
		local nc
		if opts.escape then
			nc = p:esc()
		end
		if nc then
			buf = buf + nc
		elseif enc.iswhitespace(c) then
			flush()
			if max and #words == max then
				local rs = str:sub(p.next.byte)
				if rs ~= '' then
					table.insert(words, rs)
				end
				break
			end
		else
			buf = buf .. c
		end
	end
	flush()
	return words
end
function ss.str.mergewords(enc, lst)
	if enc.mergewords then return enc.mergewords(lst) end
	return table.concat(lst, enc.wordsep or ' ')
end
function ss.str.breaklines(enc, str, opts)
	if enc.breaklines then return enc.breaklines(lst,opts) end
	return ss.str.split(enc, str, enc.encodeUCS'\n', opts)
end

function ss.str.split(enc, str, delim, opts)
	if enc.split then return enc.split(str,delim,opts) end
	opts = opts or {}
	local elts = {}
	local buf = ''
	local flush = function()
		if buf ~= '' or opts.keep_empties then
			table.insert(elts,buf)
			buf = ''
		end
	end
	local esc = enc.parse_escape
	local tryesc if opts.escape then
		tryesc = function(str, p)
			local ba, ca, escd = enc.parse_escape(str:sub(p.byte))
			if ba then
				p.next.byte = p.next.byte + ba
				p.next.code = p.next.code + ca
				buf = buf .. escd
				return true
			end
		end
	else
		tryesc = function(...)  end
	end

	if type(delim) == 'function' then
		for c, p in ss.str.each(enc,str) do
			if not tryesc(str,p) then
				local skip = delim(str:sub(p.byte))
				if skip then
					flush()
					p.next.byte = p.next.byte + skip - 1
				else
					buf = buf .. c
				end
			end
		end
	elseif enc.len(delim) == 1 then
		for c, p in ss.str.each(enc,str) do
			if not tryesc(str,p) then
				if c == delim then
					flush()
				else
					buf = buf .. c
				end
			end
		end
	else
		local dlcode = enc.len(delim)
		for c, p in ss.str.each(enc,str) do
			if not tryesc(str,p) then
				if str:sub(p.byte, p.byte+#delim-1) == delim then
					flush()
					p.next.byte = p.next.byte + #delim - 1
					p.next.code = p.next.code + dlcode
				else
					buf = buf .. c
				end
			end
		end
	end
	flush()
	return elts
end

function ss.str.langmatch(tbl, lang, enc)
	-- this performs primitive language matching. NOTE: THIS IS NOT
	-- STANDARDS COMPLIANT. it's "good enough" for now, but in the
	-- long term it needs to be rewritten to actually understand the
	-- format, primarily so that e.g. 'en-US-Latn' and 'en-Latn-US'
	-- match -- currently order is significant. it shouldn't be
	-- ref: IETF BCP 47 (RFC 5646) https://www.ietf.org/rfc/bcp/bcp47.html
	local dash = enc.encodeUCS'-'
	local tags = ss.str.split(enc, lang, dash, {escape=true})
	local bestlen = 0
	local bestmatch
	for k,v in pairs(tbl) do
		if k ~= true then
			local kt = ss.str.split(enc, k, dash, {escape=true})
			for i=1,math.min(#kt,#tags) do
				if kt[i] ~= tags[i] then goto skip end
			end
			if #kt > bestlen then
				-- match the most specific matching tag
				bestmatch = k
				bestlen = #kt
			end
		end
	::skip::end
	return tbl[bestmatch] or tbl[true], bestmatch
end

ss.math = {}

function ss.math.lerp(t, a, b)
	return (1-t)*a + (t*b)
end

function ss.dump(o, state, path, depth)
	state = state or {tbls = {}}
	depth = depth or 0
	local pfx = string.rep('   ', depth)
	if type(o) == "table" then
		local str = ''
		for k,p in pairs(o) do
			local done = false
			local exp
			if type(p) == 'table' then
				if state.tbls[p] then
					exp = '<' .. state.tbls[p] ..'>'
					done = true
				else
					state.tbls[p] = path and string.format('%s.%s', path, k) or k
				end
			end
			if not done then
				local function dodump() return ss.dump(
					p, state,
					path and string.format("%s.%s", path, k) or k,
					depth + 1
				) end
				-- boy this is ugly
				if type(p) ~= 'table' or
					getmetatable(p) == nil or
					getmetatable(p).__tostring == nil then
					exp = dodump()
				end
				if type(p) == 'table' then
					exp = string.format('{\n%s%s}', exp, pfx)
					local meta = getmetatable(p)
					if meta then
						if meta.__tostring then
							exp = tostring(p)
						end
						if meta.__name then
							exp = meta.__name .. ' ' .. exp
						end
					end
				end
			end
			str = str .. pfx .. string.format("%s = %s\n", k, exp)
		end
		return str
	elseif type(o) == "string" then
		return string.format('“%s”', o)
	else
		return tostring(o)
	end
end

function ss.hexdump(s)
	local hexlines, charlines = {},{}
	for i=1,#s do
		local line = math.floor((i-1)/16) + 1
		hexlines[line] = (hexlines[line] or '') .. string.format("%02x ",string.byte(s, i))
		charlines[line] = (charlines[line] or '') .. ' ' .. string.gsub(string.sub(s, i, i), '[^%g ]', '\x1b[;35m·\x1b[36;1m') .. ' '
	end
	local str = ''
	for i=1,#hexlines do
		str = str .. '\x1b[1;36m' .. charlines[i] .. '\x1b[m\n' .. hexlines[i] .. '\n'
	end
	return str
end

function ss.declare(c)
	local cls = setmetatable({
		__name = c.ident;
	}, {
		__name = 'class';
		__tostring = function() return c.ident or '(class)' end;
	})

	cls.__call = c.call
	cls.__index = function(self, k)
		if c.default and c.default[k] then
			return c.default[k]
		end
		if k == 'clone' then
			return function(self)
				local new = cls.mk()
				for k,v in pairs(self) do
					new[k] = v
				end
				if c.clonesetup then
					c.clonesetup(new, self)
				end
				return new
			end
		elseif k == 'to' then
			return function(self, to, ...)
				if to == 'string' then return tostring(self)
				elseif to == 'number' then return tonumber(self)
				elseif to == 'int' then return math.floor(tonumber(self))
				elseif c.cast and c.cast[to] then
					return c.cast[to](self, ...)
				elseif type(to) == 'table' and getmetatable(to) and getmetatable(to).cvt and getmetatable(to).cvt[cls] then
				else error((c.ident or 'class') .. ' is not convertible to ' .. (type(to) == 'string' and to or tostring(to))) end
			end
		end
		if c.fns and c.fns[k] then return c.fns[k] end
		if c.index then return c.index(self,k) end
	end

	if c.cast then
		if c.cast.string then
			cls.__tostring = c.cast.string
		end
		if c.cast.number then
			cls.__tonumber = c.cast.number
		end
	end

	cls.mk = function(...)
		local val = setmetatable(c.mk and c.mk(...) or {}, cls)
		if c.init then
			for k,v in pairs(c.init) do
				val[k] = v
			end
		end
		if c.construct then
			c.construct(val, ...)
		end
		return val
	end
	getmetatable(cls).__call = function(_, ...) return cls.mk(...) end
	cls.is = function(o) return getmetatable(o) == cls end
	cls.__metatable = cls -- lock metatable
	return cls
end

-- tidy exceptions

ss.exn = ss.declare {
	ident = 'exn';
	mk = function(kind, ...)
		return {
			vars = {...};
			kind = kind;
		}
	end;
	cast = {
		string = function(me)
			return me.kind.report(table.unpack(me.vars))
		end;
	};
	fns = {
		throw = function(me) error(me) end;
	}
}
ss.exnkind = ss.declare {
	ident = 'exn-kind';
	mk = function(desc, report)
		return {
			desc = desc;
			report = report or function(msg,...)
				return string.format(msg,...)
			end;
		}
	end;
	call = function(me, ...)
		return ss.exn(me, ...)
	end;
}
ss.str.exn = ss.exnkind 'failure while string munging'
ss.bug = ss.exnkind 'tripped over bug'

function ss.str.delimit(encoding, start, stop, s)
	local depth = 0
	encoding = encoding or ss.str.enc.utf8
	if not ss.str.begins(s, start) then return nil end
	for c,p in ss.str.each(encoding,s) do
		if c == (encoding.escape or '\\') then
			p.next.byte = p.next.byte + #encoding.char(encoding.codepoint(s, p.next.byte))
			p.next.code = p.next.code + 1
		elseif c == start then
			depth = depth + 1
		elseif c == stop then
			depth = depth - 1
			if depth == 0 then
				return s:sub(1+#start, p.byte - #stop), p.byte -- FIXME
			elseif depth < 0 then
				ss.str.exn('out of place token “%s”', stop):throw()
			end
		end
	end

	ss.str.exn('token “%s” expected before end of line', stop):throw()
end

ss.version = ss.declare {
	name = 'version';
	mk = function(tbl) return tbl end;
	fns = {
		pre = function(self,other) end;
		post = function(self,other) end;
		string = function(self) return tostring(self) end;		
	};
	cast = {
		string = function(vers)
			if not(next(vers)) then return '0.0' end
			local str = ''
			for _,v in pairs(vers) do
				if type(v) == 'string' then
					if str ~= '' then str = str .. '-' end
					str = str .. v
				else
					if str ~= '' then str = str .. '.' end
					str = str .. tostring(v)
				end
			end
			return str
		end
	};
}

function ss.classinstance(o)
	local g = getmetatable(o)
	if not o then return nil end
	local mm = getmetatable(g)
	if not o then return nil end
	if mm.__name == 'class' then
		return g
	else
		return nil
	end
end

function ss.walk(o, key, ...)
	if o[key] then
		if select('#', ...) == 0 then
			return o[key]
		else
			return ss.walk(o[key], ...)
		end
	end
	return nil
end

function ss.coalesce(x, ...)
	if x ~= nil then
		return x
	elseif select('#', ...) == 0 then
		return nil
	else
		return ss.coalesce(...)
	end
end

ss.tuple = {}
function ss.tuple.any(...)
	return select('#',...) > 0
end

function ss.tuple.cat(...)
	local a = {...}
	return function(...)
		ss.push(a, ...)
		return table.unpack(a)
	end
end

function ss.tuple.suffix(sfx,n,...)
	if n ~= nil then
		return n, ss.tuple.suffix(...)
	else
		return sfx
	end
end

function ss.tuple.cdr(x, ...) return ... end

ss.stack = ss.declare {
	ident = 'stack';
	mk = function() return {
		top = 0;
		store = {};
   } end;
	index = function(me, i)
		if i <= 0 then
			return me.store[me.top + i]
		else
			return me.store[i]
		end
	end;
	fns = {
		push = function(me, val, ...)
         if val~=nil then
	         me.top = me.top + 1
	         me.store[me.top] = val
	         me:push(...)
         end
         return val, ...
      end;
      pop = function(me,n) n = n or 1
         local r = {}
			if n < me.top then
				for i = 0,n-1 do
					r[i+1] = me.store[me.top - i]
					me.store[me.top - i] = nil
				end
				me.top = me.top - n
         else
	         r = me.store
				me.store = {}
         end
			return table.unpack(r)
      end;
      set = function(me,val)
         if me.top == 0 then
	         me.top = me.top + 1 --autopush
         end
         me.store[me.top] = val
      end;
      all = function(me) return table.unpack(me.store) end;
      each = function(forward)
         if forward then
	         local idx = 0
	         return function()
		         idx = idx + 1
		         if idx > top
						then return nil
						else return me.store[idx], idx
					end
	         end
         else
	         local idx = top + 1
	         return function()
		         idx = idx - 1
		         if idx == 0
						then return nil
						else return me.store[idx], idx
					end
	         end
         end
      end;
	};
}

ss.automat = ss.declare {
	ident = 'automat';
	mk = function() return {
		state = ss.stack();
		states = {};
		ttns = {};
		mem = {};
		match = function(sym, ttn, mach)
			if ttn.pred and ttn:pred(mach, sym)~=true then
				return false
			end
			if ttn.on then
				return sym == ttn.on
			end
			return false
		end;
	} end;

	construct = function(me, def)
		for k,v in pairs{'states','ttns','mem','syms'} do
			if def[k] then me[k] = v end
		end
	end;

	fns = {
		react = function(me,sym)
			local s = me.states[me.state.id]
			if s and s.input then
				s:react(me, sym)
			end
		end;

		drop = function(me,n)
			for i = 0, math.min(n-1,me.state.top-1) do
				local s = me.states[me.state[-i].id]
				if s.exit then s:exit(s.mem, me) end
			end
			if n < me.state.top then
				local newtop = me.states[me.state[-n].id]
				if newtop.activate then newtop:activate(me.state[-n].mem, me, n) end
			end
			return me.state:pop(n)
		end;
		clear = function(me) return me:drop(me.state.top) end;

		transition = function(me,ttn,oldstates)
			local s = me.state:push {id = ttn.to, mem = {}}
			local to = me.states[ttn.to]
			if to.enter then
				to:enter(s.mem, me)
			end
		end;

		input = function(me,sym)
			local ttns = me.ttns[me.state.id]
			local _, ttn = ss.find(ttns, function(ttn)
			                        return me.match(sym, ttn, me)
			                       end)
			if ttn then
				if ttn.pop then
					local oldstates = {me.state:drop(ttn.pop)}
					me:transition(ttn, sym, oldstates)
				else
					me:transition(ttn, sym)
				end
			else
				me:react(sym)
			end
		end;
	};
}