-- [ʞ] sirsem.lua
-- ~ lexu hale <lexi@hale.su>
-- ? utility library with functionality common to
-- cortav.lua and its extensions
-- from Ranuir "software utility"
-- > local ss = require 'sirsem.lua'
local ss
do -- pull ourselves up by our own bootstraps
local package = _G.package -- prevent namespace from being broken by env shenanigans
local function namespace(name, tbl)
local pkg = tbl or {}
if package then
package.loaded[name] = pkg
end
return pkg
end
ss = namespace 'sirsem'
ss.namespace = namespace
end
function ss.map(fn, lst)
local new = {}
for k,v in pairs(lst) do
table.insert(new, fn(v,k))
end
return new
end
function ss.reduce(fn, acc, lst)
for i,v in ipairs(lst) do
acc = fn(acc, v, i)
end
return acc
end
function ss.filter(list, fn)
local new = {}
for i, v in ipairs(list) do
if fn(v,i) then table.insert(new, v) end
end
return new
end
function ss.kmap(fn, list)
local new = {}
for k, v in pairs(list) do
local nk,nv = fn(k,v)
new[nk or k] = nv or v
end
return new
end
function ss.kfilter(list, fn)
local new = {}
for k, v in pairs(list) do
if fn(k,v) then new[k] = v end
end
return new
end
function ss.find(tbl, pred, ...)
pred = pred or function(a,b)
return a == b
end
for k,v in pairs(tbl) do
if pred(v,k,...) then return k,v end
end
return nil
end
function ss.clone(tbl) -- performs a shallow copy
if tbl.clone then return tbl:clone() end
local new = {}
for k,v in pairs(tbl) do
new[k] = v
end
return new
end
function ss.copy(tbl) -- performs a deep copy
local new = {}
for k,v in pairs(tbl) do
if type(v) == 'table' then
if v.clone then
new[k] = v:clone() -- this may or may not do a deep copy!
else
new[k] = ss.copy(v)
end
else
new[k] = v
end
end
return new
end
function ss.push(tbl, ...)
local idx = #tbl + 1
local function rec(v, ...)
tbl[idx] = v
idx = idx + 1
if ss.tuple.any(...) then rec(...) end
end
rec(...)
return tbl
end
function ss.delegate(tbl,tpl) -- returns a table that looks up keys it lacks from
-- tbl (lightweight alternative to shallow copies)
tpl = tpl or {}
return setmetatable({}, {__index=tbl})
end
ss.str = {}
function ss.str.begins(str, pfx)
-- appallingly, this is actually ~2/5ths faster than either
-- of the below. i hate scripting languages so much
return string.find(str, pfx, 1, true) == 1
-- to my shock, disgust, and horror, even writing my own
-- string scanning library for lua IN C only sped this up by
-- a tiny fraction. i am just speechless.
-- return string.sub(str, 1, #pfx) == pfx
-- local pl = string.len(pfx)
-- local sl = string.len(str)
-- if sl < pl then return false end
-- for i=1,pl do
-- if string.byte(str,i) ~= string.byte(pfx,i) then
-- return false
-- end
-- end
-- return true
end
function ss.enum(syms)
local e = {}
for i,v in pairs(syms) do
e[v] = i
e[i] = v
end
return e
end
function ss.bitmask_bytes(n,ofs)
ofs = ofs or 0
local function rec(i)
if i > n then return end
return 1<<(i+ofs), rec(i+1)
end
return 1<<ofs, rec(1)
end
function ss.bitmask(tbl,ofs)
local codes = {ss.bitmask_bytes(#tbl,ofs)}
local m = {}
local maxbit
for i, s in ipairs(tbl) do
m[s] = codes[i]
m[codes[i]] = s
maxbit = i
end
m[true] = {ofs or 0,maxbit}
return m
end
ss.str.charclass = ss.enum {
'numeral'; 'letter'; 'symbol'; 'punct';
'space'; 'ctl'; 'glyph'; -- hanji
}
ss.str.charprop = ss.bitmask({
'hexnumeral', -- character that can be used to write hexadecimal notation
'upper', 'lower';
'diac'; -- diacritic/modifier letter
'wordbreak'; -- char causes following characters to be treated as a separate word (e.g. punctuation)
'wordsep'; -- char causes previous and following characters to be treated as separate words; char constitutes a word of its own in between (e.g. interpunct)
'breakokay'; -- is it okay to break words at this character? (eg hyphen)
'mathop'; -- char is a mathematical operator
'disallow', -- char is not allowed in narrative text
'brack', 'right', 'left', -- brackets
'noprint', -- character deposits no ink
'superimpose' -- character is superimposed over previous
}, 3)
ss.str.enc_generics = {
pfxescape = function(ch, enc, chain)
local bytes = #ch
local codes = enc.len(ch)
return function(s)
if s == ch then
return 0, 0, ch
elseif ss.str.begins(s, ch) then
local nc = enc.char(enc.codepoint(s, bytes + 1))
return bytes, codes, nc
elseif chain then
return chain(s)
end
end
end;
};
local cc,cp = ss.str.charclass, ss.str.charprop
ss.str.enc = {
utf8 = {
char = utf8.char;
codepoint = utf8.codepoint;
len = utf8.len;
encodeUCS = function(str) return str end;
iswhitespace = function(c)
return (c == ' ') or (c == '\t') or (c == '\n')
or (c == '\u{3000}')
or (c == '\u{200B}')
end;
};
ascii = {
len = string.len; char = string.char; codepoint = string.byte;
iswhitespace = function(c)
return (c == ' ') or (c == '\t') or (c == '\n')
end;
ranges = {
{0x00,0x1a, cc.ctl};
{0x1b,0x1b, cc.ctl, cp.disallow};
{0x1c,0x1f, cc.ctl};
{0x20,0x20, cc.space};
{0x21,0x22, cc.punct};
{0x23,0x26, cc.symbol};
{0x27,0x29, cc.punct};
{0x2a,0x2b, cc.symbol};
{0x2c,0x2f, cc.punct};
{0x30,0x39, cc.numeral, cp.hexnumeral};
{0x3a,0x3b, cc.punct};
{0x3c,0x3e, cc.symbol, cp.mathop};
{0x3f,0x3f, cc.punct};
{0x40,0x40, cc.symbol};
{0x41,0x46, cc.letter, cp.ucase, cp.hexnumeral};
{0x47,0x5a, cc.letter, cp.ucase};
{0x5b,0x5d, cc.symbol, cp.mathop};
{0x5e,0x5e, cc.symbol, mathop};
{0x5f,0x60, cc.symbol};
{0x61,0x66, cc.letter, cp.lcase, cp.hexnumeral};
{0x67,0x7a, cc.letter, cp.lcase};
{0x7b,0x7e, cc.symbol};
{0x7f,0x7f, cc.ctl, cp.disallow};
}
};
raw = {len = string.len; char = string.char; codepoint = string.byte;
encodeUCS = function(str) return str end;
iswhitespace = function(c)
return (c == ' ') or (c == '\t') or (c == '\n')
end;
};
}
-- unicode ranges are optionally generated from consortium data
-- files and injected through a generated source file. if this
-- part of the build process is disabled (e.g. due to lack of
-- internet access, or to keep the size of the executable as
-- small as possible), we still at least can make the ascii
-- ranges available to UTF8 (UTF8 being a superset of ascii)
ss.str.enc.utf8.ranges = ss.delegate(ss.str.enc.ascii.ranges)
function ss.str.enc.ascii.encodeUCS(str)
local newstr = ''
for c,p in ss.str.each(ss.str.enc.utf8, str, true) do
if c > 0x7F then
newstr = newstr .. '?'
else
newstr = newstr .. string.char(c)
end
end
end
for _, v in pairs{'utf8','ascii','raw'} do
ss.str.enc[v].parse_escape = ss.str.enc_generics.pfxescape('\\',ss.str.enc[v])
end
function ss.str.classify(enc, ch)
if not enc.ranges then return {} end
if type(ch)=='string' then ch = enc.codepoint(ch) end
-- TODO
end
function ss.str.each(enc, str, ascode)
if enc.each then return enc.each(enc,str,ascode) end
local pm = {
__index = {
esc = function(self)
local ba, bc, nc = enc.parse_escape(str:sub(self.byte))
if ba then
self.next.byte = self.next.byte + ba - 1
self.next.code = self.next.code + bc - 1
return nc
end
end;
};
}
local pos = {
code = 1;
byte = 1;
}
return function()
if pos.byte > #str then return nil end
local thischar = enc.codepoint(str, pos.byte)
local lastpos = setmetatable({
code = pos.code;
byte = pos.byte;
next = pos;
},pm)
if not ascode then
thischar = enc.char(thischar)
pos.byte = pos.byte + #thischar
else
pos.byte = pos.byte + #enc.char(thischar)
end
pos.code = pos.code + 1
return thischar, lastpos
end
end
function ss.str.breakwords(enc, str, max, opts)
if enc.breakwords then return enc.breakwords(str) end
local words = {}
opts = opts or {}
local buf = ''
local flush = function()
if buf ~= '' then table.insert(words,buf) buf = '' end
end
for c, p in ss.str.each(enc,str) do
local nc
if opts.escape then
nc = p:esc()
end
if nc then
buf = buf + nc
elseif enc.iswhitespace(c) then
flush()
if max and #words == max then
local rs = str:sub(p.next.byte)
if rs ~= '' then
table.insert(words, rs)
end
break
end
else
buf = buf .. c
end
end
flush()
return words
end
function ss.str.mergewords(enc, lst)
if enc.mergewords then return enc.mergewords(lst) end
return table.concat(lst, enc.wordsep or ' ')
end
function ss.str.breaklines(enc, str, opts)
if enc.breaklines then return enc.breaklines(lst,opts) end
return ss.str.split(enc, str, enc.encodeUCS'\n', opts)
end
function ss.str.split(enc, str, delim, opts)
if enc.split then return enc.split(str,delim,opts) end
opts = opts or {}
local elts = {}
local buf = ''
local flush = function()
if buf ~= '' or opts.keep_empties then
table.insert(elts,buf)
buf = ''
end
end
local esc = enc.parse_escape
local tryesc if opts.escape then
tryesc = function(str, p)
local ba, ca, escd = enc.parse_escape(str:sub(p.byte))
if ba then
p.next.byte = p.next.byte + ba
p.next.code = p.next.code + ca
buf = buf .. escd
return true
end
end
else
tryesc = function(...) end
end
if type(delim) == 'function' then
for c, p in ss.str.each(enc,str) do
if not tryesc(str,p) then
local skip = delim(str:sub(p.byte))
if skip then
flush()
p.next.byte = p.next.byte + skip - 1
else
buf = buf .. c
end
end
end
elseif enc.len(delim) == 1 then
for c, p in ss.str.each(enc,str) do
if not tryesc(str,p) then
if c == delim then
flush()
else
buf = buf .. c
end
end
end
else
local dlcode = enc.len(delim)
for c, p in ss.str.each(enc,str) do
if not tryesc(str,p) then
if str:sub(p.byte, p.byte+#delim-1) == delim then
flush()
p.next.byte = p.next.byte + #delim - 1
p.next.code = p.next.code + dlcode
else
buf = buf .. c
end
end
end
end
flush()
return elts
end
function ss.str.langmatch(tbl, lang, enc)
-- this performs primitive language matching. NOTE: THIS IS NOT
-- STANDARDS COMPLIANT. it's "good enough" for now, but in the
-- long term it needs to be rewritten to actually understand the
-- format, primarily so that e.g. 'en-US-Latn' and 'en-Latn-US'
-- match -- currently order is significant. it shouldn't be
-- ref: IETF BCP 47 (RFC 5646) https://www.ietf.org/rfc/bcp/bcp47.html
local dash = enc.encodeUCS'-'
local tags = ss.str.split(enc, lang, dash, {escape=true})
local bestlen = 0
local bestmatch
for k,v in pairs(tbl) do
if k ~= true then
local kt = ss.str.split(enc, k, dash, {escape=true})
for i=1,math.min(#kt,#tags) do
if kt[i] ~= tags[i] then goto skip end
end
if #kt > bestlen then
-- match the most specific matching tag
bestmatch = k
bestlen = #kt
end
end
::skip::end
return tbl[bestmatch] or tbl[true], bestmatch
end
ss.math = {}
function ss.math.lerp(t, a, b)
return (1-t)*a + (t*b)
end
function ss.dump(o, state, path, depth)
state = state or {tbls = {}}
depth = depth or 0
local pfx = string.rep(' ', depth)
if type(o) == "table" then
local str = ''
for k,p in pairs(o) do
local done = false
local exp
if type(p) == 'table' then
if state.tbls[p] then
exp = '<' .. state.tbls[p] ..'>'
done = true
else
state.tbls[p] = path and string.format('%s.%s', path, k) or k
end
end
if not done then
local function dodump() return ss.dump(
p, state,
path and string.format("%s.%s", path, k) or k,
depth + 1
) end
-- boy this is ugly
if type(p) ~= 'table' or
getmetatable(p) == nil or
getmetatable(p).__tostring == nil then
exp = dodump()
end
if type(p) == 'table' then
exp = string.format('{\n%s%s}', exp, pfx)
local meta = getmetatable(p)
if meta then
if meta.__tostring then
exp = tostring(p)
end
if meta.__name then
exp = meta.__name .. ' ' .. exp
end
end
end
end
str = str .. pfx .. string.format("%s = %s\n", k, exp)
end
return str
elseif type(o) == "string" then
return string.format('“%s”', o)
else
return tostring(o)
end
end
function ss.hexdump(s)
local hexlines, charlines = {},{}
for i=1,#s do
local line = math.floor((i-1)/16) + 1
hexlines[line] = (hexlines[line] or '') .. string.format("%02x ",string.byte(s, i))
charlines[line] = (charlines[line] or '') .. ' ' .. string.gsub(string.sub(s, i, i), '[^%g ]', '\x1b[;35m·\x1b[36;1m') .. ' '
end
local str = ''
for i=1,#hexlines do
str = str .. '\x1b[1;36m' .. charlines[i] .. '\x1b[m\n' .. hexlines[i] .. '\n'
end
return str
end
function ss.declare(c)
local cls = setmetatable({
__name = c.ident;
}, {
__name = 'class';
__tostring = function() return c.ident or '(class)' end;
})
cls.__call = c.call
cls.__index = function(self, k)
if c.default and c.default[k] then
return c.default[k]
end
if k == 'clone' then
return function(self)
local new = cls.mk()
for k,v in pairs(self) do
new[k] = v
end
if c.clonesetup then
c.clonesetup(new, self)
end
return new
end
elseif k == 'to' then
return function(self, to, ...)
if to == 'string' then return tostring(self)
elseif to == 'number' then return tonumber(self)
elseif to == 'int' then return math.floor(tonumber(self))
elseif c.cast and c.cast[to] then
return c.cast[to](self, ...)
elseif type(to) == 'table' and getmetatable(to) and getmetatable(to).cvt and getmetatable(to).cvt[cls] then
else error((c.ident or 'class') .. ' is not convertible to ' .. (type(to) == 'string' and to or tostring(to))) end
end
end
if c.fns and c.fns[k] then return c.fns[k] end
if c.index then return c.index(self,k) end
end
if c.cast then
if c.cast.string then
cls.__tostring = c.cast.string
end
if c.cast.number then
cls.__tonumber = c.cast.number
end
end
cls.mk = function(...)
local val = setmetatable(c.mk and c.mk(...) or {}, cls)
if c.init then
for k,v in pairs(c.init) do
val[k] = v
end
end
if c.construct then
c.construct(val, ...)
end
return val
end
getmetatable(cls).__call = function(_, ...) return cls.mk(...) end
cls.is = function(o) return getmetatable(o) == cls end
cls.__metatable = cls -- lock metatable
return cls
end
-- tidy exceptions
ss.exn = ss.declare {
ident = 'exn';
mk = function(kind, ...)
return {
vars = {...};
kind = kind;
}
end;
cast = {
string = function(me)
return me.kind.report(table.unpack(me.vars))
end;
};
fns = {
throw = function(me) error(me) end;
}
}
ss.exnkind = ss.declare {
ident = 'exn-kind';
mk = function(desc, report)
return {
desc = desc;
report = report or function(msg,...)
return string.format(msg,...)
end;
}
end;
call = function(me, ...)
return ss.exn(me, ...)
end;
}
ss.str.exn = ss.exnkind 'failure while string munging'
ss.bug = ss.exnkind 'tripped over bug'
function ss.str.delimit(encoding, start, stop, s)
local depth = 0
encoding = encoding or ss.str.enc.utf8
if not ss.str.begins(s, start) then return nil end
for c,p in ss.str.each(encoding,s) do
if c == (encoding.escape or '\\') then
p.next.byte = p.next.byte + #encoding.char(encoding.codepoint(s, p.next.byte))
p.next.code = p.next.code + 1
elseif c == start then
depth = depth + 1
elseif c == stop then
depth = depth - 1
if depth == 0 then
return s:sub(1+#start, p.byte - #stop), p.byte -- FIXME
elseif depth < 0 then
ss.str.exn('out of place token “%s”', stop):throw()
end
end
end
ss.str.exn('token “%s” expected before end of line', stop):throw()
end
ss.version = ss.declare {
name = 'version';
mk = function(tbl) return tbl end;
fns = {
pre = function(self,other) end;
post = function(self,other) end;
string = function(self) return tostring(self) end;
};
cast = {
string = function(vers)
if not(next(vers)) then return '0.0' end
local str = ''
for _,v in pairs(vers) do
if type(v) == 'string' then
if str ~= '' then str = str .. '-' end
str = str .. v
else
if str ~= '' then str = str .. '.' end
str = str .. tostring(v)
end
end
return str
end
};
}
function ss.classinstance(o)
local g = getmetatable(o)
if not o then return nil end
local mm = getmetatable(g)
if not o then return nil end
if mm.__name == 'class' then
return g
else
return nil
end
end
function ss.walk(o, key, ...)
if o[key] then
if select('#', ...) == 0 then
return o[key]
else
return ss.walk(o[key], ...)
end
end
return nil
end
function ss.coalesce(x, ...)
if x ~= nil then
return x
elseif select('#', ...) == 0 then
return nil
else
return ss.coalesce(...)
end
end
ss.tuple = {}
function ss.tuple.any(...)
return select('#',...) > 0
end
function ss.tuple.cat(...)
local a = {...}
return function(...)
ss.push(a, ...)
return table.unpack(a)
end
end
function ss.tuple.suffix(sfx,n,...)
if n ~= nil then
return n, ss.tuple.suffix(...)
else
return sfx
end
end
function ss.tuple.cdr(x, ...) return ... end
ss.stack = ss.declare {
ident = 'stack';
mk = function() return {
top = 0;
store = {};
} end;
index = function(me, i)
if i <= 0 then
return me.store[me.top + i]
else
return me.store[i]
end
end;
fns = {
push = function(me, val, ...)
if val~=nil then
me.top = me.top + 1
me.store[me.top] = val
me:push(...)
end
return val, ...
end;
pop = function(me,n) n = n or 1
local r = {}
if n < me.top then
for i = 0,n-1 do
r[i+1] = me.store[me.top - i]
me.store[me.top - i] = nil
end
me.top = me.top - n
else
r = me.store
me.store = {}
end
return table.unpack(r)
end;
set = function(me,val)
if me.top == 0 then
me.top = me.top + 1 --autopush
end
me.store[me.top] = val
end;
all = function(me) return table.unpack(me.store) end;
each = function(forward)
if forward then
local idx = 0
return function()
idx = idx + 1
if idx > top
then return nil
else return me.store[idx], idx
end
end
else
local idx = top + 1
return function()
idx = idx - 1
if idx == 0
then return nil
else return me.store[idx], idx
end
end
end
end;
};
}
ss.automat = ss.declare {
ident = 'automat';
mk = function() return {
state = ss.stack();
states = {};
ttns = {};
mem = {};
match = function(sym, ttn, mach)
if ttn.pred and ttn:pred(mach, sym)~=true then
return false
end
if ttn.on then
return sym == ttn.on
end
return false
end;
} end;
construct = function(me, def)
for k,v in pairs{'states','ttns','mem','syms'} do
if def[k] then me[k] = v end
end
end;
fns = {
react = function(me,sym)
local s = me.states[me.state.id]
if s and s.input then
s:react(me, sym)
end
end;
drop = function(me,n)
for i = 0, math.min(n-1,me.state.top-1) do
local s = me.states[me.state[-i].id]
if s.exit then s:exit(s.mem, me) end
end
if n < me.state.top then
local newtop = me.states[me.state[-n].id]
if newtop.activate then newtop:activate(me.state[-n].mem, me, n) end
end
return me.state:pop(n)
end;
clear = function(me) return me:drop(me.state.top) end;
transition = function(me,ttn,oldstates)
local s = me.state:push {id = ttn.to, mem = {}}
local to = me.states[ttn.to]
if to.enter then
to:enter(s.mem, me)
end
end;
input = function(me,sym)
local ttns = me.ttns[me.state.id]
local _, ttn = ss.find(ttns, function(ttn)
return me.match(sym, ttn, me)
end)
if ttn then
if ttn.pop then
local oldstates = {me.state:drop(ttn.pop)}
me:transition(ttn, sym, oldstates)
else
me:transition(ttn, sym)
end
else
me:react(sym)
end
end;
};
}