-- [ʞ] parvan.lua
-- ? tool for maintaining and searching dictionaries
-- [ CONTROL CLASS: GREEN ]
-- [ CODEWORDS: - GENERAL ACCESS - ]
-- [ CONTROLLING AUTHORITY: - INTERDIRECTORIAL -
-- < Commission for Defense Communication >
-- +WCO Worlds Culture Overdirectorate
-- SSD Social Sciences Directorate
-- ELS External Linguistics Subdirectorate
-- +WSO Worlds Security Overdirectorate
-- EID External Influence Directorate ]
--# parvan
--
--## orthographies
-- parvan supports encoding words using multiple
-- orthographies. every database has a "primary"
-- orthography, which must be Unicode- or ASCII-
-- compatible, and which is used as the basis of the
-- uniform object paths. other orthographies can be
-- managed with the [$script] command, which can set
-- how they are displayed to the user. every word
-- can have zero or more representations mapped to a
-- particular orthography.
--
--## file format
-- parvan defines two separate file formats, both
-- representations of a dictionary. one, the "working
-- format", is binary; the other, the "exchange format"
-- is comprised of UTF8 codepoint sequences and can be
-- (to some degree) written and read by human beings,
-- tho its primary purpose is as a line-based format
-- that allows parvan dictionaries to be managed with
-- conventional source control solutions like fossil
-- or git
--
--## magic numbers
-- all parvan files share the same 4-byte header. it
-- is comprised of the sequence
-- [$ 0x50 0x56 $VERS $SUBTYPE ]
-- where [$$VERS] is a byte that is altered whenever a
-- breaking change is made to the format. [$$SUBTYPE]
-- indicates whether the file is binary- or text-based.
-- the byte 0x20 indicates an exchange file, while
-- 0x02 indicates a binary database.
--
--## extensions
-- parvan recommends the use of the extension [$*.pv]
-- for its binary databases, and [$*.pvx] for the
-- exchange format.
--
--## styled text
-- text in parvan documents should be written using
-- cortav syntax. at some future time this will
-- hopefully be used to generate styled output where
-- possible
local function implies(a,b) return a==b or not(a) end
local function map(lst,fn)
local new = {}
for k,v in pairs(lst) do
local nv, nk = fn(v,k)
new[nk or k] = nv
end
return new
end
local function mapD(lst,fn) --destructive
-- WARNING: this will not work if nk names an existing key!
for k,v in pairs(lst) do
local nv, nk = fn(v,k)
if nk == nil or k == nk then
lst[k] = nv
else
lst[k] = nil
lst[nk] = nv
end
end
return lst
end
local function pushi(dest, idx, src, ...)
if not src then return end
dest[idx] = src
pushi(dest, idx+1, ...)
end
local function push(dest, ...) pushi(dest,#dest+1,...) end
local function cons(car, cdr)
local new = {car}
for k,v in ipairs(cdr) do new[k+1] = v end
return new
end
local function tcatD(dest, ...)
local i = #dest
local function iter(src, ...)
if src == nil then return end
local sc = #src
for j=1,sc do dest[i+j] = src[j] end
i = i + sc
iter(...)
end
iter(...)
end
local function mergeD(dest, tbl, ...)
if tbl == nil then return dest end
for k,v in pairs(tbl) do dest[k] = v end
return mergeD(dest, ...)
end
local function merge(...)
return mergeD({}, ...)
end
local function copy(t,...)
return merge(t), copy(...)
end
local function fastDelete(table,idx)
-- delete without preserving table order
local l = #table
table[idx] = table[l]
table[l] = nil
return table
end
local function tcat(...)
local new = {}
tcatD(new, ...)
return new
end
local function iota(n,m)
if not m then return iota(1,n) end
if n == m then return n end
return n, iota(n+1,m)
end
local function keys(m)
local i,ks = 1,{}
for k in next, m do
ks[i] = k
i = i + 1
end
return ks
end
local ansi = {
levels = {
plain = 0;
ansi = 1;
color = 2;
color8b = 3;
color24b = 4;
};
}
ansi.seqs = {
br = {ansi.levels.ansi, "[1m", "[22m"};
hl = {ansi.levels.ansi, "[7m", "[27m"};
ul = {ansi.levels.ansi, "[4m", "[24m"};
em = {ansi.levels.ansi, "[3m", "[23m"};
};
function ansi.termclass(fd) -- awkwardly emulate isatty
if fd:seek('cur',0) then
return ansi.levels.plain
end
if os.getenv('COLORTERM') == 'truecolor' then
return ansi.levels.color24b
end
local term = os.getenv('TERM')
if term then
if term:match '-256color' then
return ansi.levels.color8b
elseif term:match '-color' then
return ansi.levels.color
else
return ansi.levels.ansi
end
end
return ansi.levels.plain
end
function ansi.formatter(fd)
local cl = ansi.termclass(fd)
local id = function(...) return ... end
local esc = '\27'
local f = {}
for k,v in pairs(ansi.seqs) do
local lvl, on, off = table.unpack(v)
if lvl <= cl then
f[k] = function(s)
return (esc..on) .. s .. (esc..off)
end
else f[k] = id end
end
local function ftoi(r,g,b)
return math.ceil(r*0xff),
math.ceil(g*0xff),
math.ceil(b*0xff)
end
local reset = "\27[39m"
function f.color(str, n, bright)
if n<=15 then
return string.format("\27[%s%cm",
(bg and 4 or 3) +
(br and 6 or 0), 0x30+n)
.. str .. reset
else
return string.format("\27[%c8;5;%sm",
(bg and 0x34 or 0x33), n)
.. str .. reset
end
end
function f.resetLine()
return '\27[1K\13'
end
if cl == ansi.levels.color24b then
function f.rgb(str, r,g,b, bg)
return string.format("\27[%c8;2;%u;%u;%um", bg and 0x34 or 0x33,
ftoi(r,g,b)) .. str .. reset
end
elseif cl == ansi.levels.color8b then
function f.rgb(str, r,g,b, bg)
local code = 16 + math.floor(r * 5)*36 + math.floor(g * 5)*6 + math.floor(b * 6)
return string.format("\27[%c8;5;%um", bg and 0x34 or 0x33, code)
.. str .. reset
end
elseif cl == ansi.levels.color then
function f.rgb(str, r,g,b, bg)
local code = 0x30 + 1 -- TODO
return string.format("\27[%c%cm", bg and 0x34 or 0x33, code)
.. str .. reset
end
else
function f.rgb(s) return s end
function f.color(s) return s end
function f.resetLine() return '' end
end
return f
end
local function dump(v,pfx,cyc,ismeta)
pfx = pfx or ''
cyc = cyc or {}
local np = pfx .. ' '
if type(v) == 'table' then
if cyc[v] then return '<...>' else cyc[v] = true end
end
if type(v) == 'string' then
return string.format('%q', v)
elseif type(v) == 'table' then
local str = ''
for k,v in pairs(v) do
local tkey, tval = dump(k,np,cyc), dump(v,np,cyc)
str = str .. string.format('%s[%s] = %s\n', np, tkey,tval)
end
local meta = ''
if getmetatable(v) then
meta = dump(getmetatable(v),pfx,cyc,true) .. '::'
end
if ismeta then
return string.format('%s<|\n%s%s|>',meta,str,pfx)
else
return meta..'{\n' .. str .. pfx .. '}'
end
else
return string.format('%s', v)
end
end
local struct = {
__call = function(s,...) return s:mk(...) end;
}
function struct:mk(s)
function s.is(o) return getmetatable(o) == s end
return setmetatable(s, self)
end
setmetatable(struct, struct)
local stream = struct {
__index = {
next = function(self, f)
local flds = {string.unpack('<'..f, self.data, self.index)}
self.index = flds[#flds]
flds[#flds] = nil
return table.unpack(flds)
end;
};
mk = function(self, str)
return setmetatable({
data = str;
index = 1;
}, self)
end;
}
local fmt = {}
local userError = struct {
__tostring = function(self) return self.msg end;
mk = function(self, s) return setmetatable({msg=s},self) end;
}
local function id10t(...)
error(userError(string.format(...)),0)
end
local packer,unpacker =
function(f) return function(...) return string.pack ("<"..f, ...) end end,
function(f) return function( s ) return s:next (f) end end
local qpack = function(f) return {
encode = packer(f);
decode = unpacker(f);
} end
local parse, marshal
fmt.string = qpack "s4"
fmt.label = qpack "s2"
fmt.tag = qpack "s1"
fmt.u8 = qpack "I1"
fmt.u16 = qpack "I2"
fmt.u24 = qpack "I3"
fmt.u32 = qpack "I4"
fmt.path = {
-- encodes a FIXED path to an arbitrary type of object
encode = function(a)
local kind = 0
local vals = {}
if a.w then kind = 1
table.insert(vals, marshal(fmt.label, a.w))
if a.dn then kind = 2
table.insert(vals, marshal(fmt.u8, a.dn))
if a.mn then kind = 3
table.insert(vals, marshal(fmt.u8, a.mn))
if a.nn then kind = 4
table.insert(vals, marshal(fmt.u8, a.nn))
end
end
end
end
return marshal(fmt.u8,kind) .. table.concat(vals)
end;
decode = function(s)
local kind <const> = parse(fmt.u8, s)
local path = {}
local components <const> = {
{'w',fmt.label};
{'dn',fmt.u8};
{'mn',fmt.u8};
{'nn',fmt.u8};
}
for i=1,kind do
local label, ty = table.unpack(components[i])
path[label] = parse(ty,s)
end
return path
end;
}
fmt.list = function(t,ty) ty = ty or fmt.u32
return {
encode = function(a)
local vals = {marshal(ty, #a)}
for i=1,#a do
table.insert(vals, marshal(t, a[i]))
end
return table.concat(vals)
end;
decode = function(s)
local n = parse(ty, s)
local vals = {}
for i=1,n do
table.insert(vals, parse(t, s))
end
return vals
end;
}
end
fmt.any = function(struct)
local map, keylist = {},{}
for i,v in ipairs(struct) do
if type(v) ~= 'string' then
map[v[1]] = v[2]
v = v[1]
else
map[v] = true
end
table.insert(keylist, v)
end
local tdisc = fmt.enum(table.unpack(keylist))
return {
encode = function(a)
if type(a) == 'string' and map[a] == true then
return marshal(tdisc, a)
else
local tname, obj = table.unpack(a)
assert(map[tname] ~= true, '`any` enumeration '..tostring(tname)..' has no associated struct')
return marshal(tdisc, tname) ..
marshal(map[tname], obj)
end
end;
decode = function(s)
local tname = parse(tdisc, s)
if map[tname] ~= true then
local obj = parse(map[tname], s)
return {tname,obj}
else return tname end
end;
}
end
fmt.map = function(from,to,ity)
local ent = fmt.list({
{'key', from},
{'val', to}
}, ity)
return {
encode = function(a)
local m = {}
for k,v in pairs(a) do
table.insert(m, {key=k, val=v})
end
return ent.encode(m)
end;
decode = function(s)
local lst = ent.decode(s)
local m = {}
for _,p in pairs(lst) do m[p.key] = p.val end
return m
end;
null = function() return {} end;
}
end
fmt.enum = function(...)
local vals,rmap = {...},{}
for k,v in pairs(vals) do rmap[v] = k-1 end
local ty = fmt.u8
if #vals > 0xffff then ty = fmt.u32 -- just in pathological case
elseif #vals > 0xff then ty = fmt.u16 end
return {
encode = function(a)
if not rmap[a] then error(string.format('"%s" is not part of enum "%s"', a, table.concat(vals,'","')),3) end
return marshal(ty, rmap[a])
end;
decode = function(s)
local n = parse(ty,s)
if (n+1) > #vals then error(string.format('enum "%s" does not have %u members', table.concat(vals,'","'),n),3) end
return vals[n+1]
end;
}
end
fmt.uid = fmt.u32
fmt.blob = fmt.string
fmt.relatable = function(ty)
return tcat(ty,{
{'rels',fmt.list(fmt.uid,fmt.u16)};
})
end
fmt.note = {
{'kind', fmt.tag};
{'paras', fmt.list(fmt.string)};
}
fmt.example = {
{'quote',fmt.string};
{'src',fmt.label};
}
fmt.meaning = fmt.relatable {
{'lit', fmt.string};
{'examples', fmt.list(fmt.example,fmt.u8)};
{'notes', fmt.list(fmt.note,fmt.u8)};
}
fmt.phrase = fmt.relatable {
{'str',fmt.label};
{'means',fmt.list(fmt.meaning,fmt.u8)};
}
fmt.ortho = fmt.map(fmt.uid, fmt.blob, fmt.u8)
-- UID <0> is always the UTF-8 representation of the primary ortho
fmt.writing = {
{'enc',fmt.ortho}; -- if empty, print the morphs in sequence
{'info',fmt.label};
{'morphs',fmt.list(fmt.uid,fmt.u16)};
}
fmt.def = fmt.relatable {
{'writings', fmt.list(fmt.writing,fmt.u8)};
-- for japanese-like languages where words that are
-- pronounced/written the same under the indexing
-- orthography have alternate writings that are
-- definition-specific. e.g. words よう and さま
-- would both have a definition written as 様
-- ordinary languages will have only 1 writing
{'part', fmt.u8};
{'branch', fmt.list(fmt.label,fmt.u8)};
{'means', fmt.list(fmt.meaning,fmt.u8)};
{'forms', fmt.map(fmt.u16,fmt.label,fmt.u16)};
{'phrases', fmt.list(fmt.phrase,fmt.u16)};
}
fmt.word = fmt.relatable {
{'defs', fmt.list(fmt.def,fmt.u8)};
-- store secondary encodings of this word
{'enc', fmt.ortho};
}
fmt.orthography = {
{'uid', fmt.uid};
{'name', fmt.tag};
{'repr', fmt.any{
'utf8'; -- display as utf-8 compatible text
'opaque'; -- do not display at all; used only by other tools
'bytes'; -- display as raw hexadecimal bytes
{'int',fmt.u8}; -- display as a series of integers (n=byte len)
{'glyphs',{ -- map to a palette of custom glyphs.
-- treated as 'opaque' in text-only environments
{'glyphs', fmt.list {
{'image',fmt.blob};
{'name',fmt.tag};
}};
{'encoding', fmt.u8}; -- number of bytes per codepoint
{'format',fmt.enum('svg','bmp','png')};
}};
}};
}
fmt.dictHeader = {
{'lang', fmt.tag};
{'meta', fmt.string};
{'partsOfSpeech', fmt.list(fmt.tag,fmt.u16)};
{'inflectionForms', fmt.list({
{'name', fmt.tag};
{'abbrev', fmt.tag};
{'desc', fmt.string};
{'parts', fmt.list(fmt.tag,fmt.u8)};
-- which parts of speech does this form apply to?
-- leave empty if not relevant
},fmt.u16)};
{'orthographies', fmt.list(fmt.orthography,fmt.u8)}
}
fmt.relSet = {
{'uid', fmt.uid};
-- IDs are persistent random values so they can be used
-- as reliable identifiers even when merging exports in
-- a parvan-unaware VCS
{'kind', fmt.enum('syn','ant','met')};
-- membership is stored in individual objects, using a field
-- attached by the 'relatable' template
}
fmt.pair = function(k,v) return {
{'key',k or fmt.tag};
{'val', v or fmt.blob};
} end
fmt.morph = {
{'name',fmt.tag};
{'enc', fmt.ortho};
{'meta', fmt.list(fmt.pair(nil,fmt.string),fmt.u16)};
{'rads', fmt.list(fmt.uid,fmt.u16)};
}
fmt.dict = {
{'header', fmt.dictHeader};
{'words', fmt.map(fmt.string,fmt.word)};
{'relsets', fmt.list(fmt.relSet)};
{'morphs', fmt.map(fmt.uid,fmt.morph)};
}
function marshal(ty, val, pvers)
pvers = pvers or 0
if ty.encode then
return ty.encode(val)
end
local ac = {}
for idx,fld in ipairs(ty) do
local name, fty, vers = table.unpack(fld)
vers = vers or 0
if pvers >= vers then
table.insert(ac, marshal(fty,
assert(val[name],
string.format('marshalling error: missing field %s', name)
),
pvers))
end
end
return table.concat(ac)
end
function parse(ty, stream, pvers)
pvers = pvers or 0
if ty.decode then
return ty.decode(stream)
end
local obj = {}
for idx,fld in ipairs(ty) do
local name, fty, vers, dflt = table.unpack(fld)
vers = vers or 0
if pvers >= vers then
obj[name] = parse(fty, stream, pvers)
else
obj[name] = dflt
end
end
return obj
end
local function
atomizer()
local map = {}
local i = 1
return function(v)
if map[v] then return map[v] else
map[v] = i
i=i+1
return i-1
end
end, map
end
local function rebuildRelationCache(d)
-- (re)build a dictionary's relation cache; needed
-- at load time and whenever any changes to relsets
-- are made (unless they're simple enough to update
-- the cache directly by hand, but that's very eeeh)
local setMems = {} -- indexed by set id
local function scan(obj,path)
for _,v in pairs(obj.rels) do
setMems[v] = setMems[v] or {mems={}}
table.insert(setMems[v].mems, {path=path, obj=obj})
end
end
for wk,wv in pairs(d.words) do
scan(wv, {w=wk})
for dk,dv in pairs(wv.defs) do
scan(dv, {w=wk, dn=dk})
for mk,mv in pairs(dv.means) do
scan(mv, {w=wk, dn=dk, mn=mk})
end
for pk,pv in pairs(dv.phrases) do
scan(pv, {w=wk, dn=dk, pn=pk})
for mk,mv in pairs(pv.means) do
scan(mv, {w=wk, dn=dk, pn=pk, mn=mk})
end
end
end
end
for sk,sv in pairs(d.relsets) do
setMems[sv.uid] = setMems[sv.uid] or {}
setMems[sv.uid].set = sv
end
d._relCache = setMems
end
local function
writeDict(d)
local atomizePoS, posMap = atomizer()
for lit,w in pairs(d.words) do
for j,def in ipairs(w.defs) do
def.part = atomizePoS(def.part)
end
end
d.header.partsOfSpeech = {}
for v,i in pairs(posMap) do
d.header.partsOfSpeech[i] = v
end
return 'PV0\2'..marshal(fmt.dict, d)
end
local function
readDict(file)
local s = stream(file)
local magic = s:next 'c4'
if magic == 'PV0 ' then
id10t 'text-based dictionaries must be translated to binary using the `import` command before they can be used'
elseif magic ~= 'PV0\2' then
id10t 'not a parvan0 file'
end
local d = parse(fmt.dict, s)
-- handle atoms
for lit,w in pairs(d.words) do
for j,def in ipairs(w.defs) do
def.part = d.header.partsOfSpeech[def.part]
end
end
-- create cachemaps for complex data structures to
-- enable faster lookup that would otherwise require
-- expensive scans
rebuildRelationCache(d)
return d
end
local function strwords(str,maxwords) -- here be dragons
local wds = {}
local w = {}
local state, d, quo, dquo = 0,0
local function flush(n,final)
if next(w) or state ~= 0 and state < 10 then
table.insert(wds, utf8.char(table.unpack(w)))
w = {}
elseif final and state > 10 then
table.insert(wds, '\\')
end
state = n
quo = nil
dquo = nil
d = 0
if #wds == maxwords then
state = 100
end
end
local function isws(c)
return c == 0x20 or c == 0x09 or c == 0x0a
end
for p,cp in utf8.codes(str) do
if state == 0 then -- begin
if not(isws(cp)) then
if cp == 0x22 or cp == 0x27 then
quo = cp
elseif cp == 0x5b then -- boxquote
quo = 0x5d
dquo = 0x5b
elseif cp == 0x7b then -- curlquote
quo = 0x7d
dquo = 0x7b
elseif cp == 0x201c then -- fancyquote
quo = 0x201d
dquo = 0x201c
end
if quo then
state = 2
d = 1
elseif cp == 0x5c then -- escape
state = 11
else
state = 1
table.insert(w, cp)
end
end
elseif state == 1 then -- word
if isws(cp) then flush(0)
elseif cp == 0x5c then state = 11 else
table.insert(w,cp)
end
elseif state == 2 then -- (nested?) quote
if cp == 0x5c then state = 12
elseif cp == quo then
d = d - 1
if d == 0 then
flush(0)
else
table.insert(w,cp)
end
else
if cp == dquo then d = d + 1 end
table.insert(w,cp)
end
elseif state == 11 or state == 12 then -- escape
-- 12 = quote escape, 11 = raw escape
if cp == 0x63 then --n
table.insert(w,0x0a)
else
table.insert(w,cp)
end
state = state - 10
elseif state == 100 then -- word limit reached
-- triggered from flush
table.insert(wds, string.sub(str, p))
return wds
end
end
flush(nil,true)
return wds
end
local function strsan(str)
local d,m = 0,0
local r = {}
local unclosed = {}
local i = 1
for bytepos,cp in utf8.codes(str) do
-- insert backslashes for characters that would
-- disrupt strwords() parsing
if cp == 0x0a then
table.insert(r, 0x5c)
table.insert(r, 0x6e) i=i+2
else
if cp == 0x5b then
d = d + 1
table.insert(unclosed,i)
elseif cp == 0x5d then
if d >= 1 then
d = d - 1
unclosed[rawlen(unclosed)] = nil
else
table.insert(r, 0x5c) i=i+1
end
end
table.insert(r, cp) i=i+1
end
end
for j=#unclosed,1,-1 do
table.insert(r,unclosed[j],0x5c)
end
return '[' .. utf8.char(table.unpack(r)) .. ']'
end
local predicates
local function parsefilter(str)
local f = strwords(str)
if #f == 1 then return function(e) return predicates.lit.fn(e,f[1]) end end
if not next(f) then
-- null predicate matches all
return function() return true end
elseif not predicates[f[1]] then
id10t('no such predicate %s',f[1])
else
local p = predicates[f[1]].fn
return function(e)
return p(e, table.unpack(f,2))
end
end
end
do
local function p_all(e,pred,...)
if pred == nil then return true end
pred = parsefilter(pred)
if not pred(e) then return false end
return p_all(e,...)
end;
local function p_any(e,pred,...)
if pred == nil then return false end
pred = parsefilter(pred)
if pred(e) then return true end
return p_any(e,...)
end;
local function p_none(e,pred,...)
if pred == nil then return true end
pred = parsefilter(pred)
if pred(e) then return false end
return p_none(e,...)
end;
local function p_some(e,cmp,count,...)
local cfn = {
eq = function(a,b) return a == b end;
ne = function(a,b) return a ~= b end;
lt = function(a,b) return a < b end;
gt = function(a,b) return a > b end;
}
if not cfn[cmp] then
id10t('[some %s]: invalid comparator', cmp)
end
count = tonumber(count)
local function rec(n,pred,...)
if pred == nil then
return cfn[cmp](n,count)
end
pred = parsefilter(pred)
if pred(e) then
n=n+1
end
return rec(n,...)
end
return rec(0,...)
end;
local function prepScan(...)
local map = {}
local tgt = select('#',...)
for _,v in pairs{...} do map[v] = true end
return map,tgt
end
predicates = {
all = {
fn = p_all;
syntax = '<pred>…';
help = 'every sub-<pred> matches';
};
any = {
fn = p_any;
syntax = '<pred>…';
help = 'any sub-<pred> matches';
};
none = {
fn = p_none;
syntax = '<pred>…';
help = 'no sub-<pred> matches (also useful to force evaluation for side effects without creates matches)';
};
some = {
fn = p_some;
syntax = '(eq|ne|lt|gt) <count> <pred>…';
help = '<count> [or more/less] sub-<pred>s match';
};
seq = {
syntax = "<wrap> '[' <arg>… ']' <pred>…";
help = 'reuse the same stack of arguments';
fn = function(e,wrap,args,...)
local lst = {}
local function eval(pred,...)
if not pred then return end
table.insert(lst, pred .. ' ' .. args)
eval(...)
end
eval(...)
local filter = wrap .. ' ' ..table.concat(map(lst, strsan), ' ')
return parsefilter(filter)(e)
end;
};
mark = {
syntax = '<mark> [<pred>]';
help = 'apply <mark> to the words that match <pred>, or all the words that are tested if no <pred> is supplied. use to visually indicate the reason that a given term matched the query';
fn = function(e, val, pred)
if pred == nil or parsefilter(pred)(e) then
e.mark = e.mark or {}
for k,v in pairs(e.mark) do
if v==val then return true end
end
table.insert(e.mark, val)
return true
end
end;
};
clear = {
syntax = '<mark> [<pred>]';
help = 'like [mark] but clears marks instead of setting them';
fn = function(e, val, pred)
if pred == nil or parsefilter(pred)(e) then
e.mark = e.mark or {}
for k,v in pairs(e.mark) do
if v==val then
table.remove(e.mark,k)
return true
end
end
return true
end
end;
};
marked = {
syntax = '(by <mark> [pred]|in <pred>)';
help = 'tests for an existing <mark> on the result';
fn = function(e, mode, val, pred)
if mode == 'in' then
pred = val val = nil
if pred == nil then
id10t '[marked in <pred>] requires a predicate'
end
elseif mode == 'by' then
if val == nil then
id10t '[marked by <mark>] requires a mark'
end
else id10t('invalid form [marked %s]', mode) end
if pred == nil or parsefilter(pred)(e) then
if e.mark == nil or not next(e.mark)
then return false end
if val then
for k,v in pairs(e.mark) do
if v==val then return true end
end
else return true end
end
end;
};
def = {
help = 'word has at least one definition that contains all <keyword>s';
syntax = '<keyword>…';
fn = function(e,...)
local kw = {...}
for i,d in ipairs(e.word.defs) do
for j,m in ipairs(d.means) do
for k,n in ipairs(kw) do
if not string.find(m.lit, n, 1, true) then
goto notfound
end
end
do return true end
::notfound::
end
end
return false
end;
};
lit = {
help = 'word is, begins with, matches, or ends with <search> in <script> or the primary orthography ("also" enables searching the primary as well as the listed scripts)';
syntax = '<search> [(pfx|sfx|match)] [any|(in|also) <script>…]';
fn = function(e,val,...)
local opts,oc = {...},1
local scripts, op = {0}
if opts[oc] == 'pfx' or opts[oc] == 'sfx' or opts[oc] == 'match' then
op = opts[oc]
oc = oc + 1
end
if opts[oc] then
if opts[oc] == 'any' then
scripts = nil
else
if opts[oc] == 'in' then
scripts = {}
elseif opts[oc] ~= 'also' then
id10t('[lit … %s]: invalid spec', opts[oc])
end
if #opts < oc+1 then
id10t('[lit … %s]: missing argument', opts[oc])
end
for i=oc+1,#opts do
table.insert(scripts, opts[oc])
end
end
end
if not op then
return e.lit == val
elseif op == 'pfx' then
return val == string.sub(e.lit,1,#val)
elseif op == 'sfx' then
return val == string.sub(e.lit,(#e.lit) - #val + 1)
elseif op == 'match' then
return string.find(e.lit, val) ~= nil
else
id10t('[lit %s %s] is not a valid filter, “%s” should be “pfx”, “sfx”, or “match”',val,op,op)
end
end;
};
morph = {
help = 'find words with specific morphs';
syntax = "(any|all|only) <script> [seq] ((lit|rec) <repr>|rad '[' <repr>… ']')…";
};
form = {
help = 'match against word\'s inflected forms';
syntax = '(<inflect> | (of <form>|has) [any] ([un]set | is <inflect> | (pfx|sfx|sub) <affix>)…)';
fn = function(e, k, mode, ...)
if k == nil then -- eq [form has set]
for _,d in pairs(e.word.defs) do
if next(d.forms) then return true end
end
elseif mode == 'of' or mode == 'has' then
local match,mc = {...},1
if not next(match) then
id10t('[form %s]: missing spec',mode)
end
local any = match[1]=='any'
local eval = function()return true end;
if any then
nc = 2
eval = function()return false end;
end
local ok = false
local fns = {
set = function(a) return a~=nil end;
unset = function(a) return a==nil end;
is = function(a,b)return a and a==b end;
pfx = function(a,b)return a and string.sub(a,1,#b) == b end;
sfx = function(a,b)return a and string.sub(a,0-#b) == b end;
sub = function(a,b)return a and string.find(a,b) ~= nil end;
}
repeat local n, op, arg = 1, table.unpack(match,mc)
print(op,arg)
if not op then id10t "missing argument for [form]" end
if not fns[op] then
id10t('[form … %s] is not a valid filter', op)
end
if op ~= "set" and op ~= 'unset' then
n = 2
if not arg then
id10t('[form … %s]: missing argument', op)
end
end
local oe = eval
eval = any and function(a,b)
return fns[op](a,b) or oe(a,b)
end or function(a,b)
return fns[op](a,b) and oe(a,b)
end
ok = true
mc = mc + n until mc > #match
if not ok then
id10t '[form]: incomplete spec'
end
for _,d in pairs(e.word.defs) do
if mode=='has' then
for cat,infd in pairs(d.forms) do
if eval(infd) then return true end
end
else
if eval(d.forms[k]) then return true end
end
end
elseif mode ~= nil then
id10t('[form %s]: invalid mode', mode)
else
for _,d in pairs(e.word.defs) do
for _,v in pairs(d.forms) do
if v == k then return true end
end
end
end
return false
end;
};
part = {
help = 'word has definitions for every <part> of speech';
syntax = '<part>…';
fn = function(e,...)
local map, tgt = prepScan(...)
local matches = 0
for i,d in ipairs(e.word.defs) do
if map[d.part] then matches = matches + 1 end
end
return matches == tgt
end
};
root = {
help = 'match an entry that derives from every <word>';
syntax = '<word>…';
fn = function(e,...)
local map, tgt = prepScan(...)
for i,d in ipairs(e.word.defs) do
local matches = 0
for j,r in ipairs(d.branch) do
if map[r] then matches = matches + 1 end
end
if matches == tgt then return true end
end
end
};
phrase = {
syntax = '<pred>…';
help = 'match only words with phrases';
};
ex = {
syntax= '[by <source>] [(any|all) <term>…]';
help = 'entry has an example by <source> with any/all of <term>s';
fn = function() end;
};
note = {
help = 'entry has a matching note';
syntax = '([kind <kind> [<term>]] | term <term> | (min|max|count) <n>)';
fn = function(e, op, k, t)
if op == 'kind' or op == 'term' then
if op == 'term' and t then
id10t('too many arguments for [note term <term>]')
end
for _,d in ipairs(e.word.defs) do
for _,m in ipairs(d.means) do
for _,n in ipairs(m.notes) do
if op=='term' or n.kind == k then
if op=='kind' and t == nil then return true end
if string.find(table.concat(n.paras,'\n'), t or k, 1, true) ~= nil then return true end
end
end end end
elseif op == 'min' or op == 'max' or op == 'count' then
if t then
id10t('too many arguments for [note %s <n>]',op)
end
local n = math.floor(tonumber(k))
local total = 0
for i,d in ipairs(e.word.defs) do
for j,m in ipairs(d.means) do
total = total + #m.notes
if op == 'min' and total >= n then return true end
if op == 'max' and total > n then return false end
end end
if op == 'count' then return total == n end
if op == 'max' then return total <= n end
return false
end
end;
};
}
end
local function
safeopen(file,...)
if type(file) == 'string' then
local fd = io.open(file,...)
if not fd then error(userError("cannot open file " .. file),2) end
return fd
else
return file
end
end
local function pathParse(p)
-- this is cursed, rewrite without regex pls TODO
if p == '.' then return {} end
local function comp(pfx)
return pfx .. '([0-9]+)'
end
local function mtry(...)
local mstr = '^(.+)'
for _, v in ipairs{...} do
mstr = mstr .. comp(v)
end
return p:match(mstr .. '$')
end
local xn
local w,dn,pn,mn,nn = mtry('%.','/p','/m','/n')
if not w then w,dn,pn,mn,xn = mtry('%.','/p','/m','/x') end
if not w then w,dn,pn,mn = mtry('%.','/p','/m') end
if not w then w,dn,pn= mtry('%.','/p') end
if not w then
local comps = {'%.','/m','/n'}
for i=#comps, 1, -1 do
local args = {table.unpack(comps,1,i)}
w,dn,mn,nn = mtry(table.unpack(args))
if not w and args[i] == '/n' then
args[i] = '/x'
w,dn,mn,xn = mtry(table.unpack(args))
end
if w then break end
end
end
if not w then w=p:match('^(.-)%.?$') end
return {w = w, dn = tonumber(dn), mn = tonumber(mn), pn=tonumber(pn); nn = tonumber(nn), xn = tonumber(xn)}
end
local function pathString(p,styler,display)
local function s(s, st, ...)
if styler then
return styler[st](tostring(s),...)
else return s end
end
local function comp(c,n,...)
return s('/','color',5)
.. s(string.format("%s%u",c,n), 'color',...)
end
local t = {}
if p.w then t[1] = s(p.w,'ul') else return '.' end
if p.dn then t[2] = string.format(".%s", s(p.dn,'br')) end
if p.pn then t[#t+1] = comp('p',p.pn,4,true) end
if p.mn then t[#t+1] = comp('m',p.mn,5,true) end
if p.xn then t[#t+1] = comp('x',p.xn,6,true)
elseif p.nn then t[#t+1] = comp('n',p.nn,4) end
if t[2] == nil and not display then
return p.w .. '.' --make sure paths are always valid
end
return s(table.concat(t),'em')
end
local function pathMatch(a,b)
return a.w == b.w
and a.dn == b.dn
and a.mn == b.mn
and a.pn == b.pn
and a.nn == b.nn
and a.xn == b.xn
end
local function pathResolve(ctx, a)
local res = {}
if not a.w then return res end -- empty paths are valid!
local function lookup(seg, tbl,val)
if not tbl then error('bad table',2) end
local v = tbl[val]
if v then return v end
id10t('bad %s in path: %s', seg, val)
end
res.word = lookup('word', ctx.dict.words, a.w)
if not a.dn then return res end
res.def = lookup('definition', res.word.defs, a.dn)
if (not a.pn) and (not a.mn) then return res end
local m if a.pn then
res.phrase = lookup('phrase', res.def.phrases, a.pn)
if a.mn then
res.meaning = lookup('meaning', res.phrase.means, a.mn)
else return res end
else
res.meaning = lookup('meaning', res.def.means, a.mn)
end
if a.xn then
res.ex = lookup('example',res.meaning.examples,a.xn)
elseif a.nn then
res.note = lookup('note',res.meaning.notes,a.nn)
end
return res
end
local function pathNav(...)
local t = pathResolve(...)
return t.word,t.def,t.phrase,t.meaning,t.ex or t.note
end
local function pathRef(ctx, a)
local w,d,p,m,n = pathNav(ctx,a)
return n or m or p or d or w
end
local function pathSub(super,sub)
if super.w == nil then return true end
if sub.w ~= super.w then return false end
if super.pn == nil then goto checkMN end
if sub.pn ~= super.pn then return false end
::checkMN::
if super.mn == nil then return true end
if sub.mn ~= super.mn then return false end
if super.xn then
if sub.nn then return false end
if sub.xn ~= super.xn then return false end
elseif super.nn then
if sub.xn then return false end
if sub.nn ~= super.nn then return false end
end
return true
end
function ansi.formatMarkup(text, sty)
return (string.gsub(text, '(.?)(%b[])', function(esc,seg)
if esc == '\\' then return seg end
local mode, text = seg:match('^%[(.)%s*(.-)%]$')
local r
if mode == '\\' then
r = text
elseif mode == '*' then
r = sty.br(ansi.formatMarkup(text,sty))
elseif mode == '!' then
r = sty.em(ansi.formatMarkup(text,sty))
elseif mode == '_' then
r = sty.ul(ansi.formatMarkup(text,sty))
elseif mode == '$' then
r = sty.color(ansi.formatMarkup(text,sty),6,true)
elseif mode == '>' then
r = pathString(pathParse(text),sty,true)
else return seg end
return esc..r
end))
end
local cmds = {
create = {
help = "initialize a new dictionary file";
syntax = "<lang>";
raw = true;
exec = function(ctx, lang)
if not lang then
id10t 'for what language?'
end
local fd = safeopen(ctx.file,"wb")
local new = {
header = {
lang = lang;
meta = "";
partsOfSpeech = {};
inflectionForms = {};
orthographies = {};
};
words = {};
relsets = {};
morphs = {};
}
local o = writeDict(new);
fd:write(o)
fd:close()
end;
};
coin = {
help = "add a new word";
syntax = "<word>";
write = true;
exec = function(ctx,word)
if ctx.dict.words[word] then
id10t "word already coined"
end
ctx.dict.words[word] = {defs={},rels={},enc={}}
end;
};
def = {
help = "define a word";
syntax = "<word> <part-of-speech> [<meaning> [<root>…]]";
write = true;
exec = function(ctx,word,part,means,...)
local etym = {...}
if (not word) or not part then
id10t 'bad definition'
end
if not ctx.dict.words[word] then
ctx.dict.words[word] = {defs={},rels={},enc={}}
end
local n = #(ctx.dict.words[word].defs)+1
ctx.dict.words[word].defs[n] = {
part = part;
writings = {};
branch = etym;
means = {means and {
lit=means;
examples={};
notes={};
rels={};
} or nil};
forms = {};
phrases = {};
rels={};
}
ctx.log('info', string.format('added definition #%u to “%s”', n, word))
end;
};
mean = {
help = "add a meaning to a definition";
syntax = "<word> <def#> <meaning>";
write = true;
exec = function(ctx,word,dn,m)
local t = pathResolve(ctx,{w=word,dn=tonumber(dn)})
table.insert(t.def.means, {lit=m,notes={},examples={},rels={}})
end;
};
rel = {
help = "manage groups of related words";
syntax = {
"(show|purge) <path> [<kind>]";
"(link|drop) <word> <group#> <path>…";
"new <rel> <path> <path>…";
"destroy <word> [<group#>]";
"rel ::= (syn|ant|co)"
};
write = true;
exec = function(ctx, op, ...)
local fo = ctx.sty[io.stdout]
if op == nil then id10t "not enough arguments" end
local groups = {}
if not (op=='new' or op=='link' or op=='drop' or op=='destroy' or op=='show' or op=='purge') then
id10t('invalid operation “%s” for `rel`', op)
end
if op == 'new' then
local rel = ...
if rel ~= 'syn' and rel ~= 'ant' and rel ~= 'met' then
id10t 'relationships must be synonymous, antonymous, or metonymous'
end
local links={}
for i,l in ipairs{select(2,...)} do
links[i] = pathParse(l)
end
local newstruct = {
uid=math.random(1,0xffffFFFF);
kind = rel;
}
table.insert(ctx.dict.relsets, newstruct)
for _, m in pairs(links) do
local obj = pathRef(ctx,m)
table.insert(obj.rels,newstruct.uid)
end
rebuildRelationCache(ctx.dict)
else -- assemble a list of groups
local tgtw = ...
if tgtw == nil then id10t 'missing path' end
local wp = pathParse(tgtw)
local o = pathResolve(ctx, wp)
for i,rs in pairs(ctx.dict.relsets) do
local allMembers = ctx.dict._relCache[rs.uid].mems
for j,s in ipairs(allMembers) do
if pathSub(s.path, wp) then
table.insert(groups, {
set = {
uid = rs.uid;
kind = rs.kind;
members = allMembers;
};
mem = s;
id = i;
})
break
end
end
end
if op == 'show' then
for i, g in ipairs(groups) do
local w = pathResolve(ctx, {w=g.mem.w}).w
local function label(path,w)
local repr = path.w
if path.dn then
repr = repr .. string.format("(%s)", w.defs[path.dn].part)
if path.mn then
repr = repr .. string.format(": %u. %s", path.dn, w.defs[path.dn].means[path.mn].lit)
else
local fulldef = {}
for i,v in ipairs(w.defs) do
fulldef[i] = v.lit
end
repr = repr..table.concat(fulldef, '; ')
end
end
return repr
end
local others = {}
for j, oo in ipairs(g.set.members) do
local o = oo.path
local ow = pathResolve(ctx, {w=o.w}).w
if (g.set.kind == 'ant' or not pathMatch(o, g.mem.path)) and
--exclude antonym headwords
not (g.set.kind == 'ant' and j==1) then
table.insert(others, ' '..label(o,ow))
end
end
local llab do
local cdw = ctx.dict.words
if g.set.kind == 'ant' then
local ap = g.set.members[1].path
llab = fo.br(label(ap,cdw[ap.w]) or '')
else
llab = fo.br(label(g.mem.path,cdw[g.mem.w]) or '')
end
end
local kls = {
syn = fo.color('synonyms',2,true)..' of';
ant = fo.color('antonyms',1,true)..' of';
met = fo.color('metonyms',4,true)..' of';
}
io.stdout:write(string.format("% 4d) %s\n%s", i, fo.ul(kls[g.set.kind] .. ' ' .. llab), table.concat(others,'\n')) .. '\n')
end
return false -- no changes made
elseif op == 'link' or op == 'drop' then
local tgtn, paths = (select(2,...)), { select(3, ...) }
rebuildRelationCache(ctx.dict)
elseif op == 'destroy' then
local tgtw, tgtn = ...
if not tgtn then id10t 'missing group number' end
local delendum = groups[tonumber(tgtn)]
if not delendum then id10t 'bad group number' end
for k,v in pairs(delendum.set.members) do
for idx, e in pairs(v.obj.rels) do
if e == delendum.set.uid then
fastDelete(v.obj.rels,idx)
end
end
end
fastDelete(ctx.dict.relsets, delendum.id)
rebuildRelationCache(ctx.dict)
else
id10t 'invalid operation'
end
end
end;
};
mod = {
help = "move, merge, split, or delete words or definitions";
syntax = {
"<path> (drop | [(to|move)|merge|clobber] <path>)";
"path ::= <word>[.[<def#>[/p<phrase#>][/m<meaning#>[(/n<note#>|/x<example#>)]]]]";
};
write = true;
};
morph = {
help = "manage and attach morphs (morphemes/composable glyphs)";
syntax = {
"(<ls>|<define>|<mod>)";
"define ::= def (id <name>|as) [<form>]… [from <morph>…]";
"ls ::= ls (<morph>|meta <key> <value>|has <key>)…";
"mod ::= <morph> (drop|[un]link <path>|meta <key> [<value>]|inc [<morph>])";
"morph ::= (id <name>|enc <form>)";
"form ::= [<script>]=<repr>";
};
};
note = {
help = "add a note to a definition or a paragraph to a note";
syntax = {"(<m-path> (add|for) <kind> | <m-path>:<note#>) <para>…";
"m-path ::= <word>.<def#>[/p<phrase#]/m<meaning#>"};
write = true;
exec = function(ctx,path,...)
local paras, mng
local dest = pathParse(path)
local t = pathResolve(ctx,path)
if dest.nn then
paras = {...}
else
local op, kind = ...
paras = { select(3, ...) }
if op == 'add' then
dest.nn = #(t.m.notes) + 1
t.m.notes[dest.nn] = {kind=kind, paras=paras}
return
elseif op == 'for' then
for i,nn in ipairs(t.m.notes) do
if nn.kind == kind then
dest.nn = i break
end
end
if not dest.nn then
id10t('no note of kind %s in %s',kind,path)
end
end
end
local dpa = t.m.notes[dest.nn].paras
local top = #dpa
for i,p in ipairs(paras) do
dpa[top+i] = p
end
end
};
shell = {
help = "open an interactive prompt";
raw = true;
};
help = {
help = "show help";
nofile = true;
syntax = "[<command>]";
};
predicates = {
help = "show available filter predicates";
nofile = true;
syntax = "[<predicate>]";
};
export = {
help = "create a text file dump compatible with source control";
syntax = "[<target-file>]";
};
import = {
help = "generate a usable dictionary from a text export file";
syntax = "[<input-file>]";
raw = true;
write = true;
};
dump = {
exec = function(ctx) print(dump(ctx.dict)) end
};
ls = {
help = "list all words that meet any given <filter>";
syntax = {"[<filter>…]";
"filter ::= (<word>|<pred> <arg>…)";
"arg ::= (<atom>|'['(<string>|<pred> <arg>…)']')"};
}
}
function cmds.predicates.exec(ctx, pred)
local list = predicates
if pred then list = {predicates[pred]} end
local f = ctx.sty[io.stderr]
for k,p in pairs(predicates) do
if p.help then
io.stderr:write(
f.br(' - ' ..
f.rgb('[',1,0,.5) ..
k .. ' ' ..
(f.color(p.syntax,5) or '…') ..
f.rgb(']',1,0,.5)) .. ': ' ..
f.color(p.help,4,true) .. '\n')
end
end
end
function cmds.ls.exec(ctx,...)
local filter = nil
local out = {}
local args = {...}
for i=#args,1,-1 do local f <const> = args[i]
local fn = parsefilter(f)
local of = filter or function() return false end
filter = function(e)
return fn(e) or of(e)
end
end
for lit,w in pairs(ctx.dict.words) do
local e = {lit=lit, word=w, dict=ctx.dict}
if filter == nil or filter(e) then
table.insert(out, e)
end
end
table.sort(out, function(a,b) return a.lit < b.lit end)
local fo = ctx.sty[io.stdout]
local function gatherRelSets(path)
local antonymSets, synonymSets, metonymSets = {},{},{}
local obj = pathRef(ctx,path)
if next(obj.rels) then
for i, relid in ipairs(obj.rels) do
local specuset,tgt,anto = {}
local rel = ctx.dict._relCache[relid].set
for j, mbr in ipairs(ctx.dict._relCache[relid].mems) do
if pathMatch(mbr.path, path) then
if rel.kind == 'syn' then tgt = synonymSets
elseif rel.kind == 'met' then tgt = metonymSets
elseif rel.kind == 'ant' then
if j == 1 -- is this the headword?
then tgt = antonymSets
else tgt = synonymSets
end
end
elseif j == 1 and rel.kind == 'ant' then
anto = mbr.path
else
table.insert(specuset, mbr.path)
end
end
if tgt then
table.insert(tgt, specuset)
if anto then
table.insert(antonymSets, {anto})
end
end
end
end
local function flatten(lst)
local new = {}
for i, l in ipairs(lst) do tcatD(new, l) end
return new
end
return {
syn = flatten(synonymSets);
ant = flatten(antonymSets);
met = flatten(metonymSets);
}
end
local function formatRels(lines, rls, padlen)
-- optimize for the common case
if next(rls.syn) == nil and
next(rls.ant) == nil and
next(rls.met) == nil then return {} end
local pad = string.rep(' ',padlen)
local function format(label, set)
local each = map(set, function(e)
local ew,ed = pathNav(ctx, e)
local str = fo.ul(e.w)
if ed then str = string.format('%s(%s)',str,ed.part) end
if e.mn then str = string.format('%s§%u',str,e.mn) end
return str
end)
return fo.em(string.format("%s%s %s",pad,label,table.concat(each,', ')))
end
local function add(l,c,lst)
table.insert(lines, format(fo.color(l,c,true),lst))
end
if next(rls.syn) then add('synonyms:',2,rls.syn) end
if next(rls.ant) then add('antonyms:',1,rls.ant) end
if next(rls.met) then add('metonyms:',4,rls.met) end
return lines
end
local function formatMeaning(m, obj, path, indent) -- m = dest tbl
local pad = string.rep(' ', indent)
local function note(j,n,markup)
if not next(n.paras) then return end
local pad = string.rep(' ',#(n.kind) + 9)
local nid = ''
if ctx.flags.ident then
nid='‹'..pathString(merge(path,{nn=j}), fo)..'›'
end
table.insert(m, nid..' ' .. fo.hl(' ' .. n.kind .. ' ') .. ' ' .. markup(n.paras[1]))
for i=2,#n.paras do
table.insert(m, pad..markup(n.paras[2]))
end
end
local id = ''
if ctx.flags.ident then id='‹'..pathString(path, fo)..'›' end
table.insert(m, string.format('%s%s %u. %s', pad, id, path.mn, ansi.formatMarkup(obj.lit,fo)))
formatRels(m,gatherRelSets(path), 6)
for j,n in ipairs(obj.notes) do
note(j,n, function(v) return ansi.formatMarkup(v,fo) end)
end
end
local function defnMeanings(m, w,def,path,indent)
local part = ''
for i=1,#def.means do local v = def.means[i]
formatMeaning(m, v, merge(path,{mn=i}),indent)
end
end
local function parthead(def)
local str = string.format('(%s)', def.part)
return fo.color(fo.em(str), 199), #str
end
local markcolor, markcolors, markmap = 0, {
117, 75, 203, 48, 200, 190, 26, 48, 226, 198
}, {}
for i, w in ipairs(out) do
local lines = { fo.ul(fo.br(w.lit)) }
local pad = 4
local ndefs = #w.word.defs
if ndefs == 1 then
local header = parthead(w.word.defs[1])
lines[1] = lines[1] .. ' ' .. header
end
local mark
local markline
if w.mark then
local marks = {}
for _,m in pairs(w.mark) do
if not markmap[m] then
markmap[m] = markcolors[markcolor+1]
markcolor = (markcolor+1)%#markcolors
end
local c = markmap[m]
table.insert(marks, fo.hl(fo.color(string.format(' %s ',m),c)))
end
mark = table.concat(marks, ' ')
markline = #lines
end
for j, d in ipairs(w.word.defs) do
local top = #lines -- :/
local header, hdln = parthead(d)
defnMeanings(lines, w, d, {w=w.lit, dn=j}, ndefs==1 and 0 or hdln+1)
if ctx.flags.rels then
formatRels(lines, gatherRelSets{w=w.lit,dn=j}, 0)
end
if ndefs > 1 then
lines[top+1] = ' ' .. header .. string.sub(lines[top+1],hdln+2)
end
end
if ctx.flags.rels then
formatRels(lines,gatherRelSets{w=w.lit}, 2)
end
if markline then
lines[markline] = mark .. ' ' .. lines[markline]
end
io.stdout:write(table.concat(lines,'\n')..'\n')
end
end
function cmds.import.exec(ctx,file)
local ifd = io.stdin
if file then
ifd = safeopen(file,'r')
end
local new = {
header = {
lang = lang;
meta = "";
partsOfSpeech = {};
inflectionForms = {};
orthographies = {};
};
words = {};
relsets = {};
morphs = {};
}
local state = 0
local relsets = {}
local path = {}
local inflmap, lastinfl = {}
local orthoIDs, lastOrtho = {}
local morphIDs, lastMorph = {}
local lastWriting
for l in ifd:lines() do
local words = strwords(l)
local c = words[1]
local function syn(mn,mx)
local nw = #words - 1
if nw < mn or (mx ~= nil and nw > mx) then
if mx ~= nil then
id10t('command %s needs between %u~%u words',c,mn,mx)
else
id10t('command %s needs at least %u words',c,mn)
end
end
end
local function getuid(tbl, uid)
if tonumber(uid,16) == nil then
if not tbl[uid] then
tbl[uid] = math.random(0,0xffffFFFF)
end
return tbl[uid]
end
return tonumber(uid,16)
end
if c ~= '*' and c~='meta' then -- comments
if state == 0 then
if c ~= 'PV0' then
id10t "not a parvan export"
end
new.header.lang = words[2]
new.header.meta = words[3]
state = 1
else
local T = pathResolve({dict=new}, path)
local W,D,P,M,N,X =
T.word,
T.def,
T.phrase,
T.meaning,
T.note,
T.ex
if c == 'w' then syn(1) state = 10
path = {w=words[2]}
new.words[words[2]] = {defs={},rels={},enc={}}
lastMorph = nil
elseif c == 'f' then syn(1)
local nf = {
name = words[2];
abbrev = words[3] or "";
desc = words[4] or "";
parts = {};
}
table.insert(new.header.inflectionForms, nf)
inflmap[words[2]] = #(new.header.inflectionForms)
lastinfl = nf
elseif c == 'fp' then syn(1)
if not lastinfl then
id10t 'fp can only be used after f' end
table.insert(lastinfl.parts,words[2])
elseif c == 's' then syn(2)
relsets[words[3]] = relsets[words[3]] or {}
relsets[words[3]].kind = words[2]
relsets[words[3]].uid = tonumber(words[3],16)
relsets[words[3]].members = relsets[words[3]].members or {}
elseif c == 'mo' then syn(1)
local uid,name = table.unpack(words,2)
uid = getuid(morphIDs, uid)
new.morphs[uid] = {
name = name or '';
enc = {};
meta = {};
rads = {};
}
lastMorph = new.morphs[uid]
elseif lastMorph and state < 10 and c == 'M' then syn(2)
local key, val = table.unpack(words,2)
table.insert(lastMorph.meta, {key=key,val=val})
elseif lastMorph and state < 10 and c == 'r' then syn(1)
local r = getuid(morphIDs, words[2])
table.insert(lastMorph.rads, r)
elseif c == 'e' then syn(2)
local scr, blob = table.unpack(words,2)
scr = getuid(orthoIDs, scr)
if state <= 10 and lastMorph then
lastMorph.enc[scr] = blob
elseif state == 10 then
W.enc[scr] = blob
elseif state >= 11 and lastWriting then
lastWriting.enc[scr] = blob
else
id10t('encoding “%s” declared at bad position', blob)
end
elseif c == 'o' then syn(3)
local uid, name, repr = table.unpack(words,2)
repr = strwords(repr)
uid = getuid(orthoIDs, uid)
if #repr > 1 then
local kind, p1,p2 = table.unpack(repr)
repr = {kind,{}}
if kind == 'glyphs' then
repr[2].format = p1
repr[2].glyphs = {}
repr[2].encoding = p2
elseif kind == 'int' then
repr[2] = tonumber(p1,16)
end
else repr=repr[1] end
table.insert(new.header.orthographies, {
uid = uid;
name = name;
repr = repr;
})
lastOrtho = new.header.orthographies[#(new.header.orthographies)]
elseif c == 'og' then syn(2)
if not lastOrtho then
id10t '`og` must follow an orthography declaration'
elseif lastOrtho.repr[1] ~= 'glyphs' then
id10t('orthography declares %s representation', lastOrtho.repr[1])
end
local name, data = table.unpack(words,2)
table.insert(lastOrtho.repr[2].glyphs, {
-- TODO decode base?? data for binary encodings
name = name, image = data
})
elseif state >= 10 and c == 'r' or c == 'rh' then syn(1)
local rt
if state == 10 then
rt = W.rels
elseif state == 11 then
rt = D.rels
elseif state == 12 then
rt = D.rels
elseif state == 14 then
rt = P.rels
end
relsets[words[2]] = relsets[words[2]] or {
uid = tonumber(words[2],16) or math.random(0,0xffffFFFF);
members={};
}
local mems = relsets[words[2]].members
if c == 'rh' and next(mems) then
mems[#mems+1] = mems[1]
mems[1] = path
else
table.insert(mems,path)
end
elseif state >= 10 and c == 'd' then syn(1) state = 11
table.insert(W.defs, {
part = words[2];
writings = {};
branch = {};
means = {};
forms = {};
phrases = {};
rels = {};
})
path = {w = path.w, dn = #(W.defs)}
elseif state >= 11 and c == 'dr' then syn(1)
table.insert(D.branch, words[2])
elseif state >= 11 and c == 'df' then syn(2)
if not inflmap[words[2]] then
id10t('no inflection form %s defined', words[2])
end
D.forms[inflmap[words[2]]] = words[3]
elseif state >= 11 and c == 'p' then syn(1) state = 12
table.insert(D.phrases, {
str = words[2];
means = {};
rels = {};
})
path = {w = path.w, dn = path.dn, pn = #(D.phrases)}
elseif state >= 11 and c == 'm' then syn(1) state = 13
table.insert((P or D).means, {
lit = words[2];
notes = {};
examples = {};
rels = {};
});
path = {w = path.w, dn = path.dn, pn=path.pn, mn = #((P or D).means)}
elseif state >= 11 and c == 'W' then
table.insert(D.writings, {
info = words[2] or '';
enc = {};
morphs = {};
})
lastWriting = D.writings[#(D.writings)]
elseif state >= 11 and lastWriting and c == 'Wmo' then syn(1)
local morph = getuid(morphIDs, words[2])
table.insert(lastWriting.morphs, morph)
elseif state >= 13 and c == 'x' then syn(1)
table.insert(M.examples, {
quote = words[2];
src = words[3] or '';
})
elseif state >= 13 and c == 'n' then syn(1) state = 14
table.insert(M.notes, {kind=words[2], paras={}})
path = {w = path.w, dn = path.dn, pn = path.pn, mn = path.mn, nn = #(M.notes)};
elseif state >= 14 and c == 'np' then syn(1)
table.insert(N.paras, words[2])
end
-- we ignore invalid ctls, for sake of forward-compat
end
end
end
for k,v in pairs(relsets) do
if not v.uid then
--handle non-numeric export ids
v.uid = math.random(0,0xffffFFFF)
end
table.insert(new.relsets, v)
for q,m in pairs(v.members) do
table.insert(pathRef({dict=new},m).rels, v.uid)
end
end
local ofd = safeopen(ctx.file,"w+b")
local o = writeDict(new);
ofd:write(o)
ofd:close()
end
function cmds.export.exec(ctx,file)
local ofd = io.stdout
if file then ofd = safeopen(file, 'w+') end
local san = strsan
local function o(lvl,...)
local pfx = ''
if ctx.flags.human and lvl > 0 then
pfx = string.rep('\t', lvl)
end
ofd:write(pfx..string.format(...)..'\n')
end
local d = ctx.dict
o(0,'PV0 %s %s', san(d.header.lang), san(d.header.meta))
local function checksyn(obj,lvl)
for k,v in pairs(obj.rels) do
if d._relCache[v].mems[1].obj == obj
then o(lvl,'rh %x',v)
else o(lvl,'r %x',v)
end
end
end
for i,f in pairs(d.header.inflectionForms) do
o(0,'f %s %s %s', san(f.name), san(f.abbrev), san(f.desc))
for j,p in pairs(f.parts) do
o(1,'fp %s', san(p))
end
end
for i,s in pairs(d.header.orthographies) do
local repr
if type(s.repr) == 'string'
then repr = s.repr
else repr = s.repr[1] end
if repr == 'int' then
repr = repr .. ' ' .. tostring(s.repr[2])
elseif repr == 'glyphs' then
repr = repr .. ' ' .. s.repr[2].format .. ' ' .. tostring(s.repr[2].encoding)
end
o(0, 'o %x %s %s', s.uid, san(s.name), san(repr))
if s.repr[1] == 'glyphs' then
for _,g in ipairs(s.repr[2].glyphs) do
o(1, 'og %s %s', san(g.name), san(g.image))
end
end
end
local function scanMeans(tbl,lvl)
for j,m in ipairs(tbl) do
o(lvl,'m %s', san(m.lit))
checksyn(m,lvl+1)
for k,x in ipairs(m.examples) do
o(lvl+1,'x %s %s', san(x.quote,x.src))
end
for k,n in ipairs(m.notes) do
o(lvl+1,'n %s', san(n.kind))
for a,p in ipairs(n.paras) do
o(lvl+2,'np %s', san(p))
end
end
end
end
local function scanMeta(n, meta)
for i,m in ipairs(meta) do
o(n, 'M %s %s', san(m.key), san(m.val)) end
end
local function scanEnc(n, tbl)
for uid,enc in pairs(tbl)
do o(n, 'e %x %s',uid,san(enc)) end
end
for uid, m in pairs(d.morphs) do
o(0, 'mo %x %s', uid, san(m.name))
scanMeta(1, m.meta)
scanEnc(1, m.enc)
end
for lit, w in pairs(d.words) do
o(0,'w %s',san(lit))
checksyn(w,1)
scanEnc(1, w.enc)
for i,def in ipairs(w.defs) do
o(1,'d %s',san(def.part))
for _, writ in ipairs(def.writings) do
if writ.info == '' then o(2,'W') else
o(2,'W %s',san(writ.info)) end
for mid,uid in pairs(writ.morphs) do
o(3, 'Wmo %x', uid) end
for uid,enc in pairs(writ.enc)
do o(3, 'e %x %s',uid,san(enc)) end
end
checksyn(def,2)
for j,r in ipairs(def.branch) do
o(2,'dr %s',san(r))
end
scanMeans(def.means, 2)
for j,p in ipairs(def.phrases) do
o(2,'p %s',san(p.str))
scanMeans(p.means, 3)
end
end
end
for _,s in ipairs(d.relsets) do o(0,'s %s %x', s.kind, s.uid) end
end
local function filterD(lst, fn)
-- cheap algorithm to destructively filter a list
-- DOES NOT preserve order!!
local top = #lst
for i=top,1,-1 do local m = lst[i]
if not fn(m,i) then
lst[i] = lst[top]
lst[top] = nil
top = top - 1
end
end
return lst
end
function cmds.mod.exec(ctx, orig, oper, dest, ...)
local ops = {
word = {
mask = {
word = {move=true,merge=true,clobber=true};
};
move = function(from,to) end;
merge = function(from,to) end;
clobber = function(from,to) end;
};
def = {
mask = {
word = {move=true};
def = {merge=true,clobber=true};
};
move = function(from,to) end;
merge = function(from,to) end;
clobber = function(from,to) end;
};
phrase = {
mask = {
def = {move=true};
phrase = {clobber=true};
};
move = function(from,to) end;
clobber = function(from,to) end;
};
meaning = {
mask = {
def = {move=true};
phrase = {move=true};
meaning = {merge=true,clobber=true};
};
move = function(from,to) end;
merge = function(from,to) end;
clobber = function(from,to) end;
};
example = {
mask = {
meaning={move=true};
example={merge=true,clobber=true};
};
move = function(from,to) end;
merge = function(from,to) end;
clobber = function(from,to) end;
};
note = {
mask = {
meaning={move=true};
note={merge=true,clobber=true};
};
move = function(from,to) end;
merge = function(from,to) end;
clobber = function(from,to) end;
};
}
rebuildRelationCache(ctx.dict)
end
local function fileLegible(file)
-- check if we can access the file
local fd = io.open(file,"rb")
local ret = false
if fd then ret = true end
fd:close()
return ret
end
local function
prompt(p,multiline)
-- returns string if successful, nil if EOF, false if ^C
io.stderr:write(p)
local ok, res = pcall(function()
return io.stdin:read(multiline and 'a' or 'l')
end)
if ok then return res end
return false
end
function cmds.shell.exec(ctx)
if not fileLegible(ctx.file) then
-- avoid accidentally creating a file without the
-- proper document structure and metadata
id10t("file %s must already exist and be at least readable", ctx.file)
end
local fd, rw = io.open(ctx.file,"r+b"), true
if not fd then -- not writable
ctx.log('warn',string.format('file %s is not writable', ctx.file))
fd, rw = io.open(ctx.file, "rb"), false
end
ctx.fd = fd
ctx.dict = readDict(fd:read 'a')
fd:close()
local written = false
local fo = ctx.sty[io.stdout]
local fe = ctx.sty[io.stderr]
repeat
local cmd = prompt(fe.br(string.format('(parvan %s) ', ctx.file)))
if cmd == false then
io.stderr:write(fe.resetLine())
if written then
ctx.log('warn', 'abandoning changes!')
end
return 0
end
if cmd and cmd ~= '' then
local words = strwords(cmd)
if next(words) then
if words[1] == 'bail' or
words[1] == 'abandon' or
words[1] == 'q!' then
if written then
ctx.log('warn', 'abandoning changes!')
end
return 0
end
local c = cmds[words[1]]
if c then
if c.raw then
ctx.log('fatal', words[1] .. ' cannot be run from `shell`')
elseif not implies(c.write, rw) then
ctx.log('fatal', ctx.file .. ' is not writable')
else
local ok, outcome = ctx.try(c.exec, ctx, table.unpack(words,2))
if ok and outcome ~= false then written = written or c.write end
end
elseif cmd == 'save' or cmd == 'wq' then
if not written then
ctx.log('info', 'no changes to save')
end
cmd = nil
elseif cmd == 'quit' or cmd == 'q' then
if not written then cmd = nil else
ctx.log('fatal', 'dictionary has unsaved changes')
end
else
ctx.log('fatal', words[1] .. ' is not a command')
end
end
end
until cmd == nil
if written then
ctx.log('info', 'saving file')
local out = writeDict(ctx.dict)
local fd = io.open(ctx.file,'w+b')
fd:write(out)
fd:close()
end
end
local function
showHelp(ctx,k,v)
if not v then
id10t 'no such command'
end
if v.help then
local fe = ctx.sty[io.stderr]
local defs, synt = ''
if type(v.syntax) == 'table' then
synt = v.syntax[1]
local pad = string.rep(' ', #k+5)
for i=2,#v.syntax do
defs = defs .. pad .. fe.color(v.syntax[i],5) .. '\n'
end
else synt = v.syntax end
io.stderr:write(string.format(
" > %s %s\n" .. defs ..
" %s\n",
fe.br(k), synt and fe.br(fe.color(synt,5)) or '',
fe.em(fe.color(v.help,4,true))))
end
end
function cmds.help.exec(ctx,cmd)
if cmd then
showHelp(ctx, cmd, cmds[cmd])
else
for cmd,c in pairs(cmds) do
showHelp(ctx, cmd, c)
end
end
end
local globalFlags <const> = {
human = {'h','human','enable human-readable exports'};
ident = {'i','ident','show identifier paths for all items'};
rels = {'r','rels', 'show relationships between words'};
}
local function
usage(me,ctx)
local ln = 0
local ct = {}
local fe = ctx.sty[io.stderr]
local fstr = ""
local flagHelp = {}
for k,v in pairs(globalFlags) do
fstr = fstr .. v[1]
table.insert(flagHelp, string.format(" -%s --%s: %s\n",table.unpack(v)))
end
io.stderr:write(string.format(fe.br"usage:".." %s [-%s] <file> [<command> [args…]]\n",me,fstr) .. table.concat(flagHelp))
--[[
for k,v in pairs(cmds) do
local n = 1 + utf8.len(k) + utf8.len(v.syntax)
ct[k] = n
if n > ln then ln = n end
end
for k,v in pairs(cmds) do
local pad = string.rep(" ", ln - ct[k] + 3)
io.stderr:write(string.format(" "..fe.br'%s %s'.."%s%s\n",
k, v.syntax, pad, v.help))
end]]
for k,v in pairs(cmds) do
showHelp(ctx,k,v)
end
return 64
end
local function
dispatch(argv, ctx)
local loglevel = 2
local ferr = ctx.sty[io.stderr]
local args = {}
local flags = {}
local i = 1 while i <= #argv do
local a = argv[i]
if a == '--' then i=i+1 break
elseif a:sub(1,2) == '--' then
local fs <const> = a:sub(3)
for k,v in pairs(globalFlags) do
if v[2] == fs then flags[k] = true end
end
elseif a:sub(1,1) == '-' then
for p,cp in utf8.codes(''), a, #'-' do
local c <const> = utf8.char(cp)
for k,v in pairs(globalFlags) do
if v[1] == c then flags[k] = true break end
end
end
else table.insert(args, a) end
i = i + 1 end
for j=i,#argv do table.insert(args,argv[j]) end
do local ll = os.getenv('parvan_log')
if ll then loglevel = tonumber(ll) end
if flags[quiet] then loglevel=0
elseif flags[debug] then loglevel=4 end
end
local file, cmd = table.unpack(args)
if cmd and cmds[cmd] then
local c,fd,dict = cmds[cmd]
if (not c.raw) and not c.nofile then
fd = safeopen(file, "rb")
dict = readDict(fd:read 'a')
fd:close()
-- lua io has no truncate method, so we must
-- rely on the clobbering behavior of the open()
-- call instead :(
end
local function log(lvl,...)
local loglevels = {
fatal = 1,
warn = 2,
info = 3,
debug = 4
}
if loglevels[lvl] <= loglevel then
ctx.log(lvl,...)
end
end
cmds[cmd].exec({
sty = ctx.sty;
try = ctx.try;
log = log;
flags = flags;
file = file;
fd = fd;
dict = dict;
}, table.unpack(args,3))
if (not c.raw) and c.write then
local output = writeDict(dict)
-- writeDict should always be given a chance to
-- bail before the previous file is destroyed!!
-- you don't want one bug to wipe out your entire
-- dictionary in one fell swoop
fd = safeopen(file,'w+b')
fd:write(output)
fd:close()
end
return 0
else
return usage(argv[0], ctx)
end
end
local argv if arg
then argv = arg
else argv = {[0] = 'parvan', ...}
end
local sty = {
[io.stdout] = ansi.formatter(io.stdout);
[io.stderr] = ansi.formatter(io.stderr);
};
local function log(lvl, msg)
local colors = {fatal=1,warn=3,info=4,debug=2}
local ferr = sty[io.stderr]
io.stderr:write(string.format(
ferr.color(ferr.br("(%s)"),colors[lvl]).." %s\n", lvl, msg))
end
local function stacktrace(err)
return debug.traceback(err,3)
end
local function try(fn,...)
-- a wrapper around pcall that produces a standard error
-- message format when an error occurs
local res = { xpcall(fn,stacktrace,...) }
if not res[1] then
log('fatal', res[2])
end
return table.unpack(res)
end
local ok, res = xpcall(dispatch, stacktrace, argv, {
try = try, sty = sty, log = log
})
if not ok then
log('fatal', res)
os.exit(1)
end
os.exit(res)