Index: parvan.lua ================================================================== --- parvan.lua +++ parvan.lua @@ -10,10 +10,58 @@ -- +WSO Worlds Security Overdirectorate -- EID External Influence Directorate ] local function implies(a,b) return a==b or not(a) end +local function map(lst,fn) + local new = {} + for k,v in pairs(lst) do + local nv, nk = fn(v,k) + new[nk or k] = nv + end + return new +end +local function mapD(lst,fn) --destructive + -- WARNING: this will not work if nk names an existing key! + for k,v in pairs(lst) do + local nv, nk = fn(v,k) + if nk == nil or k == nk then + lst[k] = nv + else + lst[k] = nil + lst[nk] = nv + end + end + return lst +end +local function pushi(dest, idx, src, ...) + if not src then return end + dest[idx] = src + pushi(dest, idx+1, ...) +end +local function push(dest, ...) pushi(dest,#dest+1,...) end +local function cons(car, cdr) + local new = {car} + for k,v in ipairs(cdr) do new[k+1] = v end + return new +end +local function tcatD(dest, ...) + local i = #dest + local function iter(src, ...) + if src == nil then return end + local sc = #src + for j=1,sc do dest[i+j] = src[j] end + i = i + sc + iter(...) + end + iter(...) +end +local function tcat(...) + local new = {} + tcatD(new, ...) + return new +end local ansi = { levels = { plain = 0; ansi = 1; color = 2; @@ -21,11 +69,11 @@ color24b = 4; }; } ansi.seqs = { - br = {ansi.levels.ansi, "[1m", "[21m"}; + br = {ansi.levels.ansi, "[1m", "[22m"}; hl = {ansi.levels.ansi, "[7m", "[27m"}; ul = {ansi.levels.ansi, "[4m", "[24m"}; em = {ansi.levels.ansi, "[3m", "[23m"}; }; @@ -85,11 +133,11 @@ return string.format("\27[%c8;2;%u;%u;%um", bg and 0x34 or 0x33, ftoi(r,g,b)) .. str .. reset end elseif cl == ansi.levels.color8b then function f.rgb(str, r,g,b, bg) - local code = 16 + (r * 5)*36 + (g * 5)*6 + (b * 6) + local code = 16 + math.floor(r * 5)*36 + math.floor(g * 5)*6 + math.floor(b * 6) return string.format("\27[%c8;5;%um", bg and 0x34 or 0x33, code) .. str .. reset end elseif cl == ansi.levels.color then function f.rgb(str, r,g,b, bg) @@ -107,11 +155,11 @@ local function dump(v,pfx,cyc,ismeta) pfx = pfx or '' cyc = cyc or {} - local np = pfx .. ' ' + local np = pfx .. ' ' if type(v) == 'table' then if cyc[v] then return '<...>' else cyc[v] = true end end @@ -128,11 +176,11 @@ meta = dump(getmetatable(v),pfx,cyc,true) .. '::' end if ismeta then return string.format('%s<|\n%s%s|>',meta,str,pfx) else - return meta..'{\n' .. str .. pfx .. '}\n' + return meta..'{\n' .. str .. pfx .. '}' end else return string.format('%s', v) end end @@ -188,10 +236,45 @@ fmt.tag = qpack "s1" fmt.u8 = qpack "I1" fmt.u16 = qpack "I2" fmt.u24 = qpack "I3" fmt.u32 = qpack "I4" +fmt.path = { + -- encodes a FIXED path to an arbitrary type of object + encode = function(a) + local kind = 0 + local vals = {} + if a.w then kind = 1 + table.insert(vals, marshal(fmt.label, a.w)) + if a.dn then kind = 2 + table.insert(vals, marshal(fmt.u8, a.dn)) + if a.mn then kind = 3 + table.insert(vals, marshal(fmt.u8, a.mn)) + if a.nn then kind = 4 + table.insert(vals, marshal(fmt.u8, a.nn)) + end + end + end + end + return marshal(fmt.u8,kind) .. table.concat(vals) + end; + decode = function(s) + local kind = parse(fmt.u8, s) + local path = {} + local components = { + {'w',fmt.label}; + {'dn',fmt.u8}; + {'mn',fmt.u8}; + {'nn',fmt.u8}; + } + for i=1,kind do + local label, ty = table.unpack(components[i]) + path[label] = parse(ty,s) + end + return path + end; +} fmt.list = function(t,ty) ty = ty or fmt.u32 return { encode = function(a) local vals = {marshal(ty, #a)} for i=1,#a do @@ -230,30 +313,56 @@ return m end; } end -fmt.form = { - {'form', fmt.u16}; - {'text', fmt.label}; -} +fmt.enum = function(...) + local vals,rmap = {...},{} + for k,v in pairs(vals) do rmap[v] = k-1 end + local ty = fmt.u8 + if #vals > 0xffff then ty = fmt.u32 -- just in pathological case + elseif #vals > 0xff then ty = fmt.u16 end + return { + encode = function(a) + if not rmap[a] then error(string.format('"%s" is not part of enum "%s"', a, table.concat(vals,'","')),3) end + return marshal(ty, rmap[a]) + end; + decode = function(s) + local n = parse(ty,s) + if (n+1) > #vals then error(string.format('enum "%s" does not have %u members', table.concat(vals,'","'),n),3) end + return vals[n+1] + end; + } +end fmt.note = { {'kind', fmt.tag}; {'paras', fmt.list(fmt.string)}; } +fmt.example = { + {'quote',fmt.string}; + {'src',fmt.label}; +} fmt.meaning = { {'lit', fmt.string}; + {'examples', fmt.list(fmt.example,fmt.u8)}; {'notes', fmt.list(fmt.note,fmt.u8)}; } + +fmt.phrase = { + {'str',fmt.label}; + {'means',fmt.list(fmt.meaning,fmt.u8)}; + {'xref',fmt.list(fmt.path,fmt.u16)}; +} fmt.def = { {'part', fmt.u8}; {'branch', fmt.list(fmt.label,fmt.u8)}; {'means', fmt.list(fmt.meaning,fmt.u8)}; - {'forms', fmt.list(fmt.form,fmt.u16)}; + {'forms', fmt.map(fmt.u16,fmt.label,fmt.u16)}; + {'phrases', fmt.list(fmt.phrase,fmt.u16)}; } fmt.word = { {'defs', fmt.list(fmt.def,fmt.u8)}; } @@ -260,26 +369,33 @@ fmt.dictHeader = { {'lang', fmt.tag}; {'meta', fmt.string}; {'partsOfSpeech', fmt.list(fmt.tag,fmt.u16)}; + {'inflectionForms', fmt.list({ + {'name', fmt.tag}; + {'abbrev', fmt.tag}; + {'desc', fmt.string}; + {'parts', fmt.list(fmt.tag,fmt.u8)}; + -- which parts of speech does this form apply to? + -- leave empty if not relevant + },fmt.u16)}; } -fmt.synonymSet = { +fmt.relSet = { {'uid', fmt.u32}; -- IDs are persistent random values so they can be used -- as reliable identifiers even when merging exports in -- a parvan-unaware VCS - {'members', fmt.list({ - {'word', fmt.label}, {'def', fmt.u8}; - },fmt.u16)}; + {'members', fmt.list(fmt.path,fmt.u16)}; + {'kind', fmt.enum('syn','ant','met')}; } fmt.dict = { {'header', fmt.dictHeader}; {'words', fmt.map(fmt.string,fmt.word)}; - {'synonyms', fmt.list(fmt.synonymSet)}; + {'relsets', fmt.list(fmt.relSet)}; } function marshal(ty, val) if ty.encode then return ty.encode(val) @@ -286,11 +402,15 @@ end local ac = {} for idx,fld in ipairs(ty) do local name, fty = table.unpack(fld) - table.insert(ac, marshal(fty, assert(val[name]))) + table.insert(ac, marshal(fty, + assert(val[name], + string.format('marshalling error: missing field %s', name) + ) + )) end return table.concat(ac) end @@ -317,10 +437,25 @@ i=i+1 return i-1 end end, map end + +local function rebuildRelationCache(d) +-- (re)build a dictionary's relation cache; needed +-- at load time and whenever any changes to relsets +-- are made (unless they're simple enough to update +-- the cache directly by hand, but that's very eeeh) + local sc = {} + for i,s in ipairs(d.relsets) do + for j,m in ipairs(s.members) do + sc[m.w] = sc[m.w] or {} + table.insert(sc[m.w], s) + end + end + d._relCache = sc +end local function writeDict(d) local atomizePoS, posMap = atomizer() for lit,w in pairs(d.words) do @@ -347,21 +482,29 @@ for lit,w in pairs(d.words) do for j,def in ipairs(w.defs) do def.part = d.header.partsOfSpeech[def.part] end end + + -- create cachemaps for complex data structures to + -- enable faster lookup that would otherwise require + -- expensive scans + rebuildRelationCache(d) return d end + local function strwords(str) -- here be dragons local wds = {} local w = {} local state, d, quo, dquo = 0,0 - local function flush(n) - if next(w) then + local function flush(n,final) + if next(w) or state ~= 0 and state < 10 then table.insert(wds, utf8.char(table.unpack(w))) w = {} + elseif final and state > 10 then + table.insert(wds, '\\') end state = n quo = nil dquo = nil d = 0 @@ -420,11 +563,11 @@ table.insert(w,cp) end state = state - 10 end end - flush() + flush(nil,true) return wds end local predicates local function parsefilter(str) @@ -633,16 +776,137 @@ local new = {} for k,v in pairs(tab) do new[k] = v end return new end -local function parsePath(p) - local w,dn,mn,nn = p:match('^(.+)@([0-9]+)/([0-9]+):([0-9]+)$') - if not w then w,dn,mn = p:match('^(.+)@([0-9]+)/([0-9]+)$') end - if not w then w,dn = p:match('^(.+)@([0-9]+)$') end +local function pathParse(p) +-- this is cursed, rewrite without regex pls TODO + if p == '.' then return {} end + local function comp(pfx) + return pfx .. '([0-9]+)' + end + local function mtry(...) + local mstr = '^(.+)' + for _, v in ipairs{...} do + mstr = mstr .. comp(v) + end + return p:match(mstr .. '$') + end + + local xn + local w,dn,pn,mn,nn = mtry('%.','/p','/m','/n') + if not w then w,dn,pn,mn,xn = mtry('%.','/p','/m','/x') end + if not w then w,dn,pn,mn = mtry('%.','/p','/m') end + if not w then w,dn,pn= mtry('%.','/p') end + if not w then + local comps = {'%.','/m','/n'} + for i=#comps, 1, -1 do + local args = {table.unpack(comps,1,i)} + w,dn,mn,nn = mtry(table.unpack(args)) + if not w and args[i] == '/n' then + args[i] = '/x' + w,dn,mn,xn = mtry(table.unpack(args)) + end + if w then break end + end + end if not w then w=p:match('^(.-)%.?$') end - return {w = w, dn = tonumber(dn), mn = tonumber(mn), nn = tonumber(nn)} + return {w = w, dn = tonumber(dn), mn = tonumber(mn), pn=tonumber(pn); nn = tonumber(nn), xn = tonumber(xn)} +end +local function pathString(p,styler) + local function s(s, st, ...) + if styler then + return styler[st](tostring(s),...) + else return s end + end + + local function comp(c,n,...) + return s('/','color',5) + .. s(string.format("%s%u",c,n), 'color',...) + end + local t = {} + if p.w then t[1] = s(p.w,'ul') else return '.' end + if p.dn then t[2] = string.format(".%s", s(p.dn,'br')) end + if p.pn then t[#t+1] = comp('p',p.pn,4,true) end + if p.mn then t[#t+1] = comp('m',p.mn,5,true) end + if p.xn then t[#t+1] = comp('x',p.xn,6,true) + elseif p.nn then t[#t+1] = comp('n',p.nn,4) end + if t[2] == nil then + return p.w .. '.' --make sure paths are always valid + end + return s(table.concat(t),'em') +end +local function pathMatch(a,b) + return a.w == b.w + and a.dn == b.dn + and a.mn == b.mn + and a.pn == b.pn + and a.nn == b.nn + and a.xn == b.xn +end +local function pathResolve(ctx, a) + if not a.w then return end -- empty paths are valid! + local function lookup(seg, tbl,val) + if not tbl then error('bad table',2) end + local v = tbl[val] + if v then return v end + id10t('bad %s in path: %s', seg, val) + end + + local res = {} + res.word = lookup('word', ctx.dict.words, a.w) + if not a.dn then return res end + + res.def = lookup('definition', w.defs, a.dn) + if (not a.pn) and (not a.mn) then return res end + + local m if a.pn then + res.phrase = lookup('phrase', d.phrases, a.pn) + res.meaning = lookup('meaning', p.means, a.mn) + else + res.meaning = lookup('meaning', d.means, a.mn) + end + + if a.xn then + res.ex = lookup('example',m.examples,a.xn) + elseif a.nn then + res.note = lookup('note',m.notes,a.nn) + end + + return res +end + +local function pathNav(...) + local t = pathResolve(...) + return t.word,t.def,t.phrase,t.meaning,t.ex or t.note +end + +local function pathRef(ctx, a) + local w,d,p,m,n = pathNav(ctx,a) + return n or m or p or d or w +end + +local function pathSub(super,sub) + if super.w == nil then return true end + if sub.w ~= super.w then return false end + + if super.pn == nil then goto checkMN end + if sub.pn ~= super.pn then return false end + + ::checkMN:: + if super.mn == nil then return true end + if sub.mn ~= super.mn then return false end + + if super.xn then + if sub.nn then return false end + if sub.xn ~= super.xn then return false end + elseif super.nn then + if sub.xn then return false end + if sub.nn ~= super.nn then return false end + end + + return true end local cmds = { create = { help = "initialize a new dictionary file"; @@ -656,14 +920,14 @@ local new = { header = { lang = lang; meta = ""; partsOfSpeech = {}; - branch = {}; + inflectionForms = {}; }; words = {}; - synonyms = {}; + relsets = {}; } local o = writeDict(new); fd:write(o) fd:close() end; @@ -693,11 +957,15 @@ end local n = #(ctx.dict.words[word].defs)+1 ctx.dict.words[word].defs[n] = { part = part; branch = etym; - means = {means and {lit=means,notes={}} or nil}; + means = {means and { + lit=means; + examples={}; + notes={}; + } or nil}; forms = {}; } ctx.log('info', string.format('added definition #%u to “%s”', n, word)) end; }; @@ -704,72 +972,135 @@ mean = { help = "add a meaning to a definition"; syntax = " "; write = true; exec = function(ctx,word,dn,m) - local _,d = safeNavWord(ctx,word,dn) - table.insert(d.means, {lit=m,notes={}}) + local t = pathResolve(ctx,{w=word,dn=dn}) + table.insert(t.d.means, {lit=m,notes={}}) end; }; - syn = { - help = "manage synonym groups"; + rel = { + help = "manage groups of related words"; syntax = { - "(show|purge) "; + "(show|purge) []"; "(link|drop) …"; - "new …"; - "clear []"; + "new …"; + "destroy []"; + "rel ::= (syn|ant|co)" }; write = true; - exec = function(ctx, op, tgtw, ...) + exec = function(ctx, op, ...) + local fo = ctx.sty[io.stdout] + if op == nil then id10t "not enough arguments" end local groups = {} - local wp = parsePath(tgtw) - local w,d = safeNavWord(ctx, wp.w, wp.dn) - if not (op=='new' or op=='link' or op=='drop' or op=='clear' or op=='show' or op=='purge') then - id10t('invalid operation “%s” for `syn`', op) + if not (op=='new' or op=='link' or op=='drop' or op=='destroy' or op=='show' or op=='purge') then + id10t('invalid operation “%s” for `rel`', op) end if op == 'new' then - local links = {{word = wp.w, def = wp.dn or 1}} - for i,l in ipairs{...} do - local parsed = parsePath(l) - links[i+1] = {word = parsed.w, def = parsed.dn or 1} + local rel = ... + if rel ~= 'syn' and rel ~= 'ant' and rel ~= 'met' then + id10t 'relationships must be synonymous, antonymous, or metonymous' end - table.insert(ctx.dict.synonyms, { - uid=math.random(0,0xffffFFFF); + local links={} + for i,l in ipairs{select(2,...)} do + links[i] = pathParse(l) + end + local newstruct = { + uid=math.random(1,0xffffFFFF); members=links; - }) + kind = rel; + } + table.insert(ctx.dict.relsets, newstruct) + + local rc = ctx.dict._relCache + for i,l in pairs(links) do + rc[l.w] = rc[l.w] or {} + table.insert(rc[l.w], newstruct) + end + rebuildRelationCache(ctx.dict) else -- assemble a list of groups - for i,ss in ipairs(ctx.dict.synonyms) do + local tgtw = ... + local wp = pathParse(tgtw) + local w,d,m = pathNav(ctx, wp) + for i,ss in ipairs(ctx.dict.relsets) do for j,s in ipairs(ss.members) do - if s.word == wp.w and (wp.dn == nil or s.def == wp.dn) then - table.insert(groups, {set = ss, mem = s}) + if pathSub(s, wp) then +-- if s.word == wp.w and (wp.dn == nil or s.def == wp.dn) then + table.insert(groups, {set = ss, mem = s, id = i}) break end end end if op == 'show' then for i, g in ipairs(groups) do - local w,d = safeNavWord(ctx, g.mem.word, g.mem.def) - local function label(wd,defn) - local fulldef = {} - for i,v in ipairs(defn.means) do - fulldef[i] = v.lit + local w = pathResolve(ctx, {w=g.mem.w}).w + local function label(path,w) + local repr = path.w + if path.dn then + repr = repr .. string.format("(%s)", w.defs[path.dn].part) + if path.mn then + repr = repr .. string.format(": %u. %s", path.dn, w.defs[path.dn].means[path.mn].lit) + else + local fulldef = {} + for i,v in ipairs(w.defs) do + fulldef[i] = v.lit + end + repr = repr..table.concat(fulldef, '; ') + end end - fulldef = table.concat(fulldef, '; ') - return string.format("%s(%s): %s",wd,defn.part,fulldef) + + return repr end + local others = {} for j, o in ipairs(g.set.members) do - if not (o.word == g.mem.word and o.def == (wp.dn or 1)) then - local ow, od = safeNavWord(ctx, o.word,o.def) - table.insert(others, ' '..label(o.word,od)) + local ow = pathResolve(ctx, {w=o.w}).w + if (g.set.kind == 'ant' or not pathMatch(o, g.mem)) and + --exclude antonym headwords + not (g.set.kind == 'ant' and j==1) then + table.insert(others, ' '..label(o,ow)) + end + end + local llab do + local cdw = ctx.dict.words + if g.set.kind == 'ant' then + local ap = g.set.members[1] + llab = fo.br(label(ap,cdw[ap.w]) or '') + else + llab = fo.br(label(g.mem,cdw[g.mem.w]) or '') end end - io.stdout:write(string.format("% 4u) %s\n%s", i, label(g.mem.word,d),table.concat(others,'\n'))) + local kls = { + syn = fo.color('synonyms',2,true)..' of'; + ant = fo.color('antonyms',1,true)..' of'; + met = fo.color('metonyms',4,true)..' of'; + } + io.stdout:write(string.format("% 4u) %s\n%s", i, fo.ul(kls[g.set.kind] .. ' ' .. llab), table.concat(others,'\n')) .. '\n') end + return false -- no changes made elseif op == 'link' or op == 'drop' then - local tgtn, paths = (...), { select(2, ...) } + local tgtn, paths = (select(2,...)), { select(3, ...) } + rebuildRelationCache(ctx.dict) + elseif op == 'destroy' then + local tgtw, tgtn = ... + if not tgtn then id10t 'missing group number' end + local delendum = groups[tonumber(tgtn)] + if not delendum then id10t 'bad group number' end + local rs = ctx.dict.relsets + local last = #rs + if delendum.id == last then + rs[delendum.id] = nil + else -- since order doesn't matter, we can use a + -- silly swapping trick to reduce the deletion + -- worst case from O(n) to O(2) + rs[delendum.id] = rs[last] + rs[last] = nil + end + rebuildRelationCache(ctx.dict) + else + id10t 'invalid operation' end end end; }; mod = { @@ -785,11 +1116,11 @@ syntax = {"( (add|for) | :) …"; "m-path ::= @/"}; write = true; exec = function(ctx,path,...) local paras, mng - local dest = parsePath(path) + local dest = pathParse(path) local _,_,m = safeNavWord(ctx,dest.w,dest.dn,dest.mn) if dest.nn then paras = {...} else local op, kind = ... @@ -830,10 +1161,17 @@ nofile = true; syntax = "[]"; }; export = { help = "create a text file dump compatible with source control"; + syntax = "[]"; + }; + import = { + help = "generate a usable dictionary from a text export file"; + syntax = "[]"; + raw = true; + write = true; }; dump = { exec = function(ctx) print(dump(ctx.dict)) end }; ls = { @@ -850,14 +1188,14 @@ local f = ctx.sty[io.stderr] for k,p in pairs(predicates) do if p.help then io.stderr:write( f.br(' - ' .. - f.rgb('[',.8,.3,1) .. + f.rgb('[',1,0,.5) .. k .. ' ' .. (f.color(p.syntax,5) or '…') .. - f.rgb(']',.8,.3,1)) .. ': ' .. + f.rgb(']',1,0,.5)) .. ': ' .. f.color(p.help,4,true) .. '\n') end end end @@ -877,11 +1215,79 @@ table.insert(out, e) end end table.sort(out, function(a,b) return a.lit < b.lit end) local fo = ctx.sty[io.stdout] - local function meanings(d,md,n) + + local function gatherRelSets(path) + local antonymSets, synonymSets, metonymSets = {},{},{} + if ctx.dict._relCache[path.w] then + for i, rel in ipairs(ctx.dict._relCache[path.w]) do + local specuset,tgt,anto = {} + for j, mbr in ipairs(rel.members) do + if pathMatch(mbr, path) then + if rel.kind == 'syn' then tgt = synonymSets + elseif rel.kind == 'met' then tgt = metonymSets + elseif rel.kind == 'ant' then + if j == 1 -- is this the headword? + then tgt = antonymSets + else tgt = synonymSets + end + end + elseif j == 1 and rel.kind == 'ant' then + anto = mbr + else + table.insert(specuset, mbr) + end + end + if tgt then + table.insert(tgt, specuset) + if anto then + table.insert(antonymSets, {anto}) + end + end + end + end + local function flatten(lst) + local new = {} + for i, l in ipairs(lst) do tcatD(new, l) end + return new + end + return { + syn = flatten(synonymSets); + ant = flatten(antonymSets); + met = flatten(metonymSets); + } + end + + local function formatRels(rls, padlen) + -- optimize for the common case + if next(rls.syn) == nil and + next(rls.ant) == nil and + next(rls.met) == nil then return {} end + local pad = string.rep(' ',padlen) + local function format(label, set) + local each = map(set, function(e) + local ew,ed = pathNav(ctx, e) + local str = fo.ul(e.w) + if ed then str = string.format('%s(%s)',str,ed.part) end + if e.mn then str = string.format('%s§%u',str,e.mn) end + return str + end) + return fo.em(string.format("%s%s %s",pad,label,table.concat(each,', '))) + end + local lines = {} + local function add(l,c,lst) + table.insert(lines, format(fo.color(l,c,true),lst)) + end + if next(rls.syn) then add('synonyms:',2,rls.syn) end + if next(rls.ant) then add('antonyms:',1,rls.ant) end + if next(rls.met) then add('metonyms:',4,rls.met) end + return lines + end + + local function meanings(w,d,md,n) local start = md and 2 or 1 local part = string.format('(%s)', d.part) local pad = md and string.rep(' ', #part) or '' local function note(n,insert) if not next(n.paras) then return end @@ -891,98 +1297,368 @@ insert(pad..n.paras[2]) end end local m = { (function() if d.means[1] then - if md then return - string.format(" %s 1. %s", fo.em(part), d.means[1].lit) + if md then + local id = '' + if ctx.flags.ident then + id=' ['..pathString({w=w.lit,dn=n,mn=1}, fo)..']' + end + return string.format(" %s %s 1. %s", id, fo.em(part), d.means[1].lit) end else return fo.em(string.format(' %s [empty definition #%u]', part,n)) end end)() } + tcatD(m, formatRels(gatherRelSets{w=w.lit,dn=n,mn=1}, 6)) for i=start,#d.means do local v = d.means[i] - table.insert(m, string.format(' %s %u. %s', pad, i, v.lit)) + local id = '' + if ctx.flags.ident then id='['..pathString({w=w.lit,dn=n,mn=n}, fo)..']' end + table.insert(m, string.format(' %s%s %u. %s', pad, id, i, v.lit)) + tcatD(m, formatRels(gatherRelSets{w=w.lit,dn=n,mn=i}, 6)) for j,n in ipairs(v.notes) do note(n, function(v) table.insert(m, v) end) end end return table.concat(m,'\n') end + local function autobreak(str) + if str ~= '' then return str..'\n' else return str end + end for i, w in ipairs(out) do - local d = fo.ul(w.lit) + local d = fo.ul(fo.br(w.lit)) + local wordrels = autobreak(table.concat( + formatRels(gatherRelSets{w=w.lit}, 2), + '\n' + )) + local wc = ctx.dict._relCache[w.lit] if #w.word.defs == 1 then - d=d .. ' ' .. fo.em('('..(w.word.defs[1].part)..')') ..'\n' - .. meanings(w.word.defs[1],false,1) + d=d .. ' ' + .. fo.rgb(fo.em('('..(w.word.defs[1].part)..')'),.8,.5,1) .. '\n' + .. meanings(w,w.word.defs[1],false,1) .. '\n' + .. autobreak(table.concat(formatRels(gatherRelSets{w=w.lit,dn=1}, 4), '\n')) + .. wordrels .. '\n' else for j, def in ipairs(w.word.defs) do - d=d .. '\n' .. meanings(def,true,j) - end - end - io.stdout:write(d..'\n') - end -end - -function cmds.export.exec(ctx) + local syn if wsc and wsc[j] then syn = wsc[j] end + d=d .. '\n' + .. meanings(w,syn,def,true,j) .. '\n' + .. autobreak(table.concat( + formatRels(gatherRelSets{w=w.lit,dn=j}, 4), + '\n' + )) + end + d=d .. wordrels .. '\n' + end + io.stdout:write(d) + end +end + +function cmds.import.exec(ctx,file) + local ifd = io.stdin + if file then + ifd = safeopen(file,'r') + end + + local new = { + header = { + lang = lang; + meta = ""; + partsOfSpeech = {}; + inflectionForms = {}; + }; + words = {}; + relsets = {}; + } + + local state = 0 + local relsets = {} + local path = {} + local inflmap, lastinfl = {} + for l in ifd:lines() do + local words = strwords(l) + local c = words[1] + local function syn(mn,mx) + local nw = #words - 1 + if nw < mn or (mx ~= nil and nw > mx) then + if mx ~= nil then + id10t('command %s needs between %u~%u words',c,mn,mx) + else + id10t('command %s needs at least %u words',c,mn) + end + end + end + if c ~= '*' and c~='meta' then -- comments + if state == 0 then + if c ~= 'pv0' then + id10t "not a parvan export" + end + new.header.lang = words[2] + new.header.meta = words[3] + state = 1 + else + print(pathString(path, ctx.sty[io.stderr])) + local W,D,M,N = pathNav({dict=new}, path) + if c == 'w' then syn(1) state = 2 + path = {w=words[2]} + new.words[words[2]] = {defs={}} + elseif c == 'f' then syn(1) + local nf = { + name = words[2]; + abbrev = words[3] or ""; + desc = words[4] or ""; + parts = {}; + } + table.insert(new.header.inflectionForms, nf) + inflmap[words[2]] = #(new.header.inflectionForms) + lastinfl = nf + elseif c == 'fp' then syn(1) + if not lastinfl then + id10t 'fp can only be used after f' end + table.insert(lastinfl.parts,words[2]) + elseif c == 's' then syn(2) + relsets[words[3]] = relsets[words[3]] or {} + relsets[words[3]].kind = words[2] + relsets[words[3]].uid = tonumber(words[3]) + relsets[words[3]].members = relsets[words[3]].members or {} + elseif state >= 2 and c == 'r' then syn(1) + relsets[words[2]] = relsets[words[2]] or { + uid = tonumber(words[2]); + members={}; + } + table.insert(relsets[words[2]].members, path) + elseif state >= 2 and c == 'd' then syn(1) state = 3 + table.insert(W.defs, { + part = words[2]; + branch = {}; + means = {}; + forms = {}; + phrases = {}; + }) + path = {w = path.w, dn = #(W.defs)} + elseif state >= 3 and c == 'dr' then syn(1) + table.insert(D.branch, words[2]) + elseif state >= 3 and c == 'df' then syn(2) + if not inflmap[words[2]] then + id10t('no inflection form %s defined', words[2]) + end + D.forms[inflmap[words[2]]] = words[3] + elseif state >= 3 and c == 'm' then syn(1) state = 4 + table.insert(D.means, { + lit = words[2]; + notes = {}; + examples = {}; + }); + path = {w = path.w, dn = path.dn, mn = #(D.means)} + elseif state >= 4 and c == 'n' then syn(1) state = 5 + table.insert(M.notes, {kind=words[2], paras={}}) + path = {w = path.w, dn = path.dn, mn = path.mn, nn = #(M.notes)}; + elseif state >= 5 and c == 'np' then syn(1) + table.insert(N.paras, words[2]) + end + -- we ignore invalid ctls, for sake of forward-compat + end + end + end + + for k,v in pairs(relsets) do + if not v.uid then + --handle non-numeric export ids + v.uid = math.random(0,0xffffFFFF) + end + table.insert(new.relsets, v) + end + + local ofd = safeopen(ctx.file,"w+b") + local o = writeDict(new); + ofd:write(o) + ofd:close() +end + +function cmds.export.exec(ctx,file) + local ofd = io.stdout + if file then ofd = safeopen(file, 'w+') end local function san(str) local d = 0 local r = {} for i,cp in utf8.codes(str) do -- insert backslashes for characters that would -- disrupt strwords() parsing - if cp == 0x5b then - d = d + 1 - elseif cp == 0x5d then - if d >= 1 then - d = d - 1 - else - table.insert(r, 0x5c) + if cp == 0x0a then + table.insert(r, 0x5c) + table.insert(r, 0x6e) + else + if cp == 0x5b then + d = d + 1 + elseif cp == 0x5d then + if d >= 1 then + d = d - 1 + else + table.insert(r, 0x5c) + end end + table.insert(r, cp) end - table.insert(r, cp) end return '[' .. utf8.char(table.unpack(r)) .. ']' end - local function o(...) io.stdout:write(string.format(...)..'\n') end + local function o(lvl,...) + local pfx = '' + if ctx.flags.human and lvl > 0 then + pfx = string.rep('\t', lvl) + end + ofd:write(pfx..string.format(...)..'\n') + end local d = ctx.dict - o('pv0 %s %s', san(d.header.lang), san(d.header.meta)) + o(0,'pv0 %s %s', san(d.header.lang), san(d.header.meta)) + local function checksyn(obj) +-- for _,s in ipairs(d.synonyms) do + local lvl = 0 + if obj.nn then lvl = 4 + elseif obj.mn then lvl = 3 + elseif obj.dn then lvl = 2 + elseif obj.w then lvl = 1 end + if not d._relCache[obj.w] then return end + for _,s in ipairs(d._relCache[obj.w]) do + for _,sm in ipairs(s.members) do + if pathMatch(obj, sm) then + o(lvl,'r %u',s.uid) + break + end + end + end + end + for i,f in pairs(d.header.inflectionForms) do + o(0,'f %s %s %s', san(f.name), san(f.abbrev), san(f.desc)) + for j,p in pairs(f.parts) do + o(1,'fp %s', san(p)) + end + end for lit, w in pairs(d.words) do - o('w %s',san(lit)) + o(0,'w %s',san(lit)) + checksyn{w=lit} for i,def in ipairs(w.defs) do - o('d %s',san(def.part)) - for _,s in ipairs(d.synonyms) do - for _,sm in ipairs(s.members) do - if sm.word == w and sm.def == i then - o('ds %u',s.uid) - break - end - end - end + o(1,'d %s',san(def.part)) + checksyn{w=lit,dn=i} for j,r in ipairs(def.branch) do - o('dr %s',san(r)) + o(2,'dr %s',san(r)) end for j,m in ipairs(def.means) do - o('m %s', san(m.lit)) + o(2,'m %s', san(m.lit)) + checksyn{w=lit,dn=i,mn=j} for k,n in ipairs(m.notes) do - o('n %s', san(n.kind)) + o(3,'n %s', san(n.kind)) for a,p in ipairs(n.paras) do - o('np %s', san(p)) + o(4,'np %s', san(p)) end end end end end - for _,s in ipairs(d.synonyms) do o('s %u', s.uid) end + for _,s in ipairs(d.relsets) do o(0,'s %s %u', s.kind, s.uid) end +end + +local function filterD(lst, fn) +-- cheap algorithm to destructively filter a list +-- DOES NOT preserve order!! + local top = #lst + for i=top,1,-1 do local m = lst[i] + if not fn(m,i) then + lst[i] = lst[top] + lst[top] = nil + top = top - 1 + end + end + return lst end function cmds.mod.exec(ctx, orig, oper, dest, ...) if (not orig) or not oper then id10t '`mod` requires at least an origin and an operation' end - local op, dp = parsePath(orig) + local op, dp = pathParse(orig) local w,d,m,n = safeNavWord(ctx, op.w,op.dn,op.mn,op.nn) + -- unfortunately, "pointers" exist elsewhere in the + -- structure, currently just from relsets, that must + -- be updated whenever an object moves or changes. + -- this is miserable and takes a lot of work, using + -- algorithms provided by the following functions. + -- note that we don't bother trying to update the + -- relcache as we go, it's simply not worth the work; + -- instead we simply rebuild the whole cache when + -- this command returns + local function cleanupRels(path, fn) + local rc = ctx.dict._relCache[path.w] + if rc then + for k,s in pairs(rc) do fn(s,k) end + end + end + local function cleanupRelsEach(path, fn) + cleanupRels(path, function(s,k) + local top = #s.members + for i=1,top do local m=s.members[i] + if pathSub(path, m) then + local val = fn(m,s,i) + if val ~= nil then + s.members[i] = val + end + end + end + end) + end + local function deleteRefsTo(path) + cleanupRels(path, function(s) + -- antonyms: delete the headword and transform the group + -- to a list of synonyms + if s.kind == 'ant' and pathSub(path,s.members[1]) then + s.kind = 'syn' + end + filterD(s.members, function(m) + return not pathSub(path, m) + end) + end) + if not path.dn then + ctx.dict._relCache[path.w] = nil + end + end + local function moveRelTree(op,dp) + cleanupRelsEach(op, function(old,set,idx) + local new = {} + for _,elt in pairs{'w','dn','mn','nn'} do + if dp[elt] ~= nil then + new[elt] = dp[elt] + else + new[elt] = op[elt] or old[elt] + end + end + return new + end) + end + local function shiftRelTree(dp, fld, nid, amt) + local cleanupTargetMask = ({ + dn = {w=dp.w}; + mn = {w=dp.w,dn=dp.dn}; + nn = {w=dp.w,dn=dp.dn,mn=dp.mn}; + })[fld] -- >____< + cleanupRelsEach(cleanupTargetMask, function(old,set,i) + if old[fld] >= nid then + old[fld] = old[fld] + amt + end + end) + end + local function insertAndMoveRelTree(tbl,n,op,dp,fld) + local nid = #tbl + local path = copy(dp) + path[fld] = nid + tbl[nid] = n + shiftRelTree(dp,fld,1) + moveRelTree(op, path) + end if oper == 'drop' then + -- clean out the cache and delete relationships + deleteRefsTo(op) if not d then ctx.dict.words[op.w] = nil elseif not m then table.remove(w.defs, op.dn) elseif not n then @@ -989,48 +1665,51 @@ table.remove(d.means, op.mn) else table.remove(m.notes, op.nn) end elseif oper == 'out' then - if n or not m then - id10t '`mod out` must target a meaning' - end + if n or not m then id10t '`mod out` must target a meaning' end if not dest then id10t '`mod out` requires at least a part of speech' end local newdef = { part = dest; branch = {...}; forms = {}; means = {m}; } + shiftRelTree(op, 'dn', op.dn, 1) table.insert(w.defs,op.dn+1, newdef) + moveRelTree(op,{w=op.w, dn=op.dn+1, mn=1}) table.remove(d.means,op.mn) elseif oper == 'move' or oper == 'merge' or oper == 'clobber' then if dest - then dp = parsePath(dest) + then dp = pathParse(dest) else id10t('`mod %s` requires a target',oper) end if n then if not dp.mn then id10t '`mod` on a note requires a note or meaning destination' end local _,_,dm = safeNavWord(ctx, dp.w,dp.dn,dp.mn) if dp.nn then if oper == 'move' then + shiftRelTree(dp, 'nn', dp.nn, 1) table.insert(dm.notes, dp.nn, n) elseif oper == 'merge' then local top = #(dm.notes[dp.nn].paras) for i, v in ipairs(n.paras) do dm.notes[dp.nn].paras[i+top] = v end elseif oper == 'clobber' then + deleteRefsTo(dp) dm.notes[dp.nn] = n end + moveRelTree(op,dp) else if oper ~= 'move' then id10t('`mod note %s` requires a note target', oper) end - table.insert(dm.notes, n) + insertAndMoveRelTree(dm.notes,n,op,dp,'nn') end if oper == 'move' and dp.nn and dm == m and op.nn > dp.nn then table.remove(m.notes,op.nn+1) else table.remove(m.notes,op.nn) @@ -1041,33 +1720,47 @@ part = d.part; branch = copy(d.branch); forms = copy(d.forms); means = {m}; } + local didx if ctx.dict.words[dp.w] then - table.insert(ctx.dict.words[dp.w].defs, newdef) + local defst = ctx.dict.words[dp.w].defs + didx = #defst + defst[didx] = newdef else ctx.dict.words[dp.w] = { defs = {newdef}; } + didx = 1 end + cleanupRelsEach(op, function(oldpath,set,mi) + return {w=dp.w,dn=didx,mn=1,nn=oldpath.nn} + end) table.remove(d.means,dp.mn) else local dw, dd = safeNavWord(ctx, dp.w, dp.dn) if dp.mn then if dd.means[dp.mn] and (oper == 'merge' or oper=='clobber') then if oper == 'merge' then dd.means[dp.mn] = dd.means[dp.mn] .. '; ' .. m elseif oper == 'clobber' then + deleteRefsTo(dp) dd.means[dp.mn] = m end else - if oper == clobber then dd.means = {} end + cleanupRelsEach({w=dp.w,dn=dp.dn}, function(old,set,i) + if old.mn >= dp.mn then + old.mn = old.mn + 1 + end + end) table.insert(dd.means, dp.mn, m) end + moveRelTree(op,dp) else - table.insert(dd.means, m) + insertAndMoveRelTree(dd.means,m, op,dp,'mn') +-- table.insert(dd.means, m) end if oper == 'move' and dp.mn and dd.means == d.means and op.mn > dp.mn then table.remove(d.means,op.mn+1) else table.remove(d.means,op.mn) @@ -1080,17 +1773,21 @@ local top = #(ddefs[dp.dn].means) for i,om in ipairs(d.means) do ddefs[dp.dn].means[top+i] = om end for k,p in pairs(d.forms) do + deleteRefsTo(dp) ddefs[dp.dn].forms[k] = p -- clobbers! end else + shiftRelTree(dp,'dn',dp.dn,1) table.insert(ddefs, dp.dn, d) end + moveRelTree(op,dp) else - table.insert(ddefs, d) + insertAndMoveRelTree(ddefs,d, op,dp,'dn') +-- table.insert(ddefs, d) end if oper == 'move' and dp.mn and w.defs == ddefs and op.mn > dp.mn then table.remove(w.defs,op.dn+1) else table.remove(w.defs,op.dn) @@ -1099,22 +1796,32 @@ if ctx.dict.words[dp.w] then if oper ~= 'merge' then id10t('the word “%s” already exists; use `merge` if you want to merge the words together', dp.w) end for i,def in ipairs(w.defs) do + local odp = copy(op) odp.dn = i + local ddp = {w=dp.w, dn=dp.dn+i-1} if dp.dn then + shiftRelTree(dp, 'dn', dp.dn+i-1, 1) table.insert(ctx.dict.words[dp.w].defs, dp.dn+i-1, def) + moveRelTree(odp,ddp) else - table.insert(ctx.dict.words[dp.w].defs, def) +-- table.insert(ctx.dict.words[dp.w].defs, def) + insertAndMoveRelTree(ctx.dict.words[dp.w].defs, def, + odp,dp,'dn') end end else ctx.dict.words[dp.w] = w + moveRelTree(op,dp) +-- ctx.dict._relCache[dp.w] = ctx.dict._relCache[op.w] +-- ctx.dict._relCache[op.w] = nil end ctx.dict.words[op.w] = nil end end + rebuildRelationCache(ctx.dict) end local function fileLegible(file) -- check if we can access the file local fd = io.open(file,"rb") @@ -1122,31 +1829,10 @@ if fd then ret = true end fd:close() return ret end -local function map(fn,lst) - local new = {} - for k,v in pairs(lst) do - local nv, nk = fn(v,k) - new[nk or k] = nv - end - return new -end -local function mapD(fn,lst) --destructive - -- WARNING: this will not work if nk names an existing key! - for k,v in pairs(lst) do - local nv, nk = fn(v,k) - if nk == nil or k == nk then - lst[k] = nv - else - lst[k] = nil - lst[nk] = nv - end - end - return lst -end local function prompt(p,multiline) -- returns string if successful, nil if EOF, false if ^C io.stderr:write(p) @@ -1201,12 +1887,12 @@ if c.raw then ctx.log('fatal', words[1] .. ' cannot be run from `shell`') elseif not implies(c.write, rw) then ctx.log('fatal', ctx.file .. ' is not writable') else - local ok = ctx.try(c.exec, ctx, table.unpack(words,2)) - if ok then written = written or c.write end + local ok, outcome = ctx.try(c.exec, ctx, table.unpack(words,2)) + if ok and outcome ~= false then written = written or c.write end end elseif cmd == 'save' or cmd == 'wq' then if not written then ctx.log('info', 'no changes to save') end @@ -1263,17 +1949,28 @@ for cmd,c in pairs(cmds) do showHelp(ctx, cmd, c) end end end + +local globalFlags = { + human = {'h','human','enable human-readable exports'}; + ident = {'i','ident','show identifier paths for all items'} +} local function usage(me,ctx) local ln = 0 local ct = {} local fe = ctx.sty[io.stderr] - io.stderr:write(string.format(fe.br"usage:".." %s [ [args…]]\n",me)) + local fstr = "" + local flagHelp = {} + for k,v in pairs(globalFlags) do + fstr = fstr .. v[1] + table.insert(flagHelp, string.format(" -%s --%s: %s\n",table.unpack(v))) + end + io.stderr:write(string.format(fe.br"usage:".." %s [-%s] [ [args…]]\n",me,fstr) .. table.concat(flagHelp)) --[[ for k,v in pairs(cmds) do local n = 1 + utf8.len(k) + utf8.len(v.syntax) ct[k] = n if n > ln then ln = n end @@ -1290,11 +1987,32 @@ end local function dispatch(argv, ctx) local ferr = ctx.sty[io.stderr] - local file, cmd = table.unpack(argv) + local args = {} + local flags = {} + local i = 1 while i <= #argv do + local a = argv[i] + if a == '--' then i=i+1 break + elseif a:sub(1,2) == '--' then + local fs = a:sub(3) + for k,v in pairs(globalFlags) do + if v[2] == fs then flags[k] = true end + end + elseif a:sub(1,1) == '-' then + for p,cp in utf8.codes(''), a, #'-' do + local c = utf8.char(cp) + for k,v in pairs(globalFlags) do + if v[1] == c then flags[k] = true break end + end + end + else table.insert(args, a) end + i = i + 1 end + for j=i,#argv do table.insert(args,argv[j]) end + + local file, cmd = table.unpack(args) if cmd and cmds[cmd] then local c,fd,dict = cmds[cmd] if (not c.raw) and not c.nofile then fd = safeopen(file, "rb") dict = readDict(fd:read 'a') @@ -1307,14 +2025,15 @@ cmds[cmd].exec({ sty = ctx.sty; try = ctx.try; log = ctx.log; + flags = flags; file = file; fd = fd; dict = dict; - }, table.unpack(argv,3)) + }, table.unpack(args,3)) if (not c.raw) and c.write then local output = writeDict(dict) -- writeDict should always be given a chance to -- bail before the previous file is destroyed!! @@ -1346,23 +2065,25 @@ local colors = {fatal=1,warn=3,info=4,debug=2} local ferr = sty[io.stderr] io.stderr:write(string.format( ferr.color(ferr.br("(%s)"),colors[lvl]).." %s\n", lvl, msg)) end -local function try(...) + +local function stacktrace(err) + return debug.traceback(err,3) +end + +local function try(fn,...) -- a wrapper around pcall that produces a standard error -- message format when an error occurs - local res = { pcall(...) } + local res = { xpcall(fn,stacktrace,...) } if not res[1] then log('fatal', res[2]) end return table.unpack(res) end -local function stacktrace(err) - return debug.traceback(err,3) -end local ok, res = xpcall(dispatch, stacktrace, argv, { try = try, sty = sty, log = log }) if not ok then