Index: parvan.lua ================================================================== --- parvan.lua +++ parvan.lua @@ -182,10 +182,12 @@ decode = unpacker(f); } end local parse, marshal fmt.string = qpack "s4" +fmt.label = qpack "s2" +fmt.tag = qpack "s1" fmt.u8 = qpack "I1" fmt.u16 = qpack "I2" fmt.u24 = qpack "I3" fmt.u32 = qpack "I4" fmt.list = function(t,ty) ty = ty or fmt.u32 @@ -230,15 +232,15 @@ } end fmt.form = { {'form', fmt.u16}; - {'text', fmt.string}; + {'text', fmt.label}; } fmt.note = { - {'kind', fmt.string}; + {'kind', fmt.tag}; {'paras', fmt.list(fmt.string)}; } fmt.meaning = { {'lit', fmt.string}; @@ -245,28 +247,39 @@ {'notes', fmt.list(fmt.note,fmt.u8)}; } fmt.def = { {'part', fmt.u8}; - {'branch', fmt.list(fmt.string,fmt.u8)}; + {'branch', fmt.list(fmt.label,fmt.u8)}; {'means', fmt.list(fmt.meaning,fmt.u8)}; {'forms', fmt.list(fmt.form,fmt.u16)}; } fmt.word = { {'defs', fmt.list(fmt.def,fmt.u8)}; } fmt.dictHeader = { - {'lang', fmt.string}; + {'lang', fmt.tag}; {'meta', fmt.string}; - {'partsOfSpeech', fmt.list(fmt.string,fmt.u16)}; + {'partsOfSpeech', fmt.list(fmt.tag,fmt.u16)}; +} + +fmt.synonymSet = { + {'uid', fmt.u32}; + -- IDs are persistent random values so they can be used + -- as reliable identifiers even when merging exports in + -- a parvan-unaware VCS + {'members', fmt.list({ + {'word', fmt.label}, {'def', fmt.u8}; + },fmt.u16)}; } fmt.dict = { {'header', fmt.dictHeader}; {'words', fmt.map(fmt.string,fmt.word)}; + {'synonyms', fmt.list(fmt.synonymSet)}; } function marshal(ty, val) if ty.encode then return ty.encode(val) @@ -317,16 +330,21 @@ end d.header.partsOfSpeech = {} for v,i in pairs(posMap) do d.header.partsOfSpeech[i] = v end - return marshal(fmt.dict, d) + return 'PV0\2'..marshal(fmt.dict, d) end local function readDict(file) - local d = parse(fmt.dict, stream(file)) + local s = stream(file) + local magic = s:next 'c4' + if magic ~= 'PV0\2' then + id10t 'not a parvan file' + end + local d = parse(fmt.dict, s) -- handle atoms for lit,w in pairs(d.words) do for j,def in ipairs(w.defs) do def.part = d.header.partsOfSpeech[def.part] end @@ -512,11 +530,11 @@ end end; }; form = { help = 'match against word\'s inflected forms'; - syntax = '( |
(set | is | pfx | sfx ))'; + syntax = '( | (set | is | (pfx|sfx|match) ))'; fn = function(e, k, op, v) end; }; part = { help = 'word has definitions for every of speech'; @@ -541,10 +559,44 @@ if map[r] then matches = matches + 1 end end if matches == tgt then return true end end end + }; + note = { + help = 'word has a matching note'; + syntax = '([kind []] | term | (min|max|count) )'; + fn = function(e, op, k, t) + if op == 'kind' or op == 'term' then + if op == 'term' and t then + id10t('too many arguments for [note term ]') + end + for _,d in ipairs(e.word.defs) do + for _,m in ipairs(d.means) do + for _,n in ipairs(m.notes) do + if op=='term' or n.kind == k then + if op=='kind' and t == nil then return true end + if string.find(table.concat(n.paras,'\n'), t or k, 1, true) ~= nil then return true end + end + end end end + elseif op == 'min' or op == 'max' or op == 'count' then + if t then + id10t('too many arguments for [note %s ]',op) + end + local n = math.floor(tonumber(k)) + local total = 0 + for i,d in ipairs(e.word.defs) do + for j,m in ipairs(d.means) do + total = total + #m.notes + if op == 'min' and total >= n then return true end + if op == 'max' and total > n then return false end + end end + if op == 'count' then return total == n end + if op == 'max' then return total <= n end + return false + end + end; }; } end local function @@ -607,10 +659,11 @@ meta = ""; partsOfSpeech = {}; branch = {}; }; words = {}; + synonyms = {}; } local o = writeDict(new); fd:write(o) fd:close() end; @@ -654,10 +707,72 @@ write = true; exec = function(ctx,word,dn,m) local _,d = safeNavWord(ctx,word,dn) table.insert(d.means, {lit=m,notes={}}) end; + }; + syn = { + help = "manage synonym groups"; + syntax = { + "(show|purge) "; + "(link|drop) …"; + "new …"; + "clear []"; + }; + write = true; + exec = function(ctx, op, tgtw, ...) + local groups = {} + local wp = parsePath(tgtw) + local w,d = safeNavWord(ctx, wp.w, wp.dn) + if not (op=='new' or op=='link' or op=='drop' or op=='clear' or op=='show' or op=='purge') then + id10t('invalid operation “%s” for `syn`', op) + end + if op == 'new' then + local links = {{word = wp.w, def = wp.dn or 1}} + for i,l in ipairs{...} do + local parsed = parsePath(l) + links[i+1] = {word = parsed.w, def = parsed.dn or 1} + end + table.insert(ctx.dict.synonyms, { + uid=math.random(0,0xffffFFFF); + members=links; + }) + else -- assemble a list of groups + for i,ss in ipairs(ctx.dict.synonyms) do + for j,s in ipairs(ss.members) do + if s.word == wp.w and (wp.dn == nil or s.def == wp.dn) then + table.insert(groups, {set = ss, mem = s}) + break + end + end + end + + if op == 'show' then + for i, g in ipairs(groups) do + local w,d = safeNavWord(ctx, g.mem.word, g.mem.def) + local function label(wd,defn) + local fulldef = {} + for i,v in ipairs(defn.means) do + fulldef[i] = v.lit + end + fulldef = table.concat(fulldef, '; ') + return string.format("%s(%s): %s",wd,defn.part,fulldef) + end + local others = {} + for j, o in ipairs(g.set.members) do + if not (o.word == g.mem.word and o.def == (wp.dn or 1)) then + local ow, od = safeNavWord(ctx, o.word,o.def) + table.insert(others, ' '..label(o.word,od)) + end + end + io.stdout:write(string.format("% 4u) %s\n%s", i, label(g.mem.word,d),table.concat(others,'\n'))) + end + elseif op == 'link' or op == 'drop' then + local tgtn, paths = (...), { select(2, ...) } + end + end + end; }; mod = { help = "move, merge, split, or delete words or definitions"; syntax = { " (drop | [move|merge|clobber] | out [ […]])"; @@ -713,10 +828,13 @@ predicates = { help = "show available filter predicates"; nofile = true; syntax = "[]"; }; + export = { + help = "create a text file dump compatible with source control"; + }; dump = { exec = function(ctx) print(dump(ctx.dict)) end }; ls = { help = "list all words that meet any given "; @@ -801,10 +919,62 @@ end end io.stdout:write(d..'\n') end end + +function cmds.export.exec(ctx) + local function san(str) + local d = 0 + local r = {} + for i,cp in utf8.codes(str) do + -- insert backslashes for characters that would + -- disrupt strwords() parsing + if cp == 0x5b then + d = d + 1 + elseif cp == 0x5d then + if d >= 1 then + d = d - 1 + else + table.insert(r, 0x5c) + end + end + table.insert(r, cp) + end + return '[' .. utf8.char(table.unpack(r)) .. ']' + end + local function o(...) io.stdout:write(string.format(...)..'\n') end + local d = ctx.dict + o('pv0 %s %s', san(d.header.lang), san(d.header.meta)) + for lit, w in pairs(d.words) do + o('w %s',san(lit)) + for i,def in ipairs(w.defs) do + o('d %s',san(def.part)) + for _,s in ipairs(d.synonyms) do + for _,sm in ipairs(s.members) do + if sm.word == w and sm.def == i then + o('ds %u',s.uid) + break + end + end + end + for j,r in ipairs(def.branch) do + o('dr %s',san(r)) + end + for j,m in ipairs(def.means) do + o('m %s', san(m.lit)) + for k,n in ipairs(m.notes) do + o('n %s', san(n.kind)) + for a,p in ipairs(n.paras) do + o('np %s', san(p)) + end + end + end + end + end + for _,s in ipairs(d.synonyms) do o('s %u', s.uid) end +end function cmds.mod.exec(ctx, orig, oper, dest, ...) if (not orig) or not oper then id10t '`mod` requires at least an origin and an operation' end