Differences From
Artifact [760cd798bc]:
180 180 local qpack = function(f) return {
181 181 encode = packer(f);
182 182 decode = unpacker(f);
183 183 } end
184 184
185 185 local parse, marshal
186 186 fmt.string = qpack "s4"
187 +fmt.label = qpack "s2"
188 +fmt.tag = qpack "s1"
187 189 fmt.u8 = qpack "I1"
188 190 fmt.u16 = qpack "I2"
189 191 fmt.u24 = qpack "I3"
190 192 fmt.u32 = qpack "I4"
191 193 fmt.list = function(t,ty) ty = ty or fmt.u32
192 194 return {
193 195 encode = function(a)
................................................................................
228 230 return m
229 231 end;
230 232 }
231 233 end
232 234
233 235 fmt.form = {
234 236 {'form', fmt.u16};
235 - {'text', fmt.string};
237 + {'text', fmt.label};
236 238 }
237 239
238 240 fmt.note = {
239 - {'kind', fmt.string};
241 + {'kind', fmt.tag};
240 242 {'paras', fmt.list(fmt.string)};
241 243 }
242 244
243 245 fmt.meaning = {
244 246 {'lit', fmt.string};
245 247 {'notes', fmt.list(fmt.note,fmt.u8)};
246 248 }
247 249
248 250 fmt.def = {
249 251 {'part', fmt.u8};
250 - {'branch', fmt.list(fmt.string,fmt.u8)};
252 + {'branch', fmt.list(fmt.label,fmt.u8)};
251 253 {'means', fmt.list(fmt.meaning,fmt.u8)};
252 254 {'forms', fmt.list(fmt.form,fmt.u16)};
253 255 }
254 256
255 257 fmt.word = {
256 258 {'defs', fmt.list(fmt.def,fmt.u8)};
257 259 }
258 260
259 261 fmt.dictHeader = {
260 - {'lang', fmt.string};
262 + {'lang', fmt.tag};
261 263 {'meta', fmt.string};
262 - {'partsOfSpeech', fmt.list(fmt.string,fmt.u16)};
264 + {'partsOfSpeech', fmt.list(fmt.tag,fmt.u16)};
265 +}
266 +
267 +fmt.synonymSet = {
268 + {'uid', fmt.u32};
269 + -- IDs are persistent random values so they can be used
270 + -- as reliable identifiers even when merging exports in
271 + -- a parvan-unaware VCS
272 + {'members', fmt.list({
273 + {'word', fmt.label}, {'def', fmt.u8};
274 + },fmt.u16)};
263 275 }
264 276
265 277 fmt.dict = {
266 278 {'header', fmt.dictHeader};
267 279 {'words', fmt.map(fmt.string,fmt.word)};
280 + {'synonyms', fmt.list(fmt.synonymSet)};
268 281 }
269 282
270 283 function marshal(ty, val)
271 284 if ty.encode then
272 285 return ty.encode(val)
273 286 end
274 287 local ac = {}
................................................................................
315 328 def.part = atomizePoS(def.part)
316 329 end
317 330 end
318 331 d.header.partsOfSpeech = {}
319 332 for v,i in pairs(posMap) do
320 333 d.header.partsOfSpeech[i] = v
321 334 end
322 - return marshal(fmt.dict, d)
335 + return 'PV0\2'..marshal(fmt.dict, d)
323 336 end
324 337
325 338 local function
326 339 readDict(file)
327 - local d = parse(fmt.dict, stream(file))
340 + local s = stream(file)
341 + local magic = s:next 'c4'
342 + if magic ~= 'PV0\2' then
343 + id10t 'not a parvan file'
344 + end
345 + local d = parse(fmt.dict, s)
328 346 -- handle atoms
329 347 for lit,w in pairs(d.words) do
330 348 for j,def in ipairs(w.defs) do
331 349 def.part = d.header.partsOfSpeech[def.part]
332 350 end
333 351 end
334 352 return d
................................................................................
510 528 else
511 529 id10t('[lit %s %s] is not a valid filter, “%s” should be either “pfx” or “sfx”',val,op,op)
512 530 end
513 531 end;
514 532 };
515 533 form = {
516 534 help = 'match against word\'s inflected forms';
517 - syntax = '(<inflect> | <form> (set | is <inflect> | pfx <prefix> | sfx <suffix>))';
535 + syntax = '(<inflect> | <form> (set | is <inflect> | (pfx|sfx|match) <affix>))';
518 536 fn = function(e, k, op, v)
519 537 end;
520 538 };
521 539 part = {
522 540 help = 'word has definitions for every <part> of speech';
523 541 syntax = '<part>…';
524 542 fn = function(e,...)
................................................................................
539 557 local matches = 0
540 558 for j,r in ipairs(d.branch) do
541 559 if map[r] then matches = matches + 1 end
542 560 end
543 561 if matches == tgt then return true end
544 562 end
545 563 end
564 + };
565 + note = {
566 + help = 'word has a matching note';
567 + syntax = '([kind <kind> [<term>]] | term <term> | (min|max|count) <n>)';
568 + fn = function(e, op, k, t)
569 + if op == 'kind' or op == 'term' then
570 + if op == 'term' and t then
571 + id10t('too many arguments for [note term <term>]')
572 + end
573 + for _,d in ipairs(e.word.defs) do
574 + for _,m in ipairs(d.means) do
575 + for _,n in ipairs(m.notes) do
576 + if op=='term' or n.kind == k then
577 + if op=='kind' and t == nil then return true end
578 + if string.find(table.concat(n.paras,'\n'), t or k, 1, true) ~= nil then return true end
579 + end
580 + end end end
581 + elseif op == 'min' or op == 'max' or op == 'count' then
582 + if t then
583 + id10t('too many arguments for [note %s <n>]',op)
584 + end
585 + local n = math.floor(tonumber(k))
586 + local total = 0
587 + for i,d in ipairs(e.word.defs) do
588 + for j,m in ipairs(d.means) do
589 + total = total + #m.notes
590 + if op == 'min' and total >= n then return true end
591 + if op == 'max' and total > n then return false end
592 + end end
593 + if op == 'count' then return total == n end
594 + if op == 'max' then return total <= n end
595 + return false
596 + end
597 + end;
546 598 };
547 599 }
548 600 end
549 601
550 602 local function
551 603 safeopen(file,...)
552 604 if type(file) == 'string' then
................................................................................
605 657 header = {
606 658 lang = lang;
607 659 meta = "";
608 660 partsOfSpeech = {};
609 661 branch = {};
610 662 };
611 663 words = {};
664 + synonyms = {};
612 665 }
613 666 local o = writeDict(new);
614 667 fd:write(o)
615 668 fd:close()
616 669 end;
617 670 };
618 671 coin = {
................................................................................
652 705 help = "add a meaning to a definition";
653 706 syntax = "<word> <def#> <meaning>";
654 707 write = true;
655 708 exec = function(ctx,word,dn,m)
656 709 local _,d = safeNavWord(ctx,word,dn)
657 710 table.insert(d.means, {lit=m,notes={}})
658 711 end;
712 + };
713 + syn = {
714 + help = "manage synonym groups";
715 + syntax = {
716 + "(show|purge) <path>";
717 + "(link|drop) <word> <group#> <path>…";
718 + "new <path> <path>…";
719 + "clear <word> [<group#>]";
720 + };
721 + write = true;
722 + exec = function(ctx, op, tgtw, ...)
723 + local groups = {}
724 + local wp = parsePath(tgtw)
725 + local w,d = safeNavWord(ctx, wp.w, wp.dn)
726 + if not (op=='new' or op=='link' or op=='drop' or op=='clear' or op=='show' or op=='purge') then
727 + id10t('invalid operation “%s” for `syn`', op)
728 + end
729 + if op == 'new' then
730 + local links = {{word = wp.w, def = wp.dn or 1}}
731 + for i,l in ipairs{...} do
732 + local parsed = parsePath(l)
733 + links[i+1] = {word = parsed.w, def = parsed.dn or 1}
734 + end
735 + table.insert(ctx.dict.synonyms, {
736 + uid=math.random(0,0xffffFFFF);
737 + members=links;
738 + })
739 + else -- assemble a list of groups
740 + for i,ss in ipairs(ctx.dict.synonyms) do
741 + for j,s in ipairs(ss.members) do
742 + if s.word == wp.w and (wp.dn == nil or s.def == wp.dn) then
743 + table.insert(groups, {set = ss, mem = s})
744 + break
745 + end
746 + end
747 + end
748 +
749 + if op == 'show' then
750 + for i, g in ipairs(groups) do
751 + local w,d = safeNavWord(ctx, g.mem.word, g.mem.def)
752 + local function label(wd,defn)
753 + local fulldef = {}
754 + for i,v in ipairs(defn.means) do
755 + fulldef[i] = v.lit
756 + end
757 + fulldef = table.concat(fulldef, '; ')
758 + return string.format("%s(%s): %s",wd,defn.part,fulldef)
759 + end
760 + local others = {}
761 + for j, o in ipairs(g.set.members) do
762 + if not (o.word == g.mem.word and o.def == (wp.dn or 1)) then
763 + local ow, od = safeNavWord(ctx, o.word,o.def)
764 + table.insert(others, ' '..label(o.word,od))
765 + end
766 + end
767 + io.stdout:write(string.format("% 4u) %s\n%s", i, label(g.mem.word,d),table.concat(others,'\n')))
768 + end
769 + elseif op == 'link' or op == 'drop' then
770 + local tgtn, paths = (...), { select(2, ...) }
771 + end
772 + end
773 + end;
659 774 };
660 775 mod = {
661 776 help = "move, merge, split, or delete words or definitions";
662 777 syntax = {
663 778 "<path> (drop | [move|merge|clobber] <path> | out [<part> [<root>…]])";
664 779 "path ::= <word>[(@<def#>[/<meaning#>[:<note#>]]|.)]";
665 780 };
................................................................................
711 826 syntax = "[<command>]";
712 827 };
713 828 predicates = {
714 829 help = "show available filter predicates";
715 830 nofile = true;
716 831 syntax = "[<predicate>]";
717 832 };
833 + export = {
834 + help = "create a text file dump compatible with source control";
835 + };
718 836 dump = {
719 837 exec = function(ctx) print(dump(ctx.dict)) end
720 838 };
721 839 ls = {
722 840 help = "list all words that meet any given <filter>";
723 841 syntax = {"[<filter>…]";
724 842 "filter ::= (<word>|<pred> <arg>…)";
................................................................................
799 917 for j, def in ipairs(w.word.defs) do
800 918 d=d .. '\n' .. meanings(def,true,j)
801 919 end
802 920 end
803 921 io.stdout:write(d..'\n')
804 922 end
805 923 end
924 +
925 +function cmds.export.exec(ctx)
926 + local function san(str)
927 + local d = 0
928 + local r = {}
929 + for i,cp in utf8.codes(str) do
930 + -- insert backslashes for characters that would
931 + -- disrupt strwords() parsing
932 + if cp == 0x5b then
933 + d = d + 1
934 + elseif cp == 0x5d then
935 + if d >= 1 then
936 + d = d - 1
937 + else
938 + table.insert(r, 0x5c)
939 + end
940 + end
941 + table.insert(r, cp)
942 + end
943 + return '[' .. utf8.char(table.unpack(r)) .. ']'
944 + end
945 + local function o(...) io.stdout:write(string.format(...)..'\n') end
946 + local d = ctx.dict
947 + o('pv0 %s %s', san(d.header.lang), san(d.header.meta))
948 + for lit, w in pairs(d.words) do
949 + o('w %s',san(lit))
950 + for i,def in ipairs(w.defs) do
951 + o('d %s',san(def.part))
952 + for _,s in ipairs(d.synonyms) do
953 + for _,sm in ipairs(s.members) do
954 + if sm.word == w and sm.def == i then
955 + o('ds %u',s.uid)
956 + break
957 + end
958 + end
959 + end
960 + for j,r in ipairs(def.branch) do
961 + o('dr %s',san(r))
962 + end
963 + for j,m in ipairs(def.means) do
964 + o('m %s', san(m.lit))
965 + for k,n in ipairs(m.notes) do
966 + o('n %s', san(n.kind))
967 + for a,p in ipairs(n.paras) do
968 + o('np %s', san(p))
969 + end
970 + end
971 + end
972 + end
973 + end
974 + for _,s in ipairs(d.synonyms) do o('s %u', s.uid) end
975 +end
806 976
807 977 function cmds.mod.exec(ctx, orig, oper, dest, ...)
808 978 if (not orig) or not oper then
809 979 id10t '`mod` requires at least an origin and an operation'
810 980 end
811 981 local op, dp = parsePath(orig)
812 982 local w,d,m,n = safeNavWord(ctx, op.w,op.dn,op.mn,op.nn)