Overview
Comment: | add syn |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA3-256: |
0f6a5bda236d515918b5adcb8247c4e2 |
User & Date: | lexi on 2022-04-26 02:02:04 |
Other Links: | manifest | tags |
Context
2022-04-28
| ||
21:01 | commit to preserve old code im about to axe, parvan is broken currently check-in: f996abb5e5 user: lexi tags: trunk | |
2022-04-26
| ||
02:02 | add syn check-in: 0f6a5bda23 user: lexi tags: trunk | |
2022-04-25
| ||
21:01 | add first parvan revision check-in: bf5f4fd9ca user: lexi tags: trunk | |
Changes
Modified parvan.lua from [760cd798bc] to [2e5da05ad6].
180 180 local qpack = function(f) return { 181 181 encode = packer(f); 182 182 decode = unpacker(f); 183 183 } end 184 184 185 185 local parse, marshal 186 186 fmt.string = qpack "s4" 187 +fmt.label = qpack "s2" 188 +fmt.tag = qpack "s1" 187 189 fmt.u8 = qpack "I1" 188 190 fmt.u16 = qpack "I2" 189 191 fmt.u24 = qpack "I3" 190 192 fmt.u32 = qpack "I4" 191 193 fmt.list = function(t,ty) ty = ty or fmt.u32 192 194 return { 193 195 encode = function(a) ................................................................................ 228 230 return m 229 231 end; 230 232 } 231 233 end 232 234 233 235 fmt.form = { 234 236 {'form', fmt.u16}; 235 - {'text', fmt.string}; 237 + {'text', fmt.label}; 236 238 } 237 239 238 240 fmt.note = { 239 - {'kind', fmt.string}; 241 + {'kind', fmt.tag}; 240 242 {'paras', fmt.list(fmt.string)}; 241 243 } 242 244 243 245 fmt.meaning = { 244 246 {'lit', fmt.string}; 245 247 {'notes', fmt.list(fmt.note,fmt.u8)}; 246 248 } 247 249 248 250 fmt.def = { 249 251 {'part', fmt.u8}; 250 - {'branch', fmt.list(fmt.string,fmt.u8)}; 252 + {'branch', fmt.list(fmt.label,fmt.u8)}; 251 253 {'means', fmt.list(fmt.meaning,fmt.u8)}; 252 254 {'forms', fmt.list(fmt.form,fmt.u16)}; 253 255 } 254 256 255 257 fmt.word = { 256 258 {'defs', fmt.list(fmt.def,fmt.u8)}; 257 259 } 258 260 259 261 fmt.dictHeader = { 260 - {'lang', fmt.string}; 262 + {'lang', fmt.tag}; 261 263 {'meta', fmt.string}; 262 - {'partsOfSpeech', fmt.list(fmt.string,fmt.u16)}; 264 + {'partsOfSpeech', fmt.list(fmt.tag,fmt.u16)}; 265 +} 266 + 267 +fmt.synonymSet = { 268 + {'uid', fmt.u32}; 269 + -- IDs are persistent random values so they can be used 270 + -- as reliable identifiers even when merging exports in 271 + -- a parvan-unaware VCS 272 + {'members', fmt.list({ 273 + {'word', fmt.label}, {'def', fmt.u8}; 274 + },fmt.u16)}; 263 275 } 264 276 265 277 fmt.dict = { 266 278 {'header', fmt.dictHeader}; 267 279 {'words', fmt.map(fmt.string,fmt.word)}; 280 + {'synonyms', fmt.list(fmt.synonymSet)}; 268 281 } 269 282 270 283 function marshal(ty, val) 271 284 if ty.encode then 272 285 return ty.encode(val) 273 286 end 274 287 local ac = {} ................................................................................ 315 328 def.part = atomizePoS(def.part) 316 329 end 317 330 end 318 331 d.header.partsOfSpeech = {} 319 332 for v,i in pairs(posMap) do 320 333 d.header.partsOfSpeech[i] = v 321 334 end 322 - return marshal(fmt.dict, d) 335 + return 'PV0\2'..marshal(fmt.dict, d) 323 336 end 324 337 325 338 local function 326 339 readDict(file) 327 - local d = parse(fmt.dict, stream(file)) 340 + local s = stream(file) 341 + local magic = s:next 'c4' 342 + if magic ~= 'PV0\2' then 343 + id10t 'not a parvan file' 344 + end 345 + local d = parse(fmt.dict, s) 328 346 -- handle atoms 329 347 for lit,w in pairs(d.words) do 330 348 for j,def in ipairs(w.defs) do 331 349 def.part = d.header.partsOfSpeech[def.part] 332 350 end 333 351 end 334 352 return d ................................................................................ 510 528 else 511 529 id10t('[lit %s %s] is not a valid filter, “%s” should be either “pfx” or “sfx”',val,op,op) 512 530 end 513 531 end; 514 532 }; 515 533 form = { 516 534 help = 'match against word\'s inflected forms'; 517 - syntax = '(<inflect> | <form> (set | is <inflect> | pfx <prefix> | sfx <suffix>))'; 535 + syntax = '(<inflect> | <form> (set | is <inflect> | (pfx|sfx|match) <affix>))'; 518 536 fn = function(e, k, op, v) 519 537 end; 520 538 }; 521 539 part = { 522 540 help = 'word has definitions for every <part> of speech'; 523 541 syntax = '<part>…'; 524 542 fn = function(e,...) ................................................................................ 539 557 local matches = 0 540 558 for j,r in ipairs(d.branch) do 541 559 if map[r] then matches = matches + 1 end 542 560 end 543 561 if matches == tgt then return true end 544 562 end 545 563 end 564 + }; 565 + note = { 566 + help = 'word has a matching note'; 567 + syntax = '([kind <kind> [<term>]] | term <term> | (min|max|count) <n>)'; 568 + fn = function(e, op, k, t) 569 + if op == 'kind' or op == 'term' then 570 + if op == 'term' and t then 571 + id10t('too many arguments for [note term <term>]') 572 + end 573 + for _,d in ipairs(e.word.defs) do 574 + for _,m in ipairs(d.means) do 575 + for _,n in ipairs(m.notes) do 576 + if op=='term' or n.kind == k then 577 + if op=='kind' and t == nil then return true end 578 + if string.find(table.concat(n.paras,'\n'), t or k, 1, true) ~= nil then return true end 579 + end 580 + end end end 581 + elseif op == 'min' or op == 'max' or op == 'count' then 582 + if t then 583 + id10t('too many arguments for [note %s <n>]',op) 584 + end 585 + local n = math.floor(tonumber(k)) 586 + local total = 0 587 + for i,d in ipairs(e.word.defs) do 588 + for j,m in ipairs(d.means) do 589 + total = total + #m.notes 590 + if op == 'min' and total >= n then return true end 591 + if op == 'max' and total > n then return false end 592 + end end 593 + if op == 'count' then return total == n end 594 + if op == 'max' then return total <= n end 595 + return false 596 + end 597 + end; 546 598 }; 547 599 } 548 600 end 549 601 550 602 local function 551 603 safeopen(file,...) 552 604 if type(file) == 'string' then ................................................................................ 605 657 header = { 606 658 lang = lang; 607 659 meta = ""; 608 660 partsOfSpeech = {}; 609 661 branch = {}; 610 662 }; 611 663 words = {}; 664 + synonyms = {}; 612 665 } 613 666 local o = writeDict(new); 614 667 fd:write(o) 615 668 fd:close() 616 669 end; 617 670 }; 618 671 coin = { ................................................................................ 652 705 help = "add a meaning to a definition"; 653 706 syntax = "<word> <def#> <meaning>"; 654 707 write = true; 655 708 exec = function(ctx,word,dn,m) 656 709 local _,d = safeNavWord(ctx,word,dn) 657 710 table.insert(d.means, {lit=m,notes={}}) 658 711 end; 712 + }; 713 + syn = { 714 + help = "manage synonym groups"; 715 + syntax = { 716 + "(show|purge) <path>"; 717 + "(link|drop) <word> <group#> <path>…"; 718 + "new <path> <path>…"; 719 + "clear <word> [<group#>]"; 720 + }; 721 + write = true; 722 + exec = function(ctx, op, tgtw, ...) 723 + local groups = {} 724 + local wp = parsePath(tgtw) 725 + local w,d = safeNavWord(ctx, wp.w, wp.dn) 726 + if not (op=='new' or op=='link' or op=='drop' or op=='clear' or op=='show' or op=='purge') then 727 + id10t('invalid operation “%s” for `syn`', op) 728 + end 729 + if op == 'new' then 730 + local links = {{word = wp.w, def = wp.dn or 1}} 731 + for i,l in ipairs{...} do 732 + local parsed = parsePath(l) 733 + links[i+1] = {word = parsed.w, def = parsed.dn or 1} 734 + end 735 + table.insert(ctx.dict.synonyms, { 736 + uid=math.random(0,0xffffFFFF); 737 + members=links; 738 + }) 739 + else -- assemble a list of groups 740 + for i,ss in ipairs(ctx.dict.synonyms) do 741 + for j,s in ipairs(ss.members) do 742 + if s.word == wp.w and (wp.dn == nil or s.def == wp.dn) then 743 + table.insert(groups, {set = ss, mem = s}) 744 + break 745 + end 746 + end 747 + end 748 + 749 + if op == 'show' then 750 + for i, g in ipairs(groups) do 751 + local w,d = safeNavWord(ctx, g.mem.word, g.mem.def) 752 + local function label(wd,defn) 753 + local fulldef = {} 754 + for i,v in ipairs(defn.means) do 755 + fulldef[i] = v.lit 756 + end 757 + fulldef = table.concat(fulldef, '; ') 758 + return string.format("%s(%s): %s",wd,defn.part,fulldef) 759 + end 760 + local others = {} 761 + for j, o in ipairs(g.set.members) do 762 + if not (o.word == g.mem.word and o.def == (wp.dn or 1)) then 763 + local ow, od = safeNavWord(ctx, o.word,o.def) 764 + table.insert(others, ' '..label(o.word,od)) 765 + end 766 + end 767 + io.stdout:write(string.format("% 4u) %s\n%s", i, label(g.mem.word,d),table.concat(others,'\n'))) 768 + end 769 + elseif op == 'link' or op == 'drop' then 770 + local tgtn, paths = (...), { select(2, ...) } 771 + end 772 + end 773 + end; 659 774 }; 660 775 mod = { 661 776 help = "move, merge, split, or delete words or definitions"; 662 777 syntax = { 663 778 "<path> (drop | [move|merge|clobber] <path> | out [<part> [<root>…]])"; 664 779 "path ::= <word>[(@<def#>[/<meaning#>[:<note#>]]|.)]"; 665 780 }; ................................................................................ 711 826 syntax = "[<command>]"; 712 827 }; 713 828 predicates = { 714 829 help = "show available filter predicates"; 715 830 nofile = true; 716 831 syntax = "[<predicate>]"; 717 832 }; 833 + export = { 834 + help = "create a text file dump compatible with source control"; 835 + }; 718 836 dump = { 719 837 exec = function(ctx) print(dump(ctx.dict)) end 720 838 }; 721 839 ls = { 722 840 help = "list all words that meet any given <filter>"; 723 841 syntax = {"[<filter>…]"; 724 842 "filter ::= (<word>|<pred> <arg>…)"; ................................................................................ 799 917 for j, def in ipairs(w.word.defs) do 800 918 d=d .. '\n' .. meanings(def,true,j) 801 919 end 802 920 end 803 921 io.stdout:write(d..'\n') 804 922 end 805 923 end 924 + 925 +function cmds.export.exec(ctx) 926 + local function san(str) 927 + local d = 0 928 + local r = {} 929 + for i,cp in utf8.codes(str) do 930 + -- insert backslashes for characters that would 931 + -- disrupt strwords() parsing 932 + if cp == 0x5b then 933 + d = d + 1 934 + elseif cp == 0x5d then 935 + if d >= 1 then 936 + d = d - 1 937 + else 938 + table.insert(r, 0x5c) 939 + end 940 + end 941 + table.insert(r, cp) 942 + end 943 + return '[' .. utf8.char(table.unpack(r)) .. ']' 944 + end 945 + local function o(...) io.stdout:write(string.format(...)..'\n') end 946 + local d = ctx.dict 947 + o('pv0 %s %s', san(d.header.lang), san(d.header.meta)) 948 + for lit, w in pairs(d.words) do 949 + o('w %s',san(lit)) 950 + for i,def in ipairs(w.defs) do 951 + o('d %s',san(def.part)) 952 + for _,s in ipairs(d.synonyms) do 953 + for _,sm in ipairs(s.members) do 954 + if sm.word == w and sm.def == i then 955 + o('ds %u',s.uid) 956 + break 957 + end 958 + end 959 + end 960 + for j,r in ipairs(def.branch) do 961 + o('dr %s',san(r)) 962 + end 963 + for j,m in ipairs(def.means) do 964 + o('m %s', san(m.lit)) 965 + for k,n in ipairs(m.notes) do 966 + o('n %s', san(n.kind)) 967 + for a,p in ipairs(n.paras) do 968 + o('np %s', san(p)) 969 + end 970 + end 971 + end 972 + end 973 + end 974 + for _,s in ipairs(d.synonyms) do o('s %u', s.uid) end 975 +end 806 976 807 977 function cmds.mod.exec(ctx, orig, oper, dest, ...) 808 978 if (not orig) or not oper then 809 979 id10t '`mod` requires at least an origin and an operation' 810 980 end 811 981 local op, dp = parsePath(orig) 812 982 local w,d,m,n = safeNavWord(ctx, op.w,op.dn,op.mn,op.nn)