util  Check-in [0f6a5bda23]

Overview
Comment:add syn
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA3-256: 0f6a5bda236d515918b5adcb8247c4e26f9adc3747d392bdf483a6452d9bfeae
User & Date: lexi on 2022-04-26 02:02:04
Other Links: manifest | tags
Context
2022-04-28
21:01
commit to preserve old code im about to axe, parvan is broken currently check-in: f996abb5e5 user: lexi tags: trunk
2022-04-26
02:02
add syn check-in: 0f6a5bda23 user: lexi tags: trunk
2022-04-25
21:01
add first parvan revision check-in: bf5f4fd9ca user: lexi tags: trunk
Changes

Modified parvan.lua from [760cd798bc] to [2e5da05ad6].

   180    180   local qpack = function(f) return {
   181    181   	encode = packer(f);
   182    182   	decode = unpacker(f);
   183    183   } end
   184    184   
   185    185   local parse, marshal
   186    186   fmt.string = qpack "s4"
          187  +fmt.label = qpack "s2"
          188  +fmt.tag = qpack "s1"
   187    189   fmt.u8 = qpack "I1"
   188    190   fmt.u16 = qpack "I2"
   189    191   fmt.u24 = qpack "I3"
   190    192   fmt.u32 = qpack "I4"
   191    193   fmt.list = function(t,ty) ty = ty or fmt.u32
   192    194   	return {
   193    195   		encode = function(a)
................................................................................
   228    230   			return m
   229    231   		end;
   230    232   	}
   231    233   end
   232    234   
   233    235   fmt.form = {
   234    236   	{'form', fmt.u16};
   235         -	{'text', fmt.string};
          237  +	{'text', fmt.label};
   236    238   }
   237    239   
   238    240   fmt.note = {
   239         -	{'kind', fmt.string};
          241  +	{'kind', fmt.tag};
   240    242   	{'paras', fmt.list(fmt.string)};
   241    243   }
   242    244   
   243    245   fmt.meaning = {
   244    246   	{'lit', fmt.string};
   245    247   	{'notes', fmt.list(fmt.note,fmt.u8)};
   246    248   }
   247    249   
   248    250   fmt.def = {
   249    251   	{'part', fmt.u8};
   250         -	{'branch', fmt.list(fmt.string,fmt.u8)};
          252  +	{'branch', fmt.list(fmt.label,fmt.u8)};
   251    253   	{'means', fmt.list(fmt.meaning,fmt.u8)};
   252    254   	{'forms', fmt.list(fmt.form,fmt.u16)};
   253    255   }
   254    256   
   255    257   fmt.word = {
   256    258   	{'defs', fmt.list(fmt.def,fmt.u8)};
   257    259   }
   258    260   
   259    261   fmt.dictHeader = {
   260         -	{'lang', fmt.string};
          262  +	{'lang', fmt.tag};
   261    263   	{'meta', fmt.string};
   262         -	{'partsOfSpeech', fmt.list(fmt.string,fmt.u16)};
          264  +	{'partsOfSpeech', fmt.list(fmt.tag,fmt.u16)};
          265  +}
          266  +
          267  +fmt.synonymSet = {
          268  +	{'uid', fmt.u32};
          269  +		-- IDs are persistent random values so they can be used
          270  +		-- as reliable identifiers even when merging exports in
          271  +		-- a parvan-unaware VCS
          272  +	{'members', fmt.list({
          273  +		{'word', fmt.label}, {'def', fmt.u8};
          274  +	},fmt.u16)};
   263    275   }
   264    276   
   265    277   fmt.dict = {
   266    278   	{'header', fmt.dictHeader};
   267    279   	{'words', fmt.map(fmt.string,fmt.word)};
          280  +	{'synonyms', fmt.list(fmt.synonymSet)};
   268    281   }
   269    282   
   270    283   function marshal(ty, val)
   271    284   	if ty.encode then
   272    285   		return ty.encode(val)
   273    286   	end
   274    287   	local ac = {}
................................................................................
   315    328   			def.part = atomizePoS(def.part)
   316    329   		end
   317    330   	end
   318    331   	d.header.partsOfSpeech = {}
   319    332   	for v,i in pairs(posMap) do
   320    333   		d.header.partsOfSpeech[i] = v
   321    334   	end
   322         -	return marshal(fmt.dict, d)
          335  +	return 'PV0\2'..marshal(fmt.dict, d)
   323    336   end
   324    337   
   325    338   local function
   326    339   readDict(file)
   327         -	local d = parse(fmt.dict, stream(file))
          340  +	local s = stream(file)
          341  +	local magic = s:next 'c4'
          342  +	if magic ~= 'PV0\2' then
          343  +		id10t 'not a parvan file'
          344  +	end
          345  +	local d = parse(fmt.dict, s)
   328    346   	-- handle atoms
   329    347   	for lit,w in pairs(d.words) do
   330    348   		for j,def in ipairs(w.defs) do
   331    349   			def.part = d.header.partsOfSpeech[def.part]
   332    350   		end
   333    351   	end
   334    352   	return d
................................................................................
   510    528   				else
   511    529   					id10t('[lit %s %s] is not a valid filter, “%s” should be either “pfx” or “sfx”',val,op,op)
   512    530   				end
   513    531   			end;
   514    532   		};
   515    533   		form = {
   516    534   			help = 'match against word\'s inflected forms';
   517         -			syntax = '(<inflect> | <form> (set | is <inflect> | pfx <prefix> | sfx <suffix>))';
          535  +			syntax = '(<inflect> | <form> (set | is <inflect> | (pfx|sfx|match) <affix>))';
   518    536   			fn = function(e, k, op, v)
   519    537   			end;
   520    538   		};
   521    539   		part = {
   522    540   			help = 'word has definitions for every <part> of speech';
   523    541   			syntax = '<part>…';
   524    542   			fn = function(e,...)
................................................................................
   539    557   					local matches = 0
   540    558   					for j,r in ipairs(d.branch) do
   541    559   						if map[r] then matches = matches + 1 end
   542    560   					end
   543    561   					if matches == tgt then return true end
   544    562   				end
   545    563   			end
          564  +		};
          565  +		note = {
          566  +			help = 'word has a matching note';
          567  +			syntax = '([kind <kind> [<term>]] | term <term> | (min|max|count) <n>)';
          568  +			fn = function(e, op, k, t)
          569  +				if op == 'kind' or op == 'term' then
          570  +					if op == 'term' and t then
          571  +						id10t('too many arguments for [note term <term>]')
          572  +					end
          573  +					for _,d in ipairs(e.word.defs) do
          574  +					for _,m in ipairs(d.means) do
          575  +					for _,n in ipairs(m.notes) do
          576  +						if op=='term' or n.kind == k then
          577  +							if op=='kind' and t == nil then return true end
          578  +							if string.find(table.concat(n.paras,'\n'), t or k, 1, true) ~= nil then return true end
          579  +						end
          580  +					end end end
          581  +				elseif op == 'min' or op == 'max' or op == 'count' then
          582  +					if t then
          583  +						id10t('too many arguments for [note %s <n>]',op)
          584  +					end
          585  +					local n = math.floor(tonumber(k))
          586  +					local total = 0
          587  +					for i,d in ipairs(e.word.defs) do
          588  +					for j,m in ipairs(d.means) do
          589  +						total = total + #m.notes
          590  +						if op == 'min' and total >= n then return true end
          591  +						if op == 'max' and total > n then return false end
          592  +					end end
          593  +					if op == 'count' then return total == n end
          594  +					if op == 'max'   then return total <= n end
          595  +					return false
          596  +				end
          597  +			end;
   546    598   		};
   547    599   	}
   548    600   end
   549    601   
   550    602   local function
   551    603   safeopen(file,...)
   552    604   	if type(file) == 'string' then
................................................................................
   605    657   				header = {
   606    658   					lang = lang;
   607    659   					meta = "";
   608    660   					partsOfSpeech = {};
   609    661   					branch = {};
   610    662   				};
   611    663   				words = {};
          664  +				synonyms = {};
   612    665   			}
   613    666   			local o = writeDict(new);
   614    667   			fd:write(o)
   615    668   			fd:close()
   616    669   		end;
   617    670   	};
   618    671   	coin = {
................................................................................
   652    705   		help = "add a meaning to a definition";
   653    706   		syntax = "<word> <def#> <meaning>";
   654    707   		write = true;
   655    708   		exec = function(ctx,word,dn,m)
   656    709   			local _,d = safeNavWord(ctx,word,dn)
   657    710   			table.insert(d.means, {lit=m,notes={}})
   658    711   		end;
          712  +	};
          713  +	syn = {
          714  +		help = "manage synonym groups";
          715  +		syntax = {
          716  +			"(show|purge) <path>";
          717  +			"(link|drop) <word> <group#> <path>…";
          718  +			"new <path> <path>…";
          719  +			"clear <word> [<group#>]";
          720  +		};
          721  +		write = true;
          722  +		exec = function(ctx, op, tgtw, ...)
          723  +			local groups = {}
          724  +			local wp = parsePath(tgtw)
          725  +			local w,d = safeNavWord(ctx, wp.w, wp.dn)
          726  +			if not (op=='new' or op=='link' or op=='drop' or op=='clear' or op=='show' or op=='purge') then
          727  +				id10t('invalid operation “%s” for `syn`', op)
          728  +			end
          729  +			if op == 'new' then
          730  +				local links = {{word = wp.w, def = wp.dn or 1}}
          731  +				for i,l in ipairs{...} do
          732  +					local parsed = parsePath(l)
          733  +					links[i+1] = {word = parsed.w, def = parsed.dn or 1}
          734  +				end
          735  +				table.insert(ctx.dict.synonyms, {
          736  +					uid=math.random(0,0xffffFFFF);
          737  +					members=links;
          738  +				})
          739  +			else -- assemble a list of groups
          740  +				for i,ss in ipairs(ctx.dict.synonyms) do
          741  +					for j,s in ipairs(ss.members) do
          742  +						if s.word == wp.w and (wp.dn == nil or s.def == wp.dn) then
          743  +							table.insert(groups, {set = ss, mem = s})
          744  +							break
          745  +						end
          746  +					end
          747  +				end
          748  +
          749  +				if op == 'show' then
          750  +					for i, g in ipairs(groups) do
          751  +						local w,d = safeNavWord(ctx, g.mem.word, g.mem.def)
          752  +						local function label(wd,defn)
          753  +							local fulldef = {}
          754  +							for i,v in ipairs(defn.means) do
          755  +								fulldef[i] = v.lit
          756  +							end
          757  +							fulldef = table.concat(fulldef, '; ')
          758  +							return string.format("%s(%s): %s",wd,defn.part,fulldef)
          759  +						end
          760  +						local others = {}
          761  +						for j, o in ipairs(g.set.members) do
          762  +							if not (o.word == g.mem.word and o.def == (wp.dn or 1)) then
          763  +								local ow, od = safeNavWord(ctx, o.word,o.def)
          764  +								table.insert(others, '      '..label(o.word,od))
          765  +							end
          766  +						end
          767  +						io.stdout:write(string.format("% 4u) %s\n%s", i, label(g.mem.word,d),table.concat(others,'\n')))
          768  +					end
          769  +				elseif op == 'link' or op == 'drop' then
          770  +					local tgtn, paths = (...), { select(2, ...) }
          771  +				end
          772  +			end
          773  +		end;
   659    774   	};
   660    775   	mod = {
   661    776   		help = "move, merge, split, or delete words or definitions";
   662    777   		syntax = {
   663    778   			"<path> (drop | [move|merge|clobber] <path> | out [<part> [<root>…]])";
   664    779   			"path ::= <word>[(@<def#>[/<meaning#>[:<note#>]]|.)]";
   665    780   		};
................................................................................
   711    826   		syntax = "[<command>]";
   712    827   	};
   713    828   	predicates = {
   714    829   		help = "show available filter predicates";
   715    830   		nofile = true;
   716    831   		syntax = "[<predicate>]";
   717    832   	};
          833  +	export = {
          834  +		help = "create a text file dump compatible with source control";
          835  +	};
   718    836   	dump = {
   719    837   		exec = function(ctx) print(dump(ctx.dict)) end
   720    838   	};
   721    839   	ls = {
   722    840   		help = "list all words that meet any given <filter>";
   723    841   		syntax = {"[<filter>…]";
   724    842   			"filter ::= (<word>|<pred> <arg>…)";
................................................................................
   799    917   			for j, def in ipairs(w.word.defs) do
   800    918   				d=d .. '\n' .. meanings(def,true,j)
   801    919   			end
   802    920   		end
   803    921   		io.stdout:write(d..'\n')
   804    922   	end
   805    923   end
          924  +
          925  +function cmds.export.exec(ctx)
          926  +	local function san(str)
          927  +		local d = 0
          928  +		local r = {}
          929  +		for i,cp in utf8.codes(str) do
          930  +			-- insert backslashes for characters that would
          931  +			-- disrupt strwords() parsing
          932  +			if cp == 0x5b then
          933  +				d = d + 1
          934  +			elseif cp == 0x5d then
          935  +				if d >= 1 then
          936  +					d = d - 1
          937  +				else
          938  +					table.insert(r, 0x5c)
          939  +				end
          940  +			end
          941  +			table.insert(r, cp)
          942  +		end
          943  +		return '[' .. utf8.char(table.unpack(r)) .. ']'
          944  +	end
          945  +	local function o(...) io.stdout:write(string.format(...)..'\n') end
          946  +	local d = ctx.dict
          947  +	o('pv0 %s %s', san(d.header.lang), san(d.header.meta))
          948  +	for lit, w in pairs(d.words) do
          949  +		o('w %s',san(lit))
          950  +		for i,def in ipairs(w.defs) do
          951  +			o('d %s',san(def.part))
          952  +			for _,s in ipairs(d.synonyms) do
          953  +				for _,sm in ipairs(s.members) do
          954  +					if sm.word == w and sm.def == i then
          955  +						o('ds %u',s.uid)
          956  +						break
          957  +					end
          958  +				end
          959  +			end
          960  +			for j,r in ipairs(def.branch) do
          961  +				o('dr %s',san(r))
          962  +			end
          963  +			for j,m in ipairs(def.means) do
          964  +				o('m %s', san(m.lit))
          965  +				for k,n in ipairs(m.notes) do
          966  +					o('n %s', san(n.kind))
          967  +					for a,p in ipairs(n.paras) do
          968  +						o('np %s', san(p))
          969  +					end
          970  +				end
          971  +			end
          972  +		end
          973  +	end
          974  +	for _,s in ipairs(d.synonyms) do o('s %u', s.uid) end
          975  +end
   806    976   
   807    977   function cmds.mod.exec(ctx, orig, oper, dest, ...)
   808    978   	if (not orig) or not oper then
   809    979   		id10t '`mod` requires at least an origin and an operation'
   810    980   	end
   811    981   	local op, dp = parsePath(orig)
   812    982   	local w,d,m,n = safeNavWord(ctx, op.w,op.dn,op.mn,op.nn)