Index: parvan.lua
==================================================================
--- parvan.lua
+++ parvan.lua
@@ -182,10 +182,12 @@
 	decode = unpacker(f);
 } end
 
 local parse, marshal
 fmt.string = qpack "s4"
+fmt.label = qpack "s2"
+fmt.tag = qpack "s1"
 fmt.u8 = qpack "I1"
 fmt.u16 = qpack "I2"
 fmt.u24 = qpack "I3"
 fmt.u32 = qpack "I4"
 fmt.list = function(t,ty) ty = ty or fmt.u32
@@ -230,15 +232,15 @@
 	}
 end
 
 fmt.form = {
 	{'form', fmt.u16};
-	{'text', fmt.string};
+	{'text', fmt.label};
 }
 
 fmt.note = {
-	{'kind', fmt.string};
+	{'kind', fmt.tag};
 	{'paras', fmt.list(fmt.string)};
 }
 
 fmt.meaning = {
 	{'lit', fmt.string};
@@ -245,28 +247,39 @@
 	{'notes', fmt.list(fmt.note,fmt.u8)};
 }
 
 fmt.def = {
 	{'part', fmt.u8};
-	{'branch', fmt.list(fmt.string,fmt.u8)};
+	{'branch', fmt.list(fmt.label,fmt.u8)};
 	{'means', fmt.list(fmt.meaning,fmt.u8)};
 	{'forms', fmt.list(fmt.form,fmt.u16)};
 }
 
 fmt.word = {
 	{'defs', fmt.list(fmt.def,fmt.u8)};
 }
 
 fmt.dictHeader = {
-	{'lang', fmt.string};
+	{'lang', fmt.tag};
 	{'meta', fmt.string};
-	{'partsOfSpeech', fmt.list(fmt.string,fmt.u16)};
+	{'partsOfSpeech', fmt.list(fmt.tag,fmt.u16)};
+}
+
+fmt.synonymSet = {
+	{'uid', fmt.u32};
+		-- IDs are persistent random values so they can be used
+		-- as reliable identifiers even when merging exports in
+		-- a parvan-unaware VCS
+	{'members', fmt.list({
+		{'word', fmt.label}, {'def', fmt.u8};
+	},fmt.u16)};
 }
 
 fmt.dict = {
 	{'header', fmt.dictHeader};
 	{'words', fmt.map(fmt.string,fmt.word)};
+	{'synonyms', fmt.list(fmt.synonymSet)};
 }
 
 function marshal(ty, val)
 	if ty.encode then
 		return ty.encode(val)
@@ -317,16 +330,21 @@
 	end
 	d.header.partsOfSpeech = {}
 	for v,i in pairs(posMap) do
 		d.header.partsOfSpeech[i] = v
 	end
-	return marshal(fmt.dict, d)
+	return 'PV0\2'..marshal(fmt.dict, d)
 end
 
 local function
 readDict(file)
-	local d = parse(fmt.dict, stream(file))
+	local s = stream(file)
+	local magic = s:next 'c4'
+	if magic ~= 'PV0\2' then
+		id10t 'not a parvan file'
+	end
+	local d = parse(fmt.dict, s)
 	-- handle atoms
 	for lit,w in pairs(d.words) do
 		for j,def in ipairs(w.defs) do
 			def.part = d.header.partsOfSpeech[def.part]
 		end
@@ -512,11 +530,11 @@
 				end
 			end;
 		};
 		form = {
 			help = 'match against word\'s inflected forms';
-			syntax = '(<inflect> | <form> (set | is <inflect> | pfx <prefix> | sfx <suffix>))';
+			syntax = '(<inflect> | <form> (set | is <inflect> | (pfx|sfx|match) <affix>))';
 			fn = function(e, k, op, v)
 			end;
 		};
 		part = {
 			help = 'word has definitions for every <part> of speech';
@@ -541,10 +559,44 @@
 						if map[r] then matches = matches + 1 end
 					end
 					if matches == tgt then return true end
 				end
 			end
+		};
+		note = {
+			help = 'word has a matching note';
+			syntax = '([kind <kind> [<term>]] | term <term> | (min|max|count) <n>)';
+			fn = function(e, op, k, t)
+				if op == 'kind' or op == 'term' then
+					if op == 'term' and t then
+						id10t('too many arguments for [note term <term>]')
+					end
+					for _,d in ipairs(e.word.defs) do
+					for _,m in ipairs(d.means) do
+					for _,n in ipairs(m.notes) do
+						if op=='term' or n.kind == k then
+							if op=='kind' and t == nil then return true end
+							if string.find(table.concat(n.paras,'\n'), t or k, 1, true) ~= nil then return true end
+						end
+					end end end
+				elseif op == 'min' or op == 'max' or op == 'count' then
+					if t then
+						id10t('too many arguments for [note %s <n>]',op)
+					end
+					local n = math.floor(tonumber(k))
+					local total = 0
+					for i,d in ipairs(e.word.defs) do
+					for j,m in ipairs(d.means) do
+						total = total + #m.notes
+						if op == 'min' and total >= n then return true end
+						if op == 'max' and total > n then return false end
+					end end
+					if op == 'count' then return total == n end
+					if op == 'max'   then return total <= n end
+					return false
+				end
+			end;
 		};
 	}
 end
 
 local function
@@ -607,10 +659,11 @@
 					meta = "";
 					partsOfSpeech = {};
 					branch = {};
 				};
 				words = {};
+				synonyms = {};
 			}
 			local o = writeDict(new);
 			fd:write(o)
 			fd:close()
 		end;
@@ -654,10 +707,72 @@
 		write = true;
 		exec = function(ctx,word,dn,m)
 			local _,d = safeNavWord(ctx,word,dn)
 			table.insert(d.means, {lit=m,notes={}})
 		end;
+	};
+	syn = {
+		help = "manage synonym groups";
+		syntax = {
+			"(show|purge) <path>";
+			"(link|drop) <word> <group#> <path>…";
+			"new <path> <path>…";
+			"clear <word> [<group#>]";
+		};
+		write = true;
+		exec = function(ctx, op, tgtw, ...)
+			local groups = {}
+			local wp = parsePath(tgtw)
+			local w,d = safeNavWord(ctx, wp.w, wp.dn)
+			if not (op=='new' or op=='link' or op=='drop' or op=='clear' or op=='show' or op=='purge') then
+				id10t('invalid operation “%s” for `syn`', op)
+			end
+			if op == 'new' then
+				local links = {{word = wp.w, def = wp.dn or 1}}
+				for i,l in ipairs{...} do
+					local parsed = parsePath(l)
+					links[i+1] = {word = parsed.w, def = parsed.dn or 1}
+				end
+				table.insert(ctx.dict.synonyms, {
+					uid=math.random(0,0xffffFFFF);
+					members=links;
+				})
+			else -- assemble a list of groups
+				for i,ss in ipairs(ctx.dict.synonyms) do
+					for j,s in ipairs(ss.members) do
+						if s.word == wp.w and (wp.dn == nil or s.def == wp.dn) then
+							table.insert(groups, {set = ss, mem = s})
+							break
+						end
+					end
+				end
+
+				if op == 'show' then
+					for i, g in ipairs(groups) do
+						local w,d = safeNavWord(ctx, g.mem.word, g.mem.def)
+						local function label(wd,defn)
+							local fulldef = {}
+							for i,v in ipairs(defn.means) do
+								fulldef[i] = v.lit
+							end
+							fulldef = table.concat(fulldef, '; ')
+							return string.format("%s(%s): %s",wd,defn.part,fulldef)
+						end
+						local others = {}
+						for j, o in ipairs(g.set.members) do
+							if not (o.word == g.mem.word and o.def == (wp.dn or 1)) then
+								local ow, od = safeNavWord(ctx, o.word,o.def)
+								table.insert(others, '      '..label(o.word,od))
+							end
+						end
+						io.stdout:write(string.format("% 4u) %s\n%s", i, label(g.mem.word,d),table.concat(others,'\n')))
+					end
+				elseif op == 'link' or op == 'drop' then
+					local tgtn, paths = (...), { select(2, ...) }
+				end
+			end
+		end;
 	};
 	mod = {
 		help = "move, merge, split, or delete words or definitions";
 		syntax = {
 			"<path> (drop | [move|merge|clobber] <path> | out [<part> [<root>…]])";
@@ -713,10 +828,13 @@
 	predicates = {
 		help = "show available filter predicates";
 		nofile = true;
 		syntax = "[<predicate>]";
 	};
+	export = {
+		help = "create a text file dump compatible with source control";
+	};
 	dump = {
 		exec = function(ctx) print(dump(ctx.dict)) end
 	};
 	ls = {
 		help = "list all words that meet any given <filter>";
@@ -801,10 +919,62 @@
 			end
 		end
 		io.stdout:write(d..'\n')
 	end
 end
+
+function cmds.export.exec(ctx)
+	local function san(str)
+		local d = 0
+		local r = {}
+		for i,cp in utf8.codes(str) do
+			-- insert backslashes for characters that would
+			-- disrupt strwords() parsing
+			if cp == 0x5b then
+				d = d + 1
+			elseif cp == 0x5d then
+				if d >= 1 then
+					d = d - 1
+				else
+					table.insert(r, 0x5c)
+				end
+			end
+			table.insert(r, cp)
+		end
+		return '[' .. utf8.char(table.unpack(r)) .. ']'
+	end
+	local function o(...) io.stdout:write(string.format(...)..'\n') end
+	local d = ctx.dict
+	o('pv0 %s %s', san(d.header.lang), san(d.header.meta))
+	for lit, w in pairs(d.words) do
+		o('w %s',san(lit))
+		for i,def in ipairs(w.defs) do
+			o('d %s',san(def.part))
+			for _,s in ipairs(d.synonyms) do
+				for _,sm in ipairs(s.members) do
+					if sm.word == w and sm.def == i then
+						o('ds %u',s.uid)
+						break
+					end
+				end
+			end
+			for j,r in ipairs(def.branch) do
+				o('dr %s',san(r))
+			end
+			for j,m in ipairs(def.means) do
+				o('m %s', san(m.lit))
+				for k,n in ipairs(m.notes) do
+					o('n %s', san(n.kind))
+					for a,p in ipairs(n.paras) do
+						o('np %s', san(p))
+					end
+				end
+			end
+		end
+	end
+	for _,s in ipairs(d.synonyms) do o('s %u', s.uid) end
+end
 
 function cmds.mod.exec(ctx, orig, oper, dest, ...)
 	if (not orig) or not oper then
 		id10t '`mod` requires at least an origin and an operation'
 	end