cortav  Diff

Differences From Artifact [1f16b393f5]:

To Artifact [581e1b0127]:


    86     86   			end
    87     87   		else
    88     88   			new[k] = v
    89     89   		end
    90     90   	end
    91     91   	return new
    92     92   end
           93  +
           94  +function ss.push(tbl, ...)
           95  +	local idx = #tbl + 1
           96  +	local function rec(v, ...)
           97  +		tbl[idx] = v
           98  +		idx = idx + 1
           99  +		if ss.tuple.any(...) then rec(...) end
          100  +	end
          101  +	rec(...)
          102  +	return tbl
          103  +end
    93    104   
    94    105   function ss.delegate(tbl,tpl) -- returns a table that looks up keys it lacks from
    95    106                                 -- tbl (lightweight alternative to shallow copies)
    96    107   	tpl = tpl or {}
    97    108   	return setmetatable({}, {__index=tbl})
    98    109   end
    99    110   
   100    111   ss.str = {}
   101    112   
   102    113   function ss.str.begins(str, pfx)
   103         -	return string.sub(str, 1, #pfx) == pfx
          114  +	-- appallingly, this is actually ~2/5ths faster than either
          115  +	-- of the below. i hate scripting languages so much
          116  +	return string.find(str, pfx, 1, true) == 1
          117  +	-- to my shock, disgust, and horror, even writing my own
          118  +	-- string scanning library for lua IN C only sped this up by
          119  +	-- a tiny fraction. i am just speechless.
          120  +-- 	return string.sub(str, 1, #pfx) == pfx
          121  +
          122  +-- 	local pl = string.len(pfx)
          123  +-- 	local sl = string.len(str)
          124  +-- 	if sl < pl then return false end
          125  +-- 	for i=1,pl do
          126  +-- 		if string.byte(str,i) ~= string.byte(pfx,i) then
          127  +-- 			return false
          128  +-- 		end
          129  +-- 	end
          130  +-- 	return true
   104    131   end
   105    132   
          133  +function ss.enum(syms)
          134  +	local e = {}
          135  +	for i,v in pairs(syms) do
          136  +		e[v] = i
          137  +		e[i] = v
          138  +	end
          139  +	return e
          140  +end
          141  +
          142  +function ss.bitmask_bytes(n,ofs)
          143  +	ofs = ofs or 0
          144  +	local function rec(i)
          145  +		if i > n then return end
          146  +		return 1<<(i+ofs), rec(i+1)
          147  +	end
          148  +	return 1<<ofs, rec(1)
          149  +end
          150  +
          151  +function ss.bitmask(tbl,ofs)
          152  +	local codes = {ss.bitmask_bytes(#tbl,ofs)}
          153  +	local m = {}
          154  +	local maxbit
          155  +	for i, s in ipairs(tbl) do
          156  +		m[s] = codes[i]
          157  +		m[codes[i]] = s
          158  +		maxbit = i
          159  +	end
          160  +	m[true] = {ofs or 0,maxbit}
          161  +	return m
          162  +end
          163  +
          164  +ss.str.charclass = ss.enum {
          165  +	'numeral'; 'letter'; 'symbol'; 'punct';
          166  +	'space'; 'ctl'; 'glyph'; -- hanji
          167  +}
          168  +ss.str.charprop = ss.bitmask({
          169  +	'hexnumeral', -- character that can be used to write hexadecimal notation
          170  +	'upper', 'lower';
          171  +	'diac'; -- diacritic/modifier letter
          172  +	'wordbreak'; -- char causes following characters to be treated as a separate word (e.g. punctuation)
          173  +	'wordsep'; -- char causes previous and following characters to be treated as separate words; char constitutes a word of its own in between (e.g. interpunct)
          174  +	'breakokay'; -- is it okay to break words at this character? (eg hyphen)
          175  +	'mathop'; -- char is a mathematical operator
          176  +	'disallow', -- char is not allowed in narrative text
          177  +	'brack', 'right', 'left', -- brackets
          178  +	'noprint', -- character deposits no ink
          179  +	'superimpose' -- character is superimposed over previous
          180  +}, 3)
          181  +
          182  +ss.str.enc_generics = {
          183  +	pfxescape = function(ch, enc, chain)
          184  +		local bytes = #ch
          185  +		local codes = enc.len(ch)
          186  +		return function(s)
          187  +			if s == ch then
          188  +				return 0, 0, ch
          189  +			elseif ss.str.begins(s, ch) then
          190  +				local nc = enc.char(enc.codepoint(s, bytes + 1))
          191  +				return bytes, codes, nc
          192  +			elseif chain then
          193  +				return chain(s)
          194  +			end
          195  +		end
          196  +	end;
          197  +};
          198  +
          199  +local cc,cp = ss.str.charclass, ss.str.charprop
   106    200   ss.str.enc = {
   107    201   	utf8 = {
   108    202   		char = utf8.char;
   109    203   		codepoint = utf8.codepoint;
          204  +		len = utf8.len;
          205  +		encodeUCS = function(str) return str end;
          206  +		iswhitespace = function(c)
          207  +			return (c == ' ') or (c == '\t') or (c == '\n')
          208  +				or (c == '\u{3000}')
          209  +				or (c == '\u{200B}')
          210  +      end;
          211  +	};
          212  +	ascii = {
          213  +		len = string.len; char = string.char; codepoint = string.byte;
          214  +		iswhitespace = function(c)
          215  +			return (c == ' ') or (c == '\t') or (c == '\n')
          216  +      end;
          217  +		ranges = {
          218  +			{0x00,0x1a, cc.ctl};
          219  +			{0x1b,0x1b, cc.ctl, cp.disallow};
          220  +			{0x1c,0x1f, cc.ctl};
          221  +			{0x20,0x20, cc.space};
          222  +			{0x21,0x22, cc.punct};
          223  +			{0x23,0x26, cc.symbol};
          224  +			{0x27,0x29, cc.punct};
          225  +			{0x2a,0x2b, cc.symbol};
          226  +			{0x2c,0x2f, cc.punct};
          227  +			{0x30,0x39, cc.numeral, cp.hexnumeral};
          228  +			{0x3a,0x3b, cc.punct};
          229  +			{0x3c,0x3e, cc.symbol, cp.mathop};
          230  +			{0x3f,0x3f, cc.punct};
          231  +			{0x40,0x40, cc.symbol};
          232  +			{0x41,0x46, cc.letter, cp.ucase, cp.hexnumeral};
          233  +			{0x47,0x5a, cc.letter, cp.ucase};
          234  +			{0x5b,0x5d, cc.symbol, cp.mathop};
          235  +			{0x5e,0x5e, cc.symbol, mathop};
          236  +			{0x5f,0x60, cc.symbol};
          237  +			{0x61,0x66, cc.letter, cp.lcase, cp.hexnumeral};
          238  +			{0x67,0x7a, cc.letter, cp.lcase};
          239  +			{0x7b,0x7e, cc.symbol};
          240  +			{0x7f,0x7f, cc.ctl, cp.disallow};
          241  +		}
   110    242   	};
   111         -	c6b = {};
   112         -	ascii = {};
          243  +	raw = {len = string.len; char = string.char; codepoint = string.byte;
          244  +		encodeUCS = function(str) return str end;
          245  +		iswhitespace = function(c)
          246  +			return (c == ' ') or (c == '\t') or (c == '\n')
          247  +      end;
          248  +   };
   113    249   }
   114    250   
   115         -function ss.str.enc.utf8.each(str, ascode)
          251  +-- unicode ranges are optionally generated from consortium data
          252  +-- files and injected through a generated source file. if this
          253  +-- part of the build process is disabled (e.g. due to lack of
          254  +-- internet access, or to keep the size of the executable as
          255  +-- small as possible), we still at least can make the ascii
          256  +-- ranges available to UTF8 (UTF8 being a superset of ascii)
          257  +ss.str.enc.utf8.ranges = ss.delegate(ss.str.enc.ascii.ranges)
          258  +
          259  +function ss.str.enc.ascii.encodeUCS(str)
          260  +	local newstr = ''
          261  +	for c,p in ss.str.each(ss.str.enc.utf8, str, true) do
          262  +		if c > 0x7F then
          263  +			newstr = newstr .. '?'
          264  +		else
          265  +			newstr = newstr .. string.char(c)
          266  +		end
          267  +	end
          268  +end
          269  +
          270  +for _, v in pairs{'utf8','ascii','raw'} do
          271  +	ss.str.enc[v].parse_escape = ss.str.enc_generics.pfxescape('\\',ss.str.enc[v])
          272  +end
          273  +
          274  +function ss.str.classify(enc, ch)
          275  +	if not enc.ranges then return {} end
          276  +	if type(ch)=='string' then ch = enc.codepoint(ch) end
          277  +	-- TODO
          278  +end
          279  +
          280  +
          281  +function ss.str.each(enc, str, ascode)
          282  +	if enc.each then return enc.each(enc,str,ascode) end
          283  +	local pm = {
          284  +		__index = {
          285  +			esc = function(self)
          286  +				local ba, bc, nc = enc.parse_escape(str:sub(self.byte))
          287  +				if ba then
          288  +					self.next.byte = self.next.byte + ba - 1
          289  +					self.next.code = self.next.code + bc - 1
          290  +					return nc
          291  +				end
          292  +			end;
          293  +		};
          294  +	}
   116    295   	local pos = {
   117    296   		code = 1;
   118    297   		byte = 1;
   119    298   	}
   120    299   	return function()
   121    300   		if pos.byte > #str then return nil end
   122         -		local thischar = utf8.codepoint(str, pos.byte)
   123         -		local lastpos = {
          301  +		local thischar = enc.codepoint(str, pos.byte)
          302  +		local lastpos = setmetatable({
   124    303   			code = pos.code;
   125    304   			byte = pos.byte;
   126    305   			next = pos;
   127         -		}
          306  +		},pm)
   128    307   		if not ascode then
   129         -			thischar = utf8.char(thischar)
          308  +			thischar = enc.char(thischar)
   130    309   			pos.byte = pos.byte + #thischar
   131    310   		else
   132         -			pos.byte = pos.byte + #utf8.char(thischar)
          311  +			pos.byte = pos.byte + #enc.char(thischar)
   133    312   		end
   134    313   		pos.code = pos.code + 1
   135    314   		return thischar, lastpos
   136    315   	end
   137    316   end
          317  +
          318  +function ss.str.breakwords(enc, str, max, opts)
          319  +	if enc.breakwords then return enc.breakwords(str) end
          320  +	local words = {}
          321  +	opts = opts or {}
          322  +	local buf = ''
          323  +	local flush = function()
          324  +		if buf ~= '' then table.insert(words,buf) buf = '' end
          325  +	end
          326  +	for c, p in ss.str.each(enc,str) do
          327  +		local nc
          328  +		if opts.escape then
          329  +			nc = p:esc()
          330  +		end
          331  +		if nc then
          332  +			buf = buf + nc
          333  +		elseif enc.iswhitespace(c) then
          334  +			flush()
          335  +			if max and #words == max then
          336  +				local rs = str:sub(p.next.byte)
          337  +				if rs ~= '' then
          338  +					table.insert(words, rs)
          339  +				end
          340  +				break
          341  +			end
          342  +		else
          343  +			buf = buf .. c
          344  +		end
          345  +	end
          346  +	flush()
          347  +	return words
          348  +end
          349  +function ss.str.mergewords(enc, lst)
          350  +	if enc.mergewords then return enc.mergewords(lst) end
          351  +	return table.concat(lst, enc.wordsep or ' ')
          352  +end
          353  +function ss.str.breaklines(enc, str, opts)
          354  +	if enc.breaklines then return enc.breaklines(lst,opts) end
          355  +	return ss.str.split(enc, str, enc.encodeUCS'\n', opts)
          356  +end
          357  +
          358  +function ss.str.split(enc, str, delim, opts)
          359  +	if enc.split then return enc.split(str,delim,opts) end
          360  +	opts = opts or {}
          361  +	local elts = {}
          362  +	local buf = ''
          363  +	local flush = function()
          364  +		if buf ~= '' or opts.keep_empties then
          365  +			table.insert(elts,buf)
          366  +			buf = ''
          367  +		end
          368  +	end
          369  +	local esc = enc.parse_escape
          370  +	local tryesc if opts.escape then
          371  +		tryesc = function(str, p)
          372  +			local ba, ca, escd = enc.parse_escape(str:sub(p.byte))
          373  +			if ba then
          374  +				p.next.byte = p.next.byte + ba
          375  +				p.next.code = p.next.code + ca
          376  +				buf = buf .. escd
          377  +				return true
          378  +			end
          379  +		end
          380  +	else
          381  +		tryesc = function(...)  end
          382  +	end
          383  +
          384  +	if type(delim) == 'function' then
          385  +		for c, p in ss.str.each(enc,str) do
          386  +			if not tryesc(str,p) then
          387  +				local skip = delim(str:sub(p.byte))
          388  +				if skip then
          389  +					flush()
          390  +					p.next.byte = p.next.byte + skip - 1
          391  +				else
          392  +					buf = buf .. c
          393  +				end
          394  +			end
          395  +		end
          396  +	elseif enc.len(delim) == 1 then
          397  +		for c, p in ss.str.each(enc,str) do
          398  +			if not tryesc(str,p) then
          399  +				if c == delim then
          400  +					flush()
          401  +				else
          402  +					buf = buf .. c
          403  +				end
          404  +			end
          405  +		end
          406  +	else
          407  +		local dlcode = enc.len(delim)
          408  +		for c, p in ss.str.each(enc,str) do
          409  +			if not tryesc(str,p) then
          410  +				if str:sub(p.byte, p.byte+#delim-1) == delim then
          411  +					flush()
          412  +					p.next.byte = p.next.byte + #delim - 1
          413  +					p.next.code = p.next.code + dlcode
          414  +				else
          415  +					buf = buf .. c
          416  +				end
          417  +			end
          418  +		end
          419  +	end
          420  +	flush()
          421  +	return elts
          422  +end
          423  +
          424  +function ss.str.langmatch(tbl, lang, enc)
          425  +	-- this performs primitive language matching. NOTE: THIS IS NOT
          426  +	-- STANDARDS COMPLIANT. it's "good enough" for now, but in the
          427  +	-- long term it needs to be rewritten to actually understand the
          428  +	-- format, primarily so that e.g. 'en-US-Latn' and 'en-Latn-US'
          429  +	-- match -- currently order is significant. it shouldn't be
          430  +	-- ref: IETF BCP 47 (RFC 5646) https://www.ietf.org/rfc/bcp/bcp47.html
          431  +	local dash = enc.encodeUCS'-'
          432  +	local tags = ss.str.split(enc, lang, dash, {escape=true})
          433  +	local bestlen = 0
          434  +	local bestmatch
          435  +	for k,v in pairs(tbl) do
          436  +		if k ~= true then
          437  +			local kt = ss.str.split(enc, k, dash, {escape=true})
          438  +			for i=1,math.min(#kt,#tags) do
          439  +				if kt[i] ~= tags[i] then goto skip end
          440  +			end
          441  +			if #kt > bestlen then
          442  +				-- match the most specific matching tag
          443  +				bestmatch = k
          444  +				bestlen = #kt
          445  +			end
          446  +		end
          447  +	::skip::end
          448  +	return tbl[bestmatch] or tbl[true], bestmatch
          449  +end
   138    450   
   139    451   ss.math = {}
   140    452   
   141    453   function ss.math.lerp(t, a, b)
   142    454   	return (1-t)*a + (t*b)
   143    455   end
   144    456   
................................................................................
   239    551   				elseif to == 'int' then return math.floor(tonumber(self))
   240    552   				elseif c.cast and c.cast[to] then
   241    553   					return c.cast[to](self, ...)
   242    554   				elseif type(to) == 'table' and getmetatable(to) and getmetatable(to).cvt and getmetatable(to).cvt[cls] then
   243    555   				else error((c.ident or 'class') .. ' is not convertible to ' .. (type(to) == 'string' and to or tostring(to))) end
   244    556   			end
   245    557   		end
   246         -		if c.fns then return c.fns[k] end
          558  +		if c.fns and c.fns[k] then return c.fns[k] end
          559  +		if c.index then return c.index(self,k) end
   247    560   	end
   248    561   
   249    562   	if c.cast then
   250    563   		if c.cast.string then
   251    564   			cls.__tostring = c.cast.string
   252    565   		end
   253    566   		if c.cast.number then
................................................................................
   265    578   		if c.construct then
   266    579   			c.construct(val, ...)
   267    580   		end
   268    581   		return val
   269    582   	end
   270    583   	getmetatable(cls).__call = function(_, ...) return cls.mk(...) end
   271    584   	cls.is = function(o) return getmetatable(o) == cls end
          585  +	cls.__metatable = cls -- lock metatable
   272    586   	return cls
   273    587   end
   274    588   
   275    589   -- tidy exceptions
   276    590   
   277    591   ss.exn = ss.declare {
   278    592   	ident = 'exn';
................................................................................
   302    616   		}
   303    617   	end;
   304    618   	call = function(me, ...)
   305    619   		return ss.exn(me, ...)
   306    620   	end;
   307    621   }
   308    622   ss.str.exn = ss.exnkind 'failure while string munging'
          623  +ss.bug = ss.exnkind 'tripped over bug'
   309    624   
   310    625   function ss.str.delimit(encoding, start, stop, s)
   311    626   	local depth = 0
   312    627   	encoding = encoding or ss.str.enc.utf8
   313    628   	if not ss.str.begins(s, start) then return nil end
   314         -	for c,p in encoding.each(s) do
          629  +	for c,p in ss.str.each(encoding,s) do
   315    630   		if c == (encoding.escape or '\\') then
   316    631   			p.next.byte = p.next.byte + #encoding.char(encoding.codepoint(s, p.next.byte))
   317    632   			p.next.code = p.next.code + 1
   318    633   		elseif c == start then
   319    634   			depth = depth + 1
   320    635   		elseif c == stop then
   321    636   			depth = depth - 1
................................................................................
   384    699   		return x
   385    700   	elseif select('#', ...) == 0 then
   386    701   		return nil
   387    702   	else
   388    703   		return ss.coalesce(...)
   389    704   	end
   390    705   end
          706  +
          707  +ss.tuple = {}
          708  +function ss.tuple.any(...)
          709  +	return select('#',...) > 0
          710  +end
          711  +
          712  +function ss.tuple.cat(...)
          713  +	local a = {...}
          714  +	return function(...)
          715  +		ss.push(a, ...)
          716  +		return table.unpack(a)
          717  +	end
          718  +end
          719  +
          720  +function ss.tuple.suffix(sfx,n,...)
          721  +	if n ~= nil then
          722  +		return n, ss.tuple.suffix(...)
          723  +	else
          724  +		return sfx
          725  +	end
          726  +end
          727  +
          728  +function ss.tuple.cdr(x, ...) return ... end
          729  +
          730  +ss.stack = ss.declare {
          731  +	ident = 'stack';
          732  +	mk = function() return {
          733  +		top = 0;
          734  +		store = {};
          735  +   } end;
          736  +	index = function(me, i)
          737  +		if i <= 0 then
          738  +			return me.store[me.top + i]
          739  +		else
          740  +			return me.store[i]
          741  +		end
          742  +	end;
          743  +	fns = {
          744  +		push = function(me, val, ...)
          745  +         if val~=nil then
          746  +	         me.top = me.top + 1
          747  +	         me.store[me.top] = val
          748  +	         me:push(...)
          749  +         end
          750  +         return val, ...
          751  +      end;
          752  +      pop = function(me,n) n = n or 1
          753  +         local r = {}
          754  +			if n < me.top then
          755  +				for i = 0,n-1 do
          756  +					r[i+1] = me.store[me.top - i]
          757  +					me.store[me.top - i] = nil
          758  +				end
          759  +				me.top = me.top - n
          760  +         else
          761  +	         r = me.store
          762  +				me.store = {}
          763  +         end
          764  +			return table.unpack(r)
          765  +      end;
          766  +      set = function(me,val)
          767  +         if me.top == 0 then
          768  +	         me.top = me.top + 1 --autopush
          769  +         end
          770  +         me.store[me.top] = val
          771  +      end;
          772  +      all = function(me) return table.unpack(me.store) end;
          773  +      each = function(forward)
          774  +         if forward then
          775  +	         local idx = 0
          776  +	         return function()
          777  +		         idx = idx + 1
          778  +		         if idx > top
          779  +						then return nil
          780  +						else return me.store[idx], idx
          781  +					end
          782  +	         end
          783  +         else
          784  +	         local idx = top + 1
          785  +	         return function()
          786  +		         idx = idx - 1
          787  +		         if idx == 0
          788  +						then return nil
          789  +						else return me.store[idx], idx
          790  +					end
          791  +	         end
          792  +         end
          793  +      end;
          794  +	};
          795  +}
          796  +
          797  +ss.automat = ss.declare {
          798  +	ident = 'automat';
          799  +	mk = function() return {
          800  +		state = ss.stack();
          801  +		states = {};
          802  +		ttns = {};
          803  +		mem = {};
          804  +		match = function(sym, ttn, mach)
          805  +			if ttn.pred and ttn:pred(mach, sym)~=true then
          806  +				return false
          807  +			end
          808  +			if ttn.on then
          809  +				return sym == ttn.on
          810  +			end
          811  +			return false
          812  +		end;
          813  +	} end;
          814  +
          815  +	construct = function(me, def)
          816  +		for k,v in pairs{'states','ttns','mem','syms'} do
          817  +			if def[k] then me[k] = v end
          818  +		end
          819  +	end;
          820  +
          821  +	fns = {
          822  +		react = function(me,sym)
          823  +			local s = me.states[me.state.id]
          824  +			if s and s.input then
          825  +				s:react(me, sym)
          826  +			end
          827  +		end;
          828  +
          829  +		drop = function(me,n)
          830  +			for i = 0, math.min(n-1,me.state.top-1) do
          831  +				local s = me.states[me.state[-i].id]
          832  +				if s.exit then s:exit(s.mem, me) end
          833  +			end
          834  +			if n < me.state.top then
          835  +				local newtop = me.states[me.state[-n].id]
          836  +				if newtop.activate then newtop:activate(me.state[-n].mem, me, n) end
          837  +			end
          838  +			return me.state:pop(n)
          839  +		end;
          840  +		clear = function(me) return me:drop(me.state.top) end;
          841  +
          842  +		transition = function(me,ttn,oldstates)
          843  +			local s = me.state:push {id = ttn.to, mem = {}}
          844  +			local to = me.states[ttn.to]
          845  +			if to.enter then
          846  +				to:enter(s.mem, me)
          847  +			end
          848  +		end;
          849  +
          850  +		input = function(me,sym)
          851  +			local ttns = me.ttns[me.state.id]
          852  +			local _, ttn = ss.find(ttns, function(ttn)
          853  +			                        return me.match(sym, ttn, me)
          854  +			                       end)
          855  +			if ttn then
          856  +				if ttn.pop then
          857  +					local oldstates = {me.state:drop(ttn.pop)}
          858  +					me:transition(ttn, sym, oldstates)
          859  +				else
          860  +					me:transition(ttn, sym)
          861  +				end
          862  +			else
          863  +				me:react(sym)
          864  +			end
          865  +		end;
          866  +	};
          867  +}