cortav: Diff

Differences From Artifact [581e1b0127]:

File sirsem.lua — part of check-in [52b9bce7dd] at 2021-12-26 04:08:02 on branch trunk — all kindsa shit (user: lexi, size: 20235) [annotate] [blame] [check-ins using]

To Artifact [dc1f0ae1fb]:

File sirsem.lua — part of check-in [d1b7d2fd5f] at 2021-12-26 17:49:50 on branch trunk — get math parser working (user: lexi, size: 20724) [annotate] [blame] [check-ins using]

	ascii = {
		len = string.len; char = string.char; codepoint = string.byte;
		iswhitespace = function(c)
			return (c == ' ') or (c == '\t') or (c == '\n')
      end;
		ranges = {
			{0x00,0x1a, cc.ctl};
			{0x1b,0x1b, cc.ctl, cp.disallow};
			{0x1c,0x1f, cc.ctl};
			{0x20,0x20, cc.space};
			{0x21,0x22, cc.punct};
			{0x23,0x26, cc.symbol};
			{0x27,0x29, cc.punct};
			{0x2a,0x2b, cc.symbol};
			{0x2c,0x2f, cc.punct};
			{0x30,0x39, cc.numeral, cp.hexnumeral};
			{0x3a,0x3b, cc.punct};
			{0x3c,0x3e, cc.symbol, cp.mathop};
			{0x3f,0x3f, cc.punct};
			{0x40,0x40, cc.symbol};
			{0x41,0x46, cc.letter, cp.ucase, cp.hexnumeral};
			{0x47,0x5a, cc.letter, cp.ucase};
			{0x5b,0x5d, cc.symbol, cp.mathop};
			{0x5e,0x5e, cc.symbol, mathop};
			{0x5f,0x60, cc.symbol};
			{0x61,0x66, cc.letter, cp.lcase, cp.hexnumeral};
			{0x67,0x7a, cc.letter, cp.lcase};
			{0x7b,0x7e, cc.symbol};
			{0x7f,0x7f, cc.ctl, cp.disallow};
		}
	};
	raw = {len = string.len; char = string.char; codepoint = string.byte;
		encodeUCS = function(str) return str end;
		iswhitespace = function(c)
................................................................................

-- unicode ranges are optionally generated from consortium data
-- files and injected through a generated source file. if this
-- part of the build process is disabled (e.g. due to lack of
-- internet access, or to keep the size of the executable as
-- small as possible), we still at least can make the ascii
-- ranges available to UTF8 (UTF8 being a superset of ascii)
ss.str.enc.utf8.ranges = ss.delegate(ss.str.enc.ascii.ranges)

function ss.str.enc.ascii.encodeUCS(str)
	local newstr = ''
	for c,p in ss.str.each(ss.str.enc.utf8, str, true) do
		if c > 0x7F then
			newstr = newstr .. '?'
		else
................................................................................
		end
	end
end

for _, v in pairs{'utf8','ascii','raw'} do
	ss.str.enc[v].parse_escape = ss.str.enc_generics.pfxescape('\\',ss.str.enc[v])
end
















function ss.str.classify(enc, ch)
	if not enc.ranges then return {} end
	if type(ch)=='string' then ch = enc.codepoint(ch) end
	-- TODO










end


function ss.str.each(enc, str, ascode)
	if enc.each then return enc.each(enc,str,ascode) end
	local pm = {
		__index = {








|







|

|


|
|
|
|

|
|







 







|







 







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>




<
>
>
>
>
>
>
>
>
>
>

	ascii = {
		len = string.len; char = string.char; codepoint = string.byte;
		iswhitespace = function(c)
			return (c == ' ') or (c == '\t') or (c == '\n')
      end;
		ranges = {
			{0x00,0x1a, cc.ctl};
			{0x1b,0x1b, cc.ctl | cp.disallow};
			{0x1c,0x1f, cc.ctl};
			{0x20,0x20, cc.space};
			{0x21,0x22, cc.punct};
			{0x23,0x26, cc.symbol};
			{0x27,0x29, cc.punct};
			{0x2a,0x2b, cc.symbol};
			{0x2c,0x2f, cc.punct};
			{0x30,0x39, cc.numeral | cp.hexnumeral};
			{0x3a,0x3b, cc.punct};
			{0x3c,0x3e, cc.symbol | cp.mathop};
			{0x3f,0x3f, cc.punct};
			{0x40,0x40, cc.symbol};
			{0x41,0x46, cc.letter | cp.upper | cp.hexnumeral};
			{0x47,0x5a, cc.letter | cp.upper};
			{0x5b,0x5d, cc.symbol | cp.mathop};
			{0x5e,0x5e, cc.symbol | cp.mathop};
			{0x5f,0x60, cc.symbol};
			{0x61,0x66, cc.letter | cp.lower | cp.hexnumeral};
			{0x67,0x7a, cc.letter | cp.lower};
			{0x7b,0x7e, cc.symbol};
			{0x7f,0x7f, cc.ctl, cp.disallow};
		}
	};
	raw = {len = string.len; char = string.char; codepoint = string.byte;
		encodeUCS = function(str) return str end;
		iswhitespace = function(c)
................................................................................

-- unicode ranges are optionally generated from consortium data
-- files and injected through a generated source file. if this
-- part of the build process is disabled (e.g. due to lack of
-- internet access, or to keep the size of the executable as
-- small as possible), we still at least can make the ascii
-- ranges available to UTF8 (UTF8 being a superset of ascii)
ss.str.enc.utf8.ranges = ss.str.enc.ascii.ranges

function ss.str.enc.ascii.encodeUCS(str)
	local newstr = ''
	for c,p in ss.str.each(ss.str.enc.utf8, str, true) do
		if c > 0x7F then
			newstr = newstr .. '?'
		else
................................................................................
		end
	end
end

for _, v in pairs{'utf8','ascii','raw'} do
	ss.str.enc[v].parse_escape = ss.str.enc_generics.pfxescape('\\',ss.str.enc[v])
end

function ss.bitmask_expand(ty, v)
	local bitrange = ty[true]
	local fb
	if bitrange[1] ~= 0 then
		fb = v & ((1<<bitrange[1]) - 1) -- first N bits
	end
	local tbl = {}
	for j=bitrange[1], bitrange[2] do
		if (fb & (1<<j)) ~= 0 then
			tbl[ty[1<<j]] = true
		end
	end
	return tbl, fb
end

function ss.str.classify(enc, ch)
	if not enc.ranges then return {} end
	if type(ch)=='string' then ch = enc.codepoint(ch) end


	for _, r in pairs(enc.ranges) do
		if ch >= r[1] and ch <= r[2] then
			local p,b = ss.bitmask_expand(ss.str.charprop, r[3])
			if b then p[ss.str.charclass[b]] = true end
			return p
		end
	end

	return {}
end


function ss.str.each(enc, str, ascode)
	if enc.each then return enc.each(enc,str,ascode) end
	local pm = {
		__index = {

cortav Diff

Differences From Artifact [581e1b0127]:

To Artifact [dc1f0ae1fb]: