cortav  Diff

Differences From Artifact [581e1b0127]:

To Artifact [dc1f0ae1fb]:


212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
...
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
...
266
267
268
269
270
271
272















273
274
275
276
277










278
279
280
281
282
283
284
	ascii = {
		len = string.len; char = string.char; codepoint = string.byte;
		iswhitespace = function(c)
			return (c == ' ') or (c == '\t') or (c == '\n')
      end;
		ranges = {
			{0x00,0x1a, cc.ctl};
			{0x1b,0x1b, cc.ctl, cp.disallow};
			{0x1c,0x1f, cc.ctl};
			{0x20,0x20, cc.space};
			{0x21,0x22, cc.punct};
			{0x23,0x26, cc.symbol};
			{0x27,0x29, cc.punct};
			{0x2a,0x2b, cc.symbol};
			{0x2c,0x2f, cc.punct};
			{0x30,0x39, cc.numeral, cp.hexnumeral};
			{0x3a,0x3b, cc.punct};
			{0x3c,0x3e, cc.symbol, cp.mathop};
			{0x3f,0x3f, cc.punct};
			{0x40,0x40, cc.symbol};
			{0x41,0x46, cc.letter, cp.ucase, cp.hexnumeral};
			{0x47,0x5a, cc.letter, cp.ucase};
			{0x5b,0x5d, cc.symbol, cp.mathop};
			{0x5e,0x5e, cc.symbol, mathop};
			{0x5f,0x60, cc.symbol};
			{0x61,0x66, cc.letter, cp.lcase, cp.hexnumeral};
			{0x67,0x7a, cc.letter, cp.lcase};
			{0x7b,0x7e, cc.symbol};
			{0x7f,0x7f, cc.ctl, cp.disallow};
		}
	};
	raw = {len = string.len; char = string.char; codepoint = string.byte;
		encodeUCS = function(str) return str end;
		iswhitespace = function(c)
................................................................................

-- unicode ranges are optionally generated from consortium data
-- files and injected through a generated source file. if this
-- part of the build process is disabled (e.g. due to lack of
-- internet access, or to keep the size of the executable as
-- small as possible), we still at least can make the ascii
-- ranges available to UTF8 (UTF8 being a superset of ascii)
ss.str.enc.utf8.ranges = ss.delegate(ss.str.enc.ascii.ranges)

function ss.str.enc.ascii.encodeUCS(str)
	local newstr = ''
	for c,p in ss.str.each(ss.str.enc.utf8, str, true) do
		if c > 0x7F then
			newstr = newstr .. '?'
		else
................................................................................
		end
	end
end

for _, v in pairs{'utf8','ascii','raw'} do
	ss.str.enc[v].parse_escape = ss.str.enc_generics.pfxescape('\\',ss.str.enc[v])
end
















function ss.str.classify(enc, ch)
	if not enc.ranges then return {} end
	if type(ch)=='string' then ch = enc.codepoint(ch) end
	-- TODO










end


function ss.str.each(enc, str, ascode)
	if enc.each then return enc.each(enc,str,ascode) end
	local pm = {
		__index = {







|







|

|


|
|
|
|

|
|







 







|







 







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>




<
>
>
>
>
>
>
>
>
>
>







212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
...
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
...
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291

292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
	ascii = {
		len = string.len; char = string.char; codepoint = string.byte;
		iswhitespace = function(c)
			return (c == ' ') or (c == '\t') or (c == '\n')
      end;
		ranges = {
			{0x00,0x1a, cc.ctl};
			{0x1b,0x1b, cc.ctl | cp.disallow};
			{0x1c,0x1f, cc.ctl};
			{0x20,0x20, cc.space};
			{0x21,0x22, cc.punct};
			{0x23,0x26, cc.symbol};
			{0x27,0x29, cc.punct};
			{0x2a,0x2b, cc.symbol};
			{0x2c,0x2f, cc.punct};
			{0x30,0x39, cc.numeral | cp.hexnumeral};
			{0x3a,0x3b, cc.punct};
			{0x3c,0x3e, cc.symbol | cp.mathop};
			{0x3f,0x3f, cc.punct};
			{0x40,0x40, cc.symbol};
			{0x41,0x46, cc.letter | cp.upper | cp.hexnumeral};
			{0x47,0x5a, cc.letter | cp.upper};
			{0x5b,0x5d, cc.symbol | cp.mathop};
			{0x5e,0x5e, cc.symbol | cp.mathop};
			{0x5f,0x60, cc.symbol};
			{0x61,0x66, cc.letter | cp.lower | cp.hexnumeral};
			{0x67,0x7a, cc.letter | cp.lower};
			{0x7b,0x7e, cc.symbol};
			{0x7f,0x7f, cc.ctl, cp.disallow};
		}
	};
	raw = {len = string.len; char = string.char; codepoint = string.byte;
		encodeUCS = function(str) return str end;
		iswhitespace = function(c)
................................................................................

-- unicode ranges are optionally generated from consortium data
-- files and injected through a generated source file. if this
-- part of the build process is disabled (e.g. due to lack of
-- internet access, or to keep the size of the executable as
-- small as possible), we still at least can make the ascii
-- ranges available to UTF8 (UTF8 being a superset of ascii)
ss.str.enc.utf8.ranges = ss.str.enc.ascii.ranges

function ss.str.enc.ascii.encodeUCS(str)
	local newstr = ''
	for c,p in ss.str.each(ss.str.enc.utf8, str, true) do
		if c > 0x7F then
			newstr = newstr .. '?'
		else
................................................................................
		end
	end
end

for _, v in pairs{'utf8','ascii','raw'} do
	ss.str.enc[v].parse_escape = ss.str.enc_generics.pfxescape('\\',ss.str.enc[v])
end

function ss.bitmask_expand(ty, v)
	local bitrange = ty[true]
	local fb
	if bitrange[1] ~= 0 then
		fb = v & ((1<<bitrange[1]) - 1) -- first N bits
	end
	local tbl = {}
	for j=bitrange[1], bitrange[2] do
		if (fb & (1<<j)) ~= 0 then
			tbl[ty[1<<j]] = true
		end
	end
	return tbl, fb
end

function ss.str.classify(enc, ch)
	if not enc.ranges then return {} end
	if type(ch)=='string' then ch = enc.codepoint(ch) end


	for _, r in pairs(enc.ranges) do
		if ch >= r[1] and ch <= r[2] then
			local p,b = ss.bitmask_expand(ss.str.charprop, r[3])
			if b then p[ss.str.charclass[b]] = true end
			return p
		end
	end

	return {}
end


function ss.str.each(enc, str, ascode)
	if enc.each then return enc.each(enc,str,ascode) end
	local pm = {
		__index = {