Differences From
Artifact [1f16b393f5]:
86 86 end
87 87 else
88 88 new[k] = v
89 89 end
90 90 end
91 91 return new
92 92 end
93 +
94 +function ss.push(tbl, ...)
95 + local idx = #tbl + 1
96 + local function rec(v, ...)
97 + tbl[idx] = v
98 + idx = idx + 1
99 + if ss.tuple.any(...) then rec(...) end
100 + end
101 + rec(...)
102 + return tbl
103 +end
93 104
94 105 function ss.delegate(tbl,tpl) -- returns a table that looks up keys it lacks from
95 106 -- tbl (lightweight alternative to shallow copies)
96 107 tpl = tpl or {}
97 108 return setmetatable({}, {__index=tbl})
98 109 end
99 110
100 111 ss.str = {}
101 112
102 113 function ss.str.begins(str, pfx)
103 - return string.sub(str, 1, #pfx) == pfx
114 + -- appallingly, this is actually ~2/5ths faster than either
115 + -- of the below. i hate scripting languages so much
116 + return string.find(str, pfx, 1, true) == 1
117 + -- to my shock, disgust, and horror, even writing my own
118 + -- string scanning library for lua IN C only sped this up by
119 + -- a tiny fraction. i am just speechless.
120 +-- return string.sub(str, 1, #pfx) == pfx
121 +
122 +-- local pl = string.len(pfx)
123 +-- local sl = string.len(str)
124 +-- if sl < pl then return false end
125 +-- for i=1,pl do
126 +-- if string.byte(str,i) ~= string.byte(pfx,i) then
127 +-- return false
128 +-- end
129 +-- end
130 +-- return true
104 131 end
105 132
133 +function ss.enum(syms)
134 + local e = {}
135 + for i,v in pairs(syms) do
136 + e[v] = i
137 + e[i] = v
138 + end
139 + return e
140 +end
141 +
142 +function ss.bitmask_bytes(n,ofs)
143 + ofs = ofs or 0
144 + local function rec(i)
145 + if i > n then return end
146 + return 1<<(i+ofs), rec(i+1)
147 + end
148 + return 1<<ofs, rec(1)
149 +end
150 +
151 +function ss.bitmask(tbl,ofs)
152 + local codes = {ss.bitmask_bytes(#tbl,ofs)}
153 + local m = {}
154 + local maxbit
155 + for i, s in ipairs(tbl) do
156 + m[s] = codes[i]
157 + m[codes[i]] = s
158 + maxbit = i
159 + end
160 + m[true] = {ofs or 0,maxbit}
161 + return m
162 +end
163 +
164 +ss.str.charclass = ss.enum {
165 + 'numeral'; 'letter'; 'symbol'; 'punct';
166 + 'space'; 'ctl'; 'glyph'; -- hanji
167 +}
168 +ss.str.charprop = ss.bitmask({
169 + 'hexnumeral', -- character that can be used to write hexadecimal notation
170 + 'upper', 'lower';
171 + 'diac'; -- diacritic/modifier letter
172 + 'wordbreak'; -- char causes following characters to be treated as a separate word (e.g. punctuation)
173 + 'wordsep'; -- char causes previous and following characters to be treated as separate words; char constitutes a word of its own in between (e.g. interpunct)
174 + 'breakokay'; -- is it okay to break words at this character? (eg hyphen)
175 + 'mathop'; -- char is a mathematical operator
176 + 'disallow', -- char is not allowed in narrative text
177 + 'brack', 'right', 'left', -- brackets
178 + 'noprint', -- character deposits no ink
179 + 'superimpose' -- character is superimposed over previous
180 +}, 3)
181 +
182 +ss.str.enc_generics = {
183 + pfxescape = function(ch, enc, chain)
184 + local bytes = #ch
185 + local codes = enc.len(ch)
186 + return function(s)
187 + if s == ch then
188 + return 0, 0, ch
189 + elseif ss.str.begins(s, ch) then
190 + local nc = enc.char(enc.codepoint(s, bytes + 1))
191 + return bytes, codes, nc
192 + elseif chain then
193 + return chain(s)
194 + end
195 + end
196 + end;
197 +};
198 +
199 +local cc,cp = ss.str.charclass, ss.str.charprop
106 200 ss.str.enc = {
107 201 utf8 = {
108 202 char = utf8.char;
109 203 codepoint = utf8.codepoint;
204 + len = utf8.len;
205 + encodeUCS = function(str) return str end;
206 + iswhitespace = function(c)
207 + return (c == ' ') or (c == '\t') or (c == '\n')
208 + or (c == '\u{3000}')
209 + or (c == '\u{200B}')
210 + end;
211 + };
212 + ascii = {
213 + len = string.len; char = string.char; codepoint = string.byte;
214 + iswhitespace = function(c)
215 + return (c == ' ') or (c == '\t') or (c == '\n')
216 + end;
217 + ranges = {
218 + {0x00,0x1a, cc.ctl};
219 + {0x1b,0x1b, cc.ctl, cp.disallow};
220 + {0x1c,0x1f, cc.ctl};
221 + {0x20,0x20, cc.space};
222 + {0x21,0x22, cc.punct};
223 + {0x23,0x26, cc.symbol};
224 + {0x27,0x29, cc.punct};
225 + {0x2a,0x2b, cc.symbol};
226 + {0x2c,0x2f, cc.punct};
227 + {0x30,0x39, cc.numeral, cp.hexnumeral};
228 + {0x3a,0x3b, cc.punct};
229 + {0x3c,0x3e, cc.symbol, cp.mathop};
230 + {0x3f,0x3f, cc.punct};
231 + {0x40,0x40, cc.symbol};
232 + {0x41,0x46, cc.letter, cp.ucase, cp.hexnumeral};
233 + {0x47,0x5a, cc.letter, cp.ucase};
234 + {0x5b,0x5d, cc.symbol, cp.mathop};
235 + {0x5e,0x5e, cc.symbol, mathop};
236 + {0x5f,0x60, cc.symbol};
237 + {0x61,0x66, cc.letter, cp.lcase, cp.hexnumeral};
238 + {0x67,0x7a, cc.letter, cp.lcase};
239 + {0x7b,0x7e, cc.symbol};
240 + {0x7f,0x7f, cc.ctl, cp.disallow};
241 + }
110 242 };
111 - c6b = {};
112 - ascii = {};
243 + raw = {len = string.len; char = string.char; codepoint = string.byte;
244 + encodeUCS = function(str) return str end;
245 + iswhitespace = function(c)
246 + return (c == ' ') or (c == '\t') or (c == '\n')
247 + end;
248 + };
113 249 }
114 250
115 -function ss.str.enc.utf8.each(str, ascode)
251 +-- unicode ranges are optionally generated from consortium data
252 +-- files and injected through a generated source file. if this
253 +-- part of the build process is disabled (e.g. due to lack of
254 +-- internet access, or to keep the size of the executable as
255 +-- small as possible), we still at least can make the ascii
256 +-- ranges available to UTF8 (UTF8 being a superset of ascii)
257 +ss.str.enc.utf8.ranges = ss.delegate(ss.str.enc.ascii.ranges)
258 +
259 +function ss.str.enc.ascii.encodeUCS(str)
260 + local newstr = ''
261 + for c,p in ss.str.each(ss.str.enc.utf8, str, true) do
262 + if c > 0x7F then
263 + newstr = newstr .. '?'
264 + else
265 + newstr = newstr .. string.char(c)
266 + end
267 + end
268 +end
269 +
270 +for _, v in pairs{'utf8','ascii','raw'} do
271 + ss.str.enc[v].parse_escape = ss.str.enc_generics.pfxescape('\\',ss.str.enc[v])
272 +end
273 +
274 +function ss.str.classify(enc, ch)
275 + if not enc.ranges then return {} end
276 + if type(ch)=='string' then ch = enc.codepoint(ch) end
277 + -- TODO
278 +end
279 +
280 +
281 +function ss.str.each(enc, str, ascode)
282 + if enc.each then return enc.each(enc,str,ascode) end
283 + local pm = {
284 + __index = {
285 + esc = function(self)
286 + local ba, bc, nc = enc.parse_escape(str:sub(self.byte))
287 + if ba then
288 + self.next.byte = self.next.byte + ba - 1
289 + self.next.code = self.next.code + bc - 1
290 + return nc
291 + end
292 + end;
293 + };
294 + }
116 295 local pos = {
117 296 code = 1;
118 297 byte = 1;
119 298 }
120 299 return function()
121 300 if pos.byte > #str then return nil end
122 - local thischar = utf8.codepoint(str, pos.byte)
123 - local lastpos = {
301 + local thischar = enc.codepoint(str, pos.byte)
302 + local lastpos = setmetatable({
124 303 code = pos.code;
125 304 byte = pos.byte;
126 305 next = pos;
127 - }
306 + },pm)
128 307 if not ascode then
129 - thischar = utf8.char(thischar)
308 + thischar = enc.char(thischar)
130 309 pos.byte = pos.byte + #thischar
131 310 else
132 - pos.byte = pos.byte + #utf8.char(thischar)
311 + pos.byte = pos.byte + #enc.char(thischar)
133 312 end
134 313 pos.code = pos.code + 1
135 314 return thischar, lastpos
136 315 end
137 316 end
317 +
318 +function ss.str.breakwords(enc, str, max, opts)
319 + if enc.breakwords then return enc.breakwords(str) end
320 + local words = {}
321 + opts = opts or {}
322 + local buf = ''
323 + local flush = function()
324 + if buf ~= '' then table.insert(words,buf) buf = '' end
325 + end
326 + for c, p in ss.str.each(enc,str) do
327 + local nc
328 + if opts.escape then
329 + nc = p:esc()
330 + end
331 + if nc then
332 + buf = buf + nc
333 + elseif enc.iswhitespace(c) then
334 + flush()
335 + if max and #words == max then
336 + local rs = str:sub(p.next.byte)
337 + if rs ~= '' then
338 + table.insert(words, rs)
339 + end
340 + break
341 + end
342 + else
343 + buf = buf .. c
344 + end
345 + end
346 + flush()
347 + return words
348 +end
349 +function ss.str.mergewords(enc, lst)
350 + if enc.mergewords then return enc.mergewords(lst) end
351 + return table.concat(lst, enc.wordsep or ' ')
352 +end
353 +function ss.str.breaklines(enc, str, opts)
354 + if enc.breaklines then return enc.breaklines(lst,opts) end
355 + return ss.str.split(enc, str, enc.encodeUCS'\n', opts)
356 +end
357 +
358 +function ss.str.split(enc, str, delim, opts)
359 + if enc.split then return enc.split(str,delim,opts) end
360 + opts = opts or {}
361 + local elts = {}
362 + local buf = ''
363 + local flush = function()
364 + if buf ~= '' or opts.keep_empties then
365 + table.insert(elts,buf)
366 + buf = ''
367 + end
368 + end
369 + local esc = enc.parse_escape
370 + local tryesc if opts.escape then
371 + tryesc = function(str, p)
372 + local ba, ca, escd = enc.parse_escape(str:sub(p.byte))
373 + if ba then
374 + p.next.byte = p.next.byte + ba
375 + p.next.code = p.next.code + ca
376 + buf = buf .. escd
377 + return true
378 + end
379 + end
380 + else
381 + tryesc = function(...) end
382 + end
383 +
384 + if type(delim) == 'function' then
385 + for c, p in ss.str.each(enc,str) do
386 + if not tryesc(str,p) then
387 + local skip = delim(str:sub(p.byte))
388 + if skip then
389 + flush()
390 + p.next.byte = p.next.byte + skip - 1
391 + else
392 + buf = buf .. c
393 + end
394 + end
395 + end
396 + elseif enc.len(delim) == 1 then
397 + for c, p in ss.str.each(enc,str) do
398 + if not tryesc(str,p) then
399 + if c == delim then
400 + flush()
401 + else
402 + buf = buf .. c
403 + end
404 + end
405 + end
406 + else
407 + local dlcode = enc.len(delim)
408 + for c, p in ss.str.each(enc,str) do
409 + if not tryesc(str,p) then
410 + if str:sub(p.byte, p.byte+#delim-1) == delim then
411 + flush()
412 + p.next.byte = p.next.byte + #delim - 1
413 + p.next.code = p.next.code + dlcode
414 + else
415 + buf = buf .. c
416 + end
417 + end
418 + end
419 + end
420 + flush()
421 + return elts
422 +end
423 +
424 +function ss.str.langmatch(tbl, lang, enc)
425 + -- this performs primitive language matching. NOTE: THIS IS NOT
426 + -- STANDARDS COMPLIANT. it's "good enough" for now, but in the
427 + -- long term it needs to be rewritten to actually understand the
428 + -- format, primarily so that e.g. 'en-US-Latn' and 'en-Latn-US'
429 + -- match -- currently order is significant. it shouldn't be
430 + -- ref: IETF BCP 47 (RFC 5646) https://www.ietf.org/rfc/bcp/bcp47.html
431 + local dash = enc.encodeUCS'-'
432 + local tags = ss.str.split(enc, lang, dash, {escape=true})
433 + local bestlen = 0
434 + local bestmatch
435 + for k,v in pairs(tbl) do
436 + if k ~= true then
437 + local kt = ss.str.split(enc, k, dash, {escape=true})
438 + for i=1,math.min(#kt,#tags) do
439 + if kt[i] ~= tags[i] then goto skip end
440 + end
441 + if #kt > bestlen then
442 + -- match the most specific matching tag
443 + bestmatch = k
444 + bestlen = #kt
445 + end
446 + end
447 + ::skip::end
448 + return tbl[bestmatch] or tbl[true], bestmatch
449 +end
138 450
139 451 ss.math = {}
140 452
141 453 function ss.math.lerp(t, a, b)
142 454 return (1-t)*a + (t*b)
143 455 end
144 456
................................................................................
239 551 elseif to == 'int' then return math.floor(tonumber(self))
240 552 elseif c.cast and c.cast[to] then
241 553 return c.cast[to](self, ...)
242 554 elseif type(to) == 'table' and getmetatable(to) and getmetatable(to).cvt and getmetatable(to).cvt[cls] then
243 555 else error((c.ident or 'class') .. ' is not convertible to ' .. (type(to) == 'string' and to or tostring(to))) end
244 556 end
245 557 end
246 - if c.fns then return c.fns[k] end
558 + if c.fns and c.fns[k] then return c.fns[k] end
559 + if c.index then return c.index(self,k) end
247 560 end
248 561
249 562 if c.cast then
250 563 if c.cast.string then
251 564 cls.__tostring = c.cast.string
252 565 end
253 566 if c.cast.number then
................................................................................
265 578 if c.construct then
266 579 c.construct(val, ...)
267 580 end
268 581 return val
269 582 end
270 583 getmetatable(cls).__call = function(_, ...) return cls.mk(...) end
271 584 cls.is = function(o) return getmetatable(o) == cls end
585 + cls.__metatable = cls -- lock metatable
272 586 return cls
273 587 end
274 588
275 589 -- tidy exceptions
276 590
277 591 ss.exn = ss.declare {
278 592 ident = 'exn';
................................................................................
302 616 }
303 617 end;
304 618 call = function(me, ...)
305 619 return ss.exn(me, ...)
306 620 end;
307 621 }
308 622 ss.str.exn = ss.exnkind 'failure while string munging'
623 +ss.bug = ss.exnkind 'tripped over bug'
309 624
310 625 function ss.str.delimit(encoding, start, stop, s)
311 626 local depth = 0
312 627 encoding = encoding or ss.str.enc.utf8
313 628 if not ss.str.begins(s, start) then return nil end
314 - for c,p in encoding.each(s) do
629 + for c,p in ss.str.each(encoding,s) do
315 630 if c == (encoding.escape or '\\') then
316 631 p.next.byte = p.next.byte + #encoding.char(encoding.codepoint(s, p.next.byte))
317 632 p.next.code = p.next.code + 1
318 633 elseif c == start then
319 634 depth = depth + 1
320 635 elseif c == stop then
321 636 depth = depth - 1
................................................................................
384 699 return x
385 700 elseif select('#', ...) == 0 then
386 701 return nil
387 702 else
388 703 return ss.coalesce(...)
389 704 end
390 705 end
706 +
707 +ss.tuple = {}
708 +function ss.tuple.any(...)
709 + return select('#',...) > 0
710 +end
711 +
712 +function ss.tuple.cat(...)
713 + local a = {...}
714 + return function(...)
715 + ss.push(a, ...)
716 + return table.unpack(a)
717 + end
718 +end
719 +
720 +function ss.tuple.suffix(sfx,n,...)
721 + if n ~= nil then
722 + return n, ss.tuple.suffix(...)
723 + else
724 + return sfx
725 + end
726 +end
727 +
728 +function ss.tuple.cdr(x, ...) return ... end
729 +
730 +ss.stack = ss.declare {
731 + ident = 'stack';
732 + mk = function() return {
733 + top = 0;
734 + store = {};
735 + } end;
736 + index = function(me, i)
737 + if i <= 0 then
738 + return me.store[me.top + i]
739 + else
740 + return me.store[i]
741 + end
742 + end;
743 + fns = {
744 + push = function(me, val, ...)
745 + if val~=nil then
746 + me.top = me.top + 1
747 + me.store[me.top] = val
748 + me:push(...)
749 + end
750 + return val, ...
751 + end;
752 + pop = function(me,n) n = n or 1
753 + local r = {}
754 + if n < me.top then
755 + for i = 0,n-1 do
756 + r[i+1] = me.store[me.top - i]
757 + me.store[me.top - i] = nil
758 + end
759 + me.top = me.top - n
760 + else
761 + r = me.store
762 + me.store = {}
763 + end
764 + return table.unpack(r)
765 + end;
766 + set = function(me,val)
767 + if me.top == 0 then
768 + me.top = me.top + 1 --autopush
769 + end
770 + me.store[me.top] = val
771 + end;
772 + all = function(me) return table.unpack(me.store) end;
773 + each = function(forward)
774 + if forward then
775 + local idx = 0
776 + return function()
777 + idx = idx + 1
778 + if idx > top
779 + then return nil
780 + else return me.store[idx], idx
781 + end
782 + end
783 + else
784 + local idx = top + 1
785 + return function()
786 + idx = idx - 1
787 + if idx == 0
788 + then return nil
789 + else return me.store[idx], idx
790 + end
791 + end
792 + end
793 + end;
794 + };
795 +}
796 +
797 +ss.automat = ss.declare {
798 + ident = 'automat';
799 + mk = function() return {
800 + state = ss.stack();
801 + states = {};
802 + ttns = {};
803 + mem = {};
804 + match = function(sym, ttn, mach)
805 + if ttn.pred and ttn:pred(mach, sym)~=true then
806 + return false
807 + end
808 + if ttn.on then
809 + return sym == ttn.on
810 + end
811 + return false
812 + end;
813 + } end;
814 +
815 + construct = function(me, def)
816 + for k,v in pairs{'states','ttns','mem','syms'} do
817 + if def[k] then me[k] = v end
818 + end
819 + end;
820 +
821 + fns = {
822 + react = function(me,sym)
823 + local s = me.states[me.state.id]
824 + if s and s.input then
825 + s:react(me, sym)
826 + end
827 + end;
828 +
829 + drop = function(me,n)
830 + for i = 0, math.min(n-1,me.state.top-1) do
831 + local s = me.states[me.state[-i].id]
832 + if s.exit then s:exit(s.mem, me) end
833 + end
834 + if n < me.state.top then
835 + local newtop = me.states[me.state[-n].id]
836 + if newtop.activate then newtop:activate(me.state[-n].mem, me, n) end
837 + end
838 + return me.state:pop(n)
839 + end;
840 + clear = function(me) return me:drop(me.state.top) end;
841 +
842 + transition = function(me,ttn,oldstates)
843 + local s = me.state:push {id = ttn.to, mem = {}}
844 + local to = me.states[ttn.to]
845 + if to.enter then
846 + to:enter(s.mem, me)
847 + end
848 + end;
849 +
850 + input = function(me,sym)
851 + local ttns = me.ttns[me.state.id]
852 + local _, ttn = ss.find(ttns, function(ttn)
853 + return me.match(sym, ttn, me)
854 + end)
855 + if ttn then
856 + if ttn.pop then
857 + local oldstates = {me.state:drop(ttn.pop)}
858 + me:transition(ttn, sym, oldstates)
859 + else
860 + me:transition(ttn, sym)
861 + end
862 + else
863 + me:react(sym)
864 + end
865 + end;
866 + };
867 +}