cortav  Diff

Differences From Artifact [3976f4bc78]:

To Artifact [cf6aee3c65]:


    21     21   local file = io.stdin
    22     22   local path
    23     23   if arg[1] then
    24     24   	path = arg[1]
    25     25   	file = io.open(path, 'rb')
    26     26   end
    27     27   
    28         -local bitmask_raw = function(n,ofs)
    29         -	ofs = ofs or 0
    30         -	local function rec(i)
    31         -		if i > n then return end
    32         -		return 1<<(i+ofs), rec(i+1)
    33         -	end
    34         -	return 1<<ofs, rec(1)
    35         -end
    36         -
    37         -local bitmask = function(tbl,ofs)
    38         -	local codes = {bitmask_raw(#tbl,ofs)}
    39         -	local m = {}
    40         -	local maxbit
    41         -	for i, s in ipairs(tbl) do
    42         -		m[s] = codes[i]
    43         -		m[codes[i]] = s
    44         -		maxbit = i
    45         -	end
    46         -	m[true] = {ofs or 0,maxbit}
    47         -	return m
    48         -end
    49         -
    50         -local basictype = enum {
    51         -	'numeral';
    52         -	'alpha';
    53         -	'symbol';
    54         -	'punct';
    55         -	'space';
    56         -	'ctl';
    57         -	'glyph'; -- hanji
    58         -}
    59         -local props = bitmask({
    60         -	'hex',
    61         -	'upper', 'lower', 'diac',
    62         -	'wordbreak', 'wordsep',
    63         -	'disallow',
    64         -	'brack', 'right', 'left',
    65         -	'noprint', 'superimpose'
    66         -}, 3)
    67         -
           28  +local ss = require'sirsem'
           29  +local basictype = ss.str.charclass
           30  +local props = ss.str.charprop
    68     31   local overrides = {
    69     32   	[0x200B] = basictype.space | props.wordsep; -- database entry is wrong
    70     33   }
    71     34   
    72     35   local mask = ~0 -- mask out irrelevant properties to compactify database
    73     36   
    74     37   local function parsecat(tbl)
................................................................................
    78     41   	elseif tbl.class == 'Nd' then c = b.numeral
    79     42   	elseif tbl.class == 'No' then c = b.numeral | p.diac
    80     43   	elseif tbl.class == 'Cc' then
    81     44   		if tbl.kind == 'S'
    82     45   		or tbl.kind == 'WS'
    83     46   		or tbl.kind == 'B' then c  = b.space | p.wordsep
    84     47         else c = b.ctl | p.wordbreak | p.disallow end
    85         -	elseif tbl.class == 'Lu' then c = b.alpha | p.upper
    86         -	elseif tbl.class == 'Ll' then c = b.alpha | p.lower
           48  +	elseif tbl.class == 'Lu' then c = b.letter | p.upper
           49  +	elseif tbl.class == 'Ll' then c = b.letter | p.lower
    87     50   	elseif tbl.class == 'Lo'
    88         -	    or tbl.class == 'Lt' then c = b.alpha
           51  +	    or tbl.class == 'Lt' then c = b.letter
    89     52   	elseif tbl.class == 'Po' then c = b.punct | p.wordbreak
    90     53   	elseif tbl.class == 'Sm' then c = b.symbol | p.wordsep
    91     54   	elseif tbl.class == 'Ps' then c = b.punct | p.brack | p.left
    92     55   	elseif tbl.class == 'Pe' then c = b.punct | p.brack | p.right
    93     56   	elseif tbl.class == 'Pc'
    94     57   	    or tbl.class == 'Pd'
    95     58   	    or tbl.class == 'Sk'
................................................................................
   104     67   
   105     68   local ranuirAlpha = {0xe39d, 0xe39f, 0xe3ad, 0xe3af, 0xe3b5, 0xe3b7, 0xe3b9, 0xe3bb, 0xe3bd, 0xe3be, 0xe3bf, 0xe3c5, 0xe3c7, 0xe3c9, 0xe3cb, 0xe3cc, 0xe3cd, 0xe3ce, 0xe3cf}
   106     69   local ranuirSpecial = {
   107     70   	[0xe390] = basictype.space | props.wordsep;
   108     71   }
   109     72   
   110     73   local ranuir = {}
   111         -for _,v in pairs(ranuirAlpha) do ranuir[v] = basictype.alpha end
           74  +for _,v in pairs(ranuirAlpha) do ranuir[v] = basictype.letter end
   112     75   for k,v in pairs(ranuirSpecial) do ranuir[k] = v end
   113     76   local ranuirKeys = {}
   114     77   for k in pairs(ranuir) do table.insert(ranuirKeys, k) end
   115     78   table.sort(ranuirKeys)
   116     79   
   117     80   local recs = {}
   118     81   local ranuirok = false
   119     82   for ln in file:lines() do
   120     83   	local v = {}
   121     84   	for s in ln:gmatch('[^;]*') do
   122     85   		table.insert(v, s)
   123     86   	end
   124     87   	v[1] = tonumber(v[1],0x10)
   125         -	if v[1] > 0x7f then -- discard ASCII, we already have that
           88  +-- 	if v[1] > 0x7f then -- discard ASCII, we already have that
   126     89   		local code = {
   127     90   			codepoint = v[1];
   128     91   			name = v[2];
   129     92   			class = v[3];
   130     93   			kind = v[5];
   131     94   		}
   132     95   		code.cat = parsecat(code)
................................................................................
   140    103   			end
   141    104   			ranuirok = true
   142    105   		end
   143    106   
   144    107   		if code.cat ~= 0 then
   145    108   			table.insert(recs,code)
   146    109   		end
   147         -	end
          110  +-- 	end
   148    111   end
   149    112   
   150    113   
   151    114   local ranges = {}
   152    115   local last = recs[1]
   153    116   local start = last
   154    117   local altern = false
................................................................................
   179    142   		flush()
   180    143   		start = r
   181    144   	end
   182    145   	last = r
   183    146   end
   184    147   flush()
   185    148   
   186         --- expand bitmask
   187         -	-- for k,v in pairs(ranges) do
   188         -	-- 	local basic = v[3] & ((1<<3) - 1) -- first three bits
   189         -	-- 	if basic ~= 0 then
   190         -	-- 		v[4] = basictype[basic]
   191         -	-- 	end
   192         -	-- 	local bitrange = props[true]
   193         -	-- 	for j=bitrange[1], bitrange[2] do
   194         -	-- 		if (v[3] & (1<<j)) ~= 0 then
   195         -	-- 			table.insert(v, props[1<<j])
   196         -	-- 		end
   197         -	-- 	end
   198         -	-- end
   199         -
   200    149   -- the data has been collected and formatted in the manner we
   201    150   -- need; now we just need to emit it as a lua table
   202    151   
   203    152   local tab = {}
   204    153   local top = 1
   205    154   for k,v in pairs(ranges) do
   206    155   	tab[top] = string.format('{0x%x,0x%x,%u}',table.unpack(v))
   207    156   	top = top + 1
   208    157   end
   209         -io.stdout:write(string.format(tpl, table.concat(tab,',')))
          158  +io.stdout:write(string.format(tpl, table.concat(tab,',\n')))