parsav  str.t at [d228cd7fcb]

File str.t artifact f2457b558f part of check-in d228cd7fcb


-- vim: ft=terra
-- string.t: string classes
local util = lib.util
local pstr = lib.mem.ptr(int8)
local pref = lib.mem.ref(int8)

local m = {
	sz = terralib.externfunction('strlen', rawstring -> intptr);
	cmp = terralib.externfunction('strcmp', {rawstring, rawstring} -> int);
	ncmp = terralib.externfunction('strncmp', {rawstring, rawstring, intptr} -> int);
	cpy = terralib.externfunction('stpcpy',{rawstring, rawstring} -> rawstring);
	ncpy = terralib.externfunction('stpncpy',{rawstring, rawstring, intptr} -> rawstring);
	cat = terralib.externfunction('strcat',{rawstring, rawstring} -> rawstring);
	ncat = terralib.externfunction('strncat',{rawstring, rawstring, intptr} -> rawstring);
	dup = terralib.externfunction('strdup',rawstring -> rawstring);
	ndup = terralib.externfunction('strndup',{rawstring, intptr} -> rawstring);
	fmt = terralib.externfunction('asprintf',
		terralib.types.funcpointer({&rawstring,rawstring},{int},true));
	bfmt = terralib.externfunction('sprintf',
		terralib.types.funcpointer({rawstring,rawstring},{int},true));
	span = terralib.externfunction('strspn',{rawstring, rawstring} -> rawstring);
}

do local strptr = (lib.mem.ptr(int8))
	local strref = (lib.mem.ref(int8))
	local byteptr = (lib.mem.ptr(uint8))
	strptr.metamethods.__cast = function(from,to,e)
		if from == &int8 then
			return `strptr {ptr = e, ct = m.sz(e)}
		elseif to == &int8 then
			return e.ptr
		end
	end

	terra strptr:cmp(other: strptr)
		if self.ptr == nil and other.ptr == nil then return true end
		if self.ptr == nil or other.ptr == nil then return false end

		var sz = lib.math.biggest(self.ct, other.ct)
		for i = 0, sz do
			if self.ptr[i] == 0 and other.ptr[i] == 0 then return true end
			if self.ptr[i] ~= other.ptr[i] then return false end
		end
		return true
	end
	terra strref:cmp(other: strref)
		if self.ptr == nil and other.ptr == nil then return true end
		if self.ptr == nil or other.ptr == nil then return false end

		var sz = lib.math.biggest(self.ct, other.ct)
		for i = 0, sz do
			if self.ptr[i] == 0 and other.ptr[i] == 0 then return true end
			if self.ptr[i] ~= other.ptr[i] then return false end
		end
		return true
	end

	strptr.methods.cmpl = macro(function(self,other)
		return `self:cmp(strptr { ptr = [other:asvalue()], ct = [#(other:asvalue())] })
	end)
	strref.methods.cmpl = macro(function(self,other)
		return `self:cmp(strref { ptr = [other:asvalue()], ct = [#(other:asvalue())] })
	end)

	terra byteptr:cmp(other: byteptr)
		var sz = lib.math.biggest(self.ct, other.ct)
		for i = 0, sz do
			if self.ptr[i] == 0 and other.ptr[i] == 0 then return true end
			if self.ptr[i] ~= other.ptr[i] then return false end
		end
		return true
	end
end

terra m.normalize(s: pstr)
	var c: rawstring = s.ptr
	var n: rawstring = s.ptr
	while n < s.ptr + s.ct do
		while @n == 0 or @n == @'\r' do
			n = n + 1
			if n > s.ptr + s.ct then
				c = c + 1 goto done
			end
		end
		@c = @n
		c = c + 1
		n = n + 1
	end ::done::
	@c = 0
	return pstr { ptr = s.ptr, ct = c - s.ptr }
end

struct m.acc {
	buf: rawstring
	sz: intptr
	run: intptr
	space: intptr
}

local terra biggest(a: intptr, b: intptr)
	if a > b then return a else return b end
end

terra m.acc:init(run: intptr)
	--lib.dbg('initializing string accumulator')
	self.buf = [rawstring](lib.mem.heapa_raw(run))
	self.run = run
	self.space = run
	self.sz = 0
	return self
end;

terra m.acc:free()
	--lib.dbg('freeing string accumulator')
	if self.buf ~= nil and self.space > 0 then
		lib.mem.heapf(self.buf)
	end
end;

terra m.acc:crush()
	--lib.dbg('crushing string accumulator')
	self.buf = [rawstring](lib.mem.heapr_raw(self.buf, self.sz))
	self.space = self.sz
	return self
end;

terra m.acc:finalize()
	--lib.dbg('finalizing string accumulator')
	self:crush()
	var pt: lib.mem.ptr(int8)
	pt.ptr = self.buf
	pt.ct = self.sz
	self.buf = nil
	self.sz = 0
	return pt
end;

terra m.acc:cue(sz: intptr)
	if sz <= self.run then return end
	self.run = sz
	if self.space - self.sz < self.run then
		self.space = self.sz + self.run
		self.buf = [rawstring](lib.mem.heapr_raw(self.buf, self.space))
	end
end

terra m.acc:push(str: rawstring, len: intptr)
	--var llen = len
	if str == nil then return self end
	--if str[len - 1] == 0xA then llen = llen - 1 end -- don't display newlines in debug output
	-- lib.dbg('pushing "',{str,llen},'" onto accumulator')
	if self.buf == nil then self:init(self.run) end
	if len == 0 then len = m.sz(str) end
	if len >= self.space - self.sz then
		self.space = self.space + biggest(self.run,len + 1)
		self.buf = [rawstring](lib.mem.heapr_raw(self.buf, self.space))
	end
	lib.mem.cpy(self.buf + self.sz, str, len)
	self.sz = self.sz + len
	self.buf[self.sz] = 0
	return self
end;

m.lit = macro(function(str)
	if str:asvalue() ~= nil then
		return `[lib.mem.ref(int8)] {ptr = [str:asvalue()], ct = [#(str:asvalue())]}
	else
		return `[lib.mem.ref(int8)] {ptr = nil, ct = 0}
	end
end)

m.plit = macro(function(str)
	if str:asvalue() ~= nil then
		return `[lib.mem.ptr(int8)] {ptr = [str:asvalue()], ct = [#(str:asvalue())]}
	else
		return `[lib.mem.ptr(int8)] {ptr = nil, ct = 0}
	end
end)

m.acc.methods.lpush = macro(function(self,str)
	return `self:push([str:asvalue()], [#(str:asvalue())]) end)
m.acc.methods.ppush = terra(self: &m.acc, str: lib.mem.ptr(int8))
	self:push(str.ptr, str.ct)            return self end;
m.acc.methods.rpush = terra(self: &m.acc, str: lib.mem.ref(int8))
	self:push(str.ptr, str.ct)            return self end;
m.acc.methods.merge = terra(self: &m.acc, str: lib.mem.ptr(int8))
	self:push(str.ptr, str.ct) str:free() return self end;
m.acc.methods.compose = macro(function(self, ...)
	local minlen = 0
	local pstrs = {}
	for i,v in ipairs{...} do
		if type(v) == 'table' then
			local gl = 16 -- guess wildly
			if v.tree and v.tree.type.convertible == 'tuple' then
				pstrs[#pstrs+1] = {str = `v._0, len = `v._1}
			elseif v.asvalue and type(v:asvalue()) == 'string' then
				local str = v:asvalue()
				pstrs[#pstrs+1] = {str = str, len = #str}
				gl = #str + 1
			elseif v.tree and v.tree.type.ptr_basetype == int8 then
				pstrs[#pstrs+1] = {str = `v.ptr, len = `v.ct}
			else pstrs[#pstrs+1] = {str = v, len = 0} end
			minlen = minlen + gl
		elseif type(v) == 'string' then 
			pstrs[#pstrs+1] = {str = v, len = #v}
			minlen = minlen + #v + 1
		else error('invalid type in compose expression') end
	end
	local call = `self:init(minlen)
	for i,v in ipairs(pstrs) do
		call = `[call]:push([v.str],[v.len])
	end
	return call
end)
m.acc.metamethods.__lshift = terralib.overloadedfunction('(<<)', {
	terra(self: &m.acc, str: rawstring)         return self: push(str,0) end;
	terra(self: &m.acc, str: lib.mem.ptr(int8)) return self:ppush(str  ) end;
})

m.box = terralib.memoize(function(ty)
	local b = struct {
		obj: ty
		storage: int8[0]
	}
	b.name = string.format('bytebox<%s>', ty.name)
	b.methods.mk = terra(sz: intptr)
		return [&b](lib.mem.heapa_raw(sizeof(b) + sz))
	end
	terra b:free() lib.mem.heapf(self) end -- enhhhhh
	return b
end)

m.encapsulate = function(ty, vals)
	local memreq_const = sizeof(ty)
	local ptr = symbol(&int8)
	local box = symbol(&m.box(ty))
	local memreq_exp = `0
	local copiers = {}
	for k,v in pairs(vals) do
		local ty = (`box.obj.[k]).tree.type
		local kp
		local isnull, nullify
		if ty.ptr_basetype then
			kp = quote [box].obj.[k] = [ty] { ptr = [&ty.ptr_basetype]([ptr]) } ; end
			nullify = quote [box].obj.[k] = [ty] { ptr = nil, ct = 0 } end
		else
			kp = quote [box].obj.[k] = [ty]([ptr]) ; end
			nullify = quote [box].obj.[k] = nil end
		end

		local cpy
		if type(v) ~= 'table' or #v ~= 2 then
			cpy = quote [kp] ; [ptr] = m.cpy(ptr, v) end
			isnull = `v == nil
		end
		if type(v) == 'string' then
			memreq_const = memreq_const + #v + 1
			isnull = `false
		elseif type(v) == 'table' and v.tree and (v.tree.type.ptr_basetype == int8 or v.tree.type.ptr_basetype == uint8) then
			cpy = quote [kp]; [ptr] = [&int8](lib.mem.cpy([ptr], [v].ptr, [v].ct)) end
			if ty.ptr_basetype then
				cpy = quote [cpy]; [box].obj.[k].ct = [v].ct end
			end
			isnull = `[v].ptr == nil
		elseif type(v) == 'table' and v.asvalue and type(v:asvalue()) == 'string' then
			local str = tostring(v:asvalue())
			memreq_const = memreq_const + #str + 1
			isnull = `false
		elseif type(v) == 'table' and #v == 2 then
			local str,sz = v[1],v[2]
			if type(sz) == 'number' then
				memreq_const = memreq_const + sz
			elseif type(sz:asvalue()) == 'number' then
				memreq_const = memreq_const + sz:asvalue()
			else memreq_exp = `[sz] + [memreq_exp] end

			cpy = quote [kp] ;
				--lib.io.fmt('encapsulating string %p → %p [%s] sz %llu\n', str, [ptr], str, sz)
				[ptr] = [&int8](lib.mem.cpy([ptr], str, sz))
				--lib.io.fmt(' :: encapsulated string %p [%s]\n', box.obj.[k],box.obj.[k])
			end
			if ty.ptr_basetype then
				cpy = quote [cpy]; [box].obj.[k].ct = sz end
			end
			isnull = `[str] == nil
		else
			memreq_exp = `(m.sz(v) + 1) + [memreq_exp] -- make room for NUL
			isnull = `v == nil
			if ty.ptr_basetype then
				cpy = quote [cpy]; [box].obj.[k].ct = m.sz(v) end
			end
		end

		copiers[#copiers + 1] = quote
			if [isnull] then [nullify]
			            else [cpy] end
		end
	end

	return quote
		var sz: intptr = memreq_const + [memreq_exp]
		var [box] = [&m.box(ty)](lib.mem.heapa_raw(sz))
		var [ptr] = [box].storage
		[copiers]
	in [lib.mem.ptr(ty)] { ct = 1, ptr = &([box].obj) } end
end

terra m.cspan(str: lib.mem.ptr(int8), reject: lib.mem.ref(int8), maxlen: intptr)
	for i=0, lib.math.smallest(maxlen,str.ct) do
		if str.ptr[i] == 0 then return 0 end
		for j=0, reject.ct do
			if str.ptr[i] == reject.ptr[j] then return i end
		end
	end
	return maxlen
end

terra m.ffw(str: &int8, maxlen: intptr)
	while maxlen > 0 and @str ~= 0 and
	      (@str == @' ' or @str == @'\t' or @str == @'\n') do
		str = str + 1
		maxlen = maxlen - 1
	end
	return str
end

terra m.ffw_unsafe(str: &int8)
	while  @str ~= 0 and
	      (@str == @' ' or @str == @'\t' or @str == @'\n') do
		str = str + 1
	end
	return str
end

return m