parsav  smackdown.t at [aa17a03321]

File smackdown.t artifact c51eb8a6b2 part of check-in aa17a03321


-- vim: ft=terra
-- smackdown is parsav's terrible, terrible custom markdown-alike parser

local m = {}
local pstr = lib.mem.ptr(int8)

local segt = { 
	none = 0, para = 1, head = 2, listing = 3
}

local autolink_protos = {
	'https', 'http', 'ftp', 'gopher', 'gemini', 'ircs', 'irc';
	'mailto', 'about', 'sshfs', 'afp', 'smb', 'data', 'file';
	'dav', 'git', 'svn', 'cvs', 'dns', 'finger', 'pop', 'imap';
	'pops', 'imaps', 'torrent', 'magnet', 'news', 'snews', 'nfs';
	'nntp', 'sms', 'tel', 'telnet', 'vnc', 'webcal', 'ws', 'wss';
	'xmpp';
}

local struct state {
	segt: uint
	bqlvl: uint
	curpos: rawstring
	blockstart: rawstring
}

terra state:segend(ofs: uint)
-- takes a string offset and returns true if it indexes th
-- end of the current block
	var s = self.curpos + ofs
	if s[0] ~= @'\n' then return false end
	if self.segt == segt.head then return true end -- headers can only be 1 line
-- 	if s[1] == '#'

end

local terra isws(c: int8)
	return c == @' ' or c == @'\n' or c == @'\t' or c == @'\r'
end

local terra scanline(l: rawstring, max: intptr, n: rawstring, nc: intptr)
	if l == nil then return nil end
	for i=0,max do
		for j=0,nc do
			if l[i+j] == @'\n' then return nil end
			if l[i+j] ~= n[j] then goto nexti end
		end
		do return l+i end
	::nexti::end
end

local terra scanline_wordend(l: rawstring, max: intptr, n: rawstring, nc: intptr)
	var sl = scanline(l,max,n,nc)
	if sl == nil then return nil else sl = sl + nc end
	if sl >= l+max or isws(@sl) then return sl-nc end
	return nil
end

terra m.html(input: pstr)
	if input.ct == 0 then input.ct = lib.str.sz(input.ptr) end

	var md = lib.html.sanitize(input,false)

	var styled: lib.str.acc styled:init(md.ct)

	do var i = 0 while i < md.ct do
		var wordstart = (i == 0 or isws(md.ptr[i-1]))
		var wordend = (i == md.ct - 1 or isws(md.ptr[i+1]))

		var here = md.ptr + i
		var rem = md.ct - i
		if @here == @'[' then
			var sep = scanline(here,rem, '](', 2)
			var term = scanline(sep+2,rem - ((sep+2)-here), ')', 1)
			if sep ~= nil and term ~= nil then
				styled:lpush('<a href="')
					:push(sep+2, term-(sep+2))
					:lpush('" rel="nofollow">')
					:push(here+1,(sep-here) - 1)
					:lpush('</a>')
				i = (term - md.ptr) + 1
				goto skip
			else goto fallback end
		end

		if wordstart and rem >= 7 and lib.str.ncmp('***',here,3)==0 then
			var term = scanline_wordend(here+4,rem-4,'***',3)
			if term ~= nil then
				styled:lpush('<strong><em>')
					:push(here+3, (term-here) - 3)
					:lpush('</strong></em>')
				i = (term - md.ptr) + 3
				goto skip
			end
		end

		if wordstart and rem >= 5 and lib.str.ncmp('**',here,2)==0 then
			var term = scanline_wordend(here+3,rem-3,'**',2)
			if term ~= nil then
				styled:lpush('<strong>')
					:push(here+2, (term-here) - 2)
					:lpush('</strong>')
				i = (term - md.ptr) + 2
				goto skip
			end
		end

		if wordstart and rem >= 3 and @here == @'*' then
			var term = scanline_wordend(here+2,rem-2,'*',1)
			if term ~= nil then
				styled:lpush('<em>')
					:push(here+1, (term-here) - 1)
					:lpush('</em>')
				i = (term - md.ptr) + 1
				goto skip
			end
		end

		::fallback::styled:push(here,1) -- :/
		i = i + 1
	::skip::end end
	md:free()

	-- we make two passes: the first detects and transforms inline elements,
	-- the second carries out block-level organization

	var html: lib.str.acc html:init(styled.sz)
	var s = state {
		segt = segt.none;
		bqlvl = 0;
		curpos = md.ptr;
		blockstart = nil;
	}
	while s.curpos < md.ptr + md.ct do
		s.curpos = s.curpos + 1
	end 

		html:free() -- JUST FOR NOW
	return styled:finalize()
end

return m