cortav  transmogrify.lua at [e8ab2a68d8]

File ext/transmogrify.lua artifact ad59e4c740 part of check-in e8ab2a68d8


local ct = require 'cortav'
local ss = require 'sirsem'

local patterns = {
	[ss.str.enc.utf8] = {
		{
	      ['<-->'] = '⟷';
			['--->'] = '⟶';
			['<---'] = '⟵';
			['----'] = '⸻';
      };

		{
			['<==>'] = '⟺';
			['===>'] = '⇐';
			['<==='] = '⟸';
		};

		{
			['<->'] = '↔';
			['-->'] = '→';
			['<--'] = '←';
			['==>'] = '⇒';
			['<=>'] = '⇔';
			['<=='] = '⇐';
			['=/='] = '≠';
			['---'] = '⸺';
		};

		{
			['-:-'] = '÷';
			['--'] = '—';
			['(C)'] = '©';
			['(>)'] = '🄯';
			['(R)'] = '®';
			['(TM)'] = '™';
			['(SM)'] = '℠';
		};
   };
}

local quotes = {
	[ss.str.enc.utf8] = {
		-- 5 = elision char
		['en'] = {'“', '”';  '‘', '’';  '’'};
		['de'] = {'„', '“';  '‚', '‘';  '’'};
		['sp'] = {'«', '»';  '‹', '›';  "’"};
		['ja'] = {'「', '」'; '『', '』'; "'"};
		['fr'] = {'« ',' »'; '‹ ',' ›';  "’"};
		[true] = {'“', '”';  '‘', '’';  '’'};
	};
}

local function meddle(ctx, t)
	local pts = patterns[ctx.doc.enc]
	if not pts then return t end
	local str = ''
	local lastchar
	local dquo = ctx.doc.enc.encodeUCS'"'
	local squo = ctx.doc.enc.encodeUCS"'"
	local forceRight = ctx.doc.enc.encodeUCS'`'
	local ptns = patterns[ctx.doc.enc]
	local function quo(c,p)
		if c == dquo then
			return 1
		elseif c == squo then
			return 2
		end
	end
	local qtbl if quotes[ctx.doc.enc] then
		if ctx.lang then
			qtbl = ss.str.langmatch(quotes[ctx.doc.enc], ctx.lang, ctx.doc.enc) or quotes[ctx.doc.enc][true]
		else
			qtbl = quotes[ctx.doc.enc][true]
		end
	end
	for c, p in ss.str.each(ctx.doc.enc,t) do
		local n = t:sub(p.byte)
		local ba, ca, nt = ctx.doc.enc.parse_escape(n)
		if ba then
			p.next.byte = p.next.byte + ba
			p.next.code = p.next.code + ca
			str = str .. nt
			lastchar = nt
		else
			local found = false
			local quote = quo(c,p)
			local force
			if not quote and c == forceRight and #t >= p.next.byte then
				quote = quo(ctx.doc.enc.char(ctx.doc.enc.codepoint(t,p.next.byte)))
				if quote then
		           force = 2
		           p.next.byte = p.next.byte + #forceRight
		           p.next.code = p.next.code + ctx.doc.enc.len(forceRight)
				end
			end
			if qtbl and quote then
				found = true
				if force then
					str = str .. qtbl[quote*force]
				elseif lastchar == nil or ctx.doc.enc.iswhitespace(lastchar) then
					str = str .. qtbl[quote]
				else
					str = str .. qtbl[quote*2]
				end
			elseif ptns then
				for _, order in ipairs(ptns) do
					for k,v in pairs(order) do
						if ss.str.begins(n, k) then
							found = true
							str = str .. v
							p.next.byte = p.next.byte + string.len(k) - 1
							p.next.code = p.next.code + utf8.len(k) - 1
							goto stopsearch
						end
					end
				end::stopsearch::
			end
			if not found then
				str = str .. c
			end
			lastchar = c
		end
	end
	return str
end

local function enterspan(origin, spans)
	for i,v in pairs(spans) do
		if type(v) == 'string' then
			spans[i] = meddle(origin, v)
		elseif v.kind ~= 'raw' and v.spans then
			enterspan(v.origin, v.spans)
		end
	end
end

ct.ext.install {
	id = 'transmogrify';
	version = ss.version {0,1; 'devel'};
	contributors = {{name='lexi hale', handle='velartrill', mail='lexi@hale.su', homepage='https://hale.su'}};
	default = true; -- on unless inhibited
	slow = true;
	hook = {
		doc_meddle_ast = function(job)
			for n, sec in pairs(job.doc.secorder) do
				if sec.kind=='ordinary' or sec.kind=='quote'
				or sec.kind=='footnote' then
					for i, block in pairs(sec.blocks) do
			         if type(block.spans) == 'table' then
							enterspan(block.origin, block.spans)
						elseif type(block.spans) == 'string' then
							block.spans = meddle(block.origin, block.spans)
						end
					end
				end
			end
		end;
	};
}