@@ -1,8 +1,18 @@ -- [ʞ] cortav.lua -- ~ lexi hale -- © AGPLv3 -- ? reference implementation of the cortav document language +-- +-- ! TODO refactor encoding logic. it's a complete +-- mess and i seem to have repeatedly gotten +-- confused about how it's supposed to work. +-- the whole shitshow needs to be replaced +-- with a clean, simple paradigm: documents +-- are translated to UTF8 on the way in, and +-- translate back out on the way out. trying +-- to cope with multiple simultaneous +-- encodings in memory is a disaster zone. local ss = require 'sirsem' -- aliases for commonly used sirsem funcs local startswith = ss.str.begins @@ -735,8 +745,15 @@ origin = o; }}; origin = o; } + end + local function unicodepoint(s,c) + local cp = tonumber(s, 16) + return { + kind = 'codepoint'; + code = cp; + } end ct.spanctls = { {seq = '!', parse = formatter 'emph'}; {seq = '*', parse = formatter 'strong'}; @@ -797,8 +814,14 @@ {seq = '→', parse = insert_link}; {seq = '🔗', parse = insert_link}; {seq = '##', parse = insert_var_ref(true)}; {seq = '#', parse = insert_var_ref(false)}; + + {seq = 'U+', parse = unicodepoint}; + {seq = 'u+', parse = unicodepoint}; + {seq = 'U', parse = unicodepoint}; + {seq = 'u', parse = unicodepoint}; + {seq = '%%', parse = function (s,c) local com = s:match '^%%%%%s*(.*)$' return { kind = 'comment';