util  Check-in [fc9b35c962]

Overview
Comment:add rndcbytes and richascii spec; add notes to soda
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA3-256: fc9b35c96229c0309691fa1c036bea53cf006eef77e46c5bfa855b62a74213dd
User & Date: lexi on 2022-12-31 03:22:51
Other Links: manifest | tags
Context
2023-01-29
19:21
add rasuir spec, other updates check-in: 6bda93a905 user: lexi tags: trunk
2022-12-31
03:22
add rndcbytes and richascii spec; add notes to soda check-in: fc9b35c962 user: lexi tags: trunk
2022-12-30
23:55
make language more offensive check-in: b4fe00021c user: lexi tags: trunk
Changes

Added rich-ascii.ct version [67dad5f4d3].





































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
# rich ascii
[*rich ascii] is a file format that adds very basic organizational and structural data to a text file using only ASCII control codes. even though it is called [*rich ascii], it is fully compatible with UTF-8.

##s document structure
	def: : [*[#1]] ::= [#2]
	ctl: [#1][,[#2]]
	ref: [U+27E8][*[#1]][U+27E9]
%% nested macro ident interpolation is currently broken; it seems to always use the arguments of the top(?)-level invocation, so we have to use this ugly fucking hack instead. fuck me
	SOH: {ctl [&q-SOH SOH]|1}
	STX: {ctl [&q-STX STX]|2}
	ETX: {ctl [&q-ETX ETX]|3}
	HT: {ctl [&q-HT HT]|9}
	NL: {ctl [&q-NL NL]|10}
	FF: {ctl [&q-FF FF]|12}
	DLE: {ctl [&q-DLE DLE]|16}

	q-SOH: start of heading
	q-STX: start of text
	q-ETX: end of text
	q-FF: form feed
	q-HT: horizontal tab
	q-NL: newline
	q-DLE: data link escape

a rich ascii document consists of a series of UTF-8 or ASCII codepoints.

$def doc|{ref title} {ref block}*
$def title|{SOH} {ref span} {ETX}
$def block|{SOH} {ref span} {STX} {ref body} {ETX}
$def block|{STX} {ref body} {ETX}
$def block|{ref table-row}
$def block|{ref metadata}
$def table-row|{ref table-cell}*
$def table-cell|{HT}+ {STX} {ref span} {ETX}
$def table-cell|{HT}+ {SOH} {ref span} {ETX}
$def metadata|{DLE} {ref text} {STX} {ref body} {ETX}
$def text|[!(any valid sequence of ASCII or UTF-8 characters of codepoint [=0x20] or higher)]
$def span|{ref text}
$def body|{ref span}*
$def body|{ref block}*

here is a structurally annotated document with several levels of headings.

> {SOH} worldgov expropriation notice {ETX}
> {s.DLE} author {s.STX} eudavia wobblebotch, esq. {s.ETX}
> {s.STX} please read this missive with the utmost care and attention to detail. failure to comply may result in liquidation without prior notice. {s.ETX}
> {s.SOH} demands {s.STX}
>> {s.STX} you are by the splendid munificence of EUDAVIA MILLWEED WOBBLEBOTCH, ESQ. hereby ordered, commanded, and impelled to collect posthaste the following objects, thought processes, bureaucratic minutia, and/or thumotic residuals, immediately thereupon to deliver them to the WORLDGOV OFFICE OF REQUISITIONS, IMPOSITIONS, & INQUISITIONS. failure to comply may result in liquidation without prior notice.
>> {s.SOH} carapace of a WALLOWING SLIMETOOTLER {s.STX}
>>> {s.STX} easily located by application of a rigorously dour pataphor when dawns the witching hour {s.ETX}
>>> {s.SOH} mandatory qualia {s.STX}
>>>> {STX} pulchritude {ETX}
>>> {s.ETX}
>> {s.ETX}
>> {s.SOH} verse of cultivation {s.STX}
>>> woe on thee, unit {NL}
>>> of fungible labor {NL}
>>> your productivity {NL}
>>> has diminished {NL}
>>> and your soul {NL}
>>> has been requisitioned {NL}
>>> by THE SMILING MAN {NL}
>>> for Project {NL}
>>> Scowling Lizard.
>> {s.ETX}
> {s.ETX}

Added rndcbytes.lua version [b0df5b89e1].



































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
-- [ʞ] rndcbytes.lua
--  ~ lexi hale <lexi@hale.su>
--  © CC0/public domain
--  # requires lua 5.4
--  ? generates random bytes in the form of a maximally
--    compact C string. note that this only works for
--    values small enough to fit into a string literal.
--  > lua rndcbytes.lua <n> <max>
--    <n> is the number of bytes to generate 
--    <max> is the number of characters (NOT bytes) to
--          wrap after.

local rnd_srcs = {
	'/dev/urandom';
	'/dev/random';
}

local rng
for _, r in ipairs(rnd_srcs) do
	local ur = io.open(r, 'rb')
	if ur then
		rng = function(n)
			return ur:read(n)
		end
		break
	end
end
if rng == nil then
	math.randomseed()
	io.stderr:write("WARNING: relying on internal lua RNG. bytes generated are NOT cryptographically reliable!\n")
	rng = function(n)
		local str = {}
		for i = 1,n do
			str[i] = string.pack('B', math.random(0,0xff))
		end
		return table.concat(str)
	end
end

local n = 48
local max = 48
if arg[1] then n = tonumber(arg[1]) end
if arg[2] then max = tonumber(arg[2]) end

local bytes = rng(n)

local lns = {}
local cur, chc = {}, 0
local function flush()
	if next(cur) then
		table.insert(lns, cur)
		cur = {}
		chc = 0
	end
end

local escapes <const> = {
	[0x09] = "t";
	[0x0a] = "n";
	[0x22] = '"';
}
for i=1, #bytes do
	local val = bytes:byte(i)
	local r
	if escapes[val] then
		r = '\\' .. escapes[val]
	elseif val >= 0x20 and val < 0x7f then
		r = string.char(val)
	else
		r = string.format("\\x%02X", val)
	end
	if chc + #r > max then flush() end
	chc = chc + #r
	table.insert(cur, r)
end
flush()

for _, chars in ipairs(lns) do
	print(string.format('"%s"', table.concat(chars)))
end

Modified soda.c from [0502e2ccfd] to [20cdbef471].

1
2
3
4



5
6
7
8
9
10
11
...
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
...
521
522
523
524
525
526
527

528
529






530
531
532
533
534
535
536
...
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
....
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
/* [ʞ] soda - libsodium front end
 *  ~ lexi hale <lexi@hale.su>
 *  © AGPLv3
 *  @ vim: ft=c



 */

#define _POSIX_C_SOURCE 200809L
#include <unistd.h>
#include <stdio.h>
#include <errno.h> /* boo hiss */
#include <stdint.h>
................................................................................
#undef _declflag_raw

/* the casts here are a bit wacky. this is unfortunately necessary as C
 * insists on evaluating each branch of the generic expression regardless
 * of which type is picked. why anyone thought this was a good idea is
 * beyond me. */
#define _t(x) (_Generic((x), \
	      const char**: (struct sayp){say_string_list, .strlist  = (const char**)(size_t)(x)}, \
	              char: (struct sayp){say_char,   .ch   = (char)(size_t)(x)}, \
	             char*: (struct sayp){say_string, .str  = (const char*)(size_t)(x)}, \
	       const char*: (struct sayp){say_string, .str  = (const char*)(size_t)(x)}, \
				   int: (struct sayp){say_sint,   .sint = (long long)(x)}, \
	             short: (struct sayp){say_sint,   .sint = (long long)(x)}, \
	              long: (struct sayp){say_sint,   .sint = (long long)(x)}, \
	         long long: (struct sayp){say_sint,   .sint = (long long)(x)}, \
		      unsigned: (struct sayp){say_uint,   .uint = (long long unsigned)(x)}, \
	    unsigned short: (struct sayp){say_uint,   .uint = (long long unsigned)(x)}, \
	     long unsigned: (struct sayp){say_uint,   .uint = (long long unsigned)(x)}, \
	long long unsigned: (struct sayp){say_uint,   .uint = (long long unsigned)(x)}, \
	default: (struct sayp){say_hex, .uint = (size_t)(x)})),
#define _say(k,...)  say(c, noise_##k, ((struct sayp[]){__VA_ARGS__ {say_stop}}))
#define _report(k,s) _say(k, _t(s))

void
usage(struct ctx c, const struct cmd_spec* tree) {
	const char* bin = c.bin;
	const char* fmt_flag,* fmt_usage,* fmt_enum,* head_start,* head_fin;
................................................................................
	"\x31\x32\00\x33\x34\x35\x36\x37\x38\x39\x3a\x3b\x3c\x3d\x3e"
	"\00\00\00\00\x3f\00\x0b\x0c\x0d\x0e\x0f\x10\x11\x12\x13"
	"\x14\x15\x16\x17\00\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f\x20\x21"
	"\x22";
	/* generated by a script. do not touch. */

enum { armor_split_every = 48 };

char*
armor(const unsigned char* src, char* dest, size_t sz, size_t brstart, bool format) {






	char* bufptr = dest;
	unsigned char carry = 0;
	for(size_t i = 0; i<sz; ++i) {
		/* insert line breaks every so many chars */
		*(bufptr++)=ascii_armory[src[i] % 64];
		if (format) if ((brstart + (bufptr - dest)) % armor_split_every == 0)
			*(bufptr++)='\n', *(bufptr++)=' ';
................................................................................
		if (format) if ((brstart + (bufptr - dest)) % armor_split_every == 0)
			*(bufptr++)='\n', *(bufptr++)=' ';
	}
	/* if(carry != 0) */ *(bufptr++)=ascii_armory[carry];
	return bufptr;
}

char* disarmor(const unsigned char* src, char* dest, size_t sz) {
	/* transforms ascii armor into binary. can transform in place. */
	for(size_t i = 0; i<sz; i += 3) {
		while (isws(src[i])) ++i;
		const char* s = src + i;
		unsigned char carry = ascii_value[s[2] - '-'],
			          b1    = ascii_value[s[0] - '-'] + (64 * ((carry & 12) >> 2)),
		              b2    = ascii_value[s[1] - '-'] + (64 * ( carry &  3));
................................................................................
		++ c.cmdc; ++ paramcount;
	}
	
	parse_done: _say(debug, _t("stored") _t(c.cmdc) _t("arguments and") _t(paramcount) _t("parameters"));
	
	if (c.flags.noise <= noise_debug) for (size_t i = 0; i<c.cmdc; ++i) {
		if (cmdlist[i] < _atom_n) {
			dprintf(2, "\t%llu. atom %u (normal form “%s”)\n", i, cmdlist[i], reconstitute(cmdlist[i]));
		} else {
			dprintf(2,"\t%llu. parameter “%s”\n", i, paramlist[cmdlist[i] - _atom_n]);
		}
	}

	if (c.flags.fmtv >= sz(write_privkey)) {
		_say(fatal, _t("data format version") _t(c.flags.fmtv) _t("is not known. the highest available version is") _t(sz(write_privkey) - 1) _t("- are you sure you're using the latest release of " _self_name "?"));
		return -2;
	}




>
>
>







 







|
|
|
|








|







 







>
|
<
>
>
>
>
>
>







 







|







 







|

|







1
2
3
4
5
6
7
8
9
10
11
12
13
14
...
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
...
524
525
526
527
528
529
530
531
532

533
534
535
536
537
538
539
540
541
542
543
544
545
...
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
....
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
/* [ʞ] soda - libsodium front end
 *  ~ lexi hale <lexi@hale.su>
 *  © AGPLv3
 *  @ vim: ft=c
 *  ! ascii armor decoding is completely broken;
 *    needs to be fixed before this program can
 *    become usable
 */

#define _POSIX_C_SOURCE 200809L
#include <unistd.h>
#include <stdio.h>
#include <errno.h> /* boo hiss */
#include <stdint.h>
................................................................................
#undef _declflag_raw

/* the casts here are a bit wacky. this is unfortunately necessary as C
 * insists on evaluating each branch of the generic expression regardless
 * of which type is picked. why anyone thought this was a good idea is
 * beyond me. */
#define _t(x) (_Generic((x), \
	      const char**: (struct sayp){say_string_list, .strlist  = (const char**)(intptr_t)(x)}, \
	              char: (struct sayp){say_char,   .ch   = (char)(intptr_t)(x)}, \
	             char*: (struct sayp){say_string, .str  = (const char*)(intptr_t)(x)}, \
	       const char*: (struct sayp){say_string, .str  = (const char*)(intptr_t)(x)}, \
				   int: (struct sayp){say_sint,   .sint = (long long)(x)}, \
	             short: (struct sayp){say_sint,   .sint = (long long)(x)}, \
	              long: (struct sayp){say_sint,   .sint = (long long)(x)}, \
	         long long: (struct sayp){say_sint,   .sint = (long long)(x)}, \
		      unsigned: (struct sayp){say_uint,   .uint = (long long unsigned)(x)}, \
	    unsigned short: (struct sayp){say_uint,   .uint = (long long unsigned)(x)}, \
	     long unsigned: (struct sayp){say_uint,   .uint = (long long unsigned)(x)}, \
	long long unsigned: (struct sayp){say_uint,   .uint = (long long unsigned)(x)}, \
	default: (struct sayp){say_hex, .uint = (intptr_t)(x)})),
#define _say(k,...)  say(c, noise_##k, ((struct sayp[]){__VA_ARGS__ {say_stop}}))
#define _report(k,s) _say(k, _t(s))

void
usage(struct ctx c, const struct cmd_spec* tree) {
	const char* bin = c.bin;
	const char* fmt_flag,* fmt_usage,* fmt_enum,* head_start,* head_fin;
................................................................................
	"\x31\x32\00\x33\x34\x35\x36\x37\x38\x39\x3a\x3b\x3c\x3d\x3e"
	"\00\00\00\00\x3f\00\x0b\x0c\x0d\x0e\x0f\x10\x11\x12\x13"
	"\x14\x15\x16\x17\00\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f\x20\x21"
	"\x22";
	/* generated by a script. do not touch. */

enum { armor_split_every = 48 };

char* armor

(	char unsigned const* const src,
	char               * const dest,
	size_t               const sz,
	size_t               const brstart,
	bool                 const format
) {
	char* bufptr = dest;
	unsigned char carry = 0;
	for(size_t i = 0; i<sz; ++i) {
		/* insert line breaks every so many chars */
		*(bufptr++)=ascii_armory[src[i] % 64];
		if (format) if ((brstart + (bufptr - dest)) % armor_split_every == 0)
			*(bufptr++)='\n', *(bufptr++)=' ';
................................................................................
		if (format) if ((brstart + (bufptr - dest)) % armor_split_every == 0)
			*(bufptr++)='\n', *(bufptr++)=' ';
	}
	/* if(carry != 0) */ *(bufptr++)=ascii_armory[carry];
	return bufptr;
}

char* disarmor(char unsigned const* const src, char* dest, size_t const sz) {
	/* transforms ascii armor into binary. can transform in place. */
	for(size_t i = 0; i<sz; i += 3) {
		while (isws(src[i])) ++i;
		const char* s = src + i;
		unsigned char carry = ascii_value[s[2] - '-'],
			          b1    = ascii_value[s[0] - '-'] + (64 * ((carry & 12) >> 2)),
		              b2    = ascii_value[s[1] - '-'] + (64 * ( carry &  3));
................................................................................
		++ c.cmdc; ++ paramcount;
	}
	
	parse_done: _say(debug, _t("stored") _t(c.cmdc) _t("arguments and") _t(paramcount) _t("parameters"));
	
	if (c.flags.noise <= noise_debug) for (size_t i = 0; i<c.cmdc; ++i) {
		if (cmdlist[i] < _atom_n) {
			dprintf(2, "\t%zu. atom %u (normal form “%s”)\n", i, cmdlist[i], reconstitute(cmdlist[i]));
		} else {
			dprintf(2,"\t%zu. parameter “%s”\n", i, paramlist[cmdlist[i] - _atom_n]);
		}
	}

	if (c.flags.fmtv >= sz(write_privkey)) {
		_say(fatal, _t("data format version") _t(c.flags.fmtv) _t("is not known. the highest available version is") _t(sz(write_privkey) - 1) _t("- are you sure you're using the latest release of " _self_name "?"));
		return -2;
	}