util  Check-in [72068307da]

Overview
Comment:add ord.c
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA3-256: 72068307daf49bd5c01a31514ea5b644819726488935f3266f6a70b39245312b
User & Date: lexi on 2019-07-19 06:00:34
Other Links: manifest | tags
Context
2019-07-19
06:06
fix typo check-in: a38de374f1 user: lexi tags: trunk
06:00
add ord.c check-in: 72068307da user: lexi tags: trunk
2019-07-13
09:05
PUTTING COMPUTER AWAY check-in: 95bd59918c user: lexi tags: trunk
Changes

Added ord.c version [478119c506].







































































































































































































































































































































































































































































































































































































































































































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
/* [ʞ] ord.c - integer converter
 *  ~ lexi hale <lexi@hale.su>
 *  © AGPLv3
 *  * ord has no dependencies except for libc.
 *  ? ord converts integers to ascii characters
 *    and back. written because the only fucking
 *    way to do this in shell is FUCKING PRINTF.
 *  $ cc ord.c -o ord [-D_IO=(LIBC|POSIX)]
 *  	- the flag D_IO will instruct ord.c whether
 *  	  to use POSIX io primitives (write and read)
 *  	  instead of libc primitives (printf). if
 *  	  you're on a UNIX system, POSIX primitives
 *  	  will be used by default, but you can block
 *  	  them with LIBC or force them with POSIX.
 *  	  if you are on a POSIX- compliant system,
 *  	  you *should* use POSIX IO, for improved
 *  	  performance and safety. */

#if (defined(__unix__) && _IO != LIBC) || (_IO == POSIX)
#	define _POSIX_IO
#endif

#ifdef _POSIX_IO
#	include <unistd.h>
#	define say(x) (write(2, (x), (sizeof (x))))
#	define print(sz,x) (write(1, (x), (sz)))
#	define forposix(x) x
#	define forlibc(x)
#else
#	include <stdio.h>
#	define say(x) (fprintf(stderr, (x)))
#	define print(x) (printf("%s",(x)))
#	define forposix(x)
#	define forlibc(x) x
#endif
#include <stddef.h>
#include <stdint.h>
#include <string.h>
#include <limits.h>
#define sz(x) ( sizeof (x) / sizeof (x) [0] )

enum /* constants */ {
	null = 0,

	/* ascii address space */
	numspace        = (0x39 - 0x30) + 1, /* 10 */
	alphaspace      = (0x5a - 0x41) + 1, /* 26 */
	smallalphaspace = (0x7a - 0x61) + 1, /* 26 */

	/* base representations */
	imaxbase = numspace + alphaspace,    /* 36 */
	maxbase = imaxbase + smallalphaspace /* 62 */
};

typedef unsigned long long word;
typedef _Bool bool;
enum { false = 0, true = 1 };

typedef struct pair { uint8_t val; const char* str; } pair;

#define error_list \
	e(domain, "bad argument passed for domain") \
	e(find, "could not find key in table") \
	e(syntax, "invalid syntax") \
	e(base, "that base is out of range") \
	e(overflow, "a memory overflow has occurred") \
	e(ebcdic, "nice try, mr ibm-san")

typedef enum bad {
	ok = 0, fail = 1,
#	define e(name, desc) bad_##name,
		error_list
#	undef e
} bad;

bad tblget(size_t stacksz, const pair* haystack, const char* needle, uint8_t* val) {
	for (size_t i = 0; i<stacksz; ++i) {
		if (strcmp(haystack[i].str, needle) == ok) {
			*val = haystack[i].val;
			return ok;
		}
	}
	return bad_find;
}

enum argument {
	arg_to, arg_set, arg_base,

	arg_asc,

	arg_bin, arg_trn, arg_oct, arg_dec,
	arg_duo, arg_hex, arg_b32, arg_b64,

	switch_prefix, param_prefix,
	switch_lowercase,

	arg_ebcdic,
};

word bases[] = {
	[arg_asc] =  0,
	[arg_bin] =  2,
	[arg_trn] =  3,
	[arg_oct] =  8,
	[arg_dec] = 10,
	[arg_duo] = 12,
	[arg_hex] = 16,
	[arg_b32] = 32,
};

const char* prefixes [] = { null,
	[ 0] = "\1" "@",
	[ 2] = "\2" "0b",
	[ 3] = "\2" "0t",
	[ 8] = "\1" "0",
	[12] = "\2" "0d",
	[16] = "\2" "0x",
};

const pair argtbl[] = {
	{arg_to, "to"},
	{arg_base, "base"},

	{arg_set, "--"}, {arg_set, "raw"},

	{arg_asc, "asc"}, {arg_asc, "ascii"},

	{arg_bin, "bin"}, {arg_bin, "binary"},
	{arg_trn, "trn"}, {arg_trn, "tern"}, {arg_trn, "ternary"}, {arg_trn, "trinary"},
	{arg_oct, "oct"}, {arg_oct, "octal"},
	{arg_dec, "dec"}, {arg_dec, "decimal"},
	{arg_duo, "duo"}, {arg_duo, "duodecimal"},
	{arg_hex, "hex"}, {arg_hex, "hexadecimal"},

	{arg_hex, "b32"}, {arg_hex, "base32"}, /* not padded! */

	{switch_prefix, "-p"}, {switch_prefix, "--prefix"},
	{switch_lowercase, "-l"}, {switch_lowercase, "--lowercase"},
	{param_prefix, "-m"}, {param_prefix, "--manual-prefix"},

	{arg_ebcdic, "ebcdic"},
};

bad asctoi(const char* s, word* ret) {
	word val = 0;
	enum { base = 128 };

	for (;*s!=null;++s) {
		uint8_t v = *s;
		if (v > base) return bad_domain;

		val *= base;
		val += v;
	}

	*ret = val;
	return ok;
}

bad atoi(word base, const char* s, word* ret) {
	/* s must be a null-terminated ASCII numeral string */
	if (base > maxbase) return bad_base;

	/* override the default base if it's a basèd literal */
	if (s[0] == '@' || base == 0) return asctoi(s + (s[0]=='@'),ret);
	else if (s[0] == '0' && s[1] == 'x') base = 16, s += 2;
	else if (s[0] == '0' && s[1] == 'd') base = 10, s += 2;
	else if (s[0] == '0' && s[1] == 'b') base =  2, s += 2;
	else if (s[0] == '0' && s[1] == 't') base =  3, s += 2;
	else if (s[0] == '0')                base =  8, s += 1;

	bool insens = (base <= imaxbase);
	word val = 0;

	for (;*s!=null;++s) {
		uint8_t v = *s;
		if(v >= 0x30 && v <= 0x39) v -= 0x30; else {
			if(v >= 0x61 && v <= 0x7a) {
				if (insens) v -= 0x20; else {
					v = numspace + alphaspace + (v - 0x61);
					goto checkval;
				}
			}
			if(v >= 0x41 && v <= 0x5a) v = numspace + (v - 0x41);
				else return bad_domain;
		}
		checkval: if (v >= base) return bad_domain;

		val *= base;
		val += v;
	}

	*ret = val;
	return ok;
}

/* needed for efficiency's sake, but really sucky -
 * this table needs to be kept in sync with the
 * itoa algorithm by hand. unfortunately, given C's
 * abject lack of metaprogramming, we have to do this
 * by hand. */
const char baseref[] = /* numerals[10] */ "0123456789"
	/* bigalpha[26] */ "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
	/* smallalpha[26] */ "abcdefghijklmnopqrstuvwxyz";
_Static_assert (sizeof baseref - 1 == maxbase);

bad itoasc(word val, const char* buf_start, char* buf_end, char** newbuf) {
	char* ptr = buf_end;

	*ptr-- = 0;
	while(val > 0) {
		if (ptr < buf_start) return bad_overflow;
		word rem = val % 128;
		val /= 128;
		*ptr-- = (char)rem;
	}

	if (newbuf != null) *newbuf = ptr + 1;
	return ok;
}

bool lowercase = false;
bad itoa(word base, word val, const char* buf_start,
		char* buf_end, char** newbuf) {

	char* ptr = buf_end;

	if (base > maxbase) return bad_base;
	if (base == 0) return itoasc(val, buf_start, buf_end, newbuf);

	*ptr-- = 0;
	while(val > 0) {
		if (ptr < buf_start) return bad_overflow;
		word rem = val % base;
		val /= base;
		char out = baseref[rem];
		if (lowercase && base < imaxbase)
			if (out >= 'A' && out <= 'Z')
				out += ('a' - 'A');
		*ptr-- = out;
	}

	if (newbuf != null) *newbuf = ptr + 1;
	return ok;
}

bad run(const int argc, const char** argv) {
#	ifndef _POSIX_IO
		/* fuck your buffering, it only ever makes
		 * things worse */
		setvbuf(stdout,null,_IONBF);
#	endif
	word rv;
	
	enum { set_in, set_out, _set_sz } curset = set_in;
	word base[_set_sz] = { 10, 0 };

	const char* in_vals[argc]; *in_vals = null; /* null-terminated! */
	const char** invalp = in_vals;
	const char* pfxstr;
	forposix(size_t pfxstrlen);

	
	bool raw = false;
	bool prefix = false;

	for (const char** arg = argv + 1; *arg != null; ++arg) {
		uint8_t tblval;
		if (*arg[0] == '%') { ++ *arg; goto number; } else
		if (!raw && (tblget(sz(argtbl),argtbl, *arg, &tblval) == ok)) {
			enum argument symbol = (enum argument) tblval;
			switch (symbol) {
				case arg_to: {
					if (curset == set_out) return bad_syntax;
					else curset = set_out;
				} break;

				/* treat all further arguments as numbers */
				case arg_set: { raw = true; } break;
				case arg_ebcdic: { return bad_ebcdic; } break;

				/* specify base with numeral */
				case arg_base: {
					if (arg[1] == null) return bad_syntax;
					word basekind;
					bad e = atoi(10, arg[1], &basekind);
					if (e == ok) {
						if (basekind > maxbase) return bad_base;
						base[curset] = basekind;
					} else return e;
					++arg;
				} break;

				/* specify an output prefix */
				case param_prefix: {
					if (arg[1] == null) return bad_syntax;
					prefix = true; pfxstr = arg[1];
					forposix(pfxstrlen = strlen(pfxstr));
					++arg;
				} break;

				/* specify an automatic output prefix */
				case switch_prefix: { prefix = true; pfxstr = null; } break;
				case switch_lowercase: { lowercase = true; } break;

				default: {
					/* assume base shorthand */
					base[curset] = bases[symbol];
				}
			}
		} else /* bad_find */ number: {
			/* we assume it's a number - error checking will
			 * happen once we know how to interpret it */
			*invalp++=*arg; *invalp=null;
		}
	}

	/* if an ascii string was passed, change to hexadecimal output */
	if (base[set_in] == 0 && curset != set_out) base[set_out] = 16;

	size_t max_numeral_len = 0;
	/* 0 = ascii rep (0 .. 127); one char = 7 bits */
	if (base[set_out] ==  0) max_numeral_len = (sizeof(word) * CHAR_BIT) / 7; else
	if (base[set_out] ==  1) max_numeral_len = 1024; /* pls don't */ else
	/* note for unary: actual max is ((word) -1) but we cannot actually allocate
	 * that much fucking memory, so we limit to 1KiB and crash if it needs more */
	if (base[set_out] <=  2) max_numeral_len = (sizeof(word) * CHAR_BIT); else
	if (base[set_out] <=  8) max_numeral_len = (sizeof(word) * CHAR_BIT) / 3; else
	if (base[set_out] <= 16) max_numeral_len = (sizeof(word) * CHAR_BIT) / 4; else
	/* (base[set_out] <= 32) */ max_numeral_len = (sizeof(word) * CHAR_BIT) / 5;

	/* this is i think the only sane-ish way to do it that
	 * doesn't involve *shudder* logarithms
		TODO: find a better way to do this??? */

	size_t bufmax = (invalp - in_vals) * max_numeral_len;
	char buf [bufmax];
	char* ptr = (buf + bufmax) - 1;
	forposix(char* lastptr = ptr);

	for (const char** s = in_vals; *s != null; ++s) {
		word val;
		bad e = atoi(base[set_in], *s, &val);
		if (e == ok) {
			bad e = itoa(base[set_out], val, buf, ptr, &ptr);

			if (prefix) {
				if (pfxstr != null) { print(pfxstrlen, pfxstr); }
				else if (base[set_out] < sz(prefixes)) {
					print((size_t)prefixes[base[set_out]][0],
							prefixes[base[set_out]] + 1);
				}
			}
			print(lastptr-ptr, ptr);
			print(1, "\n");
			forposix(lastptr = ptr);
		} else {
			return e;
		}
	}
}

void usage(const char* name) {
#	ifdef _POSIX_IO
		typedef struct pstr { size_t len; const char* str; } pstr;
#		define p(x) {sizeof (x "\n"), (x "\n")}
		size_t namelen = strlen(name);
#	else
		typedef const char* pstr;
#		define p(x) (x "\n")
#	endif
#	define OR "\x1b[34m|\x1b[93m" 
#	define plus "\x1b[94m+\x1b[m"
#	define par(s) "<\x1b[4m" s "\x1b[24m>"
#	define lit(l) "\x1b[3m" l "\x1b[23m"
#	define box(s) "\x1b[94m[\x1b[93m" s "\x1b[94m]\x1b[m"
		const pstr forms[] = {
			p(box(box("options") " " par("in:spec")) " " par("value:int") plus " "
					box(lit("to") " " box(par("out:spec")))),
			p(box(box("options") par("in:spec")) " " box(lit("to") " " box(par("out:spec")))
					" " lit("--") " " par("value:int") plus),
		}, specs[] = {
			p(box(lit("bin") OR lit("tern") OR lit("oct")
					OR lit("dec") OR lit("hex") OR
					lit("base") " " box("0-9") plus OR "asc")),
		}, ints[] = {
			p("default base: \x1b[94m.+\x1b[m"),
			p("binary literal: "lit("0b") box("01") plus),
			p("ternary literal: "lit("0t") box("012") plus),
			p("hex literal: "lit("0x") box("0-9A-Fa-f") plus),
			p("ascii literal: "lit("@") "\x1b[94m.+\x1b[m"),
			p("interpret any string (e.g. a keyword) as integer: " lit("%") box("0-9A-Za-z") plus),
		}, opts[] = {
			p("-p --prefix       : print known prefix codes on output strings"),
			p("-m --manual-prefix: specify a manual prefix to print before each number"),
			p("-l --lowercase    : prefer lowercase for case-insensitive bases"),
		};
#	undef p
#	undef OR
#	undef plus

#	define hl_on  "\x1b[1m" 
#	define hl_off "\x1b[21m"
	enum { ansilen = sizeof (hl_on hl_off) };
#	define hl(x) (hl_on x hl_off)
		const char form_head []= hl("usage: ");
		const char spec_head []= hl("- spec: ");
		const char int_head  []= hl("- int: ");
		const char opt_head  []= hl("- options: ");
		const char space     []=    "           "; /* sigh */
#	undef hl
#	undef hl_on
#	undef hl_off

#	ifdef _POSIX_IO
#		define _say(sz, s) write(2, (s), (sz));
#		define vsay _say
#		define display(hd) _say(sizeof (hd), (hd));
#		define pline(l)    _say((l).len, (l).str);
#	else
#		define _say(sz, s) printf("%.*s", sz, s);
#		define display(hd) printf("%s",(hd));
#		define vsay(sz, s) display(s)
#		define pline(l)    display(l);
#	endif

#	define space(x) _say(x, space);
#	define glow(x) say("\x1b[95m"); { x }; say("\x1b[m");
#	define section(x,prefix) display(x##_head); \
		for(size_t i = 0; i < sz(x##s); ++ i) { \
		if (i>0) space(sizeof x##_head - ansilen); \
			{ prefix; }; pline(x##s[i]); }

	section(form,glow(vsay(namelen, name)); space(1));
	section(spec,);
	section(int,);
	section(opt,);
}

int main(int argc, const char** argv) {
	if (argc == 0) return -1;
	if (argc == 1) usage(argv[0]);
	bad e = run(argc, argv);
	switch (e) {
		case ok: return 0;
#		define e(kind, desc) case bad_##kind:\
				 say("\x1b[31;1merror:\x1b[m "); say(desc "\n"); break;
			error_list
#		undef e
	}
}