util  ord.c at [10946f3ca5]

File ord.c artifact 9504e2c85c part of check-in 10946f3ca5


/* [ʞ] ord.c - integer converter
 *  ~ lexi hale <lexi@hale.su>
 *  © AGPLv3
 *  * ord has no dependencies except for libc.
 *  ? ord converts integers to ascii characters
 *    and back. written because the only fucking
 *    way to do this in shell is FUCKING PRINTF.
 *  $ cc ord.c -o ord [-D_IO=(LIBC|POSIX)]
 *  	- the flag D_IO will instruct ord.c whether
 *  	  to use POSIX io primitives (write and read)
 *  	  instead of libc primitives (printf). if
 *  	  you're on a UNIX system, POSIX primitives
 *  	  will be used by default, but you can block
 *  	  them with LIBC or force them with POSIX.
 *  	  if you are on a POSIX- compliant system,
 *  	  you *should* use POSIX IO, for improved
 *  	  performance and safety.

 	TODO: take full advantage of write(2) by storing
	      output in single string & making single
		  write call */

#if (defined(__unix__) && _IO != LIBC) || (_IO == POSIX)
#	define _POSIX_IO
#endif

#ifdef _POSIX_IO
#	include <unistd.h>
#	define say(x) (write(2, (x), (sizeof (x))))
#	define print(sz,x) (write(1, (x), (sz)))
#	define forposix(x) x
#	define forlibc(x)
#else
#	include <stdio.h>
#	define say(x) (fprintf(stderr, (x)))
#	define print(x) (printf("%s",(x)))
#	define forposix(x)
#	define forlibc(x) x
#endif
#include <stddef.h>
#include <stdint.h>
#include <string.h>
#include <limits.h>
#define sz(x) ( sizeof (x) / sizeof (x) [0] )


enum /* constants */ {
	null = 0,
};

typedef unsigned long long word;
typedef _Bool bool;
enum { false = 0, true = 1 };


#define error_list \
	e(domain, "bad argument passed for domain") \
	e(find, "could not find key in table") \
	e(syntax, "invalid syntax") \
	e(base, "that base is out of range") \
	e(overflow, "a memory overflow has occurred") \
	e(ebcdic, "nice try, mr ibm-san")

typedef enum bad {
	ok = 0, fail = 1,
#	define e(name, desc) bad_##name,
		error_list
#	undef e
} bad;

typedef enum {
	tbl_ok = ok, tbl_error = bad_find
} tbl_error_type;
typedef unsigned char tbl_word_type;
#include "clib/tbl.c"
typedef struct tblrow pair;

enum argument {
	arg_to, arg_set, arg_base,

	arg_asc,

	arg_bin, arg_trn, arg_oct, arg_dec,
	arg_duo, arg_hex, arg_b32, arg_b64,

	switch_prefix, param_prefix,
	switch_lowercase,
	switch_7bit,

	arg_ebcdic,
};

word bases[] = {
	[arg_asc] =  0,
	[arg_bin] =  2,
	[arg_trn] =  3,
	[arg_oct] =  8,
	[arg_dec] = 10,
	[arg_duo] = 12,
	[arg_hex] = 16,
	[arg_b32] = 32,
};

const char* prefixes [] = { null,
	[ 0] = "\1" "@",
	[ 2] = "\2" "0b",
	[ 3] = "\2" "0t",
	[ 8] = "\1" "0",
	[12] = "\2" "0d",
	[16] = "\2" "0x",
};

const pair argtbl[] = {
	{arg_to, "to"},
	{arg_base, "base"},

	{arg_set, "--"}, {arg_set, "raw"},

	{arg_asc, "asc"}, {arg_asc, "ascii"},

	{arg_bin, "bin"}, {arg_bin, "binary"},
	{arg_trn, "trn"}, {arg_trn, "tern"}, {arg_trn, "ternary"}, {arg_trn, "trinary"},
	{arg_oct, "oct"}, {arg_oct, "octal"},
	{arg_dec, "dec"}, {arg_dec, "decimal"},
	{arg_duo, "duo"}, {arg_duo, "duodecimal"},
	{arg_hex, "hex"}, {arg_hex, "hexadecimal"},

	{arg_hex, "b32"}, {arg_hex, "base32"}, /* not padded! */

	{switch_prefix, "-p"}, {switch_prefix, "--prefix"},
	{switch_lowercase, "-l"}, {switch_lowercase, "--lowercase"},
	{switch_7bit, "-7"}, {switch_7bit, "--7bit"},
	{param_prefix, "-m"}, {param_prefix, "--manual-prefix"},

	{arg_ebcdic, "ebcdic"},
};

/* import the conversion utilities */
typedef bad iaia_error_type;
typedef word iaia_word_type;
enum /* iaia synonyms */ {
	iaia_e_ok = ok,
	iaia_e_domain = bad_domain,
	iaia_e_base = bad_base,
	iaia_e_overflow = bad_overflow,
};
bool ascii_7bit = false;
#define _IAIA_EXP_ASCFORM ascii_7bit
#define _IAIA_EXTERNAL_TYPES
#include "clib/iaia.c"

bad run(const int argc, const char** argv) {
#	ifndef _POSIX_IO
		/* fuck your buffering, it only ever makes
		 * things worse */
		setvbuf(stdout,null,_IONBF);
#	endif
	
	enum { set_in, set_out, _set_sz } curset = set_in;
	word base[_set_sz] = { 10, 0 };

	const char* in_vals[argc]; *in_vals = null; /* null-terminated! */
	const char** invalp = in_vals;
	const char* pfxstr;
	forposix(size_t pfxstrlen);
	
	bool raw = false;
	bool prefix = false;
	bool lowercase = false;

	for (const char** arg = argv + 1; *arg != null; ++arg) {
		uint8_t tblval;
		if (*arg[0] == '`') { ++ *arg; goto number; } else
		if (!raw && (tblget(sz(argtbl),argtbl, *arg, &tblval) == ok)) {
			enum argument symbol = (enum argument) tblval;
			switch (symbol) {
				case arg_to: {
					if (curset == set_out) return bad_syntax;
					else curset = set_out;
				} break;

				/* treat all further arguments as numbers */
				case arg_set: { raw = true; } break;
				case arg_ebcdic: { return bad_ebcdic; } break;

				/* specify base with numeral */
				case arg_base: {
					if (arg[1] == null) return bad_syntax;
					word basekind;
					bad e = atoi(10, arg[1], &basekind);
					if (e == ok) {
						if (basekind > maxbase) return bad_base;
						base[curset] = basekind;
					} else return e;
					++arg;
				} break;

				/* specify an output prefix */
				case param_prefix: {
					if (arg[1] == null) return bad_syntax;
					prefix = true; pfxstr = arg[1];
					forposix(pfxstrlen = strlen(pfxstr));
					++arg;
				} break;

				/* specify an automatic output prefix */
				case switch_prefix: { prefix = true; pfxstr = null; } break;
				case switch_lowercase: { lowercase = true; } break;
				case switch_7bit: { ascii_7bit = true; } break;

				default: {
					/* assume base shorthand */
					base[curset] = bases[symbol];
				}
			}
		} else /* bad_find */ number: {
			/* we assume it's a number - error checking will
			 * happen once we know how to interpret it */
			*invalp++=*arg; *invalp=null;
		}
	}

	/* if an ascii string was passed, change to hexadecimal output */
	if (base[set_in] == 0 && curset != set_out) base[set_out] = 16;

	size_t max_numeral_len = 0;
	/* 0 = ascii rep (0 .. 127); one char = 7 bits */
	if (base[set_out] ==  0) max_numeral_len = (sizeof(word) * CHAR_BIT) / 7; else
	if (base[set_out] ==  1) max_numeral_len = 1024; /* pls don't */ else
	/* note for unary: actual max is ((word) -1) but we cannot actually allocate
	 * that much fucking memory, so we limit to 1KiB and crash if it needs more */
	if (base[set_out] <=  2) max_numeral_len = (sizeof(word) * CHAR_BIT); else
	if (base[set_out] <=  8) max_numeral_len = (sizeof(word) * CHAR_BIT) / 3; else
	if (base[set_out] <= 16) max_numeral_len = (sizeof(word) * CHAR_BIT) / 4; else
	/* (base[set_out] <= 32) */ max_numeral_len = (sizeof(word) * CHAR_BIT) / 5;

	/* this is i think the only sane-ish way to do it that
	 * doesn't involve *shudder* logarithms
		TODO: find a better way to do this??? */

	size_t bufmax = (invalp - in_vals) * max_numeral_len;
	char buf [bufmax];
	char* ptr = (buf + bufmax) - 1;
	forposix(char* lastptr = ptr);

	for (const char** s = in_vals; *s != null; ++s) {
		word val;
		bad e = atoi(base[set_in], *s, &val);
		if (e == ok) {
			bad e = itoa(base[set_out], val, buf, ptr, &ptr, lowercase);
			if (e != ok) return e;

			if (prefix) {
				if (pfxstr != null) { print(pfxstrlen, pfxstr); }
				else if (base[set_out] < sz(prefixes)) {
					print((size_t)prefixes[base[set_out]][0],
							prefixes[base[set_out]] + 1);
				}
			}
			print(lastptr-ptr, ptr);
			print(1, "\n");
			forposix(lastptr = ptr);
		} else {
			return e;
		}
	}
	return ok;
}

void usage(const char* name) {
#	ifdef _POSIX_IO
		typedef struct pstr { size_t len; const char* str; } pstr;
#		define p(x) {sizeof (x "\n"), (x "\n")}
		size_t namelen = strlen(name);
#	else
		typedef const char* pstr;
#		define p(x) (x "\n")
#	endif
#	define OR "\x1b[34m|\x1b[93m" 
#	define plus "\x1b[94m+\x1b[m"
#	define par(s) "<\x1b[4m" s "\x1b[24m>"
#	define lit(l) "\x1b[3m" l "\x1b[23m"
#	define box(s) "\x1b[94m[\x1b[93m" s "\x1b[94m]\x1b[m"
		const pstr forms[] = {
			p(box("options") " " box(par("in:spec")) " " par("value:int") plus " "
					box(lit("to") " " box(par("out:spec")))),
			p(box("options") " " box(par("in:spec")) " " box(lit("to") " " box(par("out:spec")))
					" " lit("--") " " par("value:int") plus),
		}, specs[] = {
			p(box(lit("bin") OR lit("tern") OR lit("oct")
					OR lit("dec") OR lit("hex") OR
					lit("base") " " box("0-9") plus OR "asc")),
		}, ints[] = {
			p("default base: \x1b[94m.+\x1b[m"),
			p("binary literal: "lit("0b") box("01") plus),
			p("ternary literal: "lit("0t") box("012") plus),
			p("hex literal: "lit("0x") box("0-9A-Fa-f") plus),
			p("ascii literal: "lit("@") "\x1b[94m.+\x1b[m"),
			p("interpret any string (e.g. a keyword) as integer: " lit("`") box("0-9A-Za-z") plus),
		}, opts[] = {
			p("-p --prefix       : print known prefix codes on output strings"),
			p("-m --manual-prefix: specify a manual prefix to print before each number"),
			p("-l --lowercase    : prefer lowercase for case-insensitive bases"),
			p("-7 --7bit         : encode ascii in 7-bit space instead of keeping the"),
			p("                    eighth empty bit. if this option is used, ascii"),
			p("                    strings will be maximally compact, but will not match"),
			p("                    the way they are stored in computer memory!"),
			/* p("-u --utf           : allow non-ascii input"), */
		};
#	undef p
#	undef OR
#	undef plus

#	define hl_on  "\x1b[;1m" 
#	define hl_off "\x1b[m"
	enum { ansilen = sizeof (hl_on hl_off) };
#	define hl(x) (hl_on x hl_off)
		const char form_head []= hl("usage: ");
		const char spec_head []= hl("- spec: ");
		const char int_head  []= hl("- int: ");
		const char opt_head  []= hl("- options: ");
		const char space     []=    "           "; /* sigh */
#	undef hl
#	undef hl_on
#	undef hl_off

#	ifdef _POSIX_IO
#		define _say(sz, s) write(2, (s), (sz));
#		define vsay _say
#		define display(hd) _say(sizeof (hd), (hd));
#		define pline(l)    _say((l).len, (l).str);
#	else
#		define _say(sz, s) printf("%.*s", sz, s);
#		define display(hd) printf("%s",(hd));
#		define vsay(sz, s) display(s)
#		define pline(l)    display(l);
#	endif

#	define space(x) _say(x, space);
#	define glow(x) say("\x1b[95m"); { x }; say("\x1b[m");
#	define section(x,prefix) display(x##_head); \
		for(size_t i = 0; i < sz(x##s); ++ i) { \
		if (i>0) space(sizeof x##_head - ansilen); \
			{ prefix; }; pline(x##s[i]); }

	section(form,glow(vsay(namelen, name)); space(1));
	section(spec,);
	section(int,);
	section(opt,);
}

int main(int argc, const char** argv) {
	if (argc == 0) return -1;
	if (argc == 1) usage(argv[0]);
	bad e = run(argc, argv);
	switch (e) {
		case ok: return 0;
		case fail: return -1;
#		define e(kind, desc) case bad_##kind:\
				 say("\x1b[31;1merror:\x1b[m "); say(desc "\n"); return e;
			error_list
#		undef e
	}
}