util  Check-in [72068307da]

Overview
Comment:add ord.c
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA3-256: 72068307daf49bd5c01a31514ea5b644819726488935f3266f6a70b39245312b
User & Date: lexi on 2019-07-19 06:00:34
Other Links: manifest | tags
Context
2019-07-19
06:06
fix typo check-in: a38de374f1 user: lexi tags: trunk
06:00
add ord.c check-in: 72068307da user: lexi tags: trunk
2019-07-13
09:05
PUTTING COMPUTER AWAY check-in: 95bd59918c user: lexi tags: trunk
Changes

Added ord.c version [478119c506].

            1  +/* [ʞ] ord.c - integer converter
            2  + *  ~ lexi hale <lexi@hale.su>
            3  + *  © AGPLv3
            4  + *  * ord has no dependencies except for libc.
            5  + *  ? ord converts integers to ascii characters
            6  + *    and back. written because the only fucking
            7  + *    way to do this in shell is FUCKING PRINTF.
            8  + *  $ cc ord.c -o ord [-D_IO=(LIBC|POSIX)]
            9  + *  	- the flag D_IO will instruct ord.c whether
           10  + *  	  to use POSIX io primitives (write and read)
           11  + *  	  instead of libc primitives (printf). if
           12  + *  	  you're on a UNIX system, POSIX primitives
           13  + *  	  will be used by default, but you can block
           14  + *  	  them with LIBC or force them with POSIX.
           15  + *  	  if you are on a POSIX- compliant system,
           16  + *  	  you *should* use POSIX IO, for improved
           17  + *  	  performance and safety. */
           18  +
           19  +#if (defined(__unix__) && _IO != LIBC) || (_IO == POSIX)
           20  +#	define _POSIX_IO
           21  +#endif
           22  +
           23  +#ifdef _POSIX_IO
           24  +#	include <unistd.h>
           25  +#	define say(x) (write(2, (x), (sizeof (x))))
           26  +#	define print(sz,x) (write(1, (x), (sz)))
           27  +#	define forposix(x) x
           28  +#	define forlibc(x)
           29  +#else
           30  +#	include <stdio.h>
           31  +#	define say(x) (fprintf(stderr, (x)))
           32  +#	define print(x) (printf("%s",(x)))
           33  +#	define forposix(x)
           34  +#	define forlibc(x) x
           35  +#endif
           36  +#include <stddef.h>
           37  +#include <stdint.h>
           38  +#include <string.h>
           39  +#include <limits.h>
           40  +#define sz(x) ( sizeof (x) / sizeof (x) [0] )
           41  +
           42  +enum /* constants */ {
           43  +	null = 0,
           44  +
           45  +	/* ascii address space */
           46  +	numspace        = (0x39 - 0x30) + 1, /* 10 */
           47  +	alphaspace      = (0x5a - 0x41) + 1, /* 26 */
           48  +	smallalphaspace = (0x7a - 0x61) + 1, /* 26 */
           49  +
           50  +	/* base representations */
           51  +	imaxbase = numspace + alphaspace,    /* 36 */
           52  +	maxbase = imaxbase + smallalphaspace /* 62 */
           53  +};
           54  +
           55  +typedef unsigned long long word;
           56  +typedef _Bool bool;
           57  +enum { false = 0, true = 1 };
           58  +
           59  +typedef struct pair { uint8_t val; const char* str; } pair;
           60  +
           61  +#define error_list \
           62  +	e(domain, "bad argument passed for domain") \
           63  +	e(find, "could not find key in table") \
           64  +	e(syntax, "invalid syntax") \
           65  +	e(base, "that base is out of range") \
           66  +	e(overflow, "a memory overflow has occurred") \
           67  +	e(ebcdic, "nice try, mr ibm-san")
           68  +
           69  +typedef enum bad {
           70  +	ok = 0, fail = 1,
           71  +#	define e(name, desc) bad_##name,
           72  +		error_list
           73  +#	undef e
           74  +} bad;
           75  +
           76  +bad tblget(size_t stacksz, const pair* haystack, const char* needle, uint8_t* val) {
           77  +	for (size_t i = 0; i<stacksz; ++i) {
           78  +		if (strcmp(haystack[i].str, needle) == ok) {
           79  +			*val = haystack[i].val;
           80  +			return ok;
           81  +		}
           82  +	}
           83  +	return bad_find;
           84  +}
           85  +
           86  +enum argument {
           87  +	arg_to, arg_set, arg_base,
           88  +
           89  +	arg_asc,
           90  +
           91  +	arg_bin, arg_trn, arg_oct, arg_dec,
           92  +	arg_duo, arg_hex, arg_b32, arg_b64,
           93  +
           94  +	switch_prefix, param_prefix,
           95  +	switch_lowercase,
           96  +
           97  +	arg_ebcdic,
           98  +};
           99  +
          100  +word bases[] = {
          101  +	[arg_asc] =  0,
          102  +	[arg_bin] =  2,
          103  +	[arg_trn] =  3,
          104  +	[arg_oct] =  8,
          105  +	[arg_dec] = 10,
          106  +	[arg_duo] = 12,
          107  +	[arg_hex] = 16,
          108  +	[arg_b32] = 32,
          109  +};
          110  +
          111  +const char* prefixes [] = { null,
          112  +	[ 0] = "\1" "@",
          113  +	[ 2] = "\2" "0b",
          114  +	[ 3] = "\2" "0t",
          115  +	[ 8] = "\1" "0",
          116  +	[12] = "\2" "0d",
          117  +	[16] = "\2" "0x",
          118  +};
          119  +
          120  +const pair argtbl[] = {
          121  +	{arg_to, "to"},
          122  +	{arg_base, "base"},
          123  +
          124  +	{arg_set, "--"}, {arg_set, "raw"},
          125  +
          126  +	{arg_asc, "asc"}, {arg_asc, "ascii"},
          127  +
          128  +	{arg_bin, "bin"}, {arg_bin, "binary"},
          129  +	{arg_trn, "trn"}, {arg_trn, "tern"}, {arg_trn, "ternary"}, {arg_trn, "trinary"},
          130  +	{arg_oct, "oct"}, {arg_oct, "octal"},
          131  +	{arg_dec, "dec"}, {arg_dec, "decimal"},
          132  +	{arg_duo, "duo"}, {arg_duo, "duodecimal"},
          133  +	{arg_hex, "hex"}, {arg_hex, "hexadecimal"},
          134  +
          135  +	{arg_hex, "b32"}, {arg_hex, "base32"}, /* not padded! */
          136  +
          137  +	{switch_prefix, "-p"}, {switch_prefix, "--prefix"},
          138  +	{switch_lowercase, "-l"}, {switch_lowercase, "--lowercase"},
          139  +	{param_prefix, "-m"}, {param_prefix, "--manual-prefix"},
          140  +
          141  +	{arg_ebcdic, "ebcdic"},
          142  +};
          143  +
          144  +bad asctoi(const char* s, word* ret) {
          145  +	word val = 0;
          146  +	enum { base = 128 };
          147  +
          148  +	for (;*s!=null;++s) {
          149  +		uint8_t v = *s;
          150  +		if (v > base) return bad_domain;
          151  +
          152  +		val *= base;
          153  +		val += v;
          154  +	}
          155  +
          156  +	*ret = val;
          157  +	return ok;
          158  +}
          159  +
          160  +bad atoi(word base, const char* s, word* ret) {
          161  +	/* s must be a null-terminated ASCII numeral string */
          162  +	if (base > maxbase) return bad_base;
          163  +
          164  +	/* override the default base if it's a basèd literal */
          165  +	if (s[0] == '@' || base == 0) return asctoi(s + (s[0]=='@'),ret);
          166  +	else if (s[0] == '0' && s[1] == 'x') base = 16, s += 2;
          167  +	else if (s[0] == '0' && s[1] == 'd') base = 10, s += 2;
          168  +	else if (s[0] == '0' && s[1] == 'b') base =  2, s += 2;
          169  +	else if (s[0] == '0' && s[1] == 't') base =  3, s += 2;
          170  +	else if (s[0] == '0')                base =  8, s += 1;
          171  +
          172  +	bool insens = (base <= imaxbase);
          173  +	word val = 0;
          174  +
          175  +	for (;*s!=null;++s) {
          176  +		uint8_t v = *s;
          177  +		if(v >= 0x30 && v <= 0x39) v -= 0x30; else {
          178  +			if(v >= 0x61 && v <= 0x7a) {
          179  +				if (insens) v -= 0x20; else {
          180  +					v = numspace + alphaspace + (v - 0x61);
          181  +					goto checkval;
          182  +				}
          183  +			}
          184  +			if(v >= 0x41 && v <= 0x5a) v = numspace + (v - 0x41);
          185  +				else return bad_domain;
          186  +		}
          187  +		checkval: if (v >= base) return bad_domain;
          188  +
          189  +		val *= base;
          190  +		val += v;
          191  +	}
          192  +
          193  +	*ret = val;
          194  +	return ok;
          195  +}
          196  +
          197  +/* needed for efficiency's sake, but really sucky -
          198  + * this table needs to be kept in sync with the
          199  + * itoa algorithm by hand. unfortunately, given C's
          200  + * abject lack of metaprogramming, we have to do this
          201  + * by hand. */
          202  +const char baseref[] = /* numerals[10] */ "0123456789"
          203  +	/* bigalpha[26] */ "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
          204  +	/* smallalpha[26] */ "abcdefghijklmnopqrstuvwxyz";
          205  +_Static_assert (sizeof baseref - 1 == maxbase);
          206  +
          207  +bad itoasc(word val, const char* buf_start, char* buf_end, char** newbuf) {
          208  +	char* ptr = buf_end;
          209  +
          210  +	*ptr-- = 0;
          211  +	while(val > 0) {
          212  +		if (ptr < buf_start) return bad_overflow;
          213  +		word rem = val % 128;
          214  +		val /= 128;
          215  +		*ptr-- = (char)rem;
          216  +	}
          217  +
          218  +	if (newbuf != null) *newbuf = ptr + 1;
          219  +	return ok;
          220  +}
          221  +
          222  +bool lowercase = false;
          223  +bad itoa(word base, word val, const char* buf_start,
          224  +		char* buf_end, char** newbuf) {
          225  +
          226  +	char* ptr = buf_end;
          227  +
          228  +	if (base > maxbase) return bad_base;
          229  +	if (base == 0) return itoasc(val, buf_start, buf_end, newbuf);
          230  +
          231  +	*ptr-- = 0;
          232  +	while(val > 0) {
          233  +		if (ptr < buf_start) return bad_overflow;
          234  +		word rem = val % base;
          235  +		val /= base;
          236  +		char out = baseref[rem];
          237  +		if (lowercase && base < imaxbase)
          238  +			if (out >= 'A' && out <= 'Z')
          239  +				out += ('a' - 'A');
          240  +		*ptr-- = out;
          241  +	}
          242  +
          243  +	if (newbuf != null) *newbuf = ptr + 1;
          244  +	return ok;
          245  +}
          246  +
          247  +bad run(const int argc, const char** argv) {
          248  +#	ifndef _POSIX_IO
          249  +		/* fuck your buffering, it only ever makes
          250  +		 * things worse */
          251  +		setvbuf(stdout,null,_IONBF);
          252  +#	endif
          253  +	word rv;
          254  +	
          255  +	enum { set_in, set_out, _set_sz } curset = set_in;
          256  +	word base[_set_sz] = { 10, 0 };
          257  +
          258  +	const char* in_vals[argc]; *in_vals = null; /* null-terminated! */
          259  +	const char** invalp = in_vals;
          260  +	const char* pfxstr;
          261  +	forposix(size_t pfxstrlen);
          262  +
          263  +	
          264  +	bool raw = false;
          265  +	bool prefix = false;
          266  +
          267  +	for (const char** arg = argv + 1; *arg != null; ++arg) {
          268  +		uint8_t tblval;
          269  +		if (*arg[0] == '%') { ++ *arg; goto number; } else
          270  +		if (!raw && (tblget(sz(argtbl),argtbl, *arg, &tblval) == ok)) {
          271  +			enum argument symbol = (enum argument) tblval;
          272  +			switch (symbol) {
          273  +				case arg_to: {
          274  +					if (curset == set_out) return bad_syntax;
          275  +					else curset = set_out;
          276  +				} break;
          277  +
          278  +				/* treat all further arguments as numbers */
          279  +				case arg_set: { raw = true; } break;
          280  +				case arg_ebcdic: { return bad_ebcdic; } break;
          281  +
          282  +				/* specify base with numeral */
          283  +				case arg_base: {
          284  +					if (arg[1] == null) return bad_syntax;
          285  +					word basekind;
          286  +					bad e = atoi(10, arg[1], &basekind);
          287  +					if (e == ok) {
          288  +						if (basekind > maxbase) return bad_base;
          289  +						base[curset] = basekind;
          290  +					} else return e;
          291  +					++arg;
          292  +				} break;
          293  +
          294  +				/* specify an output prefix */
          295  +				case param_prefix: {
          296  +					if (arg[1] == null) return bad_syntax;
          297  +					prefix = true; pfxstr = arg[1];
          298  +					forposix(pfxstrlen = strlen(pfxstr));
          299  +					++arg;
          300  +				} break;
          301  +
          302  +				/* specify an automatic output prefix */
          303  +				case switch_prefix: { prefix = true; pfxstr = null; } break;
          304  +				case switch_lowercase: { lowercase = true; } break;
          305  +
          306  +				default: {
          307  +					/* assume base shorthand */
          308  +					base[curset] = bases[symbol];
          309  +				}
          310  +			}
          311  +		} else /* bad_find */ number: {
          312  +			/* we assume it's a number - error checking will
          313  +			 * happen once we know how to interpret it */
          314  +			*invalp++=*arg; *invalp=null;
          315  +		}
          316  +	}
          317  +
          318  +	/* if an ascii string was passed, change to hexadecimal output */
          319  +	if (base[set_in] == 0 && curset != set_out) base[set_out] = 16;
          320  +
          321  +	size_t max_numeral_len = 0;
          322  +	/* 0 = ascii rep (0 .. 127); one char = 7 bits */
          323  +	if (base[set_out] ==  0) max_numeral_len = (sizeof(word) * CHAR_BIT) / 7; else
          324  +	if (base[set_out] ==  1) max_numeral_len = 1024; /* pls don't */ else
          325  +	/* note for unary: actual max is ((word) -1) but we cannot actually allocate
          326  +	 * that much fucking memory, so we limit to 1KiB and crash if it needs more */
          327  +	if (base[set_out] <=  2) max_numeral_len = (sizeof(word) * CHAR_BIT); else
          328  +	if (base[set_out] <=  8) max_numeral_len = (sizeof(word) * CHAR_BIT) / 3; else
          329  +	if (base[set_out] <= 16) max_numeral_len = (sizeof(word) * CHAR_BIT) / 4; else
          330  +	/* (base[set_out] <= 32) */ max_numeral_len = (sizeof(word) * CHAR_BIT) / 5;
          331  +
          332  +	/* this is i think the only sane-ish way to do it that
          333  +	 * doesn't involve *shudder* logarithms
          334  +		TODO: find a better way to do this??? */
          335  +
          336  +	size_t bufmax = (invalp - in_vals) * max_numeral_len;
          337  +	char buf [bufmax];
          338  +	char* ptr = (buf + bufmax) - 1;
          339  +	forposix(char* lastptr = ptr);
          340  +
          341  +	for (const char** s = in_vals; *s != null; ++s) {
          342  +		word val;
          343  +		bad e = atoi(base[set_in], *s, &val);
          344  +		if (e == ok) {
          345  +			bad e = itoa(base[set_out], val, buf, ptr, &ptr);
          346  +
          347  +			if (prefix) {
          348  +				if (pfxstr != null) { print(pfxstrlen, pfxstr); }
          349  +				else if (base[set_out] < sz(prefixes)) {
          350  +					print((size_t)prefixes[base[set_out]][0],
          351  +							prefixes[base[set_out]] + 1);
          352  +				}
          353  +			}
          354  +			print(lastptr-ptr, ptr);
          355  +			print(1, "\n");
          356  +			forposix(lastptr = ptr);
          357  +		} else {
          358  +			return e;
          359  +		}
          360  +	}
          361  +}
          362  +
          363  +void usage(const char* name) {
          364  +#	ifdef _POSIX_IO
          365  +		typedef struct pstr { size_t len; const char* str; } pstr;
          366  +#		define p(x) {sizeof (x "\n"), (x "\n")}
          367  +		size_t namelen = strlen(name);
          368  +#	else
          369  +		typedef const char* pstr;
          370  +#		define p(x) (x "\n")
          371  +#	endif
          372  +#	define OR "\x1b[34m|\x1b[93m" 
          373  +#	define plus "\x1b[94m+\x1b[m"
          374  +#	define par(s) "<\x1b[4m" s "\x1b[24m>"
          375  +#	define lit(l) "\x1b[3m" l "\x1b[23m"
          376  +#	define box(s) "\x1b[94m[\x1b[93m" s "\x1b[94m]\x1b[m"
          377  +		const pstr forms[] = {
          378  +			p(box(box("options") " " par("in:spec")) " " par("value:int") plus " "
          379  +					box(lit("to") " " box(par("out:spec")))),
          380  +			p(box(box("options") par("in:spec")) " " box(lit("to") " " box(par("out:spec")))
          381  +					" " lit("--") " " par("value:int") plus),
          382  +		}, specs[] = {
          383  +			p(box(lit("bin") OR lit("tern") OR lit("oct")
          384  +					OR lit("dec") OR lit("hex") OR
          385  +					lit("base") " " box("0-9") plus OR "asc")),
          386  +		}, ints[] = {
          387  +			p("default base: \x1b[94m.+\x1b[m"),
          388  +			p("binary literal: "lit("0b") box("01") plus),
          389  +			p("ternary literal: "lit("0t") box("012") plus),
          390  +			p("hex literal: "lit("0x") box("0-9A-Fa-f") plus),
          391  +			p("ascii literal: "lit("@") "\x1b[94m.+\x1b[m"),
          392  +			p("interpret any string (e.g. a keyword) as integer: " lit("%") box("0-9A-Za-z") plus),
          393  +		}, opts[] = {
          394  +			p("-p --prefix       : print known prefix codes on output strings"),
          395  +			p("-m --manual-prefix: specify a manual prefix to print before each number"),
          396  +			p("-l --lowercase    : prefer lowercase for case-insensitive bases"),
          397  +		};
          398  +#	undef p
          399  +#	undef OR
          400  +#	undef plus
          401  +
          402  +#	define hl_on  "\x1b[1m" 
          403  +#	define hl_off "\x1b[21m"
          404  +	enum { ansilen = sizeof (hl_on hl_off) };
          405  +#	define hl(x) (hl_on x hl_off)
          406  +		const char form_head []= hl("usage: ");
          407  +		const char spec_head []= hl("- spec: ");
          408  +		const char int_head  []= hl("- int: ");
          409  +		const char opt_head  []= hl("- options: ");
          410  +		const char space     []=    "           "; /* sigh */
          411  +#	undef hl
          412  +#	undef hl_on
          413  +#	undef hl_off
          414  +
          415  +#	ifdef _POSIX_IO
          416  +#		define _say(sz, s) write(2, (s), (sz));
          417  +#		define vsay _say
          418  +#		define display(hd) _say(sizeof (hd), (hd));
          419  +#		define pline(l)    _say((l).len, (l).str);
          420  +#	else
          421  +#		define _say(sz, s) printf("%.*s", sz, s);
          422  +#		define display(hd) printf("%s",(hd));
          423  +#		define vsay(sz, s) display(s)
          424  +#		define pline(l)    display(l);
          425  +#	endif
          426  +
          427  +#	define space(x) _say(x, space);
          428  +#	define glow(x) say("\x1b[95m"); { x }; say("\x1b[m");
          429  +#	define section(x,prefix) display(x##_head); \
          430  +		for(size_t i = 0; i < sz(x##s); ++ i) { \
          431  +		if (i>0) space(sizeof x##_head - ansilen); \
          432  +			{ prefix; }; pline(x##s[i]); }
          433  +
          434  +	section(form,glow(vsay(namelen, name)); space(1));
          435  +	section(spec,);
          436  +	section(int,);
          437  +	section(opt,);
          438  +}
          439  +
          440  +int main(int argc, const char** argv) {
          441  +	if (argc == 0) return -1;
          442  +	if (argc == 1) usage(argv[0]);
          443  +	bad e = run(argc, argv);
          444  +	switch (e) {
          445  +		case ok: return 0;
          446  +#		define e(kind, desc) case bad_##kind:\
          447  +				 say("\x1b[31;1merror:\x1b[m "); say(desc "\n"); break;
          448  +			error_list
          449  +#		undef e
          450  +	}
          451  +}