ADDED ord.c Index: ord.c ================================================================== --- ord.c +++ ord.c @@ -0,0 +1,451 @@ +/* [ʞ] ord.c - integer converter + * ~ lexi hale + * © AGPLv3 + * * ord has no dependencies except for libc. + * ? ord converts integers to ascii characters + * and back. written because the only fucking + * way to do this in shell is FUCKING PRINTF. + * $ cc ord.c -o ord [-D_IO=(LIBC|POSIX)] + * - the flag D_IO will instruct ord.c whether + * to use POSIX io primitives (write and read) + * instead of libc primitives (printf). if + * you're on a UNIX system, POSIX primitives + * will be used by default, but you can block + * them with LIBC or force them with POSIX. + * if you are on a POSIX- compliant system, + * you *should* use POSIX IO, for improved + * performance and safety. */ + +#if (defined(__unix__) && _IO != LIBC) || (_IO == POSIX) +# define _POSIX_IO +#endif + +#ifdef _POSIX_IO +# include +# define say(x) (write(2, (x), (sizeof (x)))) +# define print(sz,x) (write(1, (x), (sz))) +# define forposix(x) x +# define forlibc(x) +#else +# include +# define say(x) (fprintf(stderr, (x))) +# define print(x) (printf("%s",(x))) +# define forposix(x) +# define forlibc(x) x +#endif +#include +#include +#include +#include +#define sz(x) ( sizeof (x) / sizeof (x) [0] ) + +enum /* constants */ { + null = 0, + + /* ascii address space */ + numspace = (0x39 - 0x30) + 1, /* 10 */ + alphaspace = (0x5a - 0x41) + 1, /* 26 */ + smallalphaspace = (0x7a - 0x61) + 1, /* 26 */ + + /* base representations */ + imaxbase = numspace + alphaspace, /* 36 */ + maxbase = imaxbase + smallalphaspace /* 62 */ +}; + +typedef unsigned long long word; +typedef _Bool bool; +enum { false = 0, true = 1 }; + +typedef struct pair { uint8_t val; const char* str; } pair; + +#define error_list \ + e(domain, "bad argument passed for domain") \ + e(find, "could not find key in table") \ + e(syntax, "invalid syntax") \ + e(base, "that base is out of range") \ + e(overflow, "a memory overflow has occurred") \ + e(ebcdic, "nice try, mr ibm-san") + +typedef enum bad { + ok = 0, fail = 1, +# define e(name, desc) bad_##name, + error_list +# undef e +} bad; + +bad tblget(size_t stacksz, const pair* haystack, const char* needle, uint8_t* val) { + for (size_t i = 0; i base) return bad_domain; + + val *= base; + val += v; + } + + *ret = val; + return ok; +} + +bad atoi(word base, const char* s, word* ret) { + /* s must be a null-terminated ASCII numeral string */ + if (base > maxbase) return bad_base; + + /* override the default base if it's a basèd literal */ + if (s[0] == '@' || base == 0) return asctoi(s + (s[0]=='@'),ret); + else if (s[0] == '0' && s[1] == 'x') base = 16, s += 2; + else if (s[0] == '0' && s[1] == 'd') base = 10, s += 2; + else if (s[0] == '0' && s[1] == 'b') base = 2, s += 2; + else if (s[0] == '0' && s[1] == 't') base = 3, s += 2; + else if (s[0] == '0') base = 8, s += 1; + + bool insens = (base <= imaxbase); + word val = 0; + + for (;*s!=null;++s) { + uint8_t v = *s; + if(v >= 0x30 && v <= 0x39) v -= 0x30; else { + if(v >= 0x61 && v <= 0x7a) { + if (insens) v -= 0x20; else { + v = numspace + alphaspace + (v - 0x61); + goto checkval; + } + } + if(v >= 0x41 && v <= 0x5a) v = numspace + (v - 0x41); + else return bad_domain; + } + checkval: if (v >= base) return bad_domain; + + val *= base; + val += v; + } + + *ret = val; + return ok; +} + +/* needed for efficiency's sake, but really sucky - + * this table needs to be kept in sync with the + * itoa algorithm by hand. unfortunately, given C's + * abject lack of metaprogramming, we have to do this + * by hand. */ +const char baseref[] = /* numerals[10] */ "0123456789" + /* bigalpha[26] */ "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + /* smallalpha[26] */ "abcdefghijklmnopqrstuvwxyz"; +_Static_assert (sizeof baseref - 1 == maxbase); + +bad itoasc(word val, const char* buf_start, char* buf_end, char** newbuf) { + char* ptr = buf_end; + + *ptr-- = 0; + while(val > 0) { + if (ptr < buf_start) return bad_overflow; + word rem = val % 128; + val /= 128; + *ptr-- = (char)rem; + } + + if (newbuf != null) *newbuf = ptr + 1; + return ok; +} + +bool lowercase = false; +bad itoa(word base, word val, const char* buf_start, + char* buf_end, char** newbuf) { + + char* ptr = buf_end; + + if (base > maxbase) return bad_base; + if (base == 0) return itoasc(val, buf_start, buf_end, newbuf); + + *ptr-- = 0; + while(val > 0) { + if (ptr < buf_start) return bad_overflow; + word rem = val % base; + val /= base; + char out = baseref[rem]; + if (lowercase && base < imaxbase) + if (out >= 'A' && out <= 'Z') + out += ('a' - 'A'); + *ptr-- = out; + } + + if (newbuf != null) *newbuf = ptr + 1; + return ok; +} + +bad run(const int argc, const char** argv) { +# ifndef _POSIX_IO + /* fuck your buffering, it only ever makes + * things worse */ + setvbuf(stdout,null,_IONBF); +# endif + word rv; + + enum { set_in, set_out, _set_sz } curset = set_in; + word base[_set_sz] = { 10, 0 }; + + const char* in_vals[argc]; *in_vals = null; /* null-terminated! */ + const char** invalp = in_vals; + const char* pfxstr; + forposix(size_t pfxstrlen); + + + bool raw = false; + bool prefix = false; + + for (const char** arg = argv + 1; *arg != null; ++arg) { + uint8_t tblval; + if (*arg[0] == '%') { ++ *arg; goto number; } else + if (!raw && (tblget(sz(argtbl),argtbl, *arg, &tblval) == ok)) { + enum argument symbol = (enum argument) tblval; + switch (symbol) { + case arg_to: { + if (curset == set_out) return bad_syntax; + else curset = set_out; + } break; + + /* treat all further arguments as numbers */ + case arg_set: { raw = true; } break; + case arg_ebcdic: { return bad_ebcdic; } break; + + /* specify base with numeral */ + case arg_base: { + if (arg[1] == null) return bad_syntax; + word basekind; + bad e = atoi(10, arg[1], &basekind); + if (e == ok) { + if (basekind > maxbase) return bad_base; + base[curset] = basekind; + } else return e; + ++arg; + } break; + + /* specify an output prefix */ + case param_prefix: { + if (arg[1] == null) return bad_syntax; + prefix = true; pfxstr = arg[1]; + forposix(pfxstrlen = strlen(pfxstr)); + ++arg; + } break; + + /* specify an automatic output prefix */ + case switch_prefix: { prefix = true; pfxstr = null; } break; + case switch_lowercase: { lowercase = true; } break; + + default: { + /* assume base shorthand */ + base[curset] = bases[symbol]; + } + } + } else /* bad_find */ number: { + /* we assume it's a number - error checking will + * happen once we know how to interpret it */ + *invalp++=*arg; *invalp=null; + } + } + + /* if an ascii string was passed, change to hexadecimal output */ + if (base[set_in] == 0 && curset != set_out) base[set_out] = 16; + + size_t max_numeral_len = 0; + /* 0 = ascii rep (0 .. 127); one char = 7 bits */ + if (base[set_out] == 0) max_numeral_len = (sizeof(word) * CHAR_BIT) / 7; else + if (base[set_out] == 1) max_numeral_len = 1024; /* pls don't */ else + /* note for unary: actual max is ((word) -1) but we cannot actually allocate + * that much fucking memory, so we limit to 1KiB and crash if it needs more */ + if (base[set_out] <= 2) max_numeral_len = (sizeof(word) * CHAR_BIT); else + if (base[set_out] <= 8) max_numeral_len = (sizeof(word) * CHAR_BIT) / 3; else + if (base[set_out] <= 16) max_numeral_len = (sizeof(word) * CHAR_BIT) / 4; else + /* (base[set_out] <= 32) */ max_numeral_len = (sizeof(word) * CHAR_BIT) / 5; + + /* this is i think the only sane-ish way to do it that + * doesn't involve *shudder* logarithms + TODO: find a better way to do this??? */ + + size_t bufmax = (invalp - in_vals) * max_numeral_len; + char buf [bufmax]; + char* ptr = (buf + bufmax) - 1; + forposix(char* lastptr = ptr); + + for (const char** s = in_vals; *s != null; ++s) { + word val; + bad e = atoi(base[set_in], *s, &val); + if (e == ok) { + bad e = itoa(base[set_out], val, buf, ptr, &ptr); + + if (prefix) { + if (pfxstr != null) { print(pfxstrlen, pfxstr); } + else if (base[set_out] < sz(prefixes)) { + print((size_t)prefixes[base[set_out]][0], + prefixes[base[set_out]] + 1); + } + } + print(lastptr-ptr, ptr); + print(1, "\n"); + forposix(lastptr = ptr); + } else { + return e; + } + } +} + +void usage(const char* name) { +# ifdef _POSIX_IO + typedef struct pstr { size_t len; const char* str; } pstr; +# define p(x) {sizeof (x "\n"), (x "\n")} + size_t namelen = strlen(name); +# else + typedef const char* pstr; +# define p(x) (x "\n") +# endif +# define OR "\x1b[34m|\x1b[93m" +# define plus "\x1b[94m+\x1b[m" +# define par(s) "<\x1b[4m" s "\x1b[24m>" +# define lit(l) "\x1b[3m" l "\x1b[23m" +# define box(s) "\x1b[94m[\x1b[93m" s "\x1b[94m]\x1b[m" + const pstr forms[] = { + p(box(box("options") " " par("in:spec")) " " par("value:int") plus " " + box(lit("to") " " box(par("out:spec")))), + p(box(box("options") par("in:spec")) " " box(lit("to") " " box(par("out:spec"))) + " " lit("--") " " par("value:int") plus), + }, specs[] = { + p(box(lit("bin") OR lit("tern") OR lit("oct") + OR lit("dec") OR lit("hex") OR + lit("base") " " box("0-9") plus OR "asc")), + }, ints[] = { + p("default base: \x1b[94m.+\x1b[m"), + p("binary literal: "lit("0b") box("01") plus), + p("ternary literal: "lit("0t") box("012") plus), + p("hex literal: "lit("0x") box("0-9A-Fa-f") plus), + p("ascii literal: "lit("@") "\x1b[94m.+\x1b[m"), + p("interpret any string (e.g. a keyword) as integer: " lit("%") box("0-9A-Za-z") plus), + }, opts[] = { + p("-p --prefix : print known prefix codes on output strings"), + p("-m --manual-prefix: specify a manual prefix to print before each number"), + p("-l --lowercase : prefer lowercase for case-insensitive bases"), + }; +# undef p +# undef OR +# undef plus + +# define hl_on "\x1b[1m" +# define hl_off "\x1b[21m" + enum { ansilen = sizeof (hl_on hl_off) }; +# define hl(x) (hl_on x hl_off) + const char form_head []= hl("usage: "); + const char spec_head []= hl("- spec: "); + const char int_head []= hl("- int: "); + const char opt_head []= hl("- options: "); + const char space []= " "; /* sigh */ +# undef hl +# undef hl_on +# undef hl_off + +# ifdef _POSIX_IO +# define _say(sz, s) write(2, (s), (sz)); +# define vsay _say +# define display(hd) _say(sizeof (hd), (hd)); +# define pline(l) _say((l).len, (l).str); +# else +# define _say(sz, s) printf("%.*s", sz, s); +# define display(hd) printf("%s",(hd)); +# define vsay(sz, s) display(s) +# define pline(l) display(l); +# endif + +# define space(x) _say(x, space); +# define glow(x) say("\x1b[95m"); { x }; say("\x1b[m"); +# define section(x,prefix) display(x##_head); \ + for(size_t i = 0; i < sz(x##s); ++ i) { \ + if (i>0) space(sizeof x##_head - ansilen); \ + { prefix; }; pline(x##s[i]); } + + section(form,glow(vsay(namelen, name)); space(1)); + section(spec,); + section(int,); + section(opt,); +} + +int main(int argc, const char** argv) { + if (argc == 0) return -1; + if (argc == 1) usage(argv[0]); + bad e = run(argc, argv); + switch (e) { + case ok: return 0; +# define e(kind, desc) case bad_##kind:\ + say("\x1b[31;1merror:\x1b[m "); say(desc "\n"); break; + error_list +# undef e + } +}