/* [ʞ] ord.c - integer converter * ~ lexi hale * © AGPLv3 * * ord has no dependencies except for libc. * ? ord converts integers to ascii characters * and back. written because the only fucking * way to do this in shell is FUCKING PRINTF. * $ cc ord.c -o ord [-D_IO=(LIBC|POSIX)] * - the flag D_IO will instruct ord.c whether * to use POSIX io primitives (write and read) * instead of libc primitives (printf). if * you're on a UNIX system, POSIX primitives * will be used by default, but you can block * them with LIBC or force them with POSIX. * if you are on a POSIX- compliant system, * you *should* use POSIX IO, for improved * performance and safety. */ #if (defined(__unix__) && _IO != LIBC) || (_IO == POSIX) # define _POSIX_IO #endif #ifdef _POSIX_IO # include # define say(x) (write(2, (x), (sizeof (x)))) # define print(sz,x) (write(1, (x), (sz))) # define forposix(x) x # define forlibc(x) #else # include # define say(x) (fprintf(stderr, (x))) # define print(x) (printf("%s",(x))) # define forposix(x) # define forlibc(x) x #endif #include #include #include #include #define sz(x) ( sizeof (x) / sizeof (x) [0] ) enum /* constants */ { null = 0, /* ascii address space */ numspace = (0x39 - 0x30) + 1, /* 10 */ alphaspace = (0x5a - 0x41) + 1, /* 26 */ smallalphaspace = (0x7a - 0x61) + 1, /* 26 */ /* base representations */ imaxbase = numspace + alphaspace, /* 36 */ maxbase = imaxbase + smallalphaspace /* 62 */ }; typedef unsigned long long word; typedef _Bool bool; enum { false = 0, true = 1 }; typedef struct pair { uint8_t val; const char* str; } pair; #define error_list \ e(domain, "bad argument passed for domain") \ e(find, "could not find key in table") \ e(syntax, "invalid syntax") \ e(base, "that base is out of range") \ e(overflow, "a memory overflow has occurred") \ e(ebcdic, "nice try, mr ibm-san") typedef enum bad { ok = 0, fail = 1, # define e(name, desc) bad_##name, error_list # undef e } bad; bad tblget(size_t stacksz, const pair* haystack, const char* needle, uint8_t* val) { for (size_t i = 0; i base) return bad_domain; val *= base; val += v; } *ret = val; return ok; } bad atoi(word base, const char* s, word* ret) { /* s must be a null-terminated ASCII numeral string */ if (base > maxbase) return bad_base; /* override the default base if it's a basèd literal */ if (s[0] == '@' || base == 0) return asctoi(s + (s[0]=='@'),ret); else if (s[0] == '0' && s[1] == 'x') base = 16, s += 2; else if (s[0] == '0' && s[1] == 'd') base = 10, s += 2; else if (s[0] == '0' && s[1] == 'b') base = 2, s += 2; else if (s[0] == '0' && s[1] == 't') base = 3, s += 2; else if (s[0] == '0') base = 8, s += 1; bool insens = (base <= imaxbase); word val = 0; for (;*s!=null;++s) { uint8_t v = *s; if(v >= 0x30 && v <= 0x39) v -= 0x30; else { if(v >= 0x61 && v <= 0x7a) { if (insens) v -= 0x20; else { v = numspace + alphaspace + (v - 0x61); goto checkval; } } if(v >= 0x41 && v <= 0x5a) v = numspace + (v - 0x41); else return bad_domain; } checkval: if (v >= base) return bad_domain; val *= base; val += v; } *ret = val; return ok; } /* needed for efficiency's sake, but really sucky - * this table needs to be kept in sync with the * itoa algorithm by hand. unfortunately, given C's * abject lack of metaprogramming, we have to do this * by hand. */ const char baseref[] = /* numerals[10] */ "0123456789" /* bigalpha[26] */ "ABCDEFGHIJKLMNOPQRSTUVWXYZ" /* smallalpha[26] */ "abcdefghijklmnopqrstuvwxyz"; _Static_assert (sizeof baseref - 1 == maxbase); bad itoasc(word val, const char* buf_start, char* buf_end, char** newbuf) { char* ptr = buf_end; *ptr-- = 0; while(val > 0) { if (ptr < buf_start) return bad_overflow; word rem = val % 128; val /= 128; *ptr-- = (char)rem; } if (newbuf != null) *newbuf = ptr + 1; return ok; } bool lowercase = false; bad itoa(word base, word val, const char* buf_start, char* buf_end, char** newbuf) { char* ptr = buf_end; if (base > maxbase) return bad_base; if (base == 0) return itoasc(val, buf_start, buf_end, newbuf); *ptr-- = 0; while(val > 0) { if (ptr < buf_start) return bad_overflow; word rem = val % base; val /= base; char out = baseref[rem]; if (lowercase && base < imaxbase) if (out >= 'A' && out <= 'Z') out += ('a' - 'A'); *ptr-- = out; } if (newbuf != null) *newbuf = ptr + 1; return ok; } bad run(const int argc, const char** argv) { # ifndef _POSIX_IO /* fuck your buffering, it only ever makes * things worse */ setvbuf(stdout,null,_IONBF); # endif word rv; enum { set_in, set_out, _set_sz } curset = set_in; word base[_set_sz] = { 10, 0 }; const char* in_vals[argc]; *in_vals = null; /* null-terminated! */ const char** invalp = in_vals; const char* pfxstr; forposix(size_t pfxstrlen); bool raw = false; bool prefix = false; for (const char** arg = argv + 1; *arg != null; ++arg) { uint8_t tblval; if (*arg[0] == '%') { ++ *arg; goto number; } else if (!raw && (tblget(sz(argtbl),argtbl, *arg, &tblval) == ok)) { enum argument symbol = (enum argument) tblval; switch (symbol) { case arg_to: { if (curset == set_out) return bad_syntax; else curset = set_out; } break; /* treat all further arguments as numbers */ case arg_set: { raw = true; } break; case arg_ebcdic: { return bad_ebcdic; } break; /* specify base with numeral */ case arg_base: { if (arg[1] == null) return bad_syntax; word basekind; bad e = atoi(10, arg[1], &basekind); if (e == ok) { if (basekind > maxbase) return bad_base; base[curset] = basekind; } else return e; ++arg; } break; /* specify an output prefix */ case param_prefix: { if (arg[1] == null) return bad_syntax; prefix = true; pfxstr = arg[1]; forposix(pfxstrlen = strlen(pfxstr)); ++arg; } break; /* specify an automatic output prefix */ case switch_prefix: { prefix = true; pfxstr = null; } break; case switch_lowercase: { lowercase = true; } break; default: { /* assume base shorthand */ base[curset] = bases[symbol]; } } } else /* bad_find */ number: { /* we assume it's a number - error checking will * happen once we know how to interpret it */ *invalp++=*arg; *invalp=null; } } /* if an ascii string was passed, change to hexadecimal output */ if (base[set_in] == 0 && curset != set_out) base[set_out] = 16; size_t max_numeral_len = 0; /* 0 = ascii rep (0 .. 127); one char = 7 bits */ if (base[set_out] == 0) max_numeral_len = (sizeof(word) * CHAR_BIT) / 7; else if (base[set_out] == 1) max_numeral_len = 1024; /* pls don't */ else /* note for unary: actual max is ((word) -1) but we cannot actually allocate * that much fucking memory, so we limit to 1KiB and crash if it needs more */ if (base[set_out] <= 2) max_numeral_len = (sizeof(word) * CHAR_BIT); else if (base[set_out] <= 8) max_numeral_len = (sizeof(word) * CHAR_BIT) / 3; else if (base[set_out] <= 16) max_numeral_len = (sizeof(word) * CHAR_BIT) / 4; else /* (base[set_out] <= 32) */ max_numeral_len = (sizeof(word) * CHAR_BIT) / 5; /* this is i think the only sane-ish way to do it that * doesn't involve *shudder* logarithms TODO: find a better way to do this??? */ size_t bufmax = (invalp - in_vals) * max_numeral_len; char buf [bufmax]; char* ptr = (buf + bufmax) - 1; forposix(char* lastptr = ptr); for (const char** s = in_vals; *s != null; ++s) { word val; bad e = atoi(base[set_in], *s, &val); if (e == ok) { bad e = itoa(base[set_out], val, buf, ptr, &ptr); if (prefix) { if (pfxstr != null) { print(pfxstrlen, pfxstr); } else if (base[set_out] < sz(prefixes)) { print((size_t)prefixes[base[set_out]][0], prefixes[base[set_out]] + 1); } } print(lastptr-ptr, ptr); print(1, "\n"); forposix(lastptr = ptr); } else { return e; } } } void usage(const char* name) { # ifdef _POSIX_IO typedef struct pstr { size_t len; const char* str; } pstr; # define p(x) {sizeof (x "\n"), (x "\n")} size_t namelen = strlen(name); # else typedef const char* pstr; # define p(x) (x "\n") # endif # define OR "\x1b[34m|\x1b[93m" # define plus "\x1b[94m+\x1b[m" # define par(s) "<\x1b[4m" s "\x1b[24m>" # define lit(l) "\x1b[3m" l "\x1b[23m" # define box(s) "\x1b[94m[\x1b[93m" s "\x1b[94m]\x1b[m" const pstr forms[] = { p(box(box("options") " " par("in:spec")) " " par("value:int") plus " " box(lit("to") " " box(par("out:spec")))), p(box(box("options") par("in:spec")) " " box(lit("to") " " box(par("out:spec"))) " " lit("--") " " par("value:int") plus), }, specs[] = { p(box(lit("bin") OR lit("tern") OR lit("oct") OR lit("dec") OR lit("hex") OR lit("base") " " box("0-9") plus OR "asc")), }, ints[] = { p("default base: \x1b[94m.+\x1b[m"), p("binary literal: "lit("0b") box("01") plus), p("ternary literal: "lit("0t") box("012") plus), p("hex literal: "lit("0x") box("0-9A-Fa-f") plus), p("ascii literal: "lit("@") "\x1b[94m.+\x1b[m"), p("interpret any string (e.g. a keyword) as integer: " lit("%") box("0-9A-Za-z") plus), }, opts[] = { p("-p --prefix : print known prefix codes on output strings"), p("-m --manual-prefix: specify a manual prefix to print before each number"), p("-l --lowercase : prefer lowercase for case-insensitive bases"), }; # undef p # undef OR # undef plus # define hl_on "\x1b[1m" # define hl_off "\x1b[21m" enum { ansilen = sizeof (hl_on hl_off) }; # define hl(x) (hl_on x hl_off) const char form_head []= hl("usage: "); const char spec_head []= hl("- spec: "); const char int_head []= hl("- int: "); const char opt_head []= hl("- options: "); const char space []= " "; /* sigh */ # undef hl # undef hl_on # undef hl_off # ifdef _POSIX_IO # define _say(sz, s) write(2, (s), (sz)); # define vsay _say # define display(hd) _say(sizeof (hd), (hd)); # define pline(l) _say((l).len, (l).str); # else # define _say(sz, s) printf("%.*s", sz, s); # define display(hd) printf("%s",(hd)); # define vsay(sz, s) display(s) # define pline(l) display(l); # endif # define space(x) _say(x, space); # define glow(x) say("\x1b[95m"); { x }; say("\x1b[m"); # define section(x,prefix) display(x##_head); \ for(size_t i = 0; i < sz(x##s); ++ i) { \ if (i>0) space(sizeof x##_head - ansilen); \ { prefix; }; pline(x##s[i]); } section(form,glow(vsay(namelen, name)); space(1)); section(spec,); section(int,); section(opt,); } int main(int argc, const char** argv) { if (argc == 0) return -1; if (argc == 1) usage(argv[0]); bad e = run(argc, argv); switch (e) { case ok: return 0; # define e(kind, desc) case bad_##kind:\ say("\x1b[31;1merror:\x1b[m "); say(desc "\n"); break; error_list # undef e } }