/* [ʞ] ord.c - integer converter
* ~ lexi hale <lexi@hale.su>
* © AGPLv3
* * ord has no dependencies except for libc.
* ? ord converts integers to ascii characters
* and back. written because the only fucking
* way to do this in shell is FUCKING PRINTF.
* $ cc ord.c -o ord [-D_IO=(LIBC|POSIX)]
* - the flag D_IO will instruct ord.c whether
* to use POSIX io primitives (write and read)
* instead of libc primitives (printf). if
* you're on a UNIX system, POSIX primitives
* will be used by default, but you can block
* them with LIBC or force them with POSIX.
* if you are on a POSIX- compliant system,
* you *should* use POSIX IO, for improved
* performance and safety. */
#if (defined(__unix__) && _IO != LIBC) || (_IO == POSIX)
# define _POSIX_IO
#endif
#ifdef _POSIX_IO
# include <unistd.h>
# define say(x) (write(2, (x), (sizeof (x))))
# define print(sz,x) (write(1, (x), (sz)))
# define forposix(x) x
# define forlibc(x)
#else
# include <stdio.h>
# define say(x) (fprintf(stderr, (x)))
# define print(x) (printf("%s",(x)))
# define forposix(x)
# define forlibc(x) x
#endif
#include <stddef.h>
#include <stdint.h>
#include <string.h>
#include <limits.h>
#define sz(x) ( sizeof (x) / sizeof (x) [0] )
enum /* constants */ {
null = 0,
/* ascii address space */
numspace = (0x39 - 0x30) + 1, /* 10 */
alphaspace = (0x5a - 0x41) + 1, /* 26 */
smallalphaspace = (0x7a - 0x61) + 1, /* 26 */
/* base representations */
imaxbase = numspace + alphaspace, /* 36 */
maxbase = imaxbase + smallalphaspace /* 62 */
};
typedef unsigned long long word;
typedef _Bool bool;
enum { false = 0, true = 1 };
typedef struct pair { uint8_t val; const char* str; } pair;
#define error_list \
e(domain, "bad argument passed for domain") \
e(find, "could not find key in table") \
e(syntax, "invalid syntax") \
e(base, "that base is out of range") \
e(overflow, "a memory overflow has occurred") \
e(ebcdic, "nice try, mr ibm-san")
typedef enum bad {
ok = 0, fail = 1,
# define e(name, desc) bad_##name,
error_list
# undef e
} bad;
bad tblget(size_t stacksz, const pair* haystack, const char* needle, uint8_t* val) {
for (size_t i = 0; i<stacksz; ++i) {
if (strcmp(haystack[i].str, needle) == ok) {
*val = haystack[i].val;
return ok;
}
}
return bad_find;
}
enum argument {
arg_to, arg_set, arg_base,
arg_asc,
arg_bin, arg_trn, arg_oct, arg_dec,
arg_duo, arg_hex, arg_b32, arg_b64,
switch_prefix, param_prefix,
switch_lowercase,
arg_ebcdic,
};
word bases[] = {
[arg_asc] = 0,
[arg_bin] = 2,
[arg_trn] = 3,
[arg_oct] = 8,
[arg_dec] = 10,
[arg_duo] = 12,
[arg_hex] = 16,
[arg_b32] = 32,
};
const char* prefixes [] = { null,
[ 0] = "\1" "@",
[ 2] = "\2" "0b",
[ 3] = "\2" "0t",
[ 8] = "\1" "0",
[12] = "\2" "0d",
[16] = "\2" "0x",
};
const pair argtbl[] = {
{arg_to, "to"},
{arg_base, "base"},
{arg_set, "--"}, {arg_set, "raw"},
{arg_asc, "asc"}, {arg_asc, "ascii"},
{arg_bin, "bin"}, {arg_bin, "binary"},
{arg_trn, "trn"}, {arg_trn, "tern"}, {arg_trn, "ternary"}, {arg_trn, "trinary"},
{arg_oct, "oct"}, {arg_oct, "octal"},
{arg_dec, "dec"}, {arg_dec, "decimal"},
{arg_duo, "duo"}, {arg_duo, "duodecimal"},
{arg_hex, "hex"}, {arg_hex, "hexadecimal"},
{arg_hex, "b32"}, {arg_hex, "base32"}, /* not padded! */
{switch_prefix, "-p"}, {switch_prefix, "--prefix"},
{switch_lowercase, "-l"}, {switch_lowercase, "--lowercase"},
{param_prefix, "-m"}, {param_prefix, "--manual-prefix"},
{arg_ebcdic, "ebcdic"},
};
bad asctoi(const char* s, word* ret) {
word val = 0;
enum { base = 128 };
for (;*s!=null;++s) {
uint8_t v = *s;
if (v > base) return bad_domain;
val *= base;
val += v;
}
*ret = val;
return ok;
}
bad atoi(word base, const char* s, word* ret) {
/* s must be a null-terminated ASCII numeral string */
if (base > maxbase) return bad_base;
/* override the default base if it's a basèd literal */
if (s[0] == '@' || base == 0) return asctoi(s + (s[0]=='@'),ret);
else if (s[0] == '0' && s[1] == 'x') base = 16, s += 2;
else if (s[0] == '0' && s[1] == 'd') base = 10, s += 2;
else if (s[0] == '0' && s[1] == 'b') base = 2, s += 2;
else if (s[0] == '0' && s[1] == 't') base = 3, s += 2;
else if (s[0] == '0') base = 8, s += 1;
bool insens = (base <= imaxbase);
word val = 0;
for (;*s!=null;++s) {
uint8_t v = *s;
if(v >= 0x30 && v <= 0x39) v -= 0x30; else {
if(v >= 0x61 && v <= 0x7a) {
if (insens) v -= 0x20; else {
v = numspace + alphaspace + (v - 0x61);
goto checkval;
}
}
if(v >= 0x41 && v <= 0x5a) v = numspace + (v - 0x41);
else return bad_domain;
}
checkval: if (v >= base) return bad_domain;
val *= base;
val += v;
}
*ret = val;
return ok;
}
/* needed for efficiency's sake, but really sucky -
* this table needs to be kept in sync with the
* itoa algorithm by hand. unfortunately, given C's
* abject lack of metaprogramming, we have to do this
* by hand. */
const char baseref[] = /* numerals[10] */ "0123456789"
/* bigalpha[26] */ "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
/* smallalpha[26] */ "abcdefghijklmnopqrstuvwxyz";
_Static_assert (sizeof baseref - 1 == maxbase);
bad itoasc(word val, const char* buf_start, char* buf_end, char** newbuf) {
char* ptr = buf_end;
*ptr-- = 0;
while(val > 0) {
if (ptr < buf_start) return bad_overflow;
word rem = val % 128;
val /= 128;
*ptr-- = (char)rem;
}
if (newbuf != null) *newbuf = ptr + 1;
return ok;
}
bool lowercase = false;
bad itoa(word base, word val, const char* buf_start,
char* buf_end, char** newbuf) {
char* ptr = buf_end;
if (base > maxbase) return bad_base;
if (base == 0) return itoasc(val, buf_start, buf_end, newbuf);
*ptr-- = 0;
if (val == 0) *ptr-- = '0';
else while(val > 0) {
if (ptr < buf_start) return bad_overflow;
word rem = val % base;
val /= base;
char out = baseref[rem];
if (lowercase && base <= imaxbase)
if (out >= 'A' && out <= 'Z')
out += ('a' - 'A');
*ptr-- = out;
}
if (newbuf != null) *newbuf = ptr + 1;
return ok;
}
bad run(const int argc, const char** argv) {
# ifndef _POSIX_IO
/* fuck your buffering, it only ever makes
* things worse */
setvbuf(stdout,null,_IONBF);
# endif
word rv;
enum { set_in, set_out, _set_sz } curset = set_in;
word base[_set_sz] = { 10, 0 };
const char* in_vals[argc]; *in_vals = null; /* null-terminated! */
const char** invalp = in_vals;
const char* pfxstr;
forposix(size_t pfxstrlen);
bool raw = false;
bool prefix = false;
for (const char** arg = argv + 1; *arg != null; ++arg) {
uint8_t tblval;
if (*arg[0] == '%') { ++ *arg; goto number; } else
if (!raw && (tblget(sz(argtbl),argtbl, *arg, &tblval) == ok)) {
enum argument symbol = (enum argument) tblval;
switch (symbol) {
case arg_to: {
if (curset == set_out) return bad_syntax;
else curset = set_out;
} break;
/* treat all further arguments as numbers */
case arg_set: { raw = true; } break;
case arg_ebcdic: { return bad_ebcdic; } break;
/* specify base with numeral */
case arg_base: {
if (arg[1] == null) return bad_syntax;
word basekind;
bad e = atoi(10, arg[1], &basekind);
if (e == ok) {
if (basekind > maxbase) return bad_base;
base[curset] = basekind;
} else return e;
++arg;
} break;
/* specify an output prefix */
case param_prefix: {
if (arg[1] == null) return bad_syntax;
prefix = true; pfxstr = arg[1];
forposix(pfxstrlen = strlen(pfxstr));
++arg;
} break;
/* specify an automatic output prefix */
case switch_prefix: { prefix = true; pfxstr = null; } break;
case switch_lowercase: { lowercase = true; } break;
default: {
/* assume base shorthand */
base[curset] = bases[symbol];
}
}
} else /* bad_find */ number: {
/* we assume it's a number - error checking will
* happen once we know how to interpret it */
*invalp++=*arg; *invalp=null;
}
}
/* if an ascii string was passed, change to hexadecimal output */
if (base[set_in] == 0 && curset != set_out) base[set_out] = 16;
size_t max_numeral_len = 0;
/* 0 = ascii rep (0 .. 127); one char = 7 bits */
if (base[set_out] == 0) max_numeral_len = (sizeof(word) * CHAR_BIT) / 7; else
if (base[set_out] == 1) max_numeral_len = 1024; /* pls don't */ else
/* note for unary: actual max is ((word) -1) but we cannot actually allocate
* that much fucking memory, so we limit to 1KiB and crash if it needs more */
if (base[set_out] <= 2) max_numeral_len = (sizeof(word) * CHAR_BIT); else
if (base[set_out] <= 8) max_numeral_len = (sizeof(word) * CHAR_BIT) / 3; else
if (base[set_out] <= 16) max_numeral_len = (sizeof(word) * CHAR_BIT) / 4; else
/* (base[set_out] <= 32) */ max_numeral_len = (sizeof(word) * CHAR_BIT) / 5;
/* this is i think the only sane-ish way to do it that
* doesn't involve *shudder* logarithms
TODO: find a better way to do this??? */
size_t bufmax = (invalp - in_vals) * max_numeral_len;
char buf [bufmax];
char* ptr = (buf + bufmax) - 1;
forposix(char* lastptr = ptr);
for (const char** s = in_vals; *s != null; ++s) {
word val;
bad e = atoi(base[set_in], *s, &val);
if (e == ok) {
bad e = itoa(base[set_out], val, buf, ptr, &ptr);
if (prefix) {
if (pfxstr != null) { print(pfxstrlen, pfxstr); }
else if (base[set_out] < sz(prefixes)) {
print((size_t)prefixes[base[set_out]][0],
prefixes[base[set_out]] + 1);
}
}
print(lastptr-ptr, ptr);
print(1, "\n");
forposix(lastptr = ptr);
} else {
return e;
}
}
}
void usage(const char* name) {
# ifdef _POSIX_IO
typedef struct pstr { size_t len; const char* str; } pstr;
# define p(x) {sizeof (x "\n"), (x "\n")}
size_t namelen = strlen(name);
# else
typedef const char* pstr;
# define p(x) (x "\n")
# endif
# define OR "\x1b[34m|\x1b[93m"
# define plus "\x1b[94m+\x1b[m"
# define par(s) "<\x1b[4m" s "\x1b[24m>"
# define lit(l) "\x1b[3m" l "\x1b[23m"
# define box(s) "\x1b[94m[\x1b[93m" s "\x1b[94m]\x1b[m"
const pstr forms[] = {
p(box(box("options") " " par("in:spec")) " " par("value:int") plus " "
box(lit("to") " " box(par("out:spec")))),
p(box(box("options") " " par("in:spec")) " " box(lit("to") " " box(par("out:spec")))
" " lit("--") " " par("value:int") plus),
}, specs[] = {
p(box(lit("bin") OR lit("tern") OR lit("oct")
OR lit("dec") OR lit("hex") OR
lit("base") " " box("0-9") plus OR "asc")),
}, ints[] = {
p("default base: \x1b[94m.+\x1b[m"),
p("binary literal: "lit("0b") box("01") plus),
p("ternary literal: "lit("0t") box("012") plus),
p("hex literal: "lit("0x") box("0-9A-Fa-f") plus),
p("ascii literal: "lit("@") "\x1b[94m.+\x1b[m"),
p("interpret any string (e.g. a keyword) as integer: " lit("%") box("0-9A-Za-z") plus),
}, opts[] = {
p("-p --prefix : print known prefix codes on output strings"),
p("-m --manual-prefix: specify a manual prefix to print before each number"),
p("-l --lowercase : prefer lowercase for case-insensitive bases"),
};
# undef p
# undef OR
# undef plus
# define hl_on "\x1b[1m"
# define hl_off "\x1b[21m"
enum { ansilen = sizeof (hl_on hl_off) };
# define hl(x) (hl_on x hl_off)
const char form_head []= hl("usage: ");
const char spec_head []= hl("- spec: ");
const char int_head []= hl("- int: ");
const char opt_head []= hl("- options: ");
const char space []= " "; /* sigh */
# undef hl
# undef hl_on
# undef hl_off
# ifdef _POSIX_IO
# define _say(sz, s) write(2, (s), (sz));
# define vsay _say
# define display(hd) _say(sizeof (hd), (hd));
# define pline(l) _say((l).len, (l).str);
# else
# define _say(sz, s) printf("%.*s", sz, s);
# define display(hd) printf("%s",(hd));
# define vsay(sz, s) display(s)
# define pline(l) display(l);
# endif
# define space(x) _say(x, space);
# define glow(x) say("\x1b[95m"); { x }; say("\x1b[m");
# define section(x,prefix) display(x##_head); \
for(size_t i = 0; i < sz(x##s); ++ i) { \
if (i>0) space(sizeof x##_head - ansilen); \
{ prefix; }; pline(x##s[i]); }
section(form,glow(vsay(namelen, name)); space(1));
section(spec,);
section(int,);
section(opt,);
}
int main(int argc, const char** argv) {
if (argc == 0) return -1;
if (argc == 1) usage(argv[0]);
bad e = run(argc, argv);
switch (e) {
case ok: return 0;
# define e(kind, desc) case bad_##kind:\
say("\x1b[31;1merror:\x1b[m "); say(desc "\n"); break;
error_list
# undef e
}
}