Overview
Comment: | add ord.c |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA3-256: |
72068307daf49bd5c01a31514ea5b644 |
User & Date: | lexi on 2019-07-19 06:00:34 |
Other Links: | manifest | tags |
Context
2019-07-19
| ||
06:06 | fix typo check-in: a38de374f1 user: lexi tags: trunk | |
06:00 | add ord.c check-in: 72068307da user: lexi tags: trunk | |
2019-07-13
| ||
09:05 | PUTTING COMPUTER AWAY check-in: 95bd59918c user: lexi tags: trunk | |
Changes
Added ord.c version [478119c506].
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 |
/* [ʞ] ord.c - integer converter * ~ lexi hale <lexi@hale.su> * © AGPLv3 * * ord has no dependencies except for libc. * ? ord converts integers to ascii characters * and back. written because the only fucking * way to do this in shell is FUCKING PRINTF. * $ cc ord.c -o ord [-D_IO=(LIBC|POSIX)] * - the flag D_IO will instruct ord.c whether * to use POSIX io primitives (write and read) * instead of libc primitives (printf). if * you're on a UNIX system, POSIX primitives * will be used by default, but you can block * them with LIBC or force them with POSIX. * if you are on a POSIX- compliant system, * you *should* use POSIX IO, for improved * performance and safety. */ #if (defined(__unix__) && _IO != LIBC) || (_IO == POSIX) # define _POSIX_IO #endif #ifdef _POSIX_IO # include <unistd.h> # define say(x) (write(2, (x), (sizeof (x)))) # define print(sz,x) (write(1, (x), (sz))) # define forposix(x) x # define forlibc(x) #else # include <stdio.h> # define say(x) (fprintf(stderr, (x))) # define print(x) (printf("%s",(x))) # define forposix(x) # define forlibc(x) x #endif #include <stddef.h> #include <stdint.h> #include <string.h> #include <limits.h> #define sz(x) ( sizeof (x) / sizeof (x) [0] ) enum /* constants */ { null = 0, /* ascii address space */ numspace = (0x39 - 0x30) + 1, /* 10 */ alphaspace = (0x5a - 0x41) + 1, /* 26 */ smallalphaspace = (0x7a - 0x61) + 1, /* 26 */ /* base representations */ imaxbase = numspace + alphaspace, /* 36 */ maxbase = imaxbase + smallalphaspace /* 62 */ }; typedef unsigned long long word; typedef _Bool bool; enum { false = 0, true = 1 }; typedef struct pair { uint8_t val; const char* str; } pair; #define error_list \ e(domain, "bad argument passed for domain") \ e(find, "could not find key in table") \ e(syntax, "invalid syntax") \ e(base, "that base is out of range") \ e(overflow, "a memory overflow has occurred") \ e(ebcdic, "nice try, mr ibm-san") typedef enum bad { ok = 0, fail = 1, # define e(name, desc) bad_##name, error_list # undef e } bad; bad tblget(size_t stacksz, const pair* haystack, const char* needle, uint8_t* val) { for (size_t i = 0; i<stacksz; ++i) { if (strcmp(haystack[i].str, needle) == ok) { *val = haystack[i].val; return ok; } } return bad_find; } enum argument { arg_to, arg_set, arg_base, arg_asc, arg_bin, arg_trn, arg_oct, arg_dec, arg_duo, arg_hex, arg_b32, arg_b64, switch_prefix, param_prefix, switch_lowercase, arg_ebcdic, }; word bases[] = { [arg_asc] = 0, [arg_bin] = 2, [arg_trn] = 3, [arg_oct] = 8, [arg_dec] = 10, [arg_duo] = 12, [arg_hex] = 16, [arg_b32] = 32, }; const char* prefixes [] = { null, [ 0] = "\1" "@", [ 2] = "\2" "0b", [ 3] = "\2" "0t", [ 8] = "\1" "0", [12] = "\2" "0d", [16] = "\2" "0x", }; const pair argtbl[] = { {arg_to, "to"}, {arg_base, "base"}, {arg_set, "--"}, {arg_set, "raw"}, {arg_asc, "asc"}, {arg_asc, "ascii"}, {arg_bin, "bin"}, {arg_bin, "binary"}, {arg_trn, "trn"}, {arg_trn, "tern"}, {arg_trn, "ternary"}, {arg_trn, "trinary"}, {arg_oct, "oct"}, {arg_oct, "octal"}, {arg_dec, "dec"}, {arg_dec, "decimal"}, {arg_duo, "duo"}, {arg_duo, "duodecimal"}, {arg_hex, "hex"}, {arg_hex, "hexadecimal"}, {arg_hex, "b32"}, {arg_hex, "base32"}, /* not padded! */ {switch_prefix, "-p"}, {switch_prefix, "--prefix"}, {switch_lowercase, "-l"}, {switch_lowercase, "--lowercase"}, {param_prefix, "-m"}, {param_prefix, "--manual-prefix"}, {arg_ebcdic, "ebcdic"}, }; bad asctoi(const char* s, word* ret) { word val = 0; enum { base = 128 }; for (;*s!=null;++s) { uint8_t v = *s; if (v > base) return bad_domain; val *= base; val += v; } *ret = val; return ok; } bad atoi(word base, const char* s, word* ret) { /* s must be a null-terminated ASCII numeral string */ if (base > maxbase) return bad_base; /* override the default base if it's a basèd literal */ if (s[0] == '@' || base == 0) return asctoi(s + (s[0]=='@'),ret); else if (s[0] == '0' && s[1] == 'x') base = 16, s += 2; else if (s[0] == '0' && s[1] == 'd') base = 10, s += 2; else if (s[0] == '0' && s[1] == 'b') base = 2, s += 2; else if (s[0] == '0' && s[1] == 't') base = 3, s += 2; else if (s[0] == '0') base = 8, s += 1; bool insens = (base <= imaxbase); word val = 0; for (;*s!=null;++s) { uint8_t v = *s; if(v >= 0x30 && v <= 0x39) v -= 0x30; else { if(v >= 0x61 && v <= 0x7a) { if (insens) v -= 0x20; else { v = numspace + alphaspace + (v - 0x61); goto checkval; } } if(v >= 0x41 && v <= 0x5a) v = numspace + (v - 0x41); else return bad_domain; } checkval: if (v >= base) return bad_domain; val *= base; val += v; } *ret = val; return ok; } /* needed for efficiency's sake, but really sucky - * this table needs to be kept in sync with the * itoa algorithm by hand. unfortunately, given C's * abject lack of metaprogramming, we have to do this * by hand. */ const char baseref[] = /* numerals[10] */ "0123456789" /* bigalpha[26] */ "ABCDEFGHIJKLMNOPQRSTUVWXYZ" /* smallalpha[26] */ "abcdefghijklmnopqrstuvwxyz"; _Static_assert (sizeof baseref - 1 == maxbase); bad itoasc(word val, const char* buf_start, char* buf_end, char** newbuf) { char* ptr = buf_end; *ptr-- = 0; while(val > 0) { if (ptr < buf_start) return bad_overflow; word rem = val % 128; val /= 128; *ptr-- = (char)rem; } if (newbuf != null) *newbuf = ptr + 1; return ok; } bool lowercase = false; bad itoa(word base, word val, const char* buf_start, char* buf_end, char** newbuf) { char* ptr = buf_end; if (base > maxbase) return bad_base; if (base == 0) return itoasc(val, buf_start, buf_end, newbuf); *ptr-- = 0; while(val > 0) { if (ptr < buf_start) return bad_overflow; word rem = val % base; val /= base; char out = baseref[rem]; if (lowercase && base < imaxbase) if (out >= 'A' && out <= 'Z') out += ('a' - 'A'); *ptr-- = out; } if (newbuf != null) *newbuf = ptr + 1; return ok; } bad run(const int argc, const char** argv) { # ifndef _POSIX_IO /* fuck your buffering, it only ever makes * things worse */ setvbuf(stdout,null,_IONBF); # endif word rv; enum { set_in, set_out, _set_sz } curset = set_in; word base[_set_sz] = { 10, 0 }; const char* in_vals[argc]; *in_vals = null; /* null-terminated! */ const char** invalp = in_vals; const char* pfxstr; forposix(size_t pfxstrlen); bool raw = false; bool prefix = false; for (const char** arg = argv + 1; *arg != null; ++arg) { uint8_t tblval; if (*arg[0] == '%') { ++ *arg; goto number; } else if (!raw && (tblget(sz(argtbl),argtbl, *arg, &tblval) == ok)) { enum argument symbol = (enum argument) tblval; switch (symbol) { case arg_to: { if (curset == set_out) return bad_syntax; else curset = set_out; } break; /* treat all further arguments as numbers */ case arg_set: { raw = true; } break; case arg_ebcdic: { return bad_ebcdic; } break; /* specify base with numeral */ case arg_base: { if (arg[1] == null) return bad_syntax; word basekind; bad e = atoi(10, arg[1], &basekind); if (e == ok) { if (basekind > maxbase) return bad_base; base[curset] = basekind; } else return e; ++arg; } break; /* specify an output prefix */ case param_prefix: { if (arg[1] == null) return bad_syntax; prefix = true; pfxstr = arg[1]; forposix(pfxstrlen = strlen(pfxstr)); ++arg; } break; /* specify an automatic output prefix */ case switch_prefix: { prefix = true; pfxstr = null; } break; case switch_lowercase: { lowercase = true; } break; default: { /* assume base shorthand */ base[curset] = bases[symbol]; } } } else /* bad_find */ number: { /* we assume it's a number - error checking will * happen once we know how to interpret it */ *invalp++=*arg; *invalp=null; } } /* if an ascii string was passed, change to hexadecimal output */ if (base[set_in] == 0 && curset != set_out) base[set_out] = 16; size_t max_numeral_len = 0; /* 0 = ascii rep (0 .. 127); one char = 7 bits */ if (base[set_out] == 0) max_numeral_len = (sizeof(word) * CHAR_BIT) / 7; else if (base[set_out] == 1) max_numeral_len = 1024; /* pls don't */ else /* note for unary: actual max is ((word) -1) but we cannot actually allocate * that much fucking memory, so we limit to 1KiB and crash if it needs more */ if (base[set_out] <= 2) max_numeral_len = (sizeof(word) * CHAR_BIT); else if (base[set_out] <= 8) max_numeral_len = (sizeof(word) * CHAR_BIT) / 3; else if (base[set_out] <= 16) max_numeral_len = (sizeof(word) * CHAR_BIT) / 4; else /* (base[set_out] <= 32) */ max_numeral_len = (sizeof(word) * CHAR_BIT) / 5; /* this is i think the only sane-ish way to do it that * doesn't involve *shudder* logarithms TODO: find a better way to do this??? */ size_t bufmax = (invalp - in_vals) * max_numeral_len; char buf [bufmax]; char* ptr = (buf + bufmax) - 1; forposix(char* lastptr = ptr); for (const char** s = in_vals; *s != null; ++s) { word val; bad e = atoi(base[set_in], *s, &val); if (e == ok) { bad e = itoa(base[set_out], val, buf, ptr, &ptr); if (prefix) { if (pfxstr != null) { print(pfxstrlen, pfxstr); } else if (base[set_out] < sz(prefixes)) { print((size_t)prefixes[base[set_out]][0], prefixes[base[set_out]] + 1); } } print(lastptr-ptr, ptr); print(1, "\n"); forposix(lastptr = ptr); } else { return e; } } } void usage(const char* name) { # ifdef _POSIX_IO typedef struct pstr { size_t len; const char* str; } pstr; # define p(x) {sizeof (x "\n"), (x "\n")} size_t namelen = strlen(name); # else typedef const char* pstr; # define p(x) (x "\n") # endif # define OR "\x1b[34m|\x1b[93m" # define plus "\x1b[94m+\x1b[m" # define par(s) "<\x1b[4m" s "\x1b[24m>" # define lit(l) "\x1b[3m" l "\x1b[23m" # define box(s) "\x1b[94m[\x1b[93m" s "\x1b[94m]\x1b[m" const pstr forms[] = { p(box(box("options") " " par("in:spec")) " " par("value:int") plus " " box(lit("to") " " box(par("out:spec")))), p(box(box("options") par("in:spec")) " " box(lit("to") " " box(par("out:spec"))) " " lit("--") " " par("value:int") plus), }, specs[] = { p(box(lit("bin") OR lit("tern") OR lit("oct") OR lit("dec") OR lit("hex") OR lit("base") " " box("0-9") plus OR "asc")), }, ints[] = { p("default base: \x1b[94m.+\x1b[m"), p("binary literal: "lit("0b") box("01") plus), p("ternary literal: "lit("0t") box("012") plus), p("hex literal: "lit("0x") box("0-9A-Fa-f") plus), p("ascii literal: "lit("@") "\x1b[94m.+\x1b[m"), p("interpret any string (e.g. a keyword) as integer: " lit("%") box("0-9A-Za-z") plus), }, opts[] = { p("-p --prefix : print known prefix codes on output strings"), p("-m --manual-prefix: specify a manual prefix to print before each number"), p("-l --lowercase : prefer lowercase for case-insensitive bases"), }; # undef p # undef OR # undef plus # define hl_on "\x1b[1m" # define hl_off "\x1b[21m" enum { ansilen = sizeof (hl_on hl_off) }; # define hl(x) (hl_on x hl_off) const char form_head []= hl("usage: "); const char spec_head []= hl("- spec: "); const char int_head []= hl("- int: "); const char opt_head []= hl("- options: "); const char space []= " "; /* sigh */ # undef hl # undef hl_on # undef hl_off # ifdef _POSIX_IO # define _say(sz, s) write(2, (s), (sz)); # define vsay _say # define display(hd) _say(sizeof (hd), (hd)); # define pline(l) _say((l).len, (l).str); # else # define _say(sz, s) printf("%.*s", sz, s); # define display(hd) printf("%s",(hd)); # define vsay(sz, s) display(s) # define pline(l) display(l); # endif # define space(x) _say(x, space); # define glow(x) say("\x1b[95m"); { x }; say("\x1b[m"); # define section(x,prefix) display(x##_head); \ for(size_t i = 0; i < sz(x##s); ++ i) { \ if (i>0) space(sizeof x##_head - ansilen); \ { prefix; }; pline(x##s[i]); } section(form,glow(vsay(namelen, name)); space(1)); section(spec,); section(int,); section(opt,); } int main(int argc, const char** argv) { if (argc == 0) return -1; if (argc == 1) usage(argv[0]); bad e = run(argc, argv); switch (e) { case ok: return 0; # define e(kind, desc) case bad_##kind:\ say("\x1b[31;1merror:\x1b[m "); say(desc "\n"); break; error_list # undef e } } |