Overview
Comment: | add ord.c |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA3-256: |
72068307daf49bd5c01a31514ea5b644 |
User & Date: | lexi on 2019-07-19 06:00:34 |
Other Links: | manifest | tags |
Context
2019-07-19
| ||
06:06 | fix typo check-in: a38de374f1 user: lexi tags: trunk | |
06:00 | add ord.c check-in: 72068307da user: lexi tags: trunk | |
2019-07-13
| ||
09:05 | PUTTING COMPUTER AWAY check-in: 95bd59918c user: lexi tags: trunk | |
Changes
Added ord.c version [478119c506].
1 +/* [ʞ] ord.c - integer converter 2 + * ~ lexi hale <lexi@hale.su> 3 + * © AGPLv3 4 + * * ord has no dependencies except for libc. 5 + * ? ord converts integers to ascii characters 6 + * and back. written because the only fucking 7 + * way to do this in shell is FUCKING PRINTF. 8 + * $ cc ord.c -o ord [-D_IO=(LIBC|POSIX)] 9 + * - the flag D_IO will instruct ord.c whether 10 + * to use POSIX io primitives (write and read) 11 + * instead of libc primitives (printf). if 12 + * you're on a UNIX system, POSIX primitives 13 + * will be used by default, but you can block 14 + * them with LIBC or force them with POSIX. 15 + * if you are on a POSIX- compliant system, 16 + * you *should* use POSIX IO, for improved 17 + * performance and safety. */ 18 + 19 +#if (defined(__unix__) && _IO != LIBC) || (_IO == POSIX) 20 +# define _POSIX_IO 21 +#endif 22 + 23 +#ifdef _POSIX_IO 24 +# include <unistd.h> 25 +# define say(x) (write(2, (x), (sizeof (x)))) 26 +# define print(sz,x) (write(1, (x), (sz))) 27 +# define forposix(x) x 28 +# define forlibc(x) 29 +#else 30 +# include <stdio.h> 31 +# define say(x) (fprintf(stderr, (x))) 32 +# define print(x) (printf("%s",(x))) 33 +# define forposix(x) 34 +# define forlibc(x) x 35 +#endif 36 +#include <stddef.h> 37 +#include <stdint.h> 38 +#include <string.h> 39 +#include <limits.h> 40 +#define sz(x) ( sizeof (x) / sizeof (x) [0] ) 41 + 42 +enum /* constants */ { 43 + null = 0, 44 + 45 + /* ascii address space */ 46 + numspace = (0x39 - 0x30) + 1, /* 10 */ 47 + alphaspace = (0x5a - 0x41) + 1, /* 26 */ 48 + smallalphaspace = (0x7a - 0x61) + 1, /* 26 */ 49 + 50 + /* base representations */ 51 + imaxbase = numspace + alphaspace, /* 36 */ 52 + maxbase = imaxbase + smallalphaspace /* 62 */ 53 +}; 54 + 55 +typedef unsigned long long word; 56 +typedef _Bool bool; 57 +enum { false = 0, true = 1 }; 58 + 59 +typedef struct pair { uint8_t val; const char* str; } pair; 60 + 61 +#define error_list \ 62 + e(domain, "bad argument passed for domain") \ 63 + e(find, "could not find key in table") \ 64 + e(syntax, "invalid syntax") \ 65 + e(base, "that base is out of range") \ 66 + e(overflow, "a memory overflow has occurred") \ 67 + e(ebcdic, "nice try, mr ibm-san") 68 + 69 +typedef enum bad { 70 + ok = 0, fail = 1, 71 +# define e(name, desc) bad_##name, 72 + error_list 73 +# undef e 74 +} bad; 75 + 76 +bad tblget(size_t stacksz, const pair* haystack, const char* needle, uint8_t* val) { 77 + for (size_t i = 0; i<stacksz; ++i) { 78 + if (strcmp(haystack[i].str, needle) == ok) { 79 + *val = haystack[i].val; 80 + return ok; 81 + } 82 + } 83 + return bad_find; 84 +} 85 + 86 +enum argument { 87 + arg_to, arg_set, arg_base, 88 + 89 + arg_asc, 90 + 91 + arg_bin, arg_trn, arg_oct, arg_dec, 92 + arg_duo, arg_hex, arg_b32, arg_b64, 93 + 94 + switch_prefix, param_prefix, 95 + switch_lowercase, 96 + 97 + arg_ebcdic, 98 +}; 99 + 100 +word bases[] = { 101 + [arg_asc] = 0, 102 + [arg_bin] = 2, 103 + [arg_trn] = 3, 104 + [arg_oct] = 8, 105 + [arg_dec] = 10, 106 + [arg_duo] = 12, 107 + [arg_hex] = 16, 108 + [arg_b32] = 32, 109 +}; 110 + 111 +const char* prefixes [] = { null, 112 + [ 0] = "\1" "@", 113 + [ 2] = "\2" "0b", 114 + [ 3] = "\2" "0t", 115 + [ 8] = "\1" "0", 116 + [12] = "\2" "0d", 117 + [16] = "\2" "0x", 118 +}; 119 + 120 +const pair argtbl[] = { 121 + {arg_to, "to"}, 122 + {arg_base, "base"}, 123 + 124 + {arg_set, "--"}, {arg_set, "raw"}, 125 + 126 + {arg_asc, "asc"}, {arg_asc, "ascii"}, 127 + 128 + {arg_bin, "bin"}, {arg_bin, "binary"}, 129 + {arg_trn, "trn"}, {arg_trn, "tern"}, {arg_trn, "ternary"}, {arg_trn, "trinary"}, 130 + {arg_oct, "oct"}, {arg_oct, "octal"}, 131 + {arg_dec, "dec"}, {arg_dec, "decimal"}, 132 + {arg_duo, "duo"}, {arg_duo, "duodecimal"}, 133 + {arg_hex, "hex"}, {arg_hex, "hexadecimal"}, 134 + 135 + {arg_hex, "b32"}, {arg_hex, "base32"}, /* not padded! */ 136 + 137 + {switch_prefix, "-p"}, {switch_prefix, "--prefix"}, 138 + {switch_lowercase, "-l"}, {switch_lowercase, "--lowercase"}, 139 + {param_prefix, "-m"}, {param_prefix, "--manual-prefix"}, 140 + 141 + {arg_ebcdic, "ebcdic"}, 142 +}; 143 + 144 +bad asctoi(const char* s, word* ret) { 145 + word val = 0; 146 + enum { base = 128 }; 147 + 148 + for (;*s!=null;++s) { 149 + uint8_t v = *s; 150 + if (v > base) return bad_domain; 151 + 152 + val *= base; 153 + val += v; 154 + } 155 + 156 + *ret = val; 157 + return ok; 158 +} 159 + 160 +bad atoi(word base, const char* s, word* ret) { 161 + /* s must be a null-terminated ASCII numeral string */ 162 + if (base > maxbase) return bad_base; 163 + 164 + /* override the default base if it's a basèd literal */ 165 + if (s[0] == '@' || base == 0) return asctoi(s + (s[0]=='@'),ret); 166 + else if (s[0] == '0' && s[1] == 'x') base = 16, s += 2; 167 + else if (s[0] == '0' && s[1] == 'd') base = 10, s += 2; 168 + else if (s[0] == '0' && s[1] == 'b') base = 2, s += 2; 169 + else if (s[0] == '0' && s[1] == 't') base = 3, s += 2; 170 + else if (s[0] == '0') base = 8, s += 1; 171 + 172 + bool insens = (base <= imaxbase); 173 + word val = 0; 174 + 175 + for (;*s!=null;++s) { 176 + uint8_t v = *s; 177 + if(v >= 0x30 && v <= 0x39) v -= 0x30; else { 178 + if(v >= 0x61 && v <= 0x7a) { 179 + if (insens) v -= 0x20; else { 180 + v = numspace + alphaspace + (v - 0x61); 181 + goto checkval; 182 + } 183 + } 184 + if(v >= 0x41 && v <= 0x5a) v = numspace + (v - 0x41); 185 + else return bad_domain; 186 + } 187 + checkval: if (v >= base) return bad_domain; 188 + 189 + val *= base; 190 + val += v; 191 + } 192 + 193 + *ret = val; 194 + return ok; 195 +} 196 + 197 +/* needed for efficiency's sake, but really sucky - 198 + * this table needs to be kept in sync with the 199 + * itoa algorithm by hand. unfortunately, given C's 200 + * abject lack of metaprogramming, we have to do this 201 + * by hand. */ 202 +const char baseref[] = /* numerals[10] */ "0123456789" 203 + /* bigalpha[26] */ "ABCDEFGHIJKLMNOPQRSTUVWXYZ" 204 + /* smallalpha[26] */ "abcdefghijklmnopqrstuvwxyz"; 205 +_Static_assert (sizeof baseref - 1 == maxbase); 206 + 207 +bad itoasc(word val, const char* buf_start, char* buf_end, char** newbuf) { 208 + char* ptr = buf_end; 209 + 210 + *ptr-- = 0; 211 + while(val > 0) { 212 + if (ptr < buf_start) return bad_overflow; 213 + word rem = val % 128; 214 + val /= 128; 215 + *ptr-- = (char)rem; 216 + } 217 + 218 + if (newbuf != null) *newbuf = ptr + 1; 219 + return ok; 220 +} 221 + 222 +bool lowercase = false; 223 +bad itoa(word base, word val, const char* buf_start, 224 + char* buf_end, char** newbuf) { 225 + 226 + char* ptr = buf_end; 227 + 228 + if (base > maxbase) return bad_base; 229 + if (base == 0) return itoasc(val, buf_start, buf_end, newbuf); 230 + 231 + *ptr-- = 0; 232 + while(val > 0) { 233 + if (ptr < buf_start) return bad_overflow; 234 + word rem = val % base; 235 + val /= base; 236 + char out = baseref[rem]; 237 + if (lowercase && base < imaxbase) 238 + if (out >= 'A' && out <= 'Z') 239 + out += ('a' - 'A'); 240 + *ptr-- = out; 241 + } 242 + 243 + if (newbuf != null) *newbuf = ptr + 1; 244 + return ok; 245 +} 246 + 247 +bad run(const int argc, const char** argv) { 248 +# ifndef _POSIX_IO 249 + /* fuck your buffering, it only ever makes 250 + * things worse */ 251 + setvbuf(stdout,null,_IONBF); 252 +# endif 253 + word rv; 254 + 255 + enum { set_in, set_out, _set_sz } curset = set_in; 256 + word base[_set_sz] = { 10, 0 }; 257 + 258 + const char* in_vals[argc]; *in_vals = null; /* null-terminated! */ 259 + const char** invalp = in_vals; 260 + const char* pfxstr; 261 + forposix(size_t pfxstrlen); 262 + 263 + 264 + bool raw = false; 265 + bool prefix = false; 266 + 267 + for (const char** arg = argv + 1; *arg != null; ++arg) { 268 + uint8_t tblval; 269 + if (*arg[0] == '%') { ++ *arg; goto number; } else 270 + if (!raw && (tblget(sz(argtbl),argtbl, *arg, &tblval) == ok)) { 271 + enum argument symbol = (enum argument) tblval; 272 + switch (symbol) { 273 + case arg_to: { 274 + if (curset == set_out) return bad_syntax; 275 + else curset = set_out; 276 + } break; 277 + 278 + /* treat all further arguments as numbers */ 279 + case arg_set: { raw = true; } break; 280 + case arg_ebcdic: { return bad_ebcdic; } break; 281 + 282 + /* specify base with numeral */ 283 + case arg_base: { 284 + if (arg[1] == null) return bad_syntax; 285 + word basekind; 286 + bad e = atoi(10, arg[1], &basekind); 287 + if (e == ok) { 288 + if (basekind > maxbase) return bad_base; 289 + base[curset] = basekind; 290 + } else return e; 291 + ++arg; 292 + } break; 293 + 294 + /* specify an output prefix */ 295 + case param_prefix: { 296 + if (arg[1] == null) return bad_syntax; 297 + prefix = true; pfxstr = arg[1]; 298 + forposix(pfxstrlen = strlen(pfxstr)); 299 + ++arg; 300 + } break; 301 + 302 + /* specify an automatic output prefix */ 303 + case switch_prefix: { prefix = true; pfxstr = null; } break; 304 + case switch_lowercase: { lowercase = true; } break; 305 + 306 + default: { 307 + /* assume base shorthand */ 308 + base[curset] = bases[symbol]; 309 + } 310 + } 311 + } else /* bad_find */ number: { 312 + /* we assume it's a number - error checking will 313 + * happen once we know how to interpret it */ 314 + *invalp++=*arg; *invalp=null; 315 + } 316 + } 317 + 318 + /* if an ascii string was passed, change to hexadecimal output */ 319 + if (base[set_in] == 0 && curset != set_out) base[set_out] = 16; 320 + 321 + size_t max_numeral_len = 0; 322 + /* 0 = ascii rep (0 .. 127); one char = 7 bits */ 323 + if (base[set_out] == 0) max_numeral_len = (sizeof(word) * CHAR_BIT) / 7; else 324 + if (base[set_out] == 1) max_numeral_len = 1024; /* pls don't */ else 325 + /* note for unary: actual max is ((word) -1) but we cannot actually allocate 326 + * that much fucking memory, so we limit to 1KiB and crash if it needs more */ 327 + if (base[set_out] <= 2) max_numeral_len = (sizeof(word) * CHAR_BIT); else 328 + if (base[set_out] <= 8) max_numeral_len = (sizeof(word) * CHAR_BIT) / 3; else 329 + if (base[set_out] <= 16) max_numeral_len = (sizeof(word) * CHAR_BIT) / 4; else 330 + /* (base[set_out] <= 32) */ max_numeral_len = (sizeof(word) * CHAR_BIT) / 5; 331 + 332 + /* this is i think the only sane-ish way to do it that 333 + * doesn't involve *shudder* logarithms 334 + TODO: find a better way to do this??? */ 335 + 336 + size_t bufmax = (invalp - in_vals) * max_numeral_len; 337 + char buf [bufmax]; 338 + char* ptr = (buf + bufmax) - 1; 339 + forposix(char* lastptr = ptr); 340 + 341 + for (const char** s = in_vals; *s != null; ++s) { 342 + word val; 343 + bad e = atoi(base[set_in], *s, &val); 344 + if (e == ok) { 345 + bad e = itoa(base[set_out], val, buf, ptr, &ptr); 346 + 347 + if (prefix) { 348 + if (pfxstr != null) { print(pfxstrlen, pfxstr); } 349 + else if (base[set_out] < sz(prefixes)) { 350 + print((size_t)prefixes[base[set_out]][0], 351 + prefixes[base[set_out]] + 1); 352 + } 353 + } 354 + print(lastptr-ptr, ptr); 355 + print(1, "\n"); 356 + forposix(lastptr = ptr); 357 + } else { 358 + return e; 359 + } 360 + } 361 +} 362 + 363 +void usage(const char* name) { 364 +# ifdef _POSIX_IO 365 + typedef struct pstr { size_t len; const char* str; } pstr; 366 +# define p(x) {sizeof (x "\n"), (x "\n")} 367 + size_t namelen = strlen(name); 368 +# else 369 + typedef const char* pstr; 370 +# define p(x) (x "\n") 371 +# endif 372 +# define OR "\x1b[34m|\x1b[93m" 373 +# define plus "\x1b[94m+\x1b[m" 374 +# define par(s) "<\x1b[4m" s "\x1b[24m>" 375 +# define lit(l) "\x1b[3m" l "\x1b[23m" 376 +# define box(s) "\x1b[94m[\x1b[93m" s "\x1b[94m]\x1b[m" 377 + const pstr forms[] = { 378 + p(box(box("options") " " par("in:spec")) " " par("value:int") plus " " 379 + box(lit("to") " " box(par("out:spec")))), 380 + p(box(box("options") par("in:spec")) " " box(lit("to") " " box(par("out:spec"))) 381 + " " lit("--") " " par("value:int") plus), 382 + }, specs[] = { 383 + p(box(lit("bin") OR lit("tern") OR lit("oct") 384 + OR lit("dec") OR lit("hex") OR 385 + lit("base") " " box("0-9") plus OR "asc")), 386 + }, ints[] = { 387 + p("default base: \x1b[94m.+\x1b[m"), 388 + p("binary literal: "lit("0b") box("01") plus), 389 + p("ternary literal: "lit("0t") box("012") plus), 390 + p("hex literal: "lit("0x") box("0-9A-Fa-f") plus), 391 + p("ascii literal: "lit("@") "\x1b[94m.+\x1b[m"), 392 + p("interpret any string (e.g. a keyword) as integer: " lit("%") box("0-9A-Za-z") plus), 393 + }, opts[] = { 394 + p("-p --prefix : print known prefix codes on output strings"), 395 + p("-m --manual-prefix: specify a manual prefix to print before each number"), 396 + p("-l --lowercase : prefer lowercase for case-insensitive bases"), 397 + }; 398 +# undef p 399 +# undef OR 400 +# undef plus 401 + 402 +# define hl_on "\x1b[1m" 403 +# define hl_off "\x1b[21m" 404 + enum { ansilen = sizeof (hl_on hl_off) }; 405 +# define hl(x) (hl_on x hl_off) 406 + const char form_head []= hl("usage: "); 407 + const char spec_head []= hl("- spec: "); 408 + const char int_head []= hl("- int: "); 409 + const char opt_head []= hl("- options: "); 410 + const char space []= " "; /* sigh */ 411 +# undef hl 412 +# undef hl_on 413 +# undef hl_off 414 + 415 +# ifdef _POSIX_IO 416 +# define _say(sz, s) write(2, (s), (sz)); 417 +# define vsay _say 418 +# define display(hd) _say(sizeof (hd), (hd)); 419 +# define pline(l) _say((l).len, (l).str); 420 +# else 421 +# define _say(sz, s) printf("%.*s", sz, s); 422 +# define display(hd) printf("%s",(hd)); 423 +# define vsay(sz, s) display(s) 424 +# define pline(l) display(l); 425 +# endif 426 + 427 +# define space(x) _say(x, space); 428 +# define glow(x) say("\x1b[95m"); { x }; say("\x1b[m"); 429 +# define section(x,prefix) display(x##_head); \ 430 + for(size_t i = 0; i < sz(x##s); ++ i) { \ 431 + if (i>0) space(sizeof x##_head - ansilen); \ 432 + { prefix; }; pline(x##s[i]); } 433 + 434 + section(form,glow(vsay(namelen, name)); space(1)); 435 + section(spec,); 436 + section(int,); 437 + section(opt,); 438 +} 439 + 440 +int main(int argc, const char** argv) { 441 + if (argc == 0) return -1; 442 + if (argc == 1) usage(argv[0]); 443 + bad e = run(argc, argv); 444 + switch (e) { 445 + case ok: return 0; 446 +# define e(kind, desc) case bad_##kind:\ 447 + say("\x1b[31;1merror:\x1b[m "); say(desc "\n"); break; 448 + error_list 449 +# undef e 450 + } 451 +}