util  mkup.c at [e15188634a]

File mkup.c artifact 283e4450e2 part of check-in e15188634a


/* [ʞ] mkup.c
 *  ~ lexi hale <lexi@hale.su>
 *  © AGPLv3
 *  ? mkup is a document generator based loosely
 *    on markup, which produces html files formatted
 *    according to my website's template. */

#include <stdio.h>
#include <stddef.h>
#include <stdint.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/mman.h>
#include <fcntl.h>
#include <unistd.h>
#include <string.h>
#define k_static
#	include "clib/compose.c"
#undef k_static
#define _sz(x) ( sizeof (x) / sizeof (x) [0] )
#define _doc_page_sz (40 * 1024)

#if __STDC__ < 202000L
	enum /* constants */ {
		null = 0, false = 0, true = 1
	};
	typedef _Bool bool;
#else
#	define null nullptr
#endif
#define try(x) {bad _E_ = (x); if(_E_ != ok) return _E_;}
#define zero(x) memset(&(x), sizeof(x), 0)

#define mkup_error_list \
	e(usage,"usage was displayed to user") \
	e(insane,"your system is not in a sane state") \
	e(file,"file specified could not be mapped") \
	e(empty,"nothing to do; bailing")

typedef enum result {
	ok = 0,
#	define e(code,desc) bad_##code,
		mkup_error_list
#	undef e
} bad;
const char* error_messages[] = {
	"all systems nominal",
#	define e(code,desc) desc,
		mkup_error_list
#	undef e
};

bad hnd(bad error) {
	if (error > 0) printf("\x1b[1;31mhalt:\x1b[m %s\n", error_messages[error]);
	return error;
}

bad mapfile(const char* filename, void** buf, size_t* rsz) {
	int fd = open(filename, O_RDONLY);
	if (fd == -1) return bad_file;
	size_t sz = lseek(fd, 0, SEEK_END);
	lseek(fd,0,SEEK_SET);
	void* p = mmap(0, sz, PROT_READ, MAP_SHARED, fd, 0);
	if (p == MAP_FAILED) return bad_file;
	*buf = p; *rsz = sz;
	return ok;
}

typedef struct dstr { const char *start, *end; } dstr;
struct ref {
	dstr key, val;
	struct ref* next;
};

#define mkup_node_list    \
	N(txt,"text"        ) \
	                      \
	N(cmd,"command"     ) \
	N(lnk,"link"        ) \
	N(ref,"reference"   ) \
	N(ft, "footnote"    ) \
	                      \
	N(hd, "heading"     ) \
	N(for,"foreign text") \
	N(raw,"raw"         ) \
	N(hl, "strong"      ) \
	N(em, "emphasis"    ) \
	N(cd, "code"        ) \
	N(b,  "div block"   ) \
	N(sp, "span"        ) \
	N(qt, "quote"       )


const char* node_kind_name[] = {
#define N(name,desc) desc,
	mkup_node_list
#undef N
};

enum node_kind {
#define N(name,desc) node_##name,
	mkup_node_list
#undef N
}; struct node {
	enum node_kind kind;
	dstr body;
	union {
		dstr key;
		uint8_t depth;
	};
	struct node* next;
	struct {
		struct node *first, *last;
	} branch;
	
};

enum node_cmd_kind {
	cmd_set, cmd_push,
	cmd_by, cmd_byl,
	cmd_img
}; struct node_cmd {
	enum node_kind kind;
	enum node_cmd_kind cmd;
	dstr params[];
};

struct document {
	void* bottom; size_t free;
	struct { struct node* first, *last; } nodes;
	struct { struct ref * first, *last; } refs;
	struct page {
		void* hnd;
		struct page* last;
	}* page;
	size_t pagect;
};

/* forgive me father for i have sinned */
const char* spacer = "│  │  │  │  │  │  │  │  │  │  ";
#define HL(x) "\x1b[1m" x "\x1b[m"
#define indent(fmt,...) printf("%.*s" fmt "\n", dep*5, spacer, __VA_ARGS__)

void dump_node(struct node*n, uint8_t dep) {
	indent(HL("node kind:") " %s", node_kind_name[n -> kind]);
	indent(HL("node body:") " %.*s", n->body.end - n->body.start, n->body.start);
	if (n -> kind == node_hd)
		indent(HL("header depth:") " %u", n->depth);
}

void dump_node_list(struct node* n, uint8_t dep) {
	indent("printing node",0);
	dump_node(n,dep+1);
	indent("printing branches",0);
	if (n -> branch.first != null)
		dump_node_list(n -> branch.first,dep+1);
		else indent("node has no branches",0);
	indent("printing leaves",0);
	if (n -> next != null) dump_node_list(n -> next,dep);
		else indent("node has no more leaves",0);
}

#undef indent
void dump_document(struct document* d, uint8_t dep) {
	if (d->nodes.first == null) {
		printf("document has no nodes\n");
	} else {
		printf("document nodes:\n");
		dump_node_list(d -> nodes.first, dep + 1);
	}
}


void newpage(struct document* d) {
	struct taggedPage {
		struct page p;
		char b [_doc_page_sz];
	}* pg = malloc(sizeof(struct taggedPage));
	d -> bottom = &(pg->b);
	d -> free = _doc_page_sz;
	pg -> p = (struct page) {
		.hnd = pg,
		.last = d -> page,
	};
	d -> page = &(pg->p);
	++ d -> pagect;
};

void* dalloc(struct document* d, size_t sz, size_t align) {
	char* p = d->bottom;
	size_t ab = 0;
	if (align != 0) {
		ab = align - (((uintptr_t)p) % align);
		p += ab;
	}

	if (sz + ab <= d->free) {
		d->free -= sz + ab;
		d->bottom = p + sz;
		if (d->free == 0) newpage(d);
		return p;
	} else {
		newpage(d);
		return dalloc(d, sz, align);
	}
}
#define _dalloc(d,t) dalloc((d), sizeof(t), _Alignof(t))

struct node* addnode(struct document* d) {
	struct node* n = _dalloc(d, struct node);
	if (d -> nodes.first == null) {
		d -> nodes.first = n;
	} else {     
		d -> nodes.last -> next = n;
	}
	d -> nodes.last = n;
	n -> next = null;
	n -> branch.first = null;
	return n;
}

struct node* leafnode(struct document* d, struct node* n) {
	struct node* c = _dalloc(d, struct node);
	if (d -> nodes.last == n) d -> nodes.last = c;
	n -> next = c;

	c -> next = null;
	c -> branch.first = null;
	return c;
}

struct node* branchnode(struct document* d, struct node* n) {
	struct node* c = _dalloc(d, struct node);

	if (n -> branch.first == null) {
		n -> branch.first = c;
		n -> branch.last = c;
	} else {
		n -> branch.last -> next = c;
		n -> branch.last = c;
	}

	c -> next = null;
	c -> branch.first = null;
	return c;
}

struct ref* addref(struct document* d) {
	struct ref* r = _dalloc(d, struct ref);
	if (d -> refs.first == null) {
		d -> refs.first = r;
	} else {
		d -> refs.last -> next = r;
	}
	d -> refs.last = r;
	r -> next = null;
	return r;
}

const char* skip_line_to(const char* str, char seek) {
	while ((seek == 0 || *str != 0) && (seek == '\n' || *str != '\n')) {
		if (*str == seek) return str;
		++str;
	} return null;
}

bool p_span_line(const char* str, const char* seek) {
	const char* p, * s;
	bool round2 = false;
	search: for(p = seek, s = str; *s != 0 && *s != '\n'; ++p, ++s) {
		if (*p == 0 ) if(round2)return true;
			else goto scan_for_next; //success
		if (*p != *s) break; // failure
	} return false;
	
	scan_for_next: {
		if (*s=='\n' || *s=='\t' || *s==' ') return false;
		p = seek;
		const char* go = skip_line_to(s, *p);
		if (go == null) return false;
		s = go - 1;
		if (*s=='\n' || *s=='\t' || *s==' ') return false;
		++ s;
		round2=true; goto search;
	};
}

bad parse(const char* file, size_t sz, struct document* root, void* mem) {
#	define incguard(p) {if(++(p)>(char*)(mem+sz))goto end_document;}
#	define advance(p) {while(*(p) == ' ' || *(p) == '\t') \
		{incguard(p)}}

	zero(*root);

	root -> bottom = mem;

	struct node* active = null;
	
	const char* ptr = file;
	start_line: {
		while(*ptr=='\n') incguard(ptr);

		uint8_t depth = 0;
		while(*ptr==' ' || *ptr=='\t') {
			depth += (*ptr==' '?1:4);
			incguard(ptr);
		}
		if (depth >= 4) goto parse_raw;
		else if (depth > 0) goto parse_span;
		else switch(*ptr) {
			case '#': goto parse_header;
			case 0: goto end_document;
			default: goto parse_span;
		}
	}

	parse_span: {
		advance(ptr);
		active=addnode(root);
		active -> kind = node_txt;
		const char* end = skip_line_to(ptr,'\n');
		// TODO actually parse it
		active -> body = (dstr) {ptr, end};
		ptr = end;
		goto start_line;
	}

	parse_ref: {
		active=addnode(root);
		active -> kind = node_ref;
	}

	parse_raw: {
		active=addnode(root);
		active -> kind = node_raw;
	}
	
	parse_header: {
		uint8_t depth = 0;
		while(*ptr == '#') { incguard(ptr); ++depth; }
		active = addnode(root);
		goto grab_line;
	}

	grab_line: {
		advance(ptr);
		const char* end = skip_line_to(ptr,'\n');
		active -> body = (dstr) {ptr, end};
		ptr = end + 1;
		goto start_line;
	}

	end_document: {
		
	}

	return ok;
}

#define compile_fn_params_raw struct node* n, char* dest, size_t lvl
#define compile_fn_params (compile_fn_params_raw)
char* node_compile compile_fn_params;
#define node_recurse \
	{if (n -> branch.first != null) dest = node_compile(n->branch.first,dest,lvl+1);}
#define _P(s) ((struct pstr)_p(s))
#define pstr_node_body \
	((struct pstr){ n->body.end - n->body.start, n->body.start})

char* simple_node (pstr tag, compile_fn_params_raw) {
	pstr open[] = { _p("<"), tag, _p(">"), pstr_node_body, };
	pstr close[] = { _p("</"), tag, _p(">") };
	dest = impose(open, _sz(open), null,dest);
	node_recurse;
	return impose(close, _sz(close), null,dest);
}

typedef char* (compile_fn compile_fn_params);
#define def_node_comp(name)\
	char* node_compile_##name compile_fn_params


def_node_comp(txt) {
	if (lvl == 0) {
		pstr open[] = { _p("<p>"), pstr_node_body, };
		dest = impose(open, _sz(open), null,dest);
		node_recurse;
		return imprint((struct pstr)_p("</p>\n"),null,dest);
	} else {
		/* <p> tags only go on the top level */
		dest = imprint(pstr_node_body,null,dest);
		node_recurse;
		return dest;
	}
}

def_node_comp(cmd) {return dest;}
def_node_comp(lnk) {
	pstr key = {n->key.end - n->key.start, n->key.start};
	pstr open[] = { _p("<a href=\""), key, _p("\">"), pstr_node_body, };
	dest = impose(open, _sz(open), null,dest);
	node_recurse;
	return imprint((struct pstr){4, "</a>"},null,dest);
}
def_node_comp(ref) {return dest;}
def_node_comp(ft)  {return dest;}
def_node_comp(hd)  {
	size_t ct = n -> depth; if (ct>6) ct = 6;
	char d = '0' + ct;
	pstr open[] = { _p("<h"), {1,&d}, _p(">"), pstr_node_body, };
	pstr close[] = { _p("</h"), {1,&d}, _p(">\n") };
	dest = impose(open, _sz(open), null,dest);
	node_recurse;
	return impose(close,_sz(close), null,dest);
}
def_node_comp(for) {return dest;}
def_node_comp(raw) {return dest;}
def_node_comp(hl)  { simple_node(_P("strong"), n,dest,lvl); }
def_node_comp(em)  { simple_node(_P("strong"), n,dest,lvl); }
def_node_comp(cd)  {return dest;}
def_node_comp(b)   {return dest;}
def_node_comp(sp)  {return dest;}
def_node_comp(qt)  {return dest;}

#undef def_node_comp

compile_fn* node_compile_funcs[] = {
#define N(name,desc) node_compile_##name,
	mkup_node_list
#undef N
};
char* node_compile compile_fn_params {
	compile_fn* func = node_compile_funcs[n->kind];
	dest = (*func)(n, dest, lvl);
	if (n -> next != null) dest = node_compile(n->next,dest,lvl);
	return dest;
}
bad compile(struct document* root, char* dest, size_t dsz, size_t* rsz) {
	/* TODO bounds checking! */
	char* end;
	const char* title_seq = "<title>test-title<title>";
	if (root -> nodes.first != null) {
		pstr open[] = { _p(
			"<!doctype html>\n"
			"<html>\n"
				"<head>\n"), {0, title_seq}, _p("\n</head>\n"
				"<body>\n")};
		pstr close[] = { _p("</body>\n</html>") };
		end = impose(open, _sz(open), null, dest);
		end = node_compile(root -> nodes.first, end, 0);
		end = impose(close, _sz(close), null, end);
	} else return bad_empty;

	*rsz = end - dest;
	return ok;
}

const char
	headermsg[]="you could win up to ¤50000 GALACTIC ZORBLATS",
	bodytext []="enter to win now - just rend your meatling carapace and ululate for the glory of PHLEGETHON ETERNUM, ",
	boldtext []="glorious Soul-Poisoner of the Sunless Realm!",
	normtext []=" it's that easy! ",
	linktext []="CLICK HERE FOR NATURAL MALE ENHANCEMENT",
	linkhref []="www.3masculatr1x.xxx";

bad fakeparse(char* file, size_t sz, struct document* root, void* mem) {
	zero(*root);

	/* put our first page on the stack to avoid mallocs
	 * for small documents */
	root -> bottom = mem;

	typedef struct node* nd;
	
	nd header = addnode(root);
	header -> kind = node_hd;
	header -> depth = 1;
	header -> body.start = headermsg;
	header -> body.end = headermsg+sizeof headermsg;

	nd body = addnode(root);
	body -> kind = node_txt;
	body -> body.start = bodytext;
	body -> body.end = bodytext+sizeof bodytext;

	nd boldbranch = branchnode(root, body);
	boldbranch -> kind = node_hl;
	boldbranch -> body.start = boldtext;
	boldbranch -> body.end = boldtext+sizeof boldtext;

	nd normbranch = branchnode(root, body);
	normbranch -> kind = node_txt;
	normbranch -> body.start = normtext;
	normbranch -> body.end = normtext+sizeof normtext;

	nd linkbranch = branchnode(root, body);
	linkbranch -> kind = node_lnk;
	linkbranch -> body.start = linktext;
	linkbranch -> body.end = linktext+sizeof linktext;
	linkbranch -> key.start = linkhref;
	linkbranch -> key.end = linkhref+sizeof linkhref;
	return ok;
}

bad run(const char* filename) {
	void* file; size_t sz;
	try(mapfile(filename, &file, &sz));

	uint8_t work[_doc_page_sz]; /* safe-sized buffer for working space - might waste some space but it's faster than malloc */
	struct document doc;
	try(parse(file,sz,&doc,work));
	/* try(fakeparse(file,sz,work)); */
	dump_document(&doc,0);

	char outbuf[_doc_page_sz * (1 + doc.pagect) * 2];
		/* should be plenty of space */
	printf("outbuf: %llu\n", sizeof outbuf);
	size_t outsz=0;
	try(compile(&doc,outbuf,sizeof outbuf,&outsz));

	printf("final doc:\n");
	write(1, outbuf, outsz);

	return ok;
}

int main(int argc, char** argv) {
	if (argc == 0) return hnd(bad_insane);
	if (argc == 1) return hnd(bad_usage);

	char* opts[argc], *args[argc];

	char**copt=opts,**carg=args;
	for (char** arg = argv + 1; *arg != null; ++arg) {
		if ((*arg)[0] == '-') *copt++=*arg;
		else *carg++=*arg;
	}

	if (copt != opts) {
		/* parse options */
	}

	if (carg == args) return hnd(bad_usage);

	return hnd(run(args[0]));
}