aboutsummaryrefslogtreecommitdiff
path: root/md.c
blob: e5aaea76dfd1a447bf51e3061181c86fc1eff47b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
// Copyright 2021 Jeremias Stotter
//
// This file is part of ´JBlog´.
//
// ´JBlog´ is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// ´JBlog´ is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with ´JBlog´.  If not, see <http://www.gnu.org/licenses/>.

#define _POSIX_C_SOURCE 200809L

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <stdbool.h>
#include <limits.h>


#include <time.h>

#include "jblog.h"

//#define LINE_MAX 4096

// Some not so nice things about this:
//   HTML escapes like &#42; are not handled by this although they might be handled by the client anyways
//   No setext headings because why not just use the hash symbol?

char* get_link_components(char* start, char** out_text, char** out_loc, size_t* out_len);

struct list_data {
	// True if ol, false if ul
	bool ordered;
	// If ol this is set to the number the list starts counting from, in case of ul it is ignored
	int start;
	// The number of spaces used at this level of the list
	int indent;
};

enum html_type {
	t_root = 0,
	t_inner,
	t_h, // Contains a pointer to an int as its value, this pointer indicates title strength (1-6)
	t_p,
	t_br,
	t_str_ast, // t_str contains an int pointer to its level
	t_str_und,
	t_img,
	t_a,
	t_ol,
	t_list, // Value contains the ammount of spaces used to indent at that level
	t_li,
	t_hr,
	t_code,
	t_inline_code,
	t_bq
};

// The file will be structured as a tree that, in the end will be converted to valid html
struct tree_element {
	struct tree_element* parent;
	int type;
	int children_n;
	struct tree_element** children;
	// Is this element allowed to have inner html
	bool allow_inner;
	// Can be anythign additional, like src for img or text in the case of inner
	void* value;
};

struct tree_element* new_element() {
	return calloc(1, sizeof(struct tree_element));
}

void free_tree(struct tree_element* root) {
	if(root->value)
		free(root->value);
	if(root->children) {
		for(int i = 0; i < root->children_n; i++) {
			free_tree(root->children[i]);
		}
		free(root->children);
	}
	free(root);
}

// Index may be -1, then we will add to the end
// Otherwise we add at location index
//
// Index may only be between -1 and parent->children_n + 1, otherwise this will crash
struct tree_element* new_child(struct tree_element* parent, int index) {
	parent->children_n++;
	parent->children = realloc(parent->children, sizeof(struct tree_element*)*(parent->children_n));
	struct tree_element* child = new_element();
	child->parent = parent;
	if(index == -1)
		(parent->children)[parent->children_n-1] = child;
	else {
		// Move the elements in the children list after the new element
		for(int i = 0; i < (parent->children_n - index - 1); i++) {
			/*memmove(parent->children + (parent->children_n - i - 1) * sizeof(struct tree_element*),
			       parent->children + (parent->children_n - i - 2) * sizeof(struct tree_element*),
			       sizeof(struct tree_element*));*/
			(parent->children)[parent->children_n - i - 1] = (parent->children)[parent->children_n - i - 2];
		}
		(parent->children)[index] = child;
	}	
	return child;
}

int utf8_length;
long int unicode_char;
// Returns the bytes written
void html_escape(char* output_buffer, char input) {
	if((input & 0xFFFF0000) == 0xFFFF0000) {
		// UTF-8
		int leading_ones = 0;
		char copy_input = input;
		while(copy_input & 0b10000000) {
			leading_ones++;
			copy_input = copy_input << 1;
		}
		if(leading_ones > 1) {
			utf8_length = leading_ones;
			unicode_char = 0x0;
		}
		unicode_char = unicode_char << (8 - leading_ones -1);
		unicode_char = unicode_char | ( input & (UCHAR_MAX >> leading_ones) );
		utf8_length--;

		if(utf8_length)
			*output_buffer = '\0';
		else {
			snprintf(output_buffer, 96, "&#x%lX;", unicode_char);
		}
	} else {
		 snprintf(output_buffer, 96, "&#x%X;", input);
	}
}

// Reallocs the string dest to fit src, then append
char* realloc_append(char* dest, char* src) {
	//printf("Dest:%sSrc:%s\n", dest ? dest : "", src ? src : "");
	size_t dest_len = dest ? strlen(dest) : 0;
	dest = realloc(dest, dest_len + (src ? strlen(src) : 0) + 1);
	if(src)
		strcpy(dest + dest_len, src);
	return dest;
}

#define p_html "<p>%s</p>\n"
#define br_html "<br/>"
#define h_html "<h%d>%s</h%d>\n"
#define em_html "<em>%s</em>\n"
#define b_html "<b>%s</b>\n"
#define emb_html "<em><b>%s</b></em>\n"
#define img_html "<img src=\"%s\" alt=\"%s\" title=\"\"/>\n"
#define a_html "<a href=\"%s\">%s</a>\n"
#define ul_html "<ul>%s</ul>\n"
#define ol_html "<ol start=\"%d\">%s</ol>\n"
#define li_html "<li>%s</li>\n"
#define hr_html "<hr/>\n"
#define code_html "<pre><code>%s</code></pre>\n"
#define inline_code_html "<code>%s</code>"
#define bq_html "<blockquote>%s</blockquote>\n"
// Resolve a tree to html
int depth = 0;
char* tree_to_html(struct tree_element* root) {
	char* html=NULL;
	char* inner_html = NULL;
	for(int i = 0; i < root->children_n; i++) {
		struct tree_element* child = root->children[i];
		switch(child->type) {
			case(t_inner):
				// Append the inner html to buffer
				inner_html = realloc_append(inner_html, child->value);
			break;
			default:
				char* child_html = tree_to_html(child);
				inner_html = realloc_append(inner_html, child_html);
				free(child_html);
			break;
		}
	}
//	printf("%s\n", inner_html ? inner_html : "(EMPTY)");
	#define realloc_len(html_pattern) (inner_html ? strlen(inner_html) : 0) + strlen(html_pattern) + 1
	#define realloc_for_html(html_pattern) realloc(html, realloc_len(html_pattern));
	switch(root->type) {
		case(t_p):	
			html = realloc_for_html(p_html);
			sprintf(html, p_html, inner_html ? inner_html : "");
			break;
		case(t_h):
			html = realloc_for_html(h_html);
			sprintf(html, h_html, *(int*)(root->value), inner_html ? inner_html : "", *(int*)(root->value));
			break;
		case(t_str_ast):
		case(t_str_und):
			switch(*(unsigned int*)root->value) {
				case(1):
					html = realloc_for_html(em_html);
					sprintf(html, em_html, inner_html ? inner_html : "");
					break;
				case(2):
					html = realloc_for_html(b_html);
					sprintf(html, b_html, inner_html ? inner_html : "");
					break;
				case(3):
					html = realloc_for_html(emb_html);
					sprintf(html, emb_html, inner_html ? inner_html : "");
					break;
				default:
					html = realloc_for_html("");
					strcpy(html, inner_html ? inner_html : "");
					break;
			}
			break;
		case(t_img):
			html = realloc(html, realloc_len(img_html) + strlen(root->value));
			if(!html)
				exit(-1);
			sprintf(html, img_html, root->value ? (char*)(root->value) : "", inner_html ? inner_html : "");
			break;
		case(t_a):
			html = realloc(html, realloc_len(a_html) + strlen(root->value));
			if(!html)
				exit(-1);
			sprintf(html, a_html, root->value ? (char*)(root->value) : "", inner_html ? inner_html : "");
			break;
		case(t_br):
			html = realloc_for_html(br_html);
			strcpy(html, br_html);
			break;
		case(t_list):
			if(((struct list_data*)(root->value))->ordered) {
				// We have an ordered list
				int digits = 0;
				for(int counter = 1; counter < ((struct list_data*)(root->value))->start; counter *= 10, digits++);
				html = realloc(html, realloc_len(ol_html) + digits + 1);
				sprintf(html, ol_html, ((struct list_data*)(root->value))->start, inner_html ? inner_html : "");
			} else {
				// We have an unordered list
				html = realloc_for_html(ul_html);
				sprintf(html, ul_html, inner_html ? inner_html : "");
			}
			break;
		case(t_li):
			html = realloc_for_html(li_html);
			sprintf(html, li_html, inner_html ? inner_html : "");
			break;
		case(t_hr):
			html = realloc_for_html(hr_html);
			strcpy(html, hr_html);
			break;
		case(t_code):
			html = realloc_for_html(code_html);
			sprintf(html, code_html, inner_html ? inner_html : "");
			break;
		case(t_inline_code):
			if(*(bool*)(root->value)) {
				html = realloc_for_html(inline_code_html);
				sprintf(html, inline_code_html, inner_html ? inner_html : "");
			} else {
				html = realloc(html, inner_html ? strlen(inner_html) : 0 + 1);
				memcpy(html, "`", 2);
				strcat(html, inner_html ? inner_html : "");
			}
			break;
		case(t_bq):
			html = realloc_for_html(bq_html);
			sprintf(html, bq_html, inner_html ? inner_html : "");
			break;
		default:
			html=inner_html;
			break;
	}
	return html;
}

// Use this to create a new child of another element and quickly set its atributes
#define NEW_ACTIVE_CHILD(ae_var, parent, index, type_v, allow_inner_v) ae_var = new_child(parent, index); ae_var->type = type_v; ae_var->allow_inner = allow_inner_v;

// Use this to find the next parent that allows inner elements
#define NEXT_ALLOW_INNER(active_element, root) while(!(active_element == root) && (!active_element->allow_inner && active_element)) active_element = active_element->parent;

// This appends the cur_char to active element / it creates a new active element if the active element can not have text
void append_char_to_active(struct tree_element* root, struct tree_element** active_element, char cur_char) {
	// We are not allowed to add inner to this element so we'll start a new paragraph
	struct tree_element* new_active_element = active_element ? *active_element : root;
	if(active_element) {
		NEXT_ALLOW_INNER((*active_element), root)
		if(*active_element == root) {
			NEW_ACTIVE_CHILD(new_active_element, root, -1, t_p, true);
		}
	}
	if(new_active_element->type != t_inner) {
		NEW_ACTIVE_CHILD(new_active_element, new_active_element, -1, t_inner, false);
	}
	if((cur_char <= 47 ||
	   (cur_char >=58 && cur_char <=64) ||
	   (cur_char >=91 && cur_char <=96) ||
	   cur_char > 122) && cur_char != 0x20) {
		// Escape just to be safe
		char append[96] = "";
		html_escape(append, cur_char);
		new_active_element->value = realloc_append(new_active_element->value, append);
	} else {
		// This is stupid, improve this later xD
		char append[2] = {cur_char, '\0'};
		new_active_element->value = realloc_append(new_active_element->value, append);
	}
	if(active_element)
		*active_element = new_active_element;
}

// This adds a new unordered list and returns a pointer to it 
// indent is the spaces that the list indicator ( * or - ) was intendend
struct tree_element* new_list(struct tree_element* parent, int indent, bool ordered, int start) {
	struct tree_element *new_list;
	NEW_ACTIVE_CHILD(new_list, parent, -1, t_list, false);
	struct list_data* data = malloc(sizeof(struct list_data));
	data->indent = indent;
	data->ordered = ordered;
	data->start = start;
	new_list->value = data;
	return new_list;
}

// Walk up the tree until root, stop if we encounter the requested type and return that node, otherwise return NULL
struct tree_element* find_parent_type(struct tree_element* root, struct tree_element* start, int type) {
	struct tree_element* check_element = start;
	while(check_element != root) {
		if(check_element->type == type)
			return check_element;
		check_element = check_element->parent;
	}
	return NULL;
}

// These store temporary strength values
unsigned int temp_str_ast = 0;
unsigned int temp_str_und = 0;

// These are true if a strength element is waiting for closing
bool str_cl_wait_ast = false;
bool str_cl_wait_und = false;

// These are true if a strength element waits for a different character
bool str_chr_wait_ast = false;
bool str_chr_wait_und = false;

bool str_fin_wait_ast = false;
bool str_fin_wait_und = false;

#define ZERO_STR_AST temp_str_ast = 0; str_cl_wait_ast = false; str_chr_wait_ast = false; str_fin_wait_ast = false;
#define ZERO_STR_UND temp_str_und = 0; str_cl_wait_und = false; str_chr_wait_und = false; str_fin_wait_und = false;


void end_strength(struct tree_element* root, struct tree_element** active_element, char marker, int tmp_str, int type) {
	struct tree_element* parent_strength = find_parent_type(root, *active_element, type);
	if(!parent_strength)
		return;
	if(tmp_str < *(unsigned int*)parent_strength->value) {
		struct tree_element* tmp_active = parent_strength->parent;
		for(int i = tmp_str; i < *(unsigned int*)parent_strength->value; i++)
			append_char_to_active(root, &tmp_active, marker);
		*(unsigned int*)parent_strength->value = tmp_str;
	} else if(tmp_str > *(unsigned int*)parent_strength->value) {
		struct tree_element* tmp_active;
		if((parent_strength->parent)->children_n >= 2)
			tmp_active = &(*(parent_strength->parent)->children[(parent_strength->parent)->children_n-2]);
		else {
			NEW_ACTIVE_CHILD(tmp_active, parent_strength->parent, 0, t_inner, false);
		}
		//printf("\naa:%d\n", tmp_str);
		for(int i = *(unsigned int*)parent_strength->value; i < tmp_str; i++)
			append_char_to_active(root, &tmp_active, marker);
	}

}

// Call this function if a strength character is waiting to hit a different character
void str_wait_hit(struct tree_element* root, struct tree_element** active_element, bool* str_cl_wait, bool* str_fin_wait, unsigned int* temp_str, bool* str_chr_wait, char str_chr, int type) {
	if(*str_cl_wait) {
		if(*str_fin_wait) {
			end_strength(root, active_element, str_chr, *temp_str, type);
			*str_fin_wait = false;
			*temp_str = 0;
		}
		*str_cl_wait = false;
	} else
		*str_cl_wait = true;
	
	*str_chr_wait = false;
}

void str_chr_hit(struct tree_element* root, struct tree_element** active_element, bool* str_cl_wait, bool* str_fin_wait, unsigned int* temp_str, bool* str_chr_wait, int desired_type) {
	struct tree_element* parent_strength = find_parent_type(root, *active_element, desired_type);
	if(parent_strength) {
		// We are already in a strength element
		// Check if we are waiting to close
		if(*str_cl_wait) {
			(*(unsigned int*)(parent_strength->value))++;
			if((*(unsigned int*)(parent_strength->value)) >= *temp_str) {
				*active_element = parent_strength->parent;
				ZERO_STR_AST
				return;
			} else {
				*str_fin_wait = true;
			}
		} else {
			(*temp_str)++;
		}
	} else {
		// Enter a new strength element as we are currently not in one
		if((*active_element)->type == t_inner)
			*active_element = (*active_element)->parent;
		if(*active_element == root) {
			NEW_ACTIVE_CHILD((*active_element), root, -1, t_p, true);
		}
		NEW_ACTIVE_CHILD((*active_element), (*active_element), -1, desired_type, true);
		(*active_element)->value = calloc(sizeof(unsigned int), 1);
		*temp_str = 1;
	}
	*str_chr_wait = true;
}

// The program needs to loop through the loop again to cose all the open things at the end, THIS NEEDS TO BE IMPLEMENTED for xhtml
int parse_markdown(char* input, char* buffer, size_t buffer_size) {
	utf8_length = 0;

	clock_t before = clock();
	memset(buffer, 0, buffer_size);
	bool escaped = false;
	bool newline = false;
	bool list_waiting = false;
	bool ol_list = false;
	int ol_start = 0;
	// This will be set to some non-null value when there is a code element to return to
	struct tree_element* code_element = NULL;

	int hash_chain = 0;
	int dash_chain = 0;
	int eq_chain = 0;
	int spaces_trimmed = 0;
	int root_on_newline = false;
	int soft_newline_count = 0;

	bool inline_code_wait = false;
	bool fenced_code = false;
	char fenced_char = '\0';

	// @todo: These two variables should be globals! Why did I even define them here at all?
	struct tree_element* root = new_element();
	root->allow_inner = false;
	struct tree_element* active_element = root;
	for(register char* cur_char = input; *cur_char != '\0'; cur_char++) {
		printf("%c", *cur_char);
		if(escaped || (code_element && *cur_char != '\n' && *cur_char != '\r' && !newline)) {
			escaped = false;
			if((*cur_char == '\n' || *cur_char == '\r')) {
				if(active_element->parent)
					active_element = active_element->parent;
				struct tree_element* br_child = NEW_ACTIVE_CHILD(br_child, active_element, -1, t_br, false);
			} else {
				append_char_to_active(root, &active_element, *cur_char);
			}
			continue;
		}

		// man is this disgusting
		if(fenced_code) {
			if((*cur_char == '\n' || *cur_char == '\r') &&
			   *(cur_char + 1) == fenced_char && *(cur_char + 2) == fenced_char && *(cur_char + 3) == fenced_char) {
				cur_char += 3;
				active_element = root;
				fenced_code = false;
			} else
				append_char_to_active(root, &active_element, *cur_char);
			continue;
		}

		if(str_chr_wait_ast && *cur_char != '*') {
			str_wait_hit(root, &active_element, &str_cl_wait_ast, &str_fin_wait_ast, &temp_str_ast, &str_chr_wait_ast, '*', t_str_ast);
		}
		
		if(str_chr_wait_und && *cur_char != '_') {
			str_wait_hit(root, &active_element, &str_cl_wait_und, &str_fin_wait_und, &temp_str_und, &str_chr_wait_und, '_', t_str_und);
		}

		#define LAST_TO_TITLE(n) {\
				if(root->children_n >= 1) {\
					struct tree_element* last_element = root->children[root->children_n-1];\
					last_element->type = t_h;\
					if(last_element->value)\
						free(last_element->value);\
					last_element->value = malloc(sizeof(int));\
					*(int*)last_element->value = n;\
				}\
		}

		#define APPEND_SPACES for(int i = 0; i < soft_newline_count; i++) append_char_to_active(root, &active_element, ' ');

		// Checks that should be done if a non special character is hit, might also be necessary to check sometimes not in default
		#define DEFAULT_CHECKS {\
			APPEND_SPACES \
			soft_newline_count = 0; \
			if(spaces_trimmed >= 4 && (newline || active_element == root)) { \
				/* if code_element is set the new active element is just returned to the code element*/ \
				if(code_element) { \
					active_element = code_element;\
					/* append_char_to_active(root, &active_element, '\n'); */\
				} else {\
					NEW_ACTIVE_CHILD(active_element, root, -1, t_code, true);\
					code_element = active_element;\
				}\
				for(int i = 0; i < spaces_trimmed - 4; i++)\
					append_char_to_active(root, &active_element, ' ');\
			} else if(code_element) {\
				active_element = root;\
				code_element = false;\
			}\
			if(hash_chain > 0) {\
				for(int i = 0; i < hash_chain; i++)\
					append_char_to_active(root, &active_element, '#');\
				hash_chain = 0;\
			}\
			if(dash_chain > 0) {\
				for(int i = 0; i < dash_chain; i++)\
					append_char_to_active(root, &active_element, '-');\
				dash_chain = 0;\
			}\
			if(eq_chain > 0) {\
				for(int i = 0; i < eq_chain; i++)\
					append_char_to_active(root, &active_element, '=');\
				eq_chain = 0;\
			}\
		\
			if(newline) {\
				if(root_on_newline) {\
					active_element = root;\
					root_on_newline = false;\
				} else if(!code_element)\
					/* Check if we have a soft linebreak (two spaces before newline) */\
					append_char_to_active(root, &active_element, ' ');\
			}\
		}
		switch(*cur_char) {
			// Character escaping
			case('\\'):
				escaped = true;
				break;
			// Tabs
			case('\t'):
				if(active_element == root || newline) {
					//@todo with this we should relatively easily be able to check for code blocks!
					spaces_trimmed += 4;
					break;
				} else goto default2;
			
				break;
			// Newline
			case('\r'):
				// Ignoring \r goes against the commonmark spec, but who cares
				break;
			case('\n'):
				list_waiting = false;
				if(dash_chain >= 1) {
					active_element = root;
					// Make the last element a title unless
					if(!newline) {
						if(dash_chain >= 3 && active_element == root) {
							struct tree_element* hr = NEW_ACTIVE_CHILD(hr, root, -1, t_hr, false);
						} else {
							for(int i = 0; i < dash_chain; i++)
								append_char_to_active(root, NULL, '-');
						}
					} else {
						LAST_TO_TITLE(2);
					}
					dash_chain = 0;
				}
				if(eq_chain >= 1) {
					active_element = root;
					if(newline) {
						LAST_TO_TITLE(1);
					} else {
						for(int i = 0; i < eq_chain; i++)
							append_char_to_active(root, NULL, '=');
					}
					eq_chain = 0;
				}

				if(hash_chain > 0)
					active_element = root;
				if(newline) {
					// A double new line means we return the active element to root
					active_element = root;
					newline = false;
					root_on_newline = false;
					code_element = NULL;
					active_element = root;
				} else if(active_element != root) {
					if(soft_newline_count < 2)
						newline = true;
					else {
						struct tree_element* old_active = active_element;
						NEXT_ALLOW_INNER(active_element, root)
						NEW_ACTIVE_CHILD(active_element, active_element, -1, t_br, false);
						active_element = old_active;
						NEXT_ALLOW_INNER(active_element, root)
					}
				}
				
				//printf("%d", temp_str_ast);
				if(temp_str_ast > 0)
					end_strength(root, &active_element, '*', temp_str_ast, t_str_ast);
				ZERO_STR_AST
				//printf("%d\n", temp_str_und);
				if(temp_str_und > 0)
					end_strength(root, &active_element, '_', temp_str_und, t_str_und);
				ZERO_STR_UND

				if(code_element)
					append_char_to_active(root, &active_element, '\n');

				inline_code_wait = false;
				hash_chain = 0;
				spaces_trimmed = 0;
				soft_newline_count = 0;
				break;
			// Numbered lists
			case('1'):
			case('2'):
			case('3'):
			case('4'):
			case('5'):
			case('6'):
			case('7'):
			case('8'):
			case('9'):
			case('0'):
				if((active_element == root || newline) && *(cur_char+1) == '.' && *(cur_char+2) == ' ') {
					list_waiting = true;
					ol_list = true;
					ol_start = 0;
					sscanf(cur_char, "%d.", &ol_start);
					cur_char++;
				} else
					goto default2;
				break;
			case('*'):
				if((active_element == root || newline) && *(cur_char+1) == ' ' ) {
					list_waiting = true;
				} else {
					str_chr_hit(root, &active_element, &str_cl_wait_ast, &str_fin_wait_ast, &temp_str_ast, &str_chr_wait_ast, t_str_ast);
				}
				break;
			case('_'):	
				str_chr_hit(root, &active_element, &str_cl_wait_und, &str_fin_wait_und, &temp_str_und, &str_chr_wait_und, t_str_und);
				break;
			// No 3 backticks are supported, use 4 spaces at the begining of a line to get a <pre><code> block
			
			case('`'):
			case('~'):
				// Check for fenced code
				if(active_element == root || newline) {
					if(*cur_char == *(cur_char + 1) && *(cur_char + 2)) {
						NEW_ACTIVE_CHILD(active_element, root, -1, t_code, true);
						fenced_code = true;
						fenced_char = *cur_char;
						// Ignore the rest of the line
						while(*cur_char != 0 && *cur_char != '\n')
							cur_char++;
						break;
					}
				}
				if(*cur_char == '~')
					goto default2;
				DEFAULT_CHECKS;
				if(inline_code_wait) {
					struct tree_element* parent_code = find_parent_type(root, active_element, t_inline_code);
					if(!parent_code)
						goto default2;
					*(bool*)(parent_code->value) = true;
					active_element = parent_code->parent;
					inline_code_wait= false;
				}
				else {
					NEXT_ALLOW_INNER(active_element, root);
					if(active_element == root) {
						NEW_ACTIVE_CHILD(active_element, root, -1, t_p, true);
					}
					NEW_ACTIVE_CHILD(active_element, active_element, -1, t_inline_code, true);
					active_element->value = malloc(sizeof(bool));
					*(bool*)(active_element->value) = false;
					inline_code_wait = true;
				}
				break;
			case('['):
				char* link_text = NULL;
				char* link_loc = NULL;
				size_t link_len = 0;
				char* new_position = get_link_components(cur_char, &link_text, &link_loc, &link_len);
				if(new_position) {
					APPEND_SPACES
					if(active_element == root) {
						NEW_ACTIVE_CHILD(active_element, active_element, -1, t_p, true);
					}
					NEXT_ALLOW_INNER(active_element, root)
					// We have a link
					// Create a new a element which contains the link adress
					NEW_ACTIVE_CHILD(active_element, active_element, -1, t_a, true);
					active_element->value = link_loc;
					// Create an inner element in it which contains the text
					NEW_ACTIVE_CHILD(active_element, active_element, -1, t_inner, false);
					active_element->value = link_text;
					// Return to the old parent
					active_element = active_element->parent->parent;
					cur_char = new_position;
					break;
				} else
					goto default2;
			case('!'):
				char* alt_text = NULL;
				char* img_loc = NULL;
				size_t img_len = 0;
				new_position = get_link_components(cur_char + 1, &alt_text, &img_loc, &img_len);
				if(new_position) {
					NEXT_ALLOW_INNER(active_element, root)
					// This contains the image link
					NEW_ACTIVE_CHILD(active_element, active_element, -1, t_img, true);
					active_element->value = img_loc;
					// This contains the image alt text
					NEW_ACTIVE_CHILD(active_element, active_element, -1, t_inner, false);
					active_element->value = alt_text;
					active_element = active_element->parent->parent;
					cur_char = new_position;
					break;
				} else
					goto default2;
				break;
			// Titles
			case('='):
				if(active_element == root || newline) {
					// Make the last line a title, otherwise we fall through
					eq_chain++;
					break;
				}
			case('-'):
				if(active_element == root || newline) {
					list_waiting = true;
					dash_chain++;
				}
				break;
			case('#'):
				if(hash_chain == 0 || newline) {
					if(active_element == root || newline || active_element->type == t_li) {
						if(!(active_element->type == t_li))
							active_element = root;
						hash_chain = 1;
						newline = false;
						break;
					}
				} else {
					hash_chain++;
					break;
				}
			case('>'):
				if(newline || active_element == root) {
					// Look if we already have a block quote parent somewhere, if so just continue, otherwise create a new one.
					if(find_parent_type(root, active_element, t_bq))
						break;
					NEW_ACTIVE_CHILD(active_element, root, -1, t_bq, true);
					break;
				} else
					goto default2;
			case('<'):
				char* closing_gt = cur_char;
				while(*closing_gt != 0 && *closing_gt != '\n') {
					if(*closing_gt == '>') break;
					closing_gt++;
				}
				if(*closing_gt != '>')
					goto default2;

				NEW_ACTIVE_CHILD(active_element, active_element->allow_inner ? active_element : root, -1, t_inner, false);
				active_element->value = malloc(closing_gt - cur_char + 1);
				memcpy(active_element->value, cur_char, closing_gt - cur_char + 1);
				*(char*)((active_element->value) + (unsigned int)(closing_gt - cur_char) + 1) = 0;
				cur_char = closing_gt;
				active_element = active_element->parent;
			case(' '):
				if(hash_chain > 0) {
					if(active_element->type != t_h) {
						if(active_element->parent ? active_element->parent->type == t_h : false) {
							active_element = active_element->parent;
						} else {
							NEW_ACTIVE_CHILD(active_element, active_element, -1, t_h, true);
							active_element->value = malloc(sizeof(int));
						}
					}
					*(int*)(active_element->value) = hash_chain;
					hash_chain = 0;
					root_on_newline = true;
					break;
				}
				// Here lists are created
				if(list_waiting && dash_chain <= 1) {
					dash_chain = 0;
					newline = false;
					/* Look if we have an ancestor somewhere that has spaces fewer or equal to the spaces we skipped.
					* On fewer spaces we enter a new list below the one we found
					* Is it equal we just add a new list item
					* If we do not find a list we create a new one at the root node */
					struct tree_element* look_element = find_parent_type(root, active_element, t_list);;
					bool found_list = false;
					while(look_element != NULL){
						// Out list has more indents and is therefore a child to the one we found
						// Enter new child list
						if(((struct list_data*)look_element->value)->indent < spaces_trimmed) {
							active_element = new_list(look_element, spaces_trimmed, ol_list, ol_start);
							found_list = true;
							break;
						} 
						// We found a list of the exact indentation level
						else if(((struct list_data*)look_element->value)->indent == spaces_trimmed) {
							// If the list type is the same simply mark the found element as active, otherwise create a new sibling list
							if(((struct list_data*)look_element->value)->ordered == ol_list) {
								active_element = look_element;
								found_list = true;
							} else {
								active_element = new_list(look_element->parent, spaces_trimmed, ol_list, ol_start);
								found_list = true;
								break;
							}
							break;
						} 
						// The current list has fewer indents than what we found, look farther for a parent
						else if(((struct list_data*)look_element->value)->indent > spaces_trimmed)
							look_element = find_parent_type(root, look_element->parent, t_list);
						else
							look_element = find_parent_type(root, look_element, t_list);
					}
					// Enter a new list
					if(!found_list) {
						active_element = new_list(root, spaces_trimmed, ol_list, ol_start);
					}
					NEW_ACTIVE_CHILD(active_element, active_element, -1, t_li, true);
					root_on_newline = true;
					list_waiting = false;
					ol_list = false;
				}
				// Trim spaces from newline
				if(active_element == root || newline) {
					//@todo with this we should relatively easily be able to check for code blocks!
					spaces_trimmed++;
					break;
				}
				soft_newline_count++;
				break;
			// Default character handling
			default2:
			default:
				DEFAULT_CHECKS;
				newline = false;
				list_waiting = false;
				ol_list = false;
				append_char_to_active(root, &active_element, *cur_char);
				break;
		}
	}
	// Convert the tree to valid html
	depth++;
	char* html = tree_to_html(root);
	//printf("%s\n", html ? html : "" );
	// Tear down the tree
	free_tree(root);
	if(html) {
		strncpy(buffer, html, buffer_size - 1);
	}
	free(html);
	printf("Time to process in ns: %ld\n", (clock() - before) / (CLOCKS_PER_SEC / 1000000));
	depth--;
	return 0;
}

// Returns location of the closing round bracket if found, otherwise it returns a NULL
// out_text and out_loc are allocated by this function
// Don't forget to free
char* get_link_components(char* start, char** out_text, char** out_loc, size_t* out_len) {
	// look how far the next newline is away
	size_t line_length = strcspn(start, "\r\n");

	// Search for the next closing bracket
	char* closing_sqr_bracket = memchr(start, ']', line_length);
	if(closing_sqr_bracket == NULL) {
		return NULL;
	}
	if(*(closing_sqr_bracket + 1) != '(') {
		return NULL;
	}
	char* closing_rnd_bracket = memchr(closing_sqr_bracket + 1, ')', line_length - (closing_sqr_bracket - start));
	if(closing_rnd_bracket == NULL) {
		return NULL;
	}
	size_t link_text_len = closing_sqr_bracket - start - 1;
	size_t link_loc_len = closing_rnd_bracket - closing_sqr_bracket - 2;
	
	char* link_text = calloc(link_text_len + 1, 1);
	if(link_text == NULL) {
		jb_log(LL_ERR, true, "calloc error");
		return NULL;
	}
	char* link_loc = calloc(link_loc_len + 1, 1);
	if(link_text == NULL) {
		jb_log(LL_ERR, true, "calloc error");
		free(link_text);
		return NULL;
	}

	
	memcpy(link_text, start + 1, link_text_len);
	memcpy(link_loc, closing_sqr_bracket + 2, link_loc_len);
	*out_text = link_text;
	*out_loc = link_loc;
	*out_len = link_text_len + link_loc_len;

	return closing_rnd_bracket;
}
Jeremias Stotters git repositories generated by CGIT