aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJeremias Stotter <jeremias@stotter.eu>2022-04-10 19:28:10 +0200
committerJeremias Stotter <jeremias@stotter.eu>2022-04-10 19:28:10 +0200
commit6c6151539a2e267aab06edc4c492ea6112d2215a (patch)
tree7b4dc1efa93410d7bcf284c2bf5d5fc31c8e92bd
parent6027ef98e693ccc3b06a0f7c3bc11fe2fde20a07 (diff)
parent4fd8facc45e587b3281d8f032d6f8c283c6b095b (diff)
downloadJBlog-6c6151539a2e267aab06edc4c492ea6112d2215a.tar.gz
JBlog-6c6151539a2e267aab06edc4c492ea6112d2215a.tar.bz2
JBlog-6c6151539a2e267aab06edc4c492ea6112d2215a.zip
Merge branch 'new-md'
-rw-r--r--jblog.c3
-rw-r--r--makefile2
-rw-r--r--md.c1304
-rw-r--r--md.h1
4 files changed, 857 insertions, 453 deletions
diff --git a/jblog.c b/jblog.c
index be755af..44eb88a 100644
--- a/jblog.c
+++ b/jblog.c
@@ -74,9 +74,6 @@ void* tree_root = NULL;
int logfile = -1;
-#define LL_INFO 0
-#define LL_WARN 1
-#define LL_ERR 2
void jb_log(int loglevel, bool include_errno, char* error_string) {
// @todo implement a minimum loglevel command line switch
int curr_errno = errno;
diff --git a/makefile b/makefile
index e1cfea5..4ca92ee 100644
--- a/makefile
+++ b/makefile
@@ -1,5 +1,5 @@
CC=gcc
-CFLAGS=-O2 -std=c99 -Wall
+CFLAGS=-O0 -g -std=c99 -Wall
BINDIR=/usr/bin
INITDIR=/etc/init.d
CONFDIR=/etc/conf.d
diff --git a/md.c b/md.c
index 7187575..e5aaea7 100644
--- a/md.c
+++ b/md.c
@@ -21,512 +21,918 @@
#include <string.h>
#include <stdlib.h>
#include <stdbool.h>
+#include <limits.h>
+
+
+#include <time.h>
#include "jblog.h"
-#define LINE_MAX 4096
+//#define LINE_MAX 4096
// Some not so nice things about this:
// HTML escapes like &#42; are not handled by this although they might be handled by the client anyways
// No setext headings because why not just use the hash symbol?
-// @todo when implementing the cycling through characters replace tabs with 4 spaces
+char* get_link_components(char* start, char** out_text, char** out_loc, size_t* out_len);
-// This function outputs a pointer that points past leading spaces
-// it returns the number of spaces skippen where \t = 4 spaces
-int trim_space(char* input, char** output) {
- char* trimmed = input;
- int count = 0;
- while(*trimmed == ' ' || *trimmed == '\t') {
- if(*trimmed == '\t') {
- count += 4;
- } else {
- count ++;
+struct list_data {
+ // True if ol, false if ul
+ bool ordered;
+ // If ol this is set to the number the list starts counting from, in case of ul it is ignored
+ int start;
+ // The number of spaces used at this level of the list
+ int indent;
+};
+
+enum html_type {
+ t_root = 0,
+ t_inner,
+ t_h, // Contains a pointer to an int as its value, this pointer indicates title strength (1-6)
+ t_p,
+ t_br,
+ t_str_ast, // t_str contains an int pointer to its level
+ t_str_und,
+ t_img,
+ t_a,
+ t_ol,
+ t_list, // Value contains the ammount of spaces used to indent at that level
+ t_li,
+ t_hr,
+ t_code,
+ t_inline_code,
+ t_bq
+};
+
+// The file will be structured as a tree that, in the end will be converted to valid html
+struct tree_element {
+ struct tree_element* parent;
+ int type;
+ int children_n;
+ struct tree_element** children;
+ // Is this element allowed to have inner html
+ bool allow_inner;
+ // Can be anythign additional, like src for img or text in the case of inner
+ void* value;
+};
+
+struct tree_element* new_element() {
+ return calloc(1, sizeof(struct tree_element));
+}
+
+void free_tree(struct tree_element* root) {
+ if(root->value)
+ free(root->value);
+ if(root->children) {
+ for(int i = 0; i < root->children_n; i++) {
+ free_tree(root->children[i]);
}
- trimmed++;
+ free(root->children);
}
- *output = trimmed;
- return count;
+ free(root);
}
-// Append in_src to in_dst, then return length of new string
-size_t append(char* in_dest, char* in_src) {
- strcat(in_dest, in_src);
- // for pure safety reasons I'll put a terminating \0 at the end of the destination string
- in_dest[LINE_MAX-1] = '\0';
- return strlen(in_dest);
+// Index may be -1, then we will add to the end
+// Otherwise we add at location index
+//
+// Index may only be between -1 and parent->children_n + 1, otherwise this will crash
+struct tree_element* new_child(struct tree_element* parent, int index) {
+ parent->children_n++;
+ parent->children = realloc(parent->children, sizeof(struct tree_element*)*(parent->children_n));
+ struct tree_element* child = new_element();
+ child->parent = parent;
+ if(index == -1)
+ (parent->children)[parent->children_n-1] = child;
+ else {
+ // Move the elements in the children list after the new element
+ for(int i = 0; i < (parent->children_n - index - 1); i++) {
+ /*memmove(parent->children + (parent->children_n - i - 1) * sizeof(struct tree_element*),
+ parent->children + (parent->children_n - i - 2) * sizeof(struct tree_element*),
+ sizeof(struct tree_element*));*/
+ (parent->children)[parent->children_n - i - 1] = (parent->children)[parent->children_n - i - 2];
+ }
+ (parent->children)[index] = child;
+ }
+ return child;
}
-// Prepend in_src to in_dst, then return length of new string
-size_t prepend(char* in_dest, char* in_src) {
- char prepended_string[LINE_MAX] = {'\0'};
- strncpy(prepended_string, in_src, LINE_MAX - 1);
- strncat(prepended_string, in_dest, LINE_MAX - 1);
- memcpy(in_dest, prepended_string, LINE_MAX);
- // for pure safety reasons I'll put a terminating \0 at the end of the destination string
- in_dest[LINE_MAX-1] = '\0';
- return strlen(in_dest);
+int utf8_length;
+long int unicode_char;
+// Returns the bytes written
+void html_escape(char* output_buffer, char input) {
+ if((input & 0xFFFF0000) == 0xFFFF0000) {
+ // UTF-8
+ int leading_ones = 0;
+ char copy_input = input;
+ while(copy_input & 0b10000000) {
+ leading_ones++;
+ copy_input = copy_input << 1;
+ }
+ if(leading_ones > 1) {
+ utf8_length = leading_ones;
+ unicode_char = 0x0;
+ }
+ unicode_char = unicode_char << (8 - leading_ones -1);
+ unicode_char = unicode_char | ( input & (UCHAR_MAX >> leading_ones) );
+ utf8_length--;
+
+ if(utf8_length)
+ *output_buffer = '\0';
+ else {
+ snprintf(output_buffer, 96, "&#x%lX;", unicode_char);
+ }
+ } else {
+ snprintf(output_buffer, 96, "&#x%X;", input);
+ }
}
-// start is the first curly bracket we have
-// Returns pointer to last round bracket
-// free the returned pointers yourself
-char* get_link_components(char* start, char** out_text, char** out_loc, size_t* out_len) {
- // Search for the next closing bracket
- char* closing_sqr_bracket = strchr(start, ']');
- if(closing_sqr_bracket == NULL) {
- return NULL;
+// Reallocs the string dest to fit src, then append
+char* realloc_append(char* dest, char* src) {
+ //printf("Dest:%sSrc:%s\n", dest ? dest : "", src ? src : "");
+ size_t dest_len = dest ? strlen(dest) : 0;
+ dest = realloc(dest, dest_len + (src ? strlen(src) : 0) + 1);
+ if(src)
+ strcpy(dest + dest_len, src);
+ return dest;
+}
+
+#define p_html "<p>%s</p>\n"
+#define br_html "<br/>"
+#define h_html "<h%d>%s</h%d>\n"
+#define em_html "<em>%s</em>\n"
+#define b_html "<b>%s</b>\n"
+#define emb_html "<em><b>%s</b></em>\n"
+#define img_html "<img src=\"%s\" alt=\"%s\" title=\"\"/>\n"
+#define a_html "<a href=\"%s\">%s</a>\n"
+#define ul_html "<ul>%s</ul>\n"
+#define ol_html "<ol start=\"%d\">%s</ol>\n"
+#define li_html "<li>%s</li>\n"
+#define hr_html "<hr/>\n"
+#define code_html "<pre><code>%s</code></pre>\n"
+#define inline_code_html "<code>%s</code>"
+#define bq_html "<blockquote>%s</blockquote>\n"
+// Resolve a tree to html
+int depth = 0;
+char* tree_to_html(struct tree_element* root) {
+ char* html=NULL;
+ char* inner_html = NULL;
+ for(int i = 0; i < root->children_n; i++) {
+ struct tree_element* child = root->children[i];
+ switch(child->type) {
+ case(t_inner):
+ // Append the inner html to buffer
+ inner_html = realloc_append(inner_html, child->value);
+ break;
+ default:
+ char* child_html = tree_to_html(child);
+ inner_html = realloc_append(inner_html, child_html);
+ free(child_html);
+ break;
+ }
}
- if(*(closing_sqr_bracket + 1) != '(') {
- return NULL;
+// printf("%s\n", inner_html ? inner_html : "(EMPTY)");
+ #define realloc_len(html_pattern) (inner_html ? strlen(inner_html) : 0) + strlen(html_pattern) + 1
+ #define realloc_for_html(html_pattern) realloc(html, realloc_len(html_pattern));
+ switch(root->type) {
+ case(t_p):
+ html = realloc_for_html(p_html);
+ sprintf(html, p_html, inner_html ? inner_html : "");
+ break;
+ case(t_h):
+ html = realloc_for_html(h_html);
+ sprintf(html, h_html, *(int*)(root->value), inner_html ? inner_html : "", *(int*)(root->value));
+ break;
+ case(t_str_ast):
+ case(t_str_und):
+ switch(*(unsigned int*)root->value) {
+ case(1):
+ html = realloc_for_html(em_html);
+ sprintf(html, em_html, inner_html ? inner_html : "");
+ break;
+ case(2):
+ html = realloc_for_html(b_html);
+ sprintf(html, b_html, inner_html ? inner_html : "");
+ break;
+ case(3):
+ html = realloc_for_html(emb_html);
+ sprintf(html, emb_html, inner_html ? inner_html : "");
+ break;
+ default:
+ html = realloc_for_html("");
+ strcpy(html, inner_html ? inner_html : "");
+ break;
+ }
+ break;
+ case(t_img):
+ html = realloc(html, realloc_len(img_html) + strlen(root->value));
+ if(!html)
+ exit(-1);
+ sprintf(html, img_html, root->value ? (char*)(root->value) : "", inner_html ? inner_html : "");
+ break;
+ case(t_a):
+ html = realloc(html, realloc_len(a_html) + strlen(root->value));
+ if(!html)
+ exit(-1);
+ sprintf(html, a_html, root->value ? (char*)(root->value) : "", inner_html ? inner_html : "");
+ break;
+ case(t_br):
+ html = realloc_for_html(br_html);
+ strcpy(html, br_html);
+ break;
+ case(t_list):
+ if(((struct list_data*)(root->value))->ordered) {
+ // We have an ordered list
+ int digits = 0;
+ for(int counter = 1; counter < ((struct list_data*)(root->value))->start; counter *= 10, digits++);
+ html = realloc(html, realloc_len(ol_html) + digits + 1);
+ sprintf(html, ol_html, ((struct list_data*)(root->value))->start, inner_html ? inner_html : "");
+ } else {
+ // We have an unordered list
+ html = realloc_for_html(ul_html);
+ sprintf(html, ul_html, inner_html ? inner_html : "");
+ }
+ break;
+ case(t_li):
+ html = realloc_for_html(li_html);
+ sprintf(html, li_html, inner_html ? inner_html : "");
+ break;
+ case(t_hr):
+ html = realloc_for_html(hr_html);
+ strcpy(html, hr_html);
+ break;
+ case(t_code):
+ html = realloc_for_html(code_html);
+ sprintf(html, code_html, inner_html ? inner_html : "");
+ break;
+ case(t_inline_code):
+ if(*(bool*)(root->value)) {
+ html = realloc_for_html(inline_code_html);
+ sprintf(html, inline_code_html, inner_html ? inner_html : "");
+ } else {
+ html = realloc(html, inner_html ? strlen(inner_html) : 0 + 1);
+ memcpy(html, "`", 2);
+ strcat(html, inner_html ? inner_html : "");
+ }
+ break;
+ case(t_bq):
+ html = realloc_for_html(bq_html);
+ sprintf(html, bq_html, inner_html ? inner_html : "");
+ break;
+ default:
+ html=inner_html;
+ break;
}
- char* closing_rnd_bracket = strchr(closing_sqr_bracket + 1, ')');
- if(closing_rnd_bracket == NULL) {
- return NULL;
+ return html;
+}
+
+// Use this to create a new child of another element and quickly set its atributes
+#define NEW_ACTIVE_CHILD(ae_var, parent, index, type_v, allow_inner_v) ae_var = new_child(parent, index); ae_var->type = type_v; ae_var->allow_inner = allow_inner_v;
+
+// Use this to find the next parent that allows inner elements
+#define NEXT_ALLOW_INNER(active_element, root) while(!(active_element == root) && (!active_element->allow_inner && active_element)) active_element = active_element->parent;
+
+// This appends the cur_char to active element / it creates a new active element if the active element can not have text
+void append_char_to_active(struct tree_element* root, struct tree_element** active_element, char cur_char) {
+ // We are not allowed to add inner to this element so we'll start a new paragraph
+ struct tree_element* new_active_element = active_element ? *active_element : root;
+ if(active_element) {
+ NEXT_ALLOW_INNER((*active_element), root)
+ if(*active_element == root) {
+ NEW_ACTIVE_CHILD(new_active_element, root, -1, t_p, true);
+ }
}
- size_t link_text_len = closing_sqr_bracket - start - 1;
- size_t link_loc_len = closing_rnd_bracket - closing_sqr_bracket - 2;
-
- char* link_text = calloc(link_text_len + 1, 1);
- if(link_text == NULL) {
- jb_log(LL_ERR, true, "calloc error");
- return NULL;
+ if(new_active_element->type != t_inner) {
+ NEW_ACTIVE_CHILD(new_active_element, new_active_element, -1, t_inner, false);
}
- char* link_loc = calloc(link_loc_len + 1, 1);
- if(link_text == NULL) {
- jb_log(LL_ERR, true, "calloc error");
- free(link_text);
- return NULL;
+ if((cur_char <= 47 ||
+ (cur_char >=58 && cur_char <=64) ||
+ (cur_char >=91 && cur_char <=96) ||
+ cur_char > 122) && cur_char != 0x20) {
+ // Escape just to be safe
+ char append[96] = "";
+ html_escape(append, cur_char);
+ new_active_element->value = realloc_append(new_active_element->value, append);
+ } else {
+ // This is stupid, improve this later xD
+ char append[2] = {cur_char, '\0'};
+ new_active_element->value = realloc_append(new_active_element->value, append);
+ }
+ if(active_element)
+ *active_element = new_active_element;
+}
+
+// This adds a new unordered list and returns a pointer to it
+// indent is the spaces that the list indicator ( * or - ) was intendend
+struct tree_element* new_list(struct tree_element* parent, int indent, bool ordered, int start) {
+ struct tree_element *new_list;
+ NEW_ACTIVE_CHILD(new_list, parent, -1, t_list, false);
+ struct list_data* data = malloc(sizeof(struct list_data));
+ data->indent = indent;
+ data->ordered = ordered;
+ data->start = start;
+ new_list->value = data;
+ return new_list;
+}
+
+// Walk up the tree until root, stop if we encounter the requested type and return that node, otherwise return NULL
+struct tree_element* find_parent_type(struct tree_element* root, struct tree_element* start, int type) {
+ struct tree_element* check_element = start;
+ while(check_element != root) {
+ if(check_element->type == type)
+ return check_element;
+ check_element = check_element->parent;
+ }
+ return NULL;
+}
+
+// These store temporary strength values
+unsigned int temp_str_ast = 0;
+unsigned int temp_str_und = 0;
+
+// These are true if a strength element is waiting for closing
+bool str_cl_wait_ast = false;
+bool str_cl_wait_und = false;
+
+// These are true if a strength element waits for a different character
+bool str_chr_wait_ast = false;
+bool str_chr_wait_und = false;
+
+bool str_fin_wait_ast = false;
+bool str_fin_wait_und = false;
+
+#define ZERO_STR_AST temp_str_ast = 0; str_cl_wait_ast = false; str_chr_wait_ast = false; str_fin_wait_ast = false;
+#define ZERO_STR_UND temp_str_und = 0; str_cl_wait_und = false; str_chr_wait_und = false; str_fin_wait_und = false;
+
+
+void end_strength(struct tree_element* root, struct tree_element** active_element, char marker, int tmp_str, int type) {
+ struct tree_element* parent_strength = find_parent_type(root, *active_element, type);
+ if(!parent_strength)
+ return;
+ if(tmp_str < *(unsigned int*)parent_strength->value) {
+ struct tree_element* tmp_active = parent_strength->parent;
+ for(int i = tmp_str; i < *(unsigned int*)parent_strength->value; i++)
+ append_char_to_active(root, &tmp_active, marker);
+ *(unsigned int*)parent_strength->value = tmp_str;
+ } else if(tmp_str > *(unsigned int*)parent_strength->value) {
+ struct tree_element* tmp_active;
+ if((parent_strength->parent)->children_n >= 2)
+ tmp_active = &(*(parent_strength->parent)->children[(parent_strength->parent)->children_n-2]);
+ else {
+ NEW_ACTIVE_CHILD(tmp_active, parent_strength->parent, 0, t_inner, false);
+ }
+ //printf("\naa:%d\n", tmp_str);
+ for(int i = *(unsigned int*)parent_strength->value; i < tmp_str; i++)
+ append_char_to_active(root, &tmp_active, marker);
}
+}
+
+// Call this function if a strength character is waiting to hit a different character
+void str_wait_hit(struct tree_element* root, struct tree_element** active_element, bool* str_cl_wait, bool* str_fin_wait, unsigned int* temp_str, bool* str_chr_wait, char str_chr, int type) {
+ if(*str_cl_wait) {
+ if(*str_fin_wait) {
+ end_strength(root, active_element, str_chr, *temp_str, type);
+ *str_fin_wait = false;
+ *temp_str = 0;
+ }
+ *str_cl_wait = false;
+ } else
+ *str_cl_wait = true;
- memcpy(link_text, start + 1, link_text_len);
- memcpy(link_loc, closing_sqr_bracket + 2, link_loc_len);
- *out_text = link_text;
- *out_loc = link_loc;
- *out_len = link_text_len + link_loc_len;
+ *str_chr_wait = false;
+}
- return closing_rnd_bracket;
+void str_chr_hit(struct tree_element* root, struct tree_element** active_element, bool* str_cl_wait, bool* str_fin_wait, unsigned int* temp_str, bool* str_chr_wait, int desired_type) {
+ struct tree_element* parent_strength = find_parent_type(root, *active_element, desired_type);
+ if(parent_strength) {
+ // We are already in a strength element
+ // Check if we are waiting to close
+ if(*str_cl_wait) {
+ (*(unsigned int*)(parent_strength->value))++;
+ if((*(unsigned int*)(parent_strength->value)) >= *temp_str) {
+ *active_element = parent_strength->parent;
+ ZERO_STR_AST
+ return;
+ } else {
+ *str_fin_wait = true;
+ }
+ } else {
+ (*temp_str)++;
+ }
+ } else {
+ // Enter a new strength element as we are currently not in one
+ if((*active_element)->type == t_inner)
+ *active_element = (*active_element)->parent;
+ if(*active_element == root) {
+ NEW_ACTIVE_CHILD((*active_element), root, -1, t_p, true);
+ }
+ NEW_ACTIVE_CHILD((*active_element), (*active_element), -1, desired_type, true);
+ (*active_element)->value = calloc(sizeof(unsigned int), 1);
+ *temp_str = 1;
+ }
+ *str_chr_wait = true;
}
-// @todo NEXT
// The program needs to loop through the loop again to cose all the open things at the end, THIS NEEDS TO BE IMPLEMENTED for xhtml
int parse_markdown(char* input, char* buffer, size_t buffer_size) {
- bool empty_line_carry = true;
- bool spaced_codeblock = false;
- bool in_paragraph = false;
- // This is the offset in parsed_text
- size_t offset = 0;
- // This describes the spaces the last list level had
- int list_spaces = 0;
- int list_level = 0;
- bool in_list = false;
- char* next_line = input;
- // Different indicators for all the inline things
- // How strongly are we currently emphasized? * = 1; ** = 2; *** = 3
- int strength_level = 0;
- bool in_mono = false;
- bool in_cut = false;
- bool in_quotes = false;
- // Signal we are on the last line, for safety reasons we just insert an empty paragraph
- bool on_lastline = false;
- for(char* line = input; next_line != NULL; line = next_line) {
- // Compute the next line and put a '\0' at the end of the current line
- {
- char* line_end = strchr(line, '\n');
- if(line_end != NULL) {
- *line_end = '\0';
- next_line = line_end + 1;
+ utf8_length = 0;
+
+ clock_t before = clock();
+ memset(buffer, 0, buffer_size);
+ bool escaped = false;
+ bool newline = false;
+ bool list_waiting = false;
+ bool ol_list = false;
+ int ol_start = 0;
+ // This will be set to some non-null value when there is a code element to return to
+ struct tree_element* code_element = NULL;
+
+ int hash_chain = 0;
+ int dash_chain = 0;
+ int eq_chain = 0;
+ int spaces_trimmed = 0;
+ int root_on_newline = false;
+ int soft_newline_count = 0;
+
+ bool inline_code_wait = false;
+ bool fenced_code = false;
+ char fenced_char = '\0';
+
+ // @todo: These two variables should be globals! Why did I even define them here at all?
+ struct tree_element* root = new_element();
+ root->allow_inner = false;
+ struct tree_element* active_element = root;
+ for(register char* cur_char = input; *cur_char != '\0'; cur_char++) {
+ printf("%c", *cur_char);
+ if(escaped || (code_element && *cur_char != '\n' && *cur_char != '\r' && !newline)) {
+ escaped = false;
+ if((*cur_char == '\n' || *cur_char == '\r')) {
+ if(active_element->parent)
+ active_element = active_element->parent;
+ struct tree_element* br_child = NEW_ACTIVE_CHILD(br_child, active_element, -1, t_br, false);
} else {
- empty_line_carry = true;
- on_lastline = true;
+ append_char_to_active(root, &active_element, *cur_char);
}
+ continue;
}
- size_t line_length = 0;
- char line_buffer[LINE_MAX] = {'\0'};
- bool no_special = true;
- // The pre_line_buffer contains html headers that are needed before content of the current line
- char pre_line_buffer[LINE_MAX] = "\0";
- char* trimmed_line = NULL;
- // When this is true we can format text with things like <em>, <b>, etc.
- bool format_allow = true;
- int spaces_skipped = 0;
- bool empty_line = false;
- // Check if we have an empty line
- if(empty_line_carry) {
- empty_line_carry = false;
- empty_line = true;
- }
-
- spaces_skipped = trim_space(line, &trimmed_line);
- if(!on_lastline) {
- if(*trimmed_line == '\0' || *trimmed_line == '\r') {
- empty_line_carry = true;
- continue;
- }
+ // man is this disgusting
+ if(fenced_code) {
+ if((*cur_char == '\n' || *cur_char == '\r') &&
+ *(cur_char + 1) == fenced_char && *(cur_char + 2) == fenced_char && *(cur_char + 3) == fenced_char) {
+ cur_char += 3;
+ active_element = root;
+ fenced_code = false;
+ } else
+ append_char_to_active(root, &active_element, *cur_char);
+ continue;
}
- if(on_lastline) {
- empty_line = true;
- line = "\0";
+ if(str_chr_wait_ast && *cur_char != '*') {
+ str_wait_hit(root, &active_element, &str_cl_wait_ast, &str_fin_wait_ast, &temp_str_ast, &str_chr_wait_ast, '*', t_str_ast);
+ }
+
+ if(str_chr_wait_und && *cur_char != '_') {
+ str_wait_hit(root, &active_element, &str_cl_wait_und, &str_fin_wait_und, &temp_str_und, &str_chr_wait_und, '_', t_str_und);
}
- // Blockquotes
- /*
- I love me some spaghetti bolognese ;P
- Luckily there is enough spaghetti here
- for everyone
-
- ----|
- ----|------------
- \//\----|
- ||/\||\
- |/|||||\
- ---------------
- \ /
- \-----------/
- */
- if(*trimmed_line == '>') {
- // We are in a blockquote!
- // Set the new line to after the quote marker
- line = trimmed_line + 1;
- // trim again
- spaces_skipped = trim_space(line, &trimmed_line);
- if(!in_quotes) {
- if(in_paragraph) {
- strncat(pre_line_buffer, "</p><blockquote>", LINE_MAX -1);
- in_paragraph = false;
- } else {
- line_length = prepend(line_buffer, "<blockquote><p>");
- in_paragraph = true;
- }
- in_quotes = true;
- }
- } else if(in_quotes) {
- if(in_paragraph) {
- strncat(pre_line_buffer, "</p></blockquote>", LINE_MAX-1);
- in_paragraph = false;
- } else {
- strncat(pre_line_buffer, "</blockquote>", LINE_MAX-1);
- }
- in_quotes = false;
+ #define LAST_TO_TITLE(n) {\
+ if(root->children_n >= 1) {\
+ struct tree_element* last_element = root->children[root->children_n-1];\
+ last_element->type = t_h;\
+ if(last_element->value)\
+ free(last_element->value);\
+ last_element->value = malloc(sizeof(int));\
+ *(int*)last_element->value = n;\
+ }\
}
- // / Blockquotes
-
- // Unordered Lists ------------------------------------------------
- if(spaces_skipped < 4 + list_spaces && *trimmed_line == '*' && *(trimmed_line + 1) == ' ') {
- line_length = prepend(line_buffer, "<li>");
- if(!in_list) {
- line_length = prepend(line_buffer, "<ul>");
- in_list = true;
- } else {
- strncat(pre_line_buffer, "</li>\n", LINE_MAX-1);
- }
- if(spaces_skipped < list_spaces) {
- for(int i = (list_spaces - spaces_skipped) >> 1; i > 0; i--) {
- strncat(pre_line_buffer, "</ul>\n", LINE_MAX-1);
- list_level--;
- }
- } else if(spaces_skipped > list_spaces) {
- for(int i = (spaces_skipped - list_spaces) >> 1; i > 0; i--) {
- line_length = prepend(line_buffer, "<ul>");
- list_level++;
- }
- }
- trimmed_line = trimmed_line + 2;
- list_spaces = spaces_skipped;
- } else if(in_list && empty_line) {
- strncat(pre_line_buffer, "</li></ul>\n", LINE_MAX-1);
- for(int i = 0; i < list_level; i++) {
- strncat(pre_line_buffer, "</ul>\n", LINE_MAX-1);
- }
- in_list = false;
- list_spaces = 0;
+ #define APPEND_SPACES for(int i = 0; i < soft_newline_count; i++) append_char_to_active(root, &active_element, ' ');
+
+ // Checks that should be done if a non special character is hit, might also be necessary to check sometimes not in default
+ #define DEFAULT_CHECKS {\
+ APPEND_SPACES \
+ soft_newline_count = 0; \
+ if(spaces_trimmed >= 4 && (newline || active_element == root)) { \
+ /* if code_element is set the new active element is just returned to the code element*/ \
+ if(code_element) { \
+ active_element = code_element;\
+ /* append_char_to_active(root, &active_element, '\n'); */\
+ } else {\
+ NEW_ACTIVE_CHILD(active_element, root, -1, t_code, true);\
+ code_element = active_element;\
+ }\
+ for(int i = 0; i < spaces_trimmed - 4; i++)\
+ append_char_to_active(root, &active_element, ' ');\
+ } else if(code_element) {\
+ active_element = root;\
+ code_element = false;\
+ }\
+ if(hash_chain > 0) {\
+ for(int i = 0; i < hash_chain; i++)\
+ append_char_to_active(root, &active_element, '#');\
+ hash_chain = 0;\
+ }\
+ if(dash_chain > 0) {\
+ for(int i = 0; i < dash_chain; i++)\
+ append_char_to_active(root, &active_element, '-');\
+ dash_chain = 0;\
+ }\
+ if(eq_chain > 0) {\
+ for(int i = 0; i < eq_chain; i++)\
+ append_char_to_active(root, &active_element, '=');\
+ eq_chain = 0;\
+ }\
+ \
+ if(newline) {\
+ if(root_on_newline) {\
+ active_element = root;\
+ root_on_newline = false;\
+ } else if(!code_element)\
+ /* Check if we have a soft linebreak (two spaces before newline) */\
+ append_char_to_active(root, &active_element, ' ');\
+ }\
}
- // / Unordered List -----------------------------------------------
- // @todo Ordered list
-
- // Spaced codeblocks ----------------------------------------------
- // Also check if we have a list
- if(spaces_skipped >= 4 && !in_list) {
- // If this is true we are already in a codeblock
- if(spaced_codeblock) {
- line_length = prepend(line_buffer, "\n");
- no_special = false;
- format_allow = false;
- } else {
- line_length = prepend(line_buffer, "<pre><code>");
- line_length = append(line_buffer, "<p>");
- spaced_codeblock = true;
- no_special = false;
- format_allow = false;
- } // Replace any leading tabs with 4 spaces
- char* code_replaced = line;
- for(int i = 0; i < 4;) {
- if(*(line + i) == '\t') {
- code_replaced ++;
- i += 4;
- } else if(*(line + i) == ' ') {
- code_replaced++;
- i++;
- } else {
+ switch(*cur_char) {
+ // Character escaping
+ case('\\'):
+ escaped = true;
+ break;
+ // Tabs
+ case('\t'):
+ if(active_element == root || newline) {
+ //@todo with this we should relatively easily be able to check for code blocks!
+ spaces_trimmed += 4;
break;
+ } else goto default2;
+
+ break;
+ // Newline
+ case('\r'):
+ // Ignoring \r goes against the commonmark spec, but who cares
+ break;
+ case('\n'):
+ list_waiting = false;
+ if(dash_chain >= 1) {
+ active_element = root;
+ // Make the last element a title unless
+ if(!newline) {
+ if(dash_chain >= 3 && active_element == root) {
+ struct tree_element* hr = NEW_ACTIVE_CHILD(hr, root, -1, t_hr, false);
+ } else {
+ for(int i = 0; i < dash_chain; i++)
+ append_char_to_active(root, NULL, '-');
+ }
+ } else {
+ LAST_TO_TITLE(2);
+ }
+ dash_chain = 0;
}
- }
- line_length = append(line_buffer, code_replaced);
- } else if(spaced_codeblock) {
- spaced_codeblock = false;
- strncat(pre_line_buffer, "</p></code></pre>\n", LINE_MAX-1);
- }
- // / Spaced codeblocks --------------------------------------------
-
-
- // Titles (#) -----------------------------------------------------
- // @todo allow enclosing titles bracket style
- if(no_special && *trimmed_line == '#') {
- int header_depth = 1;
- while(header_depth < 6) {
- if(*(trimmed_line + header_depth) == '#')
- header_depth++;
- else
- break;
- }
- char* trimmed_title = NULL;
- if(trim_space(trimmed_line + header_depth,
- &trimmed_title) > 0) {
- no_special = false;
-
- char title[LINE_MAX + 1] = "";
- if(snprintf(title, LINE_MAX + 1, "<h%d>%s</h%d>\n",
- header_depth,
- trimmed_title,
- header_depth
- ) > LINE_MAX)
- {
- jb_log(LL_WARN, false, "Title too long");
- return -1;
+ if(eq_chain >= 1) {
+ active_element = root;
+ if(newline) {
+ LAST_TO_TITLE(1);
+ } else {
+ for(int i = 0; i < eq_chain; i++)
+ append_char_to_active(root, NULL, '=');
+ }
+ eq_chain = 0;
}
- line_length = append(line_buffer, title);
- }
- // / Titles -------------------------------------------------------
- // Thematic brakes ------------------------------------------------
- } else if(no_special && (*trimmed_line == '-' || *trimmed_line == '*' || *trimmed_line == '_')) {
- int i = 1;
- for(; *(trimmed_line+i) == *(trimmed_line+i-1); i++) {
- }
- if(*(trimmed_line+i) == '\0') {
- no_special = false;
- line_length = append(line_buffer, "<hr/>\n");
- }
- }
- // / Thematic brakes ----------------------------------------------
-
- // Plain Text -----------------------------------------------------
- if(no_special) {
- line_length = append(line_buffer, trimmed_line);
- // Handle newline via slash/*
- if(*(line_buffer + line_length -1) == '\\') {
- *(line_buffer + line_length -1) = '\n';
- if(*(line_buffer + line_length -2) != '\\') {
- empty_line_carry = true;
+ if(hash_chain > 0)
+ active_element = root;
+ if(newline) {
+ // A double new line means we return the active element to root
+ active_element = root;
+ newline = false;
+ root_on_newline = false;
+ code_element = NULL;
+ active_element = root;
+ } else if(active_element != root) {
+ if(soft_newline_count < 2)
+ newline = true;
+ else {
+ struct tree_element* old_active = active_element;
+ NEXT_ALLOW_INNER(active_element, root)
+ NEW_ACTIVE_CHILD(active_element, active_element, -1, t_br, false);
+ active_element = old_active;
+ NEXT_ALLOW_INNER(active_element, root)
+ }
}
- }
- // Handle newlines
- if(empty_line && in_paragraph) {
- strncat(pre_line_buffer, "</p>\n", LINE_MAX-1);
- in_paragraph = false;
- }
- // If we are not in a paragraph enter one
- if(!in_paragraph && !in_list) {
- line_length = prepend(line_buffer, "<p>");
- in_paragraph = true;
- } else { // If we are in a paragraph the newline will be converted to a space
- line_length = prepend(line_buffer, " ");
- }
- } else if(in_paragraph) {
- line_length = prepend(line_buffer, "</p>\n");
- in_paragraph = false;
- }
+
+ //printf("%d", temp_str_ast);
+ if(temp_str_ast > 0)
+ end_strength(root, &active_element, '*', temp_str_ast, t_str_ast);
+ ZERO_STR_AST
+ //printf("%d\n", temp_str_und);
+ if(temp_str_und > 0)
+ end_strength(root, &active_element, '_', temp_str_und, t_str_und);
+ ZERO_STR_UND
- if(format_allow) {
- bool escaped = false;
- char format_line_buffer[LINE_MAX] = {0};
- for(char* line_position = line_buffer; *line_position != '\0'; line_position++) {
- // Handle a previously escaped character
- if(escaped) {
- char escaped_char[2] = {*line_position, '\0'};
- // Also check if we have an escaped newline
- strncat(format_line_buffer, *line_position == '\n' ? "\\" : escaped_char, LINE_MAX-1);
- escaped = false;
- continue;
- }
- switch(*line_position) {
- // Text strength, for sake of simplicity, we'll treat * and _ the same
- case '*' :
- case '_' :
- {
- // Look ahead if the next ones are also strength indicators
- int strength_indicators = 0;
- for(; strength_level > 0 ? strength_indicators<strength_level : true ; strength_indicators++) {
- if(!(*(line_position + strength_indicators) == '*' ||
- *(line_position + strength_indicators) == '_')) {
- break;
- }
- }
+ if(code_element)
+ append_char_to_active(root, &active_element, '\n');
- if(strength_level == 0) {
- if(strength_indicators == 1) {
- strncat(format_line_buffer, "<em>", LINE_MAX-1);
- } else if(strength_indicators == 2) {
- strncat(format_line_buffer, "<b>", LINE_MAX-1);
- } else {
- strncat(format_line_buffer, "<b><em>", LINE_MAX-1);
- }
- strength_level = strength_indicators;
- line_position += strength_indicators -1;
- } else {
- if(strength_level >= 3 && strength_indicators >= 3) {
- strncat(format_line_buffer, "</em></b>", LINE_MAX-1);
- } else if(strength_level == 2 && strength_indicators >= 2) {
- strncat(format_line_buffer, "</b>", LINE_MAX-1);
- } else {
- strncat(format_line_buffer, "</em>", LINE_MAX-1);
- }
- strength_level = strength_level - strength_indicators;
- line_position += strength_indicators -1;
- }
- }
+ inline_code_wait = false;
+ hash_chain = 0;
+ spaces_trimmed = 0;
+ soft_newline_count = 0;
+ break;
+ // Numbered lists
+ case('1'):
+ case('2'):
+ case('3'):
+ case('4'):
+ case('5'):
+ case('6'):
+ case('7'):
+ case('8'):
+ case('9'):
+ case('0'):
+ if((active_element == root || newline) && *(cur_char+1) == '.' && *(cur_char+2) == ' ') {
+ list_waiting = true;
+ ol_list = true;
+ ol_start = 0;
+ sscanf(cur_char, "%d.", &ol_start);
+ cur_char++;
+ } else
+ goto default2;
+ break;
+ case('*'):
+ if((active_element == root || newline) && *(cur_char+1) == ' ' ) {
+ list_waiting = true;
+ } else {
+ str_chr_hit(root, &active_element, &str_cl_wait_ast, &str_fin_wait_ast, &temp_str_ast, &str_chr_wait_ast, t_str_ast);
+ }
+ break;
+ case('_'):
+ str_chr_hit(root, &active_element, &str_cl_wait_und, &str_fin_wait_und, &temp_str_und, &str_chr_wait_und, t_str_und);
+ break;
+ // No 3 backticks are supported, use 4 spaces at the begining of a line to get a <pre><code> block
+
+ case('`'):
+ case('~'):
+ // Check for fenced code
+ if(active_element == root || newline) {
+ if(*cur_char == *(cur_char + 1) && *(cur_char + 2)) {
+ NEW_ACTIVE_CHILD(active_element, root, -1, t_code, true);
+ fenced_code = true;
+ fenced_char = *cur_char;
+ // Ignore the rest of the line
+ while(*cur_char != 0 && *cur_char != '\n')
+ cur_char++;
break;
- // Inline code
- case '`':
- {
- if(in_mono) {
- strncat(format_line_buffer, "</code>", LINE_MAX - 1);
- in_mono = false;
- } else {
- strncat(format_line_buffer, "<code>", LINE_MAX - 1);
- in_mono = true;
- }
- }
+ }
+ }
+ if(*cur_char == '~')
+ goto default2;
+ DEFAULT_CHECKS;
+ if(inline_code_wait) {
+ struct tree_element* parent_code = find_parent_type(root, active_element, t_inline_code);
+ if(!parent_code)
+ goto default2;
+ *(bool*)(parent_code->value) = true;
+ active_element = parent_code->parent;
+ inline_code_wait= false;
+ }
+ else {
+ NEXT_ALLOW_INNER(active_element, root);
+ if(active_element == root) {
+ NEW_ACTIVE_CHILD(active_element, root, -1, t_p, true);
+ }
+ NEW_ACTIVE_CHILD(active_element, active_element, -1, t_inline_code, true);
+ active_element->value = malloc(sizeof(bool));
+ *(bool*)(active_element->value) = false;
+ inline_code_wait = true;
+ }
+ break;
+ case('['):
+ char* link_text = NULL;
+ char* link_loc = NULL;
+ size_t link_len = 0;
+ char* new_position = get_link_components(cur_char, &link_text, &link_loc, &link_len);
+ if(new_position) {
+ APPEND_SPACES
+ if(active_element == root) {
+ NEW_ACTIVE_CHILD(active_element, active_element, -1, t_p, true);
+ }
+ NEXT_ALLOW_INNER(active_element, root)
+ // We have a link
+ // Create a new a element which contains the link adress
+ NEW_ACTIVE_CHILD(active_element, active_element, -1, t_a, true);
+ active_element->value = link_loc;
+ // Create an inner element in it which contains the text
+ NEW_ACTIVE_CHILD(active_element, active_element, -1, t_inner, false);
+ active_element->value = link_text;
+ // Return to the old parent
+ active_element = active_element->parent->parent;
+ cur_char = new_position;
+ break;
+ } else
+ goto default2;
+ case('!'):
+ char* alt_text = NULL;
+ char* img_loc = NULL;
+ size_t img_len = 0;
+ new_position = get_link_components(cur_char + 1, &alt_text, &img_loc, &img_len);
+ if(new_position) {
+ NEXT_ALLOW_INNER(active_element, root)
+ // This contains the image link
+ NEW_ACTIVE_CHILD(active_element, active_element, -1, t_img, true);
+ active_element->value = img_loc;
+ // This contains the image alt text
+ NEW_ACTIVE_CHILD(active_element, active_element, -1, t_inner, false);
+ active_element->value = alt_text;
+ active_element = active_element->parent->parent;
+ cur_char = new_position;
+ break;
+ } else
+ goto default2;
+ break;
+ // Titles
+ case('='):
+ if(active_element == root || newline) {
+ // Make the last line a title, otherwise we fall through
+ eq_chain++;
+ break;
+ }
+ case('-'):
+ if(active_element == root || newline) {
+ list_waiting = true;
+ dash_chain++;
+ }
+ break;
+ case('#'):
+ if(hash_chain == 0 || newline) {
+ if(active_element == root || newline || active_element->type == t_li) {
+ if(!(active_element->type == t_li))
+ active_element = root;
+ hash_chain = 1;
+ newline = false;
break;
- // Cut text
- case '~':
- {
- // Look ahead, we only want to cut if there are two tildes
- if(*(line_position + 1) != '~') {
- strncat(format_line_buffer, "~", LINE_MAX - 1);
- break;
- }
- if(in_cut) {
- // Close
- strncat(format_line_buffer, "</s>", LINE_MAX - 1);
- in_cut = false;
- } else {
- in_cut = true;
- strncat(format_line_buffer, "<s>", LINE_MAX - 1);
- }
- line_position++;
- }
+ }
+ } else {
+ hash_chain++;
+ break;
+ }
+ case('>'):
+ if(newline || active_element == root) {
+ // Look if we already have a block quote parent somewhere, if so just continue, otherwise create a new one.
+ if(find_parent_type(root, active_element, t_bq))
break;
- // brackets
- case '[':
- {
- char* link_text = NULL;
- char* link_loc = NULL;
- size_t link_loctxt_len = 0;
- char* closing_rnd_bracket = get_link_components(line_position, &link_text, &link_loc, &link_loctxt_len);
- // This is 16 characters long (add one for good measure (: ):
- //<a href=""></a>\0
- char* link_html = calloc(link_loctxt_len + 17, 1);
- if(link_html == NULL || closing_rnd_bracket == NULL) {
- strncat(format_line_buffer, "[", LINE_MAX - 1);
- if(link_html != NULL) free(link_html);
- if(link_text != NULL) free(link_text);
- if(link_loc != NULL) free(link_loc);
- break;
- }
-
- sprintf(link_html, "<a href=\"%s\">%s</a>", link_loc, link_text);
+ NEW_ACTIVE_CHILD(active_element, root, -1, t_bq, true);
+ break;
+ } else
+ goto default2;
+ case('<'):
+ char* closing_gt = cur_char;
+ while(*closing_gt != 0 && *closing_gt != '\n') {
+ if(*closing_gt == '>') break;
+ closing_gt++;
+ }
+ if(*closing_gt != '>')
+ goto default2;
- strncat(format_line_buffer, link_html, LINE_MAX - 1);
- line_position = closing_rnd_bracket;
- free(link_html);
- free(link_text);
- free(link_loc);
+ NEW_ACTIVE_CHILD(active_element, active_element->allow_inner ? active_element : root, -1, t_inner, false);
+ active_element->value = malloc(closing_gt - cur_char + 1);
+ memcpy(active_element->value, cur_char, closing_gt - cur_char + 1);
+ *(char*)((active_element->value) + (unsigned int)(closing_gt - cur_char) + 1) = 0;
+ cur_char = closing_gt;
+ active_element = active_element->parent;
+ case(' '):
+ if(hash_chain > 0) {
+ if(active_element->type != t_h) {
+ if(active_element->parent ? active_element->parent->type == t_h : false) {
+ active_element = active_element->parent;
+ } else {
+ NEW_ACTIVE_CHILD(active_element, active_element, -1, t_h, true);
+ active_element->value = malloc(sizeof(int));
}
- break;
- // Images
- case '!':
- {
- char* img_alt = NULL;
- char* img_src = NULL;
- size_t img_altsrc_len = 0;
- if(*(line_position + 1) == '\0') {
- strncat(format_line_buffer, "!", LINE_MAX - 1);
+ }
+ *(int*)(active_element->value) = hash_chain;
+ hash_chain = 0;
+ root_on_newline = true;
+ break;
+ }
+ // Here lists are created
+ if(list_waiting && dash_chain <= 1) {
+ dash_chain = 0;
+ newline = false;
+ /* Look if we have an ancestor somewhere that has spaces fewer or equal to the spaces we skipped.
+ * On fewer spaces we enter a new list below the one we found
+ * Is it equal we just add a new list item
+ * If we do not find a list we create a new one at the root node */
+ struct tree_element* look_element = find_parent_type(root, active_element, t_list);;
+ bool found_list = false;
+ while(look_element != NULL){
+ // Out list has more indents and is therefore a child to the one we found
+ // Enter new child list
+ if(((struct list_data*)look_element->value)->indent < spaces_trimmed) {
+ active_element = new_list(look_element, spaces_trimmed, ol_list, ol_start);
+ found_list = true;
+ break;
+ }
+ // We found a list of the exact indentation level
+ else if(((struct list_data*)look_element->value)->indent == spaces_trimmed) {
+ // If the list type is the same simply mark the found element as active, otherwise create a new sibling list
+ if(((struct list_data*)look_element->value)->ordered == ol_list) {
+ active_element = look_element;
+ found_list = true;
+ } else {
+ active_element = new_list(look_element->parent, spaces_trimmed, ol_list, ol_start);
+ found_list = true;
break;
}
- char* closing_rnd_bracket = get_link_components(line_position + 1, &img_alt, &img_src, &img_altsrc_len);
- // This is 21 characters long, again we add one more to be safe
- //<img alt="" src=""/>\0
- char* img_html = calloc(img_altsrc_len + 22, 1);
- if(img_html == NULL || closing_rnd_bracket == NULL) {
- strncat(format_line_buffer, "!", LINE_MAX - 1);
- if(img_html != NULL) free(img_html);
- if(img_alt != NULL) free(img_alt);
- if(img_src != NULL) free(img_src);
- break;
- }
- sprintf(img_html, "<img alt=\"%s\" src=\"%s\"/>", img_alt, img_src);
-
- strncat(format_line_buffer, img_html, LINE_MAX - 1);
- line_position = closing_rnd_bracket;
- free(img_html);
- free(img_alt);
- free(img_src);
- }
- break;
- // escape
- case '\\':
- {
- escaped = true;
- }
- break;
- default:
- {
- // See how much text we have
- size_t text_found_n = strcspn(line_position, "*_`[!\\~");
- char text_found[LINE_MAX] = {'\0'};
- memcpy(text_found, line_position, text_found_n);
- text_found[text_found_n+1] = '\0';
- append(format_line_buffer, text_found);
- line_position += text_found_n-1;
- }
+ break;
+ }
+ // The current list has fewer indents than what we found, look farther for a parent
+ else if(((struct list_data*)look_element->value)->indent > spaces_trimmed)
+ look_element = find_parent_type(root, look_element->parent, t_list);
+ else
+ look_element = find_parent_type(root, look_element, t_list);
+ }
+ // Enter a new list
+ if(!found_list) {
+ active_element = new_list(root, spaces_trimmed, ol_list, ol_start);
+ }
+ NEW_ACTIVE_CHILD(active_element, active_element, -1, t_li, true);
+ root_on_newline = true;
+ list_waiting = false;
+ ol_list = false;
}
- }
- memcpy(line_buffer, format_line_buffer, LINE_MAX-1);
- line_length = strnlen(format_line_buffer, LINE_MAX);
+ // Trim spaces from newline
+ if(active_element == root || newline) {
+ //@todo with this we should relatively easily be able to check for code blocks!
+ spaces_trimmed++;
+ break;
+ }
+ soft_newline_count++;
+ break;
+ // Default character handling
+ default2:
+ default:
+ DEFAULT_CHECKS;
+ newline = false;
+ list_waiting = false;
+ ol_list = false;
+ append_char_to_active(root, &active_element, *cur_char);
+ break;
}
+ }
+ // Convert the tree to valid html
+ depth++;
+ char* html = tree_to_html(root);
+ //printf("%s\n", html ? html : "" );
+ // Tear down the tree
+ free_tree(root);
+ if(html) {
+ strncpy(buffer, html, buffer_size - 1);
+ }
+ free(html);
+ printf("Time to process in ns: %ld\n", (clock() - before) / (CLOCKS_PER_SEC / 1000000));
+ depth--;
+ return 0;
+}
- // Prepend the pre_line_buffer to the line_buffer
- line_length = prepend(line_buffer, pre_line_buffer);
-
- if(offset + line_length > buffer_size) {
- jb_log(LL_WARN, false, "too long");
- return -1;
- }
+// Returns location of the closing round bracket if found, otherwise it returns a NULL
+// out_text and out_loc are allocated by this function
+// Don't forget to free
+char* get_link_components(char* start, char** out_text, char** out_loc, size_t* out_len) {
+ // look how far the next newline is away
+ size_t line_length = strcspn(start, "\r\n");
- memcpy(buffer + offset, line_buffer, line_length);
- offset += line_length;
- if(on_lastline) {
- *(buffer+offset) = '\0';
- if(in_paragraph) {
- strncat(buffer, "</p>", buffer_size);
- }
- break;
- }
+ // Search for the next closing bracket
+ char* closing_sqr_bracket = memchr(start, ']', line_length);
+ if(closing_sqr_bracket == NULL) {
+ return NULL;
}
- return 0;
+ if(*(closing_sqr_bracket + 1) != '(') {
+ return NULL;
+ }
+ char* closing_rnd_bracket = memchr(closing_sqr_bracket + 1, ')', line_length - (closing_sqr_bracket - start));
+ if(closing_rnd_bracket == NULL) {
+ return NULL;
+ }
+ size_t link_text_len = closing_sqr_bracket - start - 1;
+ size_t link_loc_len = closing_rnd_bracket - closing_sqr_bracket - 2;
+
+ char* link_text = calloc(link_text_len + 1, 1);
+ if(link_text == NULL) {
+ jb_log(LL_ERR, true, "calloc error");
+ return NULL;
+ }
+ char* link_loc = calloc(link_loc_len + 1, 1);
+ if(link_text == NULL) {
+ jb_log(LL_ERR, true, "calloc error");
+ free(link_text);
+ return NULL;
+ }
+
+
+ memcpy(link_text, start + 1, link_text_len);
+ memcpy(link_loc, closing_sqr_bracket + 2, link_loc_len);
+ *out_text = link_text;
+ *out_loc = link_loc;
+ *out_len = link_text_len + link_loc_len;
+
+ return closing_rnd_bracket;
}
diff --git a/md.h b/md.h
index d9fffb4..2d2d589 100644
--- a/md.h
+++ b/md.h
@@ -17,5 +17,6 @@
#ifndef MARKDOWN
#define MARKDOWN
+#include <stddef.h>
int parse_markdown(char* input, char* buffer, size_t buffer_size);
#endif
Jeremias Stotters git repositories generated by CGIT