aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJeremias Stotter <jeremias@stotter.eu>2021-12-11 01:44:05 +0100
committerJeremias Stotter <jeremias@stotter.eu>2021-12-11 01:44:05 +0100
commite96bc6ff7e1797cf633c909a964bd9d65c575ad1 (patch)
treef2668bf532badbacc0af9cc66ca35f0d18a19e17
parent2df85e68e1d43de499d3ff8283c74ce9a951c2eb (diff)
downloadJBlog-e96bc6ff7e1797cf633c909a964bd9d65c575ad1.tar.gz
JBlog-e96bc6ff7e1797cf633c909a964bd9d65c575ad1.tar.bz2
JBlog-e96bc6ff7e1797cf633c909a964bd9d65c575ad1.zip
Started work on a new markdown engine
The old one is actual steaming garbage and I habe no idea why I ever wrote something this shit. Anyway, this new one uses a tree to create valid HTML as HTML also is a tree. This is only the beginning and it doesn't do any actual parsing, so building this will give you lots of empty pages :P
-rw-r--r--jblog.c3
-rw-r--r--makefile2
-rw-r--r--md.c570
-rw-r--r--md.h1
4 files changed, 149 insertions, 427 deletions
diff --git a/jblog.c b/jblog.c
index f62bf91..6a767c5 100644
--- a/jblog.c
+++ b/jblog.c
@@ -74,9 +74,6 @@ void* tree_root = NULL;
int logfile = -1;
-#define LL_INFO 0
-#define LL_WARN 1
-#define LL_ERR 2
void jb_log(int loglevel, bool include_errno, char* error_string) {
// @todo implement a minimum loglevel command line switch
int curr_errno = errno;
diff --git a/makefile b/makefile
index e1cfea5..4ca92ee 100644
--- a/makefile
+++ b/makefile
@@ -1,5 +1,5 @@
CC=gcc
-CFLAGS=-O2 -std=c99 -Wall
+CFLAGS=-O0 -g -std=c99 -Wall
BINDIR=/usr/bin
INITDIR=/etc/init.d
CONFDIR=/etc/conf.d
diff --git a/md.c b/md.c
index 7187575..9587371 100644
--- a/md.c
+++ b/md.c
@@ -30,7 +30,153 @@
// HTML escapes like &#42; are not handled by this although they might be handled by the client anyways
// No setext headings because why not just use the hash symbol?
-// @todo when implementing the cycling through characters replace tabs with 4 spaces
+size_t append(char* in_dest, char* in_src);
+size_t prepend(char* in_dest, char* in_src);
+char* get_link_components(char* start, char** out_text, char** out_loc, size_t* out_len);
+
+enum html_types{
+ inner,
+ img,
+
+};
+
+enum html_type {
+ t_root = 0,
+ t_inner,
+ t_p,
+ t_em,
+ t_b,
+ t_img,
+ t_a,
+ t_ol,
+ t_ul,
+ t_li
+};
+
+// The file will be structured as a tree that, in the end will be converted to valid html
+struct tree_element {
+ struct tree_element* parent;
+ int type;
+ int children_n;
+ struct tree_element** children;
+ // Can be anythign additional, like src for img or text in the case of inner
+ void* value;
+};
+
+struct tree_element* new_element() {
+ return calloc(1, sizeof(struct tree_element));
+}
+
+void free_tree(struct tree_element* root) {
+ if(root->value)
+ free(root->value);
+ if(root->children) {
+ for(int i = 0; i < root->children_n; i++) {
+ free_tree(root->children[i]);
+ }
+ free(root->children);
+ }
+ free(root);
+}
+
+struct tree_element* new_child(struct tree_element* parent) {
+ parent->children_n++;
+ parent->children = realloc(parent->children, sizeof(struct tree_element*)*(parent->children_n));
+ struct tree_element* child = new_element();
+ child->parent = parent;
+ (parent->children)[parent->children_n-1] = child;
+ return child;
+}
+
+// Returns the bytes written
+#define html_escape(output_buffer, input) sprintf(output_buffer, "&#x%X;", input)
+
+// Reallocs the string dest to fit src, then append
+char* realloc_append(char* dest, char* src) {
+ size_t dest_len = dest ? strlen(dest) : 0;
+ dest = realloc(dest, dest_len + (src ? strlen(src) : 0) + 1);
+ if(src)
+ strcpy(dest + dest_len, src);
+ return dest;
+}
+
+#define p_html "<p>%s</p>"
+#define em_html "<em>%s</p>"
+#define b_html "<b>%s</b>"
+#define img_html "<img src=\"%s\" alt=\"%s\" title=\"\"/>"
+#define a_html "<a href=%s>%s</a>"
+#define ol_html "<ol>%s</ol>"
+#define ul_html "<ul>%s</ul>"
+#define li_html "<li>%s</li>"
+// Resolve a tree to html
+char* tree_to_html(struct tree_element* root) {
+ char* html = NULL;
+ for(int i = 0; i < root->children_n; i++) {
+ struct tree_element* child = root->children[i];
+ switch(child->type) {
+ case(t_inner):
+ // Append the inner html to buffer
+ html = realloc_append(html, child->value);
+ break;
+ case(t_p):
+ char* child_html = tree_to_html(child);
+ html = realloc_append(html, child_html);
+ free(child_html);
+ break;
+ }
+ }
+ return html;
+}
+
+// The program needs to loop through the loop again to cose all the open things at the end, THIS NEEDS TO BE IMPLEMENTED for xhtml
+int parse_markdown(char* input, char* buffer, size_t buffer_size) {
+ memset(buffer, 0, buffer_size);
+ bool escaped = false;
+ bool newline = false;
+ struct tree_element* root = new_element();
+ struct tree_element* active_element = new_child(root);
+ for(char* cur_char = input; *cur_char != '\0'; cur_char++) {
+ if(escaped) {
+ escaped = false;
+ continue;
+ }
+ switch(*cur_char) {
+ case('\\'):
+ escaped = true;
+ break;
+ case('\t'):
+
+ break;
+ case('\r'):
+ case('\n'):
+ if(newline) {
+ // New active element
+ active_element = new_child(active_element->parent);
+ }
+ newline = true;
+ break;
+ case('*'):
+ case('_'):
+
+ break;
+ default:
+ /*
+ if(!active_element->type) {
+ active_element->type = t_inner;
+ }
+ char append[2] = {*cur_char, '\0'};
+ active_element->value = realloc_append(active_element->value, append);
+ */
+ break;
+ }
+ }
+ char* html = tree_to_html(root);
+ printf("%s\n", html);
+ // Tear down the tree
+ free_tree(root);
+ free(html);
+ return 0;
+}
// This function outputs a pointer that points past leading spaces
// it returns the number of spaces skippen where \t = 4 spaces
@@ -108,425 +254,3 @@ char* get_link_components(char* start, char** out_text, char** out_loc, size_t*
return closing_rnd_bracket;
}
-
-// @todo NEXT
-// The program needs to loop through the loop again to cose all the open things at the end, THIS NEEDS TO BE IMPLEMENTED for xhtml
-int parse_markdown(char* input, char* buffer, size_t buffer_size) {
- bool empty_line_carry = true;
- bool spaced_codeblock = false;
- bool in_paragraph = false;
- // This is the offset in parsed_text
- size_t offset = 0;
- // This describes the spaces the last list level had
- int list_spaces = 0;
- int list_level = 0;
- bool in_list = false;
- char* next_line = input;
- // Different indicators for all the inline things
- // How strongly are we currently emphasized? * = 1; ** = 2; *** = 3
- int strength_level = 0;
- bool in_mono = false;
- bool in_cut = false;
- bool in_quotes = false;
- // Signal we are on the last line, for safety reasons we just insert an empty paragraph
- bool on_lastline = false;
- for(char* line = input; next_line != NULL; line = next_line) {
- // Compute the next line and put a '\0' at the end of the current line
- {
- char* line_end = strchr(line, '\n');
- if(line_end != NULL) {
- *line_end = '\0';
- next_line = line_end + 1;
- } else {
- empty_line_carry = true;
- on_lastline = true;
- }
- }
- size_t line_length = 0;
- char line_buffer[LINE_MAX] = {'\0'};
- bool no_special = true;
- // The pre_line_buffer contains html headers that are needed before content of the current line
- char pre_line_buffer[LINE_MAX] = "\0";
- char* trimmed_line = NULL;
- // When this is true we can format text with things like <em>, <b>, etc.
- bool format_allow = true;
- int spaces_skipped = 0;
- bool empty_line = false;
- // Check if we have an empty line
- if(empty_line_carry) {
- empty_line_carry = false;
- empty_line = true;
- }
-
- spaces_skipped = trim_space(line, &trimmed_line);
-
- if(!on_lastline) {
- if(*trimmed_line == '\0' || *trimmed_line == '\r') {
- empty_line_carry = true;
- continue;
- }
- }
-
- if(on_lastline) {
- empty_line = true;
- line = "\0";
- }
-
- // Blockquotes
- /*
- I love me some spaghetti bolognese ;P
- Luckily there is enough spaghetti here
- for everyone
-
- ----|
- ----|------------
- \//\----|
- ||/\||\
- |/|||||\
- ---------------
- \ /
- \-----------/
- */
- if(*trimmed_line == '>') {
- // We are in a blockquote!
- // Set the new line to after the quote marker
- line = trimmed_line + 1;
- // trim again
- spaces_skipped = trim_space(line, &trimmed_line);
- if(!in_quotes) {
- if(in_paragraph) {
- strncat(pre_line_buffer, "</p><blockquote>", LINE_MAX -1);
- in_paragraph = false;
- } else {
- line_length = prepend(line_buffer, "<blockquote><p>");
- in_paragraph = true;
- }
- in_quotes = true;
- }
- } else if(in_quotes) {
- if(in_paragraph) {
- strncat(pre_line_buffer, "</p></blockquote>", LINE_MAX-1);
- in_paragraph = false;
- } else {
- strncat(pre_line_buffer, "</blockquote>", LINE_MAX-1);
- }
- in_quotes = false;
- }
- // / Blockquotes
-
- // Unordered Lists ------------------------------------------------
- if(spaces_skipped < 4 + list_spaces && *trimmed_line == '*' && *(trimmed_line + 1) == ' ') {
- line_length = prepend(line_buffer, "<li>");
- if(!in_list) {
- line_length = prepend(line_buffer, "<ul>");
- in_list = true;
- } else {
- strncat(pre_line_buffer, "</li>\n", LINE_MAX-1);
- }
- if(spaces_skipped < list_spaces) {
- for(int i = (list_spaces - spaces_skipped) >> 1; i > 0; i--) {
- strncat(pre_line_buffer, "</ul>\n", LINE_MAX-1);
- list_level--;
- }
- } else if(spaces_skipped > list_spaces) {
- for(int i = (spaces_skipped - list_spaces) >> 1; i > 0; i--) {
- line_length = prepend(line_buffer, "<ul>");
- list_level++;
- }
- }
-
- trimmed_line = trimmed_line + 2;
- list_spaces = spaces_skipped;
- } else if(in_list && empty_line) {
- strncat(pre_line_buffer, "</li></ul>\n", LINE_MAX-1);
- for(int i = 0; i < list_level; i++) {
- strncat(pre_line_buffer, "</ul>\n", LINE_MAX-1);
- }
- in_list = false;
- list_spaces = 0;
- }
- // / Unordered List -----------------------------------------------
- // @todo Ordered list
-
- // Spaced codeblocks ----------------------------------------------
- // Also check if we have a list
- if(spaces_skipped >= 4 && !in_list) {
- // If this is true we are already in a codeblock
- if(spaced_codeblock) {
- line_length = prepend(line_buffer, "\n");
- no_special = false;
- format_allow = false;
- } else {
- line_length = prepend(line_buffer, "<pre><code>");
- line_length = append(line_buffer, "<p>");
- spaced_codeblock = true;
- no_special = false;
- format_allow = false;
- } // Replace any leading tabs with 4 spaces
- char* code_replaced = line;
- for(int i = 0; i < 4;) {
- if(*(line + i) == '\t') {
- code_replaced ++;
- i += 4;
- } else if(*(line + i) == ' ') {
- code_replaced++;
- i++;
- } else {
- break;
- }
- }
- line_length = append(line_buffer, code_replaced);
- } else if(spaced_codeblock) {
- spaced_codeblock = false;
- strncat(pre_line_buffer, "</p></code></pre>\n", LINE_MAX-1);
- }
- // / Spaced codeblocks --------------------------------------------
-
-
- // Titles (#) -----------------------------------------------------
- // @todo allow enclosing titles bracket style
- if(no_special && *trimmed_line == '#') {
- int header_depth = 1;
- while(header_depth < 6) {
- if(*(trimmed_line + header_depth) == '#')
- header_depth++;
- else
- break;
- }
- char* trimmed_title = NULL;
- if(trim_space(trimmed_line + header_depth,
- &trimmed_title) > 0) {
- no_special = false;
-
- char title[LINE_MAX + 1] = "";
- if(snprintf(title, LINE_MAX + 1, "<h%d>%s</h%d>\n",
- header_depth,
- trimmed_title,
- header_depth
- ) > LINE_MAX)
- {
- jb_log(LL_WARN, false, "Title too long");
- return -1;
- }
- line_length = append(line_buffer, title);
- }
- // / Titles -------------------------------------------------------
- // Thematic brakes ------------------------------------------------
- } else if(no_special && (*trimmed_line == '-' || *trimmed_line == '*' || *trimmed_line == '_')) {
- int i = 1;
- for(; *(trimmed_line+i) == *(trimmed_line+i-1); i++) {
- }
- if(*(trimmed_line+i) == '\0') {
- no_special = false;
- line_length = append(line_buffer, "<hr/>\n");
- }
- }
- // / Thematic brakes ----------------------------------------------
-
-
- // Plain Text -----------------------------------------------------
- if(no_special) {
- line_length = append(line_buffer, trimmed_line);
- // Handle newline via slash/*
- if(*(line_buffer + line_length -1) == '\\') {
- *(line_buffer + line_length -1) = '\n';
- if(*(line_buffer + line_length -2) != '\\') {
- empty_line_carry = true;
- }
- }
- // Handle newlines
- if(empty_line && in_paragraph) {
- strncat(pre_line_buffer, "</p>\n", LINE_MAX-1);
- in_paragraph = false;
- }
- // If we are not in a paragraph enter one
- if(!in_paragraph && !in_list) {
- line_length = prepend(line_buffer, "<p>");
- in_paragraph = true;
- } else { // If we are in a paragraph the newline will be converted to a space
- line_length = prepend(line_buffer, " ");
- }
- } else if(in_paragraph) {
- line_length = prepend(line_buffer, "</p>\n");
- in_paragraph = false;
- }
-
- if(format_allow) {
- bool escaped = false;
- char format_line_buffer[LINE_MAX] = {0};
- for(char* line_position = line_buffer; *line_position != '\0'; line_position++) {
- // Handle a previously escaped character
- if(escaped) {
- char escaped_char[2] = {*line_position, '\0'};
- // Also check if we have an escaped newline
- strncat(format_line_buffer, *line_position == '\n' ? "\\" : escaped_char, LINE_MAX-1);
- escaped = false;
- continue;
- }
- switch(*line_position) {
- // Text strength, for sake of simplicity, we'll treat * and _ the same
- case '*' :
- case '_' :
- {
- // Look ahead if the next ones are also strength indicators
- int strength_indicators = 0;
- for(; strength_level > 0 ? strength_indicators<strength_level : true ; strength_indicators++) {
- if(!(*(line_position + strength_indicators) == '*' ||
- *(line_position + strength_indicators) == '_')) {
- break;
- }
- }
-
- if(strength_level == 0) {
- if(strength_indicators == 1) {
- strncat(format_line_buffer, "<em>", LINE_MAX-1);
- } else if(strength_indicators == 2) {
- strncat(format_line_buffer, "<b>", LINE_MAX-1);
- } else {
- strncat(format_line_buffer, "<b><em>", LINE_MAX-1);
- }
- strength_level = strength_indicators;
- line_position += strength_indicators -1;
- } else {
- if(strength_level >= 3 && strength_indicators >= 3) {
- strncat(format_line_buffer, "</em></b>", LINE_MAX-1);
- } else if(strength_level == 2 && strength_indicators >= 2) {
- strncat(format_line_buffer, "</b>", LINE_MAX-1);
- } else {
- strncat(format_line_buffer, "</em>", LINE_MAX-1);
- }
- strength_level = strength_level - strength_indicators;
- line_position += strength_indicators -1;
- }
- }
- break;
- // Inline code
- case '`':
- {
- if(in_mono) {
- strncat(format_line_buffer, "</code>", LINE_MAX - 1);
- in_mono = false;
- } else {
- strncat(format_line_buffer, "<code>", LINE_MAX - 1);
- in_mono = true;
- }
- }
- break;
- // Cut text
- case '~':
- {
- // Look ahead, we only want to cut if there are two tildes
- if(*(line_position + 1) != '~') {
- strncat(format_line_buffer, "~", LINE_MAX - 1);
- break;
- }
- if(in_cut) {
- // Close
- strncat(format_line_buffer, "</s>", LINE_MAX - 1);
- in_cut = false;
- } else {
- in_cut = true;
- strncat(format_line_buffer, "<s>", LINE_MAX - 1);
- }
- line_position++;
- }
- break;
- // brackets
- case '[':
- {
- char* link_text = NULL;
- char* link_loc = NULL;
- size_t link_loctxt_len = 0;
- char* closing_rnd_bracket = get_link_components(line_position, &link_text, &link_loc, &link_loctxt_len);
- // This is 16 characters long (add one for good measure (: ):
- //<a href=""></a>\0
- char* link_html = calloc(link_loctxt_len + 17, 1);
- if(link_html == NULL || closing_rnd_bracket == NULL) {
- strncat(format_line_buffer, "[", LINE_MAX - 1);
- if(link_html != NULL) free(link_html);
- if(link_text != NULL) free(link_text);
- if(link_loc != NULL) free(link_loc);
- break;
- }
-
- sprintf(link_html, "<a href=\"%s\">%s</a>", link_loc, link_text);
-
- strncat(format_line_buffer, link_html, LINE_MAX - 1);
- line_position = closing_rnd_bracket;
- free(link_html);
- free(link_text);
- free(link_loc);
- }
- break;
- // Images
- case '!':
- {
- char* img_alt = NULL;
- char* img_src = NULL;
- size_t img_altsrc_len = 0;
- if(*(line_position + 1) == '\0') {
- strncat(format_line_buffer, "!", LINE_MAX - 1);
- break;
- }
- char* closing_rnd_bracket = get_link_components(line_position + 1, &img_alt, &img_src, &img_altsrc_len);
- // This is 21 characters long, again we add one more to be safe
- //<img alt="" src=""/>\0
- char* img_html = calloc(img_altsrc_len + 22, 1);
- if(img_html == NULL || closing_rnd_bracket == NULL) {
- strncat(format_line_buffer, "!", LINE_MAX - 1);
- if(img_html != NULL) free(img_html);
- if(img_alt != NULL) free(img_alt);
- if(img_src != NULL) free(img_src);
- break;
- }
- sprintf(img_html, "<img alt=\"%s\" src=\"%s\"/>", img_alt, img_src);
-
- strncat(format_line_buffer, img_html, LINE_MAX - 1);
- line_position = closing_rnd_bracket;
- free(img_html);
- free(img_alt);
- free(img_src);
- }
- break;
- // escape
- case '\\':
- {
- escaped = true;
- }
- break;
- default:
- {
- // See how much text we have
- size_t text_found_n = strcspn(line_position, "*_`[!\\~");
- char text_found[LINE_MAX] = {'\0'};
- memcpy(text_found, line_position, text_found_n);
- text_found[text_found_n+1] = '\0';
- append(format_line_buffer, text_found);
- line_position += text_found_n-1;
- }
- }
- }
- memcpy(line_buffer, format_line_buffer, LINE_MAX-1);
- line_length = strnlen(format_line_buffer, LINE_MAX);
- }
-
- // Prepend the pre_line_buffer to the line_buffer
- line_length = prepend(line_buffer, pre_line_buffer);
-
- if(offset + line_length > buffer_size) {
- jb_log(LL_WARN, false, "too long");
- return -1;
- }
-
- memcpy(buffer + offset, line_buffer, line_length);
- offset += line_length;
- if(on_lastline) {
- *(buffer+offset) = '\0';
- if(in_paragraph) {
- strncat(buffer, "</p>", buffer_size);
- }
- break;
- }
- }
- return 0;
-}
diff --git a/md.h b/md.h
index d9fffb4..2d2d589 100644
--- a/md.h
+++ b/md.h
@@ -17,5 +17,6 @@
#ifndef MARKDOWN
#define MARKDOWN
+#include <stddef.h>
int parse_markdown(char* input, char* buffer, size_t buffer_size);
#endif
Jeremias Stotters git repositories generated by CGIT