aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJeremias Stotter <jeremias@stotter.eu>2022-01-23 16:15:07 +0100
committerJeremias Stotter <jeremias@stotter.eu>2022-01-29 01:32:31 +0100
commit947f2dd1e4cac6ae3b0d960b4a43145bc10cb0b1 (patch)
tree9cd6f97dce95a5d5f34d81725c7222e25148a06d
parent2f1aa02d5578bd1bc33a16e6501b13113d66d0cb (diff)
downloadJBlog-947f2dd1e4cac6ae3b0d960b4a43145bc10cb0b1.tar.gz
JBlog-947f2dd1e4cac6ae3b0d960b4a43145bc10cb0b1.tar.bz2
JBlog-947f2dd1e4cac6ae3b0d960b4a43145bc10cb0b1.zip
Support unicode
-rw-r--r--md.c40
1 files changed, 36 insertions, 4 deletions
diff --git a/md.c b/md.c
index 6cc992b..11a2df2 100644
--- a/md.c
+++ b/md.c
@@ -21,12 +21,14 @@
#include <string.h>
#include <stdlib.h>
#include <stdbool.h>
+#include <limits.h>
+
#include <time.h>
#include "jblog.h"
-#define LINE_MAX 4096
+//#define LINE_MAX 4096
// Some not so nice things about this:
// HTML escapes like &#42; are not handled by this although they might be handled by the client anyways
@@ -104,8 +106,35 @@ struct tree_element* new_child(struct tree_element* parent, int index) {
return child;
}
+int utf8_length;
+long int unicode_char;
// Returns the bytes written
-#define html_escape(output_buffer, input) sprintf(output_buffer, "&#x%X;", input)
+void html_escape(char* output_buffer, char input) {
+ if((input & 0xFFFF0000) == 0xFFFF0000) {
+ // UTF-8
+ int leading_ones = 0;
+ char copy_input = input;
+ while(copy_input & 0b10000000) {
+ leading_ones++;
+ copy_input = copy_input << 1;
+ }
+ if(leading_ones > 1) {
+ utf8_length = leading_ones;
+ unicode_char = 0x0;
+ }
+ unicode_char = unicode_char << (8 - leading_ones -1);
+ unicode_char = unicode_char | ( input & (UCHAR_MAX >> leading_ones) );
+ utf8_length--;
+
+ if(utf8_length)
+ *output_buffer = '\0';
+ else {
+ snprintf(output_buffer, 96, "&#x%lX;", unicode_char);
+ }
+ } else {
+ snprintf(output_buffer, 96, "&#x%X;", input);
+ }
+}
// Reallocs the string dest to fit src, then append
char* realloc_append(char* dest, char* src) {
@@ -220,7 +249,7 @@ void append_char_to_active(struct tree_element* root, struct tree_element** acti
(cur_char >=91 && cur_char <=96) ||
cur_char > 122) && cur_char != 0x20) {
// Escape just to be safe
- char append[8] = "";
+ char append[96] = "";
html_escape(append, cur_char);
new_active_element->value = realloc_append(new_active_element->value, append);
} else {
@@ -350,6 +379,8 @@ void str_chr_hit(struct tree_element* root, struct tree_element** active_element
// The program needs to loop through the loop again to cose all the open things at the end, THIS NEEDS TO BE IMPLEMENTED for xhtml
int parse_markdown(char* input, char* buffer, size_t buffer_size) {
+ utf8_length = 0;
+
clock_t before = clock();
memset(buffer, 0, buffer_size);
bool escaped = false;
@@ -623,6 +654,7 @@ int trim_space(char* input, char** output) {
return count;
}
+/*
// Append in_src to in_dst, then return length of new string
size_t append(char* in_dest, char* in_src) {
strcat(in_dest, in_src);
@@ -641,7 +673,7 @@ size_t prepend(char* in_dest, char* in_src) {
in_dest[LINE_MAX-1] = '\0';
return strlen(in_dest);
}
-
+*/
// start is the first curly bracket we have
// Returns pointer to last round bracket
// free the returned pointers yourself
Jeremias Stotters git repositories generated by CGIT