Handle escapes in code fence info string. - md4c - C Markdown parser. Fast. SAX-like interface. Compliant to CommonMark specification.

commit 2928484250d8834340691868b618951f0bea1b37
parent 75cee13ea704906323ef2203e18300b15d813f7a
Author: Martin Mitas <mity@morous.org>
Date:   Wed,  7 Dec 2016 13:22:16 +0100

Handle escapes in code fence info string.

Diffstat:
M md4c/md4c.c  | 144 ++++++++++++++++++++++++++++++++++++++++++++++++-------------------------------

1 file changed, 88 insertions(+), 56 deletions(-)
diff --git a/md4c/md4c.c b/md4c/md4c.c
@@ -835,6 +835,79 @@ struct MD_UNICODE_FOLD_INFO_tag {
 #endif
 
 
+/*************************************
+ ***  Helper string manipulations  ***
+ *************************************/
+
+/* Fill buffer with copy of the string between 'beg' and 'end' but replace any
+ * line breaks with given replacement character and also optionally resolve any
+ * escape sequences.
+ *
+ * NOTE: Caller is responsible to make sure the buffer is large enough.
+ * (Given the output is always shorter then input, (end - beg) is good idea
+ * what the caller should allocate.)
+ */
+static void
+md_do_normalize_string(MD_CTX* ctx, OFF beg, OFF end, const MD_LINE* lines, int n_lines,
+                       CHAR line_break_replacement_char, int resolve_escapes,
+                       CHAR* buffer, SZ* p_size)
+{
+    CHAR* ptr = buffer;
+    int line_index = 0;
+    OFF off = beg;
+
+    while(1) {
+        const MD_LINE* line = &lines[line_index];
+        OFF line_end = line->end;
+
+        while(off < line_end) {
+            if(off >= end) {
+                *p_size = ptr - buffer;
+                return;
+            }
+
+            if(resolve_escapes  &&  CH(off) == _T('\\')  &&
+               off+1 < end  &&  (ISPUNCT(off+1) || ISNEWLINE(off+1)))
+            {
+                off++;
+            } else {
+                *ptr = CH(off);
+                ptr++;
+                off++;
+            }
+        }
+
+        *ptr = line_break_replacement_char;
+        ptr++;
+
+        line_index++;
+        off = lines[line_index].beg;
+    }
+}
+
+/* Wrapper of md_do_normalize_string() which allocates new buffer for the
+ * output string. */
+static int
+md_normalize_string(MD_CTX* ctx, OFF beg, OFF end, const MD_LINE* lines, int n_lines,
+                    CHAR line_break_replacement_char, int resolve_escapes,
+                    CHAR** p_str, SZ* p_size)
+{
+    CHAR* buffer;
+
+    buffer = (CHAR*) malloc(sizeof(CHAR) * (end - beg));
+    if(buffer == NULL) {
+        MD_LOG("malloc() failed.");
+        return -1;
+    }
+
+    md_do_normalize_string(ctx, beg, end, lines, n_lines,
+                line_break_replacement_char, resolve_escapes, buffer, p_size);
+
+    *p_str = buffer;
+    return 0;
+}
+
+
 /******************************
  ***  Recognizing raw HTML  ***
  ******************************/
@@ -1425,57 +1498,6 @@ md_is_link_title(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg,
     return FALSE;
 }
 
-/* Allocate new buffer, and fill it with copy of the string between
- * 'beg' and 'end' but replace any line breaks with given replacement character
- * and also optionally resolve any escape sequences.
- */
-static int
-md_normalize_string(MD_CTX* ctx, OFF beg, OFF end, const MD_LINE* lines, int n_lines,
-                    CHAR line_break_replacement_char, int resolve_escapes,
-                    CHAR** p_str, SZ* p_size)
-{
-    CHAR* buffer;
-    CHAR* ptr;
-    int line_index = 0;
-    OFF off = beg;
-
-    buffer = (CHAR*) malloc(sizeof(CHAR) * (end - beg));
-    if(buffer == NULL) {
-        MD_LOG("malloc() failed.");
-        return -1;
-    }
-    ptr = buffer;
-
-    while(1) {
-        const MD_LINE* line = &lines[line_index];
-        OFF line_end = line->end;
-
-        while(off < line_end) {
-            if(off >= end) {
-                *p_str = buffer;
-                *p_size = ptr - buffer;
-                return 0;
-            }
-
-            if(resolve_escapes  &&  CH(off) == _T('\\')  &&
-               off+1 < end  &&  (ISPUNCT(off+1) || ISNEWLINE(off+1)))
-            {
-                off++;
-            } else {
-                *ptr = CH(off);
-                ptr++;
-                off++;
-            }
-        }
-
-        *ptr = line_break_replacement_char;
-        ptr++;
-
-        line_index++;
-        off = lines[line_index].beg;
-    }
-}
-
 /* Returns 0 if it is not a link reference definition.
  *
  * Returns N > 0 if it is not a link reference definition (then N corresponds
@@ -3778,13 +3800,14 @@ md_process_code_block_contents(MD_CTX* ctx, int is_fenced, const MD_VERBATIMLINE
     return md_process_verbatim_block_contents(ctx, MD_TEXT_CODE, lines, n_lines);
 }
 
-static void
+static int
 md_setup_fenced_code_detail(MD_CTX* ctx, const MD_BLOCK* block, MD_BLOCK_CODE_DETAIL* det)
 {
     const MD_VERBATIMLINE* fence_line = (const MD_VERBATIMLINE*)(block + 1);
     OFF beg = fence_line->beg;
     OFF end = fence_line->end;
     CHAR fence_ch = CH(fence_line->beg);
+    int ret = 0;
 
     /* Skip the fence itself. */
     while(beg < ctx->size  &&  CH(beg) == fence_ch)
@@ -3798,13 +3821,22 @@ md_setup_fenced_code_detail(MD_CTX* ctx, const MD_BLOCK* block, MD_BLOCK_CODE_DE
         end--;
 
     if(beg < end) {
-        det->info = STR(beg);
-        det->info_size = end - beg;
+        MD_LINE line = { beg, end };
+        SZ size;
+
+        MD_TEMP_BUFFER(end - beg);
+        md_do_normalize_string(ctx, beg, end, &line, 1, _T(' '), TRUE, ctx->buffer, &size);
+
+        det->info = ctx->buffer;
+        det->info_size = size;
 
         det->lang = det->info;
         while(det->lang_size < det->info_size  &&  !ISWHITESPACE_(det->lang[det->lang_size]))
             det->lang_size++;
     }
+
+abort:
+    return ret;
 }
 
 static int
@@ -3814,8 +3846,8 @@ md_process_leaf_block(MD_CTX* ctx, const MD_BLOCK* block)
         MD_BLOCK_H_DETAIL header;
         MD_BLOCK_CODE_DETAIL code;
     } det;
-    int ret = 0;
     int is_in_tight_list;
+    int ret = 0;
 
     memset(&det, 0, sizeof(det));
 
@@ -3832,7 +3864,7 @@ md_process_leaf_block(MD_CTX* ctx, const MD_BLOCK* block)
         case MD_BLOCK_CODE:
             /* For fenced code block, we may need to set the info string. */
             if(block->data != 0)
-                md_setup_fenced_code_detail(ctx, block, &det.code);
+                MD_CHECK(md_setup_fenced_code_detail(ctx, block, &det.code));
             break;
 
         default:

	md4c C Markdown parser. Fast. SAX-like interface. Compliant to CommonMark specification.
	git clone https://noulin.net/git/md4c.git
	Log \| Files \| Refs \| README \| LICENSE