Refactorize to allow procesing more blocks at once. - md4c - C Markdown parser. Fast. SAX-like interface. Compliant to CommonMark specification.

commit 0d4b10667aac652d352a843fd82ed19e9501fbf1
parent 1ba03589c0962849a8a45926f53263867404ef6a
Author: Martin Mitas <mity@morous.org>
Date:   Mon,  7 Nov 2016 19:46:43 +0100

Refactorize to allow procesing more blocks at once.

Diffstat:
M md4c/md4c.c  | 723 ++++++++++++++++++++++++++++++++++++++++++++++++++-----------------------------

1 file changed, 456 insertions(+), 267 deletions(-)
diff --git a/md4c/md4c.c b/md4c/md4c.c
@@ -70,6 +70,7 @@ typedef MD_SIZE SZ;
 typedef MD_OFFSET OFF;
 
 typedef struct MD_MARK_tag MD_MARK;
+typedef struct MD_BLOCK_tag MD_BLOCK;
 
 
 /* During analyzes of inline marks, we need to manage some "mark chains",
@@ -112,22 +113,27 @@ struct MD_CTX_tag {
 #define ASTERISK_OPENERS        ctx->mark_chains[2]
 #define UNDERSCORE_OPENERS      ctx->mark_chains[3]
 
-    /* Minimal indentation to call the block "indented code". */
+    /* For block analysis.
+     * Notes:
+     *   -- It holds MD_BLOCK as well as MD_LINE structures. After each
+     *      MD_BLOCK, its (multiple) MD_LINE(s) follow.
+     *   -- For MD_BLOCK_HTML and MD_BLOCK_CODE, MD_VERBATIMLINE(s) are used
+     *      instead of MD_LINE(s).
+     */
+    void* block_bytes;
+    MD_BLOCK* current_block;
+    unsigned n_block_bytes;
+    unsigned alloc_block_bytes;
+
+    /* Minimal indentation to call the block "indented code block". */
     unsigned code_indent_offset;
 
     /* For MD_BLOCK_QUOTE */
     unsigned quote_level;   /* Nesting level. */
 
-    /* For MD_BLOCK_HEADER. */
-    unsigned header_level;
-
-    /* For MD_BLOCK_CODE (fenced). */
-    SZ code_fence_length;
-    OFF code_fence_info_beg;
-    OFF code_fence_info_end;
-
-    /* For MD_BLOCK_HTML. */
-    int html_block_type;
+    /* Contextual info for line analysis. */
+    SZ code_fence_length;   /* For checking closing fence length. */
+    int html_block_type;    /* For checking closing raw HTML condition. */
 };
 
 typedef enum MD_LINETYPE_tag MD_LINETYPE;
@@ -143,15 +149,29 @@ enum MD_LINETYPE_tag {
     MD_LINE_TEXT
 };
 
-typedef struct MD_LINE_tag MD_LINE;
-struct MD_LINE_tag {
-    MD_LINETYPE type;
+typedef struct MD_LINE_ANALYSIS_tag MD_LINE_ANALYSIS;
+struct MD_LINE_ANALYSIS_tag {
+    MD_LINETYPE type    : 16;
+    unsigned data       : 16;
     OFF beg;
     OFF end;
     unsigned quote_level;   /* Level of nesting in <blockquote>. */
     unsigned indent;        /* Indentation level. */
 };
 
+typedef struct MD_LINE_tag MD_LINE;
+struct MD_LINE_tag {
+    OFF beg;
+    OFF end;
+};
+
+typedef struct MD_VERBATIMLINE_tag MD_VERBATIMLINE;
+struct MD_VERBATIMLINE_tag {
+    OFF beg;
+    OFF end;
+    OFF indent;
+};
+
 
 /*******************
  ***  Debugging  ***
@@ -387,7 +407,7 @@ md_text_with_null_replacement(MD_CTX* ctx, MD_TEXTTYPE type, const CHAR* str, SZ
  * or when breaking document to blocks (checking for start of HTML block type 7).
  *
  * When breaking document to blocks, we do not yet know line boundaries, but
- * in that case th whole tag has to live on a single line. We distinguish this
+ * in that case the whole tag has to live on a single line. We distinguish this
  * by n_lines == 0.
  */
 static int
@@ -715,52 +735,9 @@ md_is_html_any(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_
 
 
 /******************************************
- ***  Recognizing Some Complex Inlines  ***
+ ***  Processing Inlines (a.k.a Spans)  ***
  ******************************************/
 
-static int
-md_is_autolink(MD_CTX* ctx, OFF beg, OFF end)
-{
-    OFF off;
-
-    MD_ASSERT(CH(beg) == _T('<'));
-    MD_ASSERT(CH(end-1) == _T('>'));
-
-    beg++;
-    end--;
-
-    /* Check for scheme. */
-    off = beg;
-    if(off >= end  ||  !ISASCII(off))
-        return -1;
-    off++;
-    while(1) {
-        if(off >= end)
-            return -1;
-        if(off - beg > 32)
-            return -1;
-        if(CH(off) == _T(':')  &&  off - beg >= 2)
-            break;
-        if(!ISALNUM(off) && CH(off) != _T('+') && CH(off) != _T('-') && CH(off) != _T('.'))
-            return -1;
-        off++;
-    }
-
-    /* Check the path after the scheme. */
-    while(off < end) {
-        if(ISWHITESPACE(off) || ISCNTRL(off) || CH(off) == _T('<') || CH(off) == _T('>'))
-            return -1;
-        off++;
-    }
-
-    return 0;
-}
-
-
-/******************************************************
- ***  Processing Sequence of Inlines (a.k.a Spans)  ***
- ******************************************************/
-
 /* We process inlines in few phases:
  *
  * (1) We go through the block text and collect all significant characters
@@ -1345,6 +1322,44 @@ md_analyze_backtick(MD_CTX* ctx, int mark_index)
         md_mark_chain_append(ctx, &BACKTICK_OPENERS, mark_index);
 }
 
+static int
+md_is_autolink(MD_CTX* ctx, OFF beg, OFF end)
+{
+    OFF off;
+
+    MD_ASSERT(CH(beg) == _T('<'));
+    MD_ASSERT(CH(end-1) == _T('>'));
+
+    beg++;
+    end--;
+
+    /* Check for scheme. */
+    off = beg;
+    if(off >= end  ||  !ISASCII(off))
+        return -1;
+    off++;
+    while(1) {
+        if(off >= end)
+            return -1;
+        if(off - beg > 32)
+            return -1;
+        if(CH(off) == _T(':')  &&  off - beg >= 2)
+            break;
+        if(!ISALNUM(off) && CH(off) != _T('+') && CH(off) != _T('-') && CH(off) != _T('.'))
+            return -1;
+        off++;
+    }
+
+    /* Check the path after the scheme. */
+    while(off < end) {
+        if(ISWHITESPACE(off) || ISCNTRL(off) || CH(off) == _T('<') || CH(off) == _T('>'))
+            return -1;
+        off++;
+    }
+
+    return 0;
+}
+
 static void
 md_analyze_lt_gt(MD_CTX* ctx, int mark_index, const MD_LINE* lines, int n_lines)
 {
@@ -1882,12 +1897,24 @@ abort:
 }
 
 
-/******************************************
- ***  Processing Single Block Contents  ***
- ******************************************/
+/*******************************
+ ***  Processing Leaf Block  ***
+ *******************************/
+
+struct MD_BLOCK_tag {
+    MD_BLOCKTYPE type   : 16;
+
+    /* MD_BLOCK_H:      header level (1 - 6)
+     * MD_BLOCK_CODE:   non-zero if fenced, zero if indented.
+     */
+    unsigned data       : 16;
+
+    unsigned n_lines;
+    unsigned quote_level;
+};
 
 static int
-md_process_normal_block(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
+md_process_normal_block_contents(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
 {
     int ret;
 
@@ -1899,14 +1926,14 @@ abort:
 }
 
 static int
-md_process_verbatim_block(MD_CTX* ctx, MD_TEXTTYPE text_type, const MD_LINE* lines, int n_lines)
+md_process_verbatim_block_contents(MD_CTX* ctx, MD_TEXTTYPE text_type, const MD_VERBATIMLINE* lines, int n_lines)
 {
     static const CHAR indent_str[16] = _T("                ");
     int i;
     int ret = 0;
 
     for(i = 0; i < n_lines; i++) {
-        const MD_LINE* line = &lines[i];
+        const MD_VERBATIMLINE* line = &lines[i];
         int indent = line->indent;
 
         /* Output code indentation. */
@@ -1929,10 +1956,15 @@ abort:
 }
 
 static int
-md_process_code_block(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
+md_process_code_block_contents(MD_CTX* ctx, int is_fenced, const MD_VERBATIMLINE* lines, int n_lines)
 {
-    /* Ignore blank lines at start/end of indented code block. */
-    if(lines[0].type == MD_LINE_INDENTEDCODE) {
+    if(is_fenced) {
+        /* Skip the first line in case of fenced code: It is the fence.
+         * (Only the starting fence is present due to logic in md_analyze_line().) */
+        lines++;
+        n_lines--;
+    } else {
+        /* Ignore blank lines at start/end of indented code block. */
         while(n_lines > 0  &&  lines[0].beg == lines[0].end) {
             lines++;
             n_lines--;
@@ -1942,23 +1974,289 @@ md_process_code_block(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
         }
     }
 
-    /* Skip the first line in case of fenced code: It is the fence.
-     * (Only the starting fence is present due to logic in md_analyze_line().) */
-    if(lines[0].type == MD_LINE_FENCEDCODE) {
-        lines++;
-        n_lines--;
-    }
-
     if(n_lines == 0)
         return 0;
 
-    return md_process_verbatim_block(ctx, MD_TEXT_CODE, lines, n_lines);
+    return md_process_verbatim_block_contents(ctx, MD_TEXT_CODE, lines, n_lines);
+}
+
+static int
+md_process_blockquote_nesting(MD_CTX* ctx, unsigned desired_level)
+{
+    int ret = 0;
+
+    /* Bring blockquote nesting to expected level. */
+    if(ctx->quote_level != desired_level) {
+        while(ctx->quote_level < desired_level) {
+            MD_ENTER_BLOCK(MD_BLOCK_QUOTE, NULL);
+            ctx->quote_level++;
+        }
+        while(ctx->quote_level > desired_level) {
+            MD_LEAVE_BLOCK(MD_BLOCK_QUOTE, NULL);
+            ctx->quote_level--;
+        }
+    }
+
+abort:
+    return ret;
+}
+
+static void
+md_setup_fenced_code_detail(MD_CTX* ctx, const MD_BLOCK* block, MD_BLOCK_CODE_DETAIL* det)
+{
+    const MD_VERBATIMLINE* fence_line = (const MD_VERBATIMLINE*)(block + 1);
+    OFF beg = fence_line->beg;
+    OFF end = fence_line->end;
+    CHAR fence_ch = CH(fence_line->beg);
+
+    /* Skip the fence itself. */
+    while(CH(beg) == fence_ch)
+        beg++;
+    /* Trim initial spaces. */
+    while(CH(beg) == _T(' '))
+        beg++;
+
+    /* Trim trailing spaces. */
+    while(end > beg  &&  CH(end-1) == _T(' '))
+        end--;
+
+    if(beg < end) {
+        det->info = STR(beg);
+        det->info_size = end - beg;
+
+        det->lang = det->info;
+        while(det->lang_size < det->info_size  &&  !ISWHITESPACE_(det->lang[det->lang_size]))
+            det->lang_size++;
+    }
+}
+
+static int
+md_process_block(MD_CTX* ctx, const MD_BLOCK* block)
+{
+    union {
+        MD_BLOCK_H_DETAIL header;
+        MD_BLOCK_CODE_DETAIL code;
+    } det;
+    int ret = 0;
+
+    /* Make sure the processed leaf block lives in the proper block quote
+     * level. */
+    MD_CHECK(md_process_blockquote_nesting(ctx, block->quote_level));
+
+    memset(&det, 0, sizeof(det));
+
+    switch(block->type) {
+        case MD_BLOCK_DOC:
+            /* Noop. We just needed to solve block quote nesting. */
+            return 0;
+
+        case MD_BLOCK_H:
+            det.header.level = block->data;
+            break;
+
+        case MD_BLOCK_CODE:
+            /* For fenced code block, we may need to set the info string. */
+            if(block->data != 0)
+                md_setup_fenced_code_detail(ctx, block, &det.code);
+            break;
+
+        default:
+            /* Noop. */
+            break;
+    }
+
+    MD_ENTER_BLOCK(block->type, (void*) &det);
+
+    /* Process the block contents accordingly to is type. */
+    switch(block->type) {
+        case MD_BLOCK_HR:
+            /* noop */
+            break;
+
+        case MD_BLOCK_CODE:
+            ret = md_process_code_block_contents(ctx, (block->data != 0),
+                            (const MD_VERBATIMLINE*)(block + 1), block->n_lines);
+            break;
+
+        case MD_BLOCK_HTML:
+            ret = md_process_verbatim_block_contents(ctx, MD_TEXT_HTML,
+                            (const MD_VERBATIMLINE*)(block + 1), block->n_lines);
+            break;
+
+        default:
+            ret = md_process_normal_block_contents(ctx,
+                            (const MD_LINE*)(block + 1), block->n_lines);
+            break;
+    }
+    if(ret != 0)
+        goto abort;
+
+    MD_LEAVE_BLOCK(block->type, (void*) &det);
+
+abort:
+    return ret;
+}
+
+static int
+md_process_all_blocks(MD_CTX* ctx)
+{
+    unsigned byte_off = 0;
+    int ret = 0;
+
+    while(byte_off < ctx->n_block_bytes) {
+        MD_BLOCK* block = (MD_BLOCK*)((char*)ctx->block_bytes + byte_off);
+        MD_CHECK(md_process_block(ctx, block));
+
+        byte_off += sizeof(MD_BLOCK);
+        if(block->type == MD_BLOCK_CODE || block->type == MD_BLOCK_HTML)
+            byte_off += block->n_lines * sizeof(MD_VERBATIMLINE);
+        else
+            byte_off += block->n_lines * sizeof(MD_LINE);
+    }
+
+    ctx->n_block_bytes = 0;
+
+abort:
+    return ret;
 }
 
 
-/***************************************
- ***  Breaking Document into Blocks  ***
- ***************************************/
+/************************************
+ ***  Grouping Lines into Blocks  ***
+ ************************************/
+
+static void*
+md_push_block_bytes(MD_CTX* ctx, unsigned n_bytes)
+{
+    void* ptr;
+
+    if(ctx->n_block_bytes + n_bytes > ctx->alloc_block_bytes) {
+        void* new_block_bytes;
+
+        ctx->alloc_block_bytes = (ctx->alloc_block_bytes > 0 ? ctx->alloc_block_bytes * 2 : 512);
+        new_block_bytes = realloc(ctx->block_bytes, ctx->alloc_block_bytes);
+        if(new_block_bytes == NULL) {
+            MD_LOG("realloc() failed.");
+            return NULL;
+        }
+
+        /* Fix the ->current_block after the reallocation. */
+        if(ctx->current_block != NULL) {
+            OFF off_current_block = (char*) ctx->current_block - (char*) ctx->block_bytes;
+            ctx->current_block = (MD_BLOCK*) ((char*) new_block_bytes + off_current_block);
+        }
+
+        ctx->block_bytes = new_block_bytes;
+    }
+
+    ptr = (char*)ctx->block_bytes + ctx->n_block_bytes;
+    ctx->n_block_bytes += n_bytes;
+    return ptr;
+}
+
+static int
+md_start_new_block(MD_CTX* ctx, const MD_LINE_ANALYSIS* line)
+{
+    MD_BLOCK* block;
+
+    MD_ASSERT(ctx->current_block == NULL);
+
+    block = (MD_BLOCK*) md_push_block_bytes(ctx, sizeof(MD_BLOCK));
+    if(block == NULL)
+        return -1;
+
+    switch(line->type) {
+        case MD_LINE_BLANK:
+            /* We misuse MD_BLOCK_DOC here to mark "no real leaf block". */
+            block->type = MD_BLOCK_DOC;
+            break;
+
+        case MD_LINE_HR:
+            block->type = MD_BLOCK_HR;
+            break;
+
+        case MD_LINE_ATXHEADER:
+        case MD_LINE_SETEXTHEADER:
+            block->type = MD_BLOCK_H;
+            break;
+
+        case MD_LINE_FENCEDCODE:
+        case MD_LINE_INDENTEDCODE:
+            block->type = MD_BLOCK_CODE;
+            break;
+
+        case MD_LINE_TEXT:
+            block->type = MD_BLOCK_P;
+            break;
+
+        case MD_LINE_HTML:
+            block->type = MD_BLOCK_HTML;
+            break;
+
+        case MD_LINE_SETEXTUNDERLINE:
+        default:
+            MD_UNREACHABLE();
+            break;
+    }
+
+    block->data = line->data;
+    block->n_lines = 0;
+    block->quote_level = line->quote_level;
+
+    ctx->current_block = block;
+    return 0;
+}
+
+static int
+md_end_current_block(MD_CTX* ctx)
+{
+    int ret = 0;
+
+    if(ctx->current_block != NULL) {
+        ctx->current_block = NULL;
+
+        // TODO : consider flush of all complete blocks
+        //MD_CHECK(md_process_all_blocks(ctx));
+    }
+
+abort:
+    return ret;
+}
+
+static int
+md_add_line_into_current_block(MD_CTX* ctx, const MD_LINE_ANALYSIS* analysis)
+{
+    MD_ASSERT(ctx->current_block != NULL);
+
+    if(ctx->current_block->type == MD_BLOCK_CODE || ctx->current_block->type == MD_BLOCK_HTML) {
+        MD_VERBATIMLINE* line;
+
+        line = (MD_VERBATIMLINE*) md_push_block_bytes(ctx, sizeof(MD_VERBATIMLINE));
+        if(line == NULL)
+            return -1;
+
+        line->indent = analysis->indent;
+        line->beg = analysis->beg;
+        line->end = analysis->end;
+    } else {
+        MD_LINE* line;
+
+        line = (MD_LINE*) md_push_block_bytes(ctx, sizeof(MD_LINE));
+        if(line == NULL)
+            return -1;
+
+        line->beg = analysis->beg;
+        line->end = analysis->end;
+    }
+    ctx->current_block->n_lines++;
+
+    return 0;
+}
+
+
+/***********************
+ ***  Line Analysis  ***
+ ***********************/
 
 static int
 md_is_hr_line(MD_CTX* ctx, OFF beg, OFF* p_end)
@@ -1984,7 +2282,7 @@ md_is_hr_line(MD_CTX* ctx, OFF beg, OFF* p_end)
 }
 
 static int
-md_is_atxheader_line(MD_CTX* ctx, OFF beg, OFF* p_beg, OFF* p_end)
+md_is_atxheader_line(MD_CTX* ctx, OFF beg, OFF* p_beg, OFF* p_end, unsigned* p_level)
 {
     int n;
     OFF off = beg + 1;
@@ -1995,7 +2293,7 @@ md_is_atxheader_line(MD_CTX* ctx, OFF beg, OFF* p_beg, OFF* p_end)
 
     if(n > 6)
         return -1;
-    ctx->header_level = n;
+    *p_level = n;
 
     if(!(ctx->r.flags & MD_FLAG_PERMISSIVEATXHEADERS)  &&  off < ctx->size  &&
        CH(off) != _T(' ')  &&  CH(off) != _T('\t')  &&  !ISNEWLINE(off))
@@ -2008,7 +2306,7 @@ md_is_atxheader_line(MD_CTX* ctx, OFF beg, OFF* p_beg, OFF* p_end)
 }
 
 static int
-md_is_setext_underline(MD_CTX* ctx, OFF beg, OFF* p_end)
+md_is_setext_underline(MD_CTX* ctx, OFF beg, OFF* p_end, unsigned* p_level)
 {
     OFF off = beg + 1;
 
@@ -2026,7 +2324,7 @@ md_is_setext_underline(MD_CTX* ctx, OFF beg, OFF* p_end)
     if(off < ctx->size  &&  !ISNEWLINE(off))
         return -1;
 
-    ctx->header_level = (CH(beg) == _T('=') ? 1 : 2);
+    *p_level = (CH(beg) == _T('=') ? 1 : 2);
     return 0;
 }
 
@@ -2048,19 +2346,13 @@ md_is_opening_code_fence(MD_CTX* ctx, OFF beg, OFF* p_end)
     while(off < ctx->size  &&  CH(off) == _T(' '))
         off++;
 
-    /* Optionally, language info can follow. It must not contain '`'. */
-    ctx->code_fence_info_beg = off;
+    /* Optionally, an info string can follow. It must not contain '`'. */
     while(off < ctx->size  &&  CH(off) != _T('`')  &&  !ISNEWLINE(off))
         off++;
     if(off < ctx->size  &&  !ISNEWLINE(off))
         return -1;
 
     *p_end = off;
-
-    /* Right trim of language info. */
-    while(off > ctx->code_fence_info_beg  &&  CH(off-1) == _T(' '))
-        off--;
-    ctx->code_fence_info_end = off;
     return 0;
 }
 
@@ -2296,7 +2588,8 @@ md_is_html_block_end_condition(MD_CTX* ctx, OFF beg, OFF* p_end)
 /* Analyze type of the line and find some its properties. This serves as a
  * main input for determining type and boundaries of a block. */
 static void
-md_analyze_line(MD_CTX* ctx, OFF beg, OFF* p_end, const MD_LINE* pivot_line, MD_LINE* line)
+md_analyze_line(MD_CTX* ctx, OFF beg, OFF* p_end,
+                const MD_LINE_ANALYSIS* pivot_line, MD_LINE_ANALYSIS* line)
 {
     OFF off = beg;
 
@@ -2344,6 +2637,7 @@ redo_indentation_after_blockquote_mark:
     {
         line->type = MD_LINE_INDENTEDCODE;
         line->indent -= ctx->code_indent_offset;
+        line->data = 0;
         goto done;
     }
 
@@ -2396,8 +2690,11 @@ redo_indentation_after_blockquote_mark:
     /* Check whether we are ATX header.
      * (We check the indentation to fix http://spec.commonmark.org/0.26/#example-40) */
     if(line->indent < ctx->code_indent_offset  &&  CH(off) == _T('#')) {
-        if(md_is_atxheader_line(ctx, off, &line->beg, &off) == 0) {
+        unsigned level;
+
+        if(md_is_atxheader_line(ctx, off, &line->beg, &off, &level) == 0) {
             line->type = MD_LINE_ATXHEADER;
+            line->data = level;
             goto done;
         }
     }
@@ -2407,8 +2704,11 @@ redo_indentation_after_blockquote_mark:
         &&  line->quote_level == pivot_line->quote_level
         && (CH(off) == _T('=') || CH(off) == _T('-')))
     {
-        if(md_is_setext_underline(ctx, off, &off) == 0) {
+        unsigned level;
+
+        if(md_is_setext_underline(ctx, off, &off, &level) == 0) {
             line->type = MD_LINE_SETEXTUNDERLINE;
+            line->data = level;
             goto done;
         }
     }
@@ -2427,6 +2727,7 @@ redo_indentation_after_blockquote_mark:
     if(CH(off) == _T('`') || CH(off) == _T('~')) {
         if(md_is_opening_code_fence(ctx, off, &off) == 0) {
             line->type = MD_LINE_FENCEDCODE;
+            line->data = 1;
             goto done;
         }
     }
@@ -2495,142 +2796,67 @@ done:
     *p_end = off;
 }
 
-static int
-md_process_blockquote_nesting(MD_CTX* ctx, unsigned desired_level)
-{
-    int ret = 0;
-
-    /* Bring blockquote nesting to expected level. */
-    if(ctx->quote_level != desired_level) {
-        while(ctx->quote_level < desired_level) {
-            MD_ENTER_BLOCK(MD_BLOCK_QUOTE, NULL);
-            ctx->quote_level++;
-        }
-        while(ctx->quote_level > desired_level) {
-            MD_LEAVE_BLOCK(MD_BLOCK_QUOTE, NULL);
-            ctx->quote_level--;
-        }
-    }
-
-abort:
-    return ret;
-}
+static const MD_LINE_ANALYSIS md_dummy_blank_line = { MD_LINE_BLANK, 0 };
 
-/* Determine type of the block (from type of its 1st line and some context),
- * call block_enter() callback, then appropriate function to parse contents
- * of the block, and finally block_leave() callback.
- */
 static int
-md_process_block(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
+md_process_line(MD_CTX* ctx, const MD_LINE_ANALYSIS** p_pivot_line, const MD_LINE_ANALYSIS* line)
 {
-    MD_BLOCKTYPE block_type;
-    union {
-        MD_BLOCK_H_DETAIL header;
-        MD_BLOCK_CODE_DETAIL code;
-    } det;
+    const MD_LINE_ANALYSIS* pivot_line = *p_pivot_line;
     int ret = 0;
 
-    if(n_lines == 0)
-        return 0;
-
-    memset(&det, 0, sizeof(det));
-
-    /* Make sure the processed leaf block lives in the proper block quote
-     * nesting level. */
-    MD_CHECK(md_process_blockquote_nesting(ctx, lines[0].quote_level));
-
-    /* Derive block type from type of the first line. */
-    switch(lines[0].type) {
-        case MD_LINE_BLANK:
-            return 0;
-
-        case MD_LINE_HR:
-            block_type = MD_BLOCK_HR;
-            break;
-
-        case MD_LINE_ATXHEADER:
-        case MD_LINE_SETEXTHEADER:
-            block_type = MD_BLOCK_H;
-            det.header.level = ctx->header_level;
-            break;
-
-        case MD_LINE_INDENTEDCODE:
-            block_type = MD_BLOCK_CODE;
-            break;
-
-        case MD_LINE_FENCEDCODE:
-            block_type = MD_BLOCK_CODE;
-            if(ctx->code_fence_info_beg < ctx->code_fence_info_end)
-                det.code.info = STR(ctx->code_fence_info_beg);
-            else
-                det.code.info = NULL;
-            det.code.info_size = ctx->code_fence_info_end - ctx->code_fence_info_beg;
-
-            det.code.lang = det.code.info;
-            det.code.lang_size = 0;
-            while(det.code.lang_size < det.code.info_size
-                        &&  !ISWHITESPACE_(det.code.lang[det.code.lang_size]))
-                det.code.lang_size++;
+    /* Some line types form block on their own. */
+    if(line->type == MD_LINE_HR || line->type == MD_LINE_ATXHEADER) {
+        MD_CHECK(md_end_current_block(ctx));
 
-            break;
+        /* Add our single-line block. */
+        MD_CHECK(md_start_new_block(ctx, line));
+        MD_CHECK(md_add_line_into_current_block(ctx, line));
+        MD_CHECK(md_end_current_block(ctx));
+        *p_pivot_line = &md_dummy_blank_line;
+        return 0;
+    }
 
-        case MD_LINE_TEXT:
-            block_type = MD_BLOCK_P;
-            break;
+    /* MD_LINE_SETEXTUNDERLINE changes meaning of the previous block and ends it. */
+    if(line->type == MD_LINE_SETEXTUNDERLINE) {
+        MD_ASSERT(ctx->current_block != NULL);
+        ctx->current_block->type = MD_BLOCK_H;
+        ctx->current_block->data = line->data;
+        MD_CHECK(md_end_current_block(ctx));
+        *p_pivot_line = &md_dummy_blank_line;
+        return 0;
+    }
 
-        case MD_LINE_HTML:
-            block_type = MD_BLOCK_HTML;
-            break;
+    /* The current block also ends if the line has different type or block quote
+     * level. */
+    if(line->type != pivot_line->type || line->quote_level != pivot_line->quote_level)
+        MD_CHECK(md_end_current_block(ctx));
 
-        case MD_LINE_SETEXTUNDERLINE:
-            /* Noop. */
+    /* Skip blank lines, if we can.
+     * (Blank lines are still important if they differ e.g. in block quote level.) */
+    if(line->type == MD_LINE_BLANK) {
+        if(pivot_line->type == MD_LINE_BLANK  &&  line->quote_level == pivot_line->quote_level)
             return 0;
-
-        default:
-            MD_UNREACHABLE();
-            break;
     }
 
-    MD_ENTER_BLOCK(block_type, (void*) &det);
-
-    /* Process the block contents accordingly to is type. */
-    switch(block_type) {
-        case MD_BLOCK_HR:
-            /* Noop. */
-            break;
-
-        case MD_BLOCK_CODE:
-            ret = md_process_code_block(ctx, lines, n_lines);
-            break;
-
-        case MD_BLOCK_HTML:
-            ret = md_process_verbatim_block(ctx, MD_TEXT_HTML, lines, n_lines);
-            break;
-
-        default:
-            ret = md_process_normal_block(ctx, lines, n_lines);
-            break;
+    /* The current line may start a new block. */
+    if(ctx->current_block == NULL) {
+        MD_CHECK(md_start_new_block(ctx, line));
+        *p_pivot_line = line;
     }
-    if(ret != 0)
-        goto abort;
 
-    MD_LEAVE_BLOCK(block_type, (void*) &det);
+    /* In all other cases the line is just a continuation of the current block. */
+    MD_CHECK(md_add_line_into_current_block(ctx, line));
 
 abort:
     return ret;
 }
 
-/* Go through the document, analyze each line, on the fly identify block
- * boundaries and call md_process_block() for sequence of MD_LINE composing
- * the block.
- */
 static int
 md_process_doc(MD_CTX *ctx)
 {
-    MD_LINE* lines = NULL;
-    int alloc_lines = 0;
-    int n_lines = 0;
-    int pivot_line_index = -1;  /* Points to a line determining type of block. */
+    const MD_LINE_ANALYSIS* pivot_line = &md_dummy_blank_line;
+    MD_LINE_ANALYSIS line_buf[2];
+    MD_LINE_ANALYSIS* line = &line_buf[0];
     OFF off = 0;
     int ret = 0;
 
@@ -2639,68 +2865,15 @@ md_process_doc(MD_CTX *ctx)
     MD_ENTER_BLOCK(MD_BLOCK_DOC, NULL);
 
     while(off < ctx->size) {
-        static const MD_LINE dummy_line = { MD_LINE_BLANK, 0 };
-        const MD_LINE* pivot_line;
-        MD_LINE* line;
-
-        if(n_lines >= alloc_lines) {
-            MD_LINE* new_lines;
-
-            alloc_lines = (alloc_lines == 0 ? 32 : alloc_lines * 2);
-            new_lines = (MD_LINE*) realloc(lines, alloc_lines * sizeof(MD_LINE));
-            if(new_lines == NULL) {
-                MD_LOG("realloc() failed.");
-                ret = -1;
-                goto abort;
-            }
+        if(line == pivot_line)
+            line = (line == &line_buf[0] ? &line_buf[1] : &line_buf[0]);
 
-            lines = new_lines;
-        }
-
-        pivot_line = (pivot_line_index >= 0 ? &lines[pivot_line_index] : &dummy_line);
-
-        md_analyze_line(ctx, off, &off, pivot_line, &lines[n_lines]);
-        line = &lines[n_lines];
-
-        /* Some line types form block on their own. */
-        if(line->type == MD_LINE_HR || line->type == MD_LINE_ATXHEADER) {
-            /* Flush accumulated lines. */
-            MD_CHECK(md_process_block(ctx, lines, n_lines));
-
-            /* Flush ourself. */
-            MD_CHECK(md_process_block(ctx, line, 1));
-
-            pivot_line_index = -1;
-            n_lines = 0;
-            continue;
-        }
-
-        /* MD_LINE_SETEXTUNDERLINE changes meaning of the previous block. */
-        if(line->type == MD_LINE_SETEXTUNDERLINE) {
-            MD_ASSERT(n_lines > 0);
-            lines[0].type = MD_LINE_SETEXTHEADER;
-            line->type = MD_LINE_BLANK;
-        }
-
-        /* New block also starts if line type changes or if block quote nesting
-         * level changes. */
-        if(line->type != pivot_line->type  ||  line->quote_level != pivot_line->quote_level) {
-            MD_CHECK(md_process_block(ctx, lines, n_lines));
-
-            /* Keep the current line as the new pivot. */
-            if(line != &lines[0])
-                memcpy(&lines[0], line, sizeof(MD_LINE));
-            pivot_line_index = 0;
-            n_lines = 1;
-            continue;
-        }
-
-        /* Otherwise we just accumulate the line into ongoing block. */
-        n_lines++;
+        md_analyze_line(ctx, off, &off, pivot_line, line);
+        MD_CHECK(md_process_line(ctx, &pivot_line, line));
     }
 
-    /* Process also the last block. */
-    MD_CHECK(md_process_block(ctx, lines, n_lines));
+    /* Process all remaining blocks. */
+    MD_CHECK(md_process_all_blocks(ctx));
 
     /* Close any dangling parent blocks. */
     MD_CHECK(md_process_blockquote_nesting(ctx, 0));
@@ -2708,7 +2881,22 @@ md_process_doc(MD_CTX *ctx)
     MD_LEAVE_BLOCK(MD_BLOCK_DOC, NULL);
 
 abort:
-    free(lines);
+
+#if 0
+    /* Output some memory consumption statistics. */
+    {
+        char buffer[256];
+        sprintf(buffer, "Alloced %u bytes for block buffer.", ctx->alloc_block_bytes);
+        MD_LOG(buffer);
+
+        sprintf(buffer, "Alloced %u bytes for marks buffer.", ctx->alloc_marks * sizeof(MD_MARK));
+        MD_LOG(buffer);
+
+        sprintf(buffer, "Alloced %u bytes for aux. buffer.", ctx->alloc_buffer * sizeof(MD_CHAR));
+        MD_LOG(buffer);
+    }
+#endif
+
     return ret;
 }
 
@@ -2737,6 +2925,7 @@ md_parse(const MD_CHAR* text, MD_SIZE size, const MD_RENDERER* renderer, void* u
     ret = md_process_doc(&ctx);
 
     /* Clean-up. */
+    free(ctx.block_bytes);
     free(ctx.marks);
     free(ctx.buffer);

	md4c C Markdown parser. Fast. SAX-like interface. Compliant to CommonMark specification.
	git clone https://noulin.net/git/md4c.git
	Log \| Files \| Refs \| README \| LICENSE