Implement support for entities outside normal text flow (issue #5). - md4c - C Markdown parser. Fast. SAX-like interface. Compliant to CommonMark specification.

commit c085ab5cfed594592ad9ad0b3d8a801cc2ef0db8
parent ba29d0075eb54cfa8ceee10a0d193fef96a0e674
Author: Martin Mitas <mity@morous.org>
Date:   Mon, 12 Dec 2016 23:23:51 +0100

Implement support for entities outside normal text flow (issue #5).

 * Change API (md4c.h) to propagate different substring type info to renderer.
 * Implement/refactor related code in the parser.
 * Adapt renderer (md2html) to the new API.

Diffstat:
M md2html/md2html.c  | 210 ++++++++++++++++++++++++++++++++++++++++++++-----------------------------------
M md4c/md4c.c  | 544 +++++++++++++++++++++++++++++++++++++++++++++++--------------------------------
M md4c/md4c.h  | 52 +++++++++++++++++++++++++++++++++++-----------------

3 files changed, 475 insertions(+), 331 deletions(-)
diff --git a/md2html/md2html.c b/md2html/md2html.c
@@ -180,89 +180,6 @@ membuf_append_url_escaped(struct membuffer* buf, const char* data, MD_SIZE size)
  ***  HTML rendering helper functions  ***
  *****************************************/
 
-static int image_nesting_level = 0;
-
-static void
-open_ol_block(struct membuffer* out, const MD_BLOCK_OL_DETAIL* det)
-{
-    char buf[64];
-
-    if(det->start == 1) {
-        MEMBUF_APPEND_LITERAL(out, "<ol>");
-        return;
-    }
-
-    snprintf(buf, sizeof(buf), "<ol start=\"%u\">", det->start);
-    MEMBUF_APPEND_LITERAL(out, buf);
-}
-
-static void
-open_code_block(struct membuffer* out, const MD_BLOCK_CODE_DETAIL* det)
-{
-    MEMBUF_APPEND_LITERAL(out, "<pre><code");
-
-    /* If known, output the HTML 5 attribute class="language-LANGNAME". */
-    if(det->lang != NULL) {
-        MEMBUF_APPEND_LITERAL(out, " class=\"language-");
-        membuf_append_escaped(out, det->lang, det->lang_size);
-        MEMBUF_APPEND_LITERAL(out, "\"");
-    }
-
-    MEMBUF_APPEND_LITERAL(out, ">");
-}
-
-static void
-open_td_block(struct membuffer* out, const char* cell_type, const MD_BLOCK_TD_DETAIL* det)
-{
-    MEMBUF_APPEND_LITERAL(out, "<");
-    MEMBUF_APPEND_LITERAL(out, cell_type);
-
-    switch(det->align) {
-        case MD_ALIGN_LEFT:     MEMBUF_APPEND_LITERAL(out, " align=\"left\">"); break;
-        case MD_ALIGN_CENTER:   MEMBUF_APPEND_LITERAL(out, " align=\"center\">"); break;
-        case MD_ALIGN_RIGHT:    MEMBUF_APPEND_LITERAL(out, " align=\"right\">"); break;
-        default:                MEMBUF_APPEND_LITERAL(out, ">"); break;
-    }
-}
-
-static void
-open_a_span(struct membuffer* out, const MD_SPAN_A_DETAIL* det)
-{
-    MEMBUF_APPEND_LITERAL(out, "<a href=\"");
-    membuf_append_url_escaped(out, det->href, det->href_size);
-
-    if(det->title != NULL) {
-        MEMBUF_APPEND_LITERAL(out, "\" title=\"");
-        membuf_append_escaped(out, det->title, det->title_size);
-    }
-
-    MEMBUF_APPEND_LITERAL(out, "\">");
-}
-
-static void
-open_img_span(struct membuffer* out, const MD_SPAN_IMG_DETAIL* det)
-{
-    MEMBUF_APPEND_LITERAL(out, "<img src=\"");
-    membuf_append_url_escaped(out, det->src, det->src_size);
-
-    MEMBUF_APPEND_LITERAL(out, "\" alt=\"");
-
-    image_nesting_level++;
-}
-
-static void
-close_img_span(struct membuffer* out, const MD_SPAN_IMG_DETAIL* det)
-{
-    if(det->title != NULL) {
-        MEMBUF_APPEND_LITERAL(out, "\" title=\"");
-        membuf_append_escaped(out, det->title, det->title_size);
-    }
-
-    MEMBUF_APPEND_LITERAL(out, "\">");
-
-    image_nesting_level--;
-}
-
 static unsigned
 hex_val(char ch)
 {
@@ -275,7 +192,8 @@ hex_val(char ch)
 }
 
 static void
-render_utf8_codepoint(struct membuffer* out, unsigned codepoint)
+render_utf8_codepoint(struct membuffer* out, unsigned codepoint,
+                      void (*fn_append)(struct membuffer*,  const char*, MD_SIZE))
 {
     static const char utf8_replacement_char[] = { 0xef, 0xbf, 0xbd };
 
@@ -303,18 +221,19 @@ render_utf8_codepoint(struct membuffer* out, unsigned codepoint)
     }
 
     if(0 < codepoint  &&  codepoint <= 0x10ffff)
-        membuf_append_escaped(out, (char*)utf8, n);
+        fn_append(out, (char*)utf8, n);
     else
-        membuf_append(out, utf8_replacement_char, 3);
+        fn_append(out, utf8_replacement_char, 3);
 }
 
 /* Translate entity to its UTF-8 equivalent, or output the verbatim one
  * if such entity is unknown (or if the translation is disabled). */
 static void
-render_entity(struct membuffer* out, const MD_CHAR* text, MD_SIZE size)
+render_entity(struct membuffer* out, const MD_CHAR* text, MD_SIZE size,
+              void (*fn_append)(struct membuffer*,  const char*, MD_SIZE))
 {
     if(want_verbatim_entities) {
-        membuf_append(out, text, size);
+        fn_append(out, text, size);
         return;
     }
 
@@ -334,20 +253,123 @@ render_entity(struct membuffer* out, const MD_CHAR* text, MD_SIZE size)
                 codepoint = 10 * codepoint + (text[i] - '0');
         }
 
-        render_utf8_codepoint(out, codepoint);
+        render_utf8_codepoint(out, codepoint, fn_append);
         return;
     } else {
-        /* Named entity (e.g. "&nbsp;". */
+        /* Named entity (e.g. "&nbsp;"). */
         const char* ent;
 
         ent = entity_lookup(text, size);
         if(ent != NULL) {
-            membuf_append_escaped(out, ent, strlen(ent));
+            fn_append(out, ent, strlen(ent));
             return;
         }
     }
 
-    membuf_append_escaped(out, text, size);
+    fn_append(out, text, size);
+}
+
+static void
+render_attribute(struct membuffer* out, const MD_ATTRIBUTE* attr,
+                 void (*fn_append)(struct membuffer*,  const char*, MD_SIZE))
+{
+    int i;
+
+    for(i = 0; attr->substr_offsets[i] < attr->size; i++) {
+        MD_TEXTTYPE type = attr->substr_types[i];
+        MD_OFFSET off = attr->substr_offsets[i];
+        MD_SIZE size = attr->substr_offsets[i+1] - off;
+        const MD_CHAR* text = attr->text + off;
+
+        switch(type) {
+            case MD_TEXT_ENTITY:    render_entity(out, text, size, fn_append); break;
+            default:                fn_append(out, text, size); break;
+        }
+    }
+}
+
+
+static int image_nesting_level = 0;
+
+static void
+open_ol_block(struct membuffer* out, const MD_BLOCK_OL_DETAIL* det)
+{
+    char buf[64];
+
+    if(det->start == 1) {
+        MEMBUF_APPEND_LITERAL(out, "<ol>");
+        return;
+    }
+
+    snprintf(buf, sizeof(buf), "<ol start=\"%u\">", det->start);
+    MEMBUF_APPEND_LITERAL(out, buf);
+}
+
+static void
+open_code_block(struct membuffer* out, const MD_BLOCK_CODE_DETAIL* det)
+{
+    MEMBUF_APPEND_LITERAL(out, "<pre><code");
+
+    /* If known, output the HTML 5 attribute class="language-LANGNAME". */
+    if(det->lang.text != NULL) {
+        MEMBUF_APPEND_LITERAL(out, " class=\"language-");
+        render_attribute(out, &det->lang, membuf_append_escaped);
+        MEMBUF_APPEND_LITERAL(out, "\"");
+    }
+
+    MEMBUF_APPEND_LITERAL(out, ">");
+}
+
+static void
+open_td_block(struct membuffer* out, const char* cell_type, const MD_BLOCK_TD_DETAIL* det)
+{
+    MEMBUF_APPEND_LITERAL(out, "<");
+    MEMBUF_APPEND_LITERAL(out, cell_type);
+
+    switch(det->align) {
+        case MD_ALIGN_LEFT:     MEMBUF_APPEND_LITERAL(out, " align=\"left\">"); break;
+        case MD_ALIGN_CENTER:   MEMBUF_APPEND_LITERAL(out, " align=\"center\">"); break;
+        case MD_ALIGN_RIGHT:    MEMBUF_APPEND_LITERAL(out, " align=\"right\">"); break;
+        default:                MEMBUF_APPEND_LITERAL(out, ">"); break;
+    }
+}
+
+static void
+open_a_span(struct membuffer* out, const MD_SPAN_A_DETAIL* det)
+{
+    MEMBUF_APPEND_LITERAL(out, "<a href=\"");
+    render_attribute(out, &det->href, membuf_append_url_escaped);
+
+    if(det->title.text != NULL) {
+        MEMBUF_APPEND_LITERAL(out, "\" title=\"");
+        render_attribute(out, &det->title, membuf_append_escaped);
+    }
+
+    MEMBUF_APPEND_LITERAL(out, "\">");
+}
+
+static void
+open_img_span(struct membuffer* out, const MD_SPAN_IMG_DETAIL* det)
+{
+    MEMBUF_APPEND_LITERAL(out, "<img src=\"");
+    render_attribute(out, &det->src, membuf_append_url_escaped);
+
+    MEMBUF_APPEND_LITERAL(out, "\" alt=\"");
+
+    image_nesting_level++;
+}
+
+static void
+close_img_span(struct membuffer* out, const MD_SPAN_IMG_DETAIL* det)
+{
+    if(det->title.text != NULL) {
+        MEMBUF_APPEND_LITERAL(out, "\" title=\"");
+        render_attribute(out, &det->title, membuf_append_escaped);
+    }
+
+    MEMBUF_APPEND_LITERAL(out, "\">");
+
+    image_nesting_level--;
 }
 
 
@@ -463,11 +485,11 @@ text_callback(MD_TEXTTYPE type, const MD_CHAR* text, MD_SIZE size, void* userdat
     struct membuffer* out = (struct membuffer*) userdata;
 
     switch(type) {
-        case MD_TEXT_NULLCHAR:  render_utf8_codepoint(out, 0x0000); break;
+        case MD_TEXT_NULLCHAR:  render_utf8_codepoint(out, 0x0000, membuf_append); break;
         case MD_TEXT_BR:        MEMBUF_APPEND_LITERAL(out, (image_nesting_level == 0 ? "<br>\n" : " ")); break;
         case MD_TEXT_SOFTBR:    MEMBUF_APPEND_LITERAL(out, (image_nesting_level == 0 ? "\n" : " ")); break;
         case MD_TEXT_HTML:      membuf_append(out, text, size); break;
-        case MD_TEXT_ENTITY:    render_entity(out, text, size); break;
+        case MD_TEXT_ENTITY:    render_entity(out, text, size, membuf_append_escaped); break;
         default:                membuf_append_escaped(out, text, size); break;
     }
 
diff --git a/md4c/md4c.c b/md4c/md4c.c
@@ -840,17 +840,15 @@ struct MD_UNICODE_FOLD_INFO_tag {
  *************************************/
 
 /* Fill buffer with copy of the string between 'beg' and 'end' but replace any
- * line breaks with given replacement character and also optionally resolve any
- * escape sequences.
+ * line breaks with given replacement character.
  *
  * NOTE: Caller is responsible to make sure the buffer is large enough.
  * (Given the output is always shorter then input, (end - beg) is good idea
  * what the caller should allocate.)
  */
 static void
-md_do_normalize_string(MD_CTX* ctx, OFF beg, OFF end, const MD_LINE* lines, int n_lines,
-                       CHAR line_break_replacement_char, int resolve_escapes,
-                       CHAR* buffer, SZ* p_size)
+md_merge_lines(MD_CTX* ctx, OFF beg, OFF end, const MD_LINE* lines, int n_lines,
+               CHAR line_break_replacement_char, CHAR* buffer, SZ* p_size)
 {
     CHAR* ptr = buffer;
     int line_index = 0;
@@ -863,13 +861,6 @@ md_do_normalize_string(MD_CTX* ctx, OFF beg, OFF end, const MD_LINE* lines, int 
             line_end = end;
 
         while(off < line_end) {
-            if(resolve_escapes  &&  CH(off) == _T('\\')  &&
-               off+1 < end  &&  (ISPUNCT(off+1) || ISNEWLINE(off+1))) {
-                if(ISNEWLINE(off+1))
-                    break;
-                off++;
-            }
-
             *ptr = CH(off);
             ptr++;
             off++;
@@ -888,12 +879,11 @@ md_do_normalize_string(MD_CTX* ctx, OFF beg, OFF end, const MD_LINE* lines, int 
     }
 }
 
-/* Wrapper of md_do_normalize_string() which allocates new buffer for the
- * output string. */
+/* Wrapper of md_merge_lines() which allocates new buffer for the output string.
+ */
 static int
-md_normalize_string(MD_CTX* ctx, OFF beg, OFF end, const MD_LINE* lines, int n_lines,
-                    CHAR line_break_replacement_char, int resolve_escapes,
-                    CHAR** p_str, SZ* p_size)
+md_merge_lines_alloc(MD_CTX* ctx, OFF beg, OFF end, const MD_LINE* lines, int n_lines,
+                    CHAR line_break_replacement_char, CHAR** p_str, SZ* p_size)
 {
     CHAR* buffer;
 
@@ -903,8 +893,8 @@ md_normalize_string(MD_CTX* ctx, OFF beg, OFF end, const MD_LINE* lines, int n_l
         return -1;
     }
 
-    md_do_normalize_string(ctx, beg, end, lines, n_lines,
-                line_break_replacement_char, resolve_escapes, buffer, p_size);
+    md_merge_lines(ctx, beg, end, lines, n_lines,
+                line_break_replacement_char, buffer, p_size);
 
     *p_str = buffer;
     return 0;
@@ -1246,6 +1236,218 @@ md_is_html_any(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_
 }
 
 
+/****************************
+ ***  Recognizing Entity  ***
+ ****************************/
+
+static int
+md_is_hex_entity_contents(MD_CTX* ctx, const CHAR* text, OFF beg, OFF max_end, OFF* p_end)
+{
+    OFF off = beg;
+
+    while(off < max_end  &&  ISXDIGIT_(text[off])  &&  off - beg <= 8)
+        off++;
+
+    if(1 <= off - beg  &&  off - beg <= 8) {
+        *p_end = off;
+        return TRUE;
+    } else {
+        return FALSE;
+    }
+}
+
+static int
+md_is_dec_entity_contents(MD_CTX* ctx, const CHAR* text, OFF beg, OFF max_end, OFF* p_end)
+{
+    OFF off = beg;
+
+    while(off < max_end  &&  ISDIGIT_(text[off])  &&  off - beg <= 8)
+        off++;
+
+    if(1 <= off - beg  &&  off - beg <= 8) {
+        *p_end = off;
+        return TRUE;
+    } else {
+        return FALSE;
+    }
+}
+
+static int
+md_is_named_entity_contents(MD_CTX* ctx, const CHAR* text, OFF beg, OFF max_end, OFF* p_end)
+{
+    OFF off = beg;
+
+    if(off <= max_end  &&  ISALPHA_(text[off]))
+        off++;
+    else
+        return FALSE;
+
+    while(off < max_end  &&  ISALNUM_(text[off])  &&  off - beg <= 48)
+        off++;
+
+    if(2 <= off - beg  &&  off - beg <= 48) {
+        *p_end = off;
+        return TRUE;
+    } else {
+        return FALSE;
+    }
+}
+
+static int
+md_is_entity_str(MD_CTX* ctx, const CHAR* text, OFF beg, OFF max_end, OFF* p_end)
+{
+    int is_contents;
+    OFF off = beg;
+
+    MD_ASSERT(text[off] == _T('&'));
+    off++;
+
+    if(off+1 < max_end  &&  text[off] == _T('#')  &&  (text[off+1] == _T('x') || text[off+1] == _T('X')))
+        is_contents = md_is_hex_entity_contents(ctx, text, off+2, max_end, &off);
+    else if(off < max_end  &&  CH(off) == _T('#'))
+        is_contents = md_is_dec_entity_contents(ctx, text, off+1, max_end, &off);
+    else
+        is_contents = md_is_named_entity_contents(ctx, text, off, max_end, &off);
+
+    if(is_contents  &&  off < max_end  &&  text[off] == _T(';')) {
+        *p_end = off+1;
+        return TRUE;
+    } else {
+        return FALSE;
+    }
+}
+
+static inline int
+md_is_entity(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end)
+{
+    return md_is_entity_str(ctx, ctx->text, beg, max_end, p_end);
+}
+
+
+/******************************
+ ***  Attribute Management  ***
+ ******************************/
+
+typedef struct MD_ATTRIBUTE_BUILD_tag MD_ATTRIBUTE_BUILD;
+struct MD_ATTRIBUTE_BUILD_tag {
+    MD_TEXTTYPE* substr_types;
+    OFF* substr_offsets;
+    int substr_count;
+    int substr_alloc;
+};
+
+
+#define MD_BUILD_ATTR_NO_ESCAPES    0x0001
+
+static int
+md_build_attr_append_substr(MD_CTX* ctx, MD_ATTRIBUTE_BUILD* build,
+                            MD_TEXTTYPE type, OFF off)
+{
+    if(build->substr_count >= build->substr_alloc) {
+        MD_TEXTTYPE* new_substr_types;
+        OFF* new_substr_offsets;
+
+        build->substr_alloc = (build->substr_alloc == 0 ? 8 : build->substr_alloc * 2);
+
+        new_substr_types = (MD_TEXTTYPE*) realloc(build->substr_types,
+                                    (build->substr_alloc+1) * sizeof(MD_TEXTTYPE));
+        if(new_substr_types == NULL) {
+            MD_LOG("realloc() failed.");
+            return -1;
+        }
+        new_substr_offsets = (OFF*) realloc(build->substr_offsets,
+                                    build->substr_alloc * sizeof(OFF));
+        if(new_substr_offsets == NULL) {
+            MD_LOG("realloc() failed.");
+            return -1;
+        }
+
+        build->substr_types = new_substr_types;
+        build->substr_offsets = new_substr_offsets;
+    }
+
+    build->substr_types[build->substr_count] = type;
+    build->substr_offsets[build->substr_count] = off;
+    build->substr_count++;
+    return 0;
+}
+
+static int
+md_build_attribute(MD_CTX* ctx, const CHAR* raw_text, SZ raw_size,
+                   unsigned flags, MD_ATTRIBUTE* attr)
+{
+    MD_ATTRIBUTE_BUILD build = {0};
+    CHAR* text;
+    OFF raw_off = 0;
+    OFF off = 0;
+    int ret = 0;
+
+    if(raw_size == 0) {
+        static const MD_TEXTTYPE empty_types[] = { MD_TEXT_NORMAL };
+        static const OFF empty_offsets[] = { 0, 0 };
+
+        attr->text = NULL;
+        attr->size = 0;
+        attr->substr_types = empty_types;
+        attr->substr_offsets = empty_offsets;
+        return 0;
+    }
+
+    text = (CHAR*) malloc(raw_size * sizeof(CHAR));
+    if(text == NULL) {
+        MD_LOG("malloc() failed.");
+        goto abort;
+    }
+
+    while(raw_off < raw_size) {
+        if(raw_text[raw_off] == _T('&')) {
+            OFF ent_end;
+
+            if(md_is_entity_str(ctx, raw_text, raw_off, raw_size, &ent_end)) {
+                MD_CHECK(md_build_attr_append_substr(ctx, &build, MD_TEXT_ENTITY, off));
+                memcpy(text + off, raw_text + raw_off, ent_end - raw_off);
+                off += ent_end - raw_off;
+                raw_off = ent_end;
+                continue;
+            }
+        }
+
+        if(build.substr_count == 0  ||  build.substr_types[build.substr_count-1] != MD_TEXT_NORMAL)
+            MD_CHECK(md_build_attr_append_substr(ctx, &build, MD_TEXT_NORMAL, off));
+
+        if(!(flags & MD_BUILD_ATTR_NO_ESCAPES)  &&
+           raw_text[raw_off] == _T('\\')  &&  raw_off+1 < raw_size  &&
+           (ISPUNCT_(raw_text[raw_off+1]) || ISNEWLINE_(raw_text[raw_off+1])))
+            raw_off++;
+
+        text[off++] = raw_text[raw_off++];
+    }
+    build.substr_offsets[build.substr_count] = off;
+
+    attr->text = text;
+    attr->size = off;
+    attr->substr_offsets = build.substr_offsets;
+    attr->substr_types = build.substr_types;
+    return 0;
+
+abort:
+    free(text);
+    free(build.substr_offsets);
+    free(build.substr_types);
+    return -1;
+}
+
+static void
+md_free_attribute(MD_CTX* ctx, MD_ATTRIBUTE* attr)
+{
+    if(attr->size > 0) {
+        free((void*) attr->text);
+        free((void*) attr->substr_types);
+        free((void*) attr->substr_offsets);
+    }
+}
+
+
 /***************************
  ***  Recognizing Links  ***
  ***************************/
@@ -1260,7 +1462,6 @@ struct MD_LINK_REF_DEF_tag {
     SZ label_size                   : 24;
     unsigned label_needs_free       :  1;
     unsigned title_needs_free       :  1;
-    unsigned dest_contains_escape   :  1;
     SZ title_size;
     OFF dest_beg;
     OFF dest_end;
@@ -1273,7 +1474,6 @@ struct MD_LINK_ATTR_tag {
 
     CHAR* title;
     SZ title_size;
-    int dest_contains_escape;
     int title_needs_free;
 };
 
@@ -1347,8 +1547,7 @@ md_is_link_label(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg,
 
 static int
 md_is_link_destination_A(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end,
-                         OFF* p_contents_beg, OFF* p_contents_end,
-                         int* p_contains_escape)
+                         OFF* p_contents_beg, OFF* p_contents_end)
 {
     OFF off = beg;
 
@@ -1356,11 +1555,8 @@ md_is_link_destination_A(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end,
         return FALSE;
     off++;
 
-    *p_contains_escape = FALSE;
-
     while(off < max_end) {
         if(CH(off) == _T('\\')  &&  off+1 < max_end  &&  ISPUNCT(off+1)) {
-            *p_contains_escape = TRUE;
             off += 2;
             continue;
         }
@@ -1384,17 +1580,13 @@ md_is_link_destination_A(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end,
 
 static int
 md_is_link_destination_B(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end,
-                         OFF* p_contents_beg, OFF* p_contents_end,
-                         int* p_contains_escape)
+                         OFF* p_contents_beg, OFF* p_contents_end)
 {
     OFF off = beg;
     int in_parentheses = 0;
 
-    *p_contains_escape = FALSE;
-
     while(off < max_end) {
         if(CH(off) == _T('\\')  &&  off+1 < max_end  &&  ISPUNCT(off+1)) {
-            *p_contains_escape = TRUE;
             off += 2;
             continue;
         }
@@ -1429,25 +1621,18 @@ md_is_link_destination_B(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end,
     return TRUE;
 }
 
-static int
+static inline int
 md_is_link_destination(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end,
-                       OFF* p_contents_beg, OFF* p_contents_end,
-                       int* p_contains_escape)
+                       OFF* p_contents_beg, OFF* p_contents_end)
 {
-    if(md_is_link_destination_A(ctx, beg, max_end, p_end, p_contents_beg, p_contents_end, p_contains_escape))
-        return TRUE;
-
-    if(md_is_link_destination_B(ctx, beg, max_end, p_end, p_contents_beg, p_contents_end, p_contains_escape))
-        return TRUE;
-
-    return FALSE;
+    return (md_is_link_destination_A(ctx, beg, max_end, p_end, p_contents_beg, p_contents_end)  ||
+            md_is_link_destination_B(ctx, beg, max_end, p_end, p_contents_beg, p_contents_end));
 }
 
 static int
 md_is_link_title(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg,
                  OFF* p_end, int* p_beg_line_index, int* p_end_line_index,
-                 OFF* p_contents_beg, OFF* p_contents_end,
-                 int* p_has_escape)
+                 OFF* p_contents_beg, OFF* p_contents_end)
 {
     OFF off = beg;
     CHAR closer_char;
@@ -1475,14 +1660,12 @@ md_is_link_title(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg,
     off++;
 
     *p_contents_beg = off;
-    *p_has_escape = FALSE;
 
     while(line_index < n_lines) {
         OFF line_end = lines[line_index].end;
 
         while(off < line_end) {
             if(CH(off) == _T('\\')  &&  off+1 < ctx->size  &&  (ISPUNCT(off+1) || ISNEWLINE(off+1))) {
-                *p_has_escape = TRUE;
                 off++;
             } else if(CH(off) == closer_char) {
                 /* Success. */
@@ -1518,12 +1701,10 @@ md_is_link_reference_definition(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
     int label_is_multiline;
     OFF dest_contents_beg;
     OFF dest_contents_end;
-    int dest_contains_escape;
     OFF title_contents_beg;
     OFF title_contents_end;
     int title_contents_line_index;
     int title_is_multiline;
-    int title_has_escape;
     OFF off;
     int line_index = 0;
     int tmp_line_index;
@@ -1554,14 +1735,14 @@ md_is_link_reference_definition(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
 
     /* Link destination. */
     if(!md_is_link_destination(ctx, off, lines[line_index].end,
-                &off, &dest_contents_beg, &dest_contents_end, &dest_contains_escape))
+                &off, &dest_contents_beg, &dest_contents_end))
         return FALSE;
 
     /* (Optional) title. Note we interpret it as an title only if nothing
      * more follows on its last line. */
     if(md_is_link_title(ctx, lines + line_index, n_lines - line_index, off,
                 &off, &title_contents_line_index, &tmp_line_index,
-                &title_contents_beg, &title_contents_end, &title_has_escape)
+                &title_contents_beg, &title_contents_end)
         &&  off >= lines[line_index + tmp_line_index].end)
     {
         title_is_multiline = (tmp_line_index != title_contents_line_index);
@@ -1604,27 +1785,26 @@ md_is_link_reference_definition(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
     } else {
         SZ label_size;
 
-        MD_CHECK(md_normalize_string(ctx, label_contents_beg, label_contents_end,
+        MD_CHECK(md_merge_lines_alloc(ctx, label_contents_beg, label_contents_end,
                     lines + label_contents_line_index, n_lines - label_contents_line_index,
-                    _T(' '), FALSE, &def->label, &label_size));
+                    _T(' '), &def->label, &label_size));
         def->label_size = label_size;
         def->label_needs_free = TRUE;
     }
 
     def->dest_beg = dest_contents_beg;
     def->dest_end = dest_contents_end;
-    def->dest_contains_escape = dest_contains_escape;
 
     if(title_contents_beg >= title_contents_end) {
         def->title = NULL;
         def->title_size = 0;
-    } else if(!title_is_multiline  &&  !title_has_escape) {
+    } else if(!title_is_multiline) {
         def->title = (CHAR*) STR(title_contents_beg);
         def->title_size = title_contents_end - title_contents_beg;
     } else {
-        MD_CHECK(md_normalize_string(ctx, title_contents_beg, title_contents_end,
+        MD_CHECK(md_merge_lines_alloc(ctx, title_contents_beg, title_contents_end,
                     lines + title_contents_line_index, n_lines - title_contents_line_index,
-                    _T('\n'), TRUE, &def->title, &def->title_size));
+                    _T('\n'), &def->title, &def->title_size));
         def->title_needs_free = TRUE;
     }
 
@@ -1758,8 +1938,8 @@ md_is_link_reference(MD_CTX* ctx, const MD_LINE* lines, int n_lines,
         end_line++;
 
     if(beg_line != end_line) {
-        MD_CHECK(md_normalize_string(ctx, beg, end, beg_line,
-                 n_lines - (beg_line - lines), _T(' '), FALSE, &label, &label_size));
+        MD_CHECK(md_merge_lines_alloc(ctx, beg, end, beg_line,
+                 n_lines - (beg_line - lines), _T(' '), &label, &label_size));
     } else {
         label = (CHAR*) STR(beg);
         label_size = end - beg;
@@ -1769,7 +1949,6 @@ md_is_link_reference(MD_CTX* ctx, const MD_LINE* lines, int n_lines,
     if(ret == TRUE) {
         attr->dest_beg = def->dest_beg;
         attr->dest_end = def->dest_end;
-        attr->dest_contains_escape = def->dest_contains_escape;
         attr->title = def->title;
         attr->title_size = def->title_size;
         attr->title_needs_free = FALSE;
@@ -1792,7 +1971,6 @@ md_is_inline_link_spec(MD_CTX* ctx, const MD_LINE* lines, int n_lines,
     OFF title_contents_end;
     int title_contents_line_index;
     int title_is_multiline;
-    int title_has_escape;
     OFF off = beg;
     int ret = FALSE;
 
@@ -1814,7 +1992,7 @@ md_is_inline_link_spec(MD_CTX* ctx, const MD_LINE* lines, int n_lines,
 
     /* (Optional) link destination. */
     if(!md_is_link_destination(ctx, off, lines[line_index].end,
-            &off, &attr->dest_beg, &attr->dest_end, &attr->dest_contains_escape)) {
+            &off, &attr->dest_beg, &attr->dest_end)) {
         attr->dest_beg = off;
         attr->dest_end = off;
     }
@@ -1822,7 +2000,7 @@ md_is_inline_link_spec(MD_CTX* ctx, const MD_LINE* lines, int n_lines,
     /* (Optional) title. */
     if(md_is_link_title(ctx, lines + line_index, n_lines - line_index, off,
                 &off, &title_contents_line_index, &tmp_line_index,
-                &title_contents_beg, &title_contents_end, &title_has_escape))
+                &title_contents_beg, &title_contents_end))
     {
         title_is_multiline = (tmp_line_index != title_contents_line_index);
         title_contents_line_index += line_index;
@@ -1852,14 +2030,14 @@ md_is_inline_link_spec(MD_CTX* ctx, const MD_LINE* lines, int n_lines,
         attr->title = NULL;
         attr->title_size = 0;
         attr->title_needs_free = FALSE;
-    } else if(!title_is_multiline  &&  !title_has_escape) {
+    } else if(!title_is_multiline) {
         attr->title = (CHAR*) STR(title_contents_beg);
         attr->title_size = title_contents_end - title_contents_beg;
         attr->title_needs_free = FALSE;
     } else {
-        MD_CHECK(md_normalize_string(ctx, title_contents_beg, title_contents_end,
+        MD_CHECK(md_merge_lines_alloc(ctx, title_contents_beg, title_contents_end,
                     lines + title_contents_line_index, n_lines - title_contents_line_index,
-                    _T('\n'), TRUE, &attr->title, &attr->title_size));
+                    _T('\n'), &attr->title, &attr->title_size));
         attr->title_needs_free = TRUE;
     }
 
@@ -1978,7 +2156,6 @@ struct MD_MARK_tag {
 /* Mark flags specific for various mark types (so they can share bits). */
 #define MD_MARK_INTRAWORD                   0x40  /* Helper for emphasis '*', '_' ("the rule of 3"). */
 #define MD_MARK_AUTOLINK                    0x40  /* Distinguisher for '<', '>'. */
-#define MD_MARK_LINKDESTCONTAINESESCAPE     0x40  /* Flag that link destination contains an escape. */
 
 
 static MD_MARK*
@@ -2872,8 +3049,6 @@ md_resolve_links(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
             MD_ASSERT(ctx->marks[opener_index+1].ch == 'D');
             ctx->marks[opener_index+1].beg = attr.dest_beg;
             ctx->marks[opener_index+1].end = attr.dest_end;
-            if(attr.dest_contains_escape)
-                ctx->marks[opener_index+1].flags |= MD_MARK_LINKDESTCONTAINESESCAPE;
 
             MD_ASSERT(ctx->marks[opener_index+2].ch == 'D');
             md_mark_store_ptr(ctx, opener_index+2, attr.title);
@@ -2905,7 +3080,7 @@ md_analyze_entity(MD_CTX* ctx, int mark_index)
 {
     MD_MARK* opener = &ctx->marks[mark_index];
     MD_MARK* closer;
-    OFF beg, end, off;
+    OFF off;
 
     /* Cannot be entity if there is no closer as the next mark.
      * (Any other mark between would mean strange character which cannot be
@@ -2920,50 +3095,12 @@ md_analyze_entity(MD_CTX* ctx, int mark_index)
     if(closer->ch != ';')
         return;
 
-    if(CH(opener->end) == _T('#')) {
-        if(CH(opener->end+1) == _T('x') || CH(opener->end+1) == _T('X')) {
-            /* It can be only a hexadecimal entity. 
-             * Check it has 1 - 8 hexadecimal digits. */
-            beg = opener->end+2;
-            end = closer->beg;
-            if(!(1 <= end - beg  &&  end - beg <= 8))
-                return;
-            for(off = beg; off < end; off++) {
-                if(!ISXDIGIT(off))
-                    return;
-            }
-        } else {
-            /* It can be only a decimal entity.
-             * Check it has 1 - 8 decimal digits. */
-            beg = opener->end+1;
-            end = closer->beg;
-            if(!(1 <= end - beg  &&  end - beg <= 8))
-                return;
-            for(off = beg; off < end; off++) {
-                if(!ISDIGIT(off))
-                    return;
-            }
-        }
-    } else {
-        /* It can be only a named entity. 
-         * Check it starts with letter and 1-47 alnum chars follow. */
-        beg = opener->end;
-        end = closer->beg;
-        if(!(2 <= end - beg  &&  end - beg <= 48))
-            return;
-        if(!ISALPHA(beg))
-            return;
-        for(off = beg + 1; off < end; off++) {
-            if(!ISALNUM(off))
-                return;
-        }
-    }
+    if(md_is_entity(ctx, opener->beg, closer->end, &off)) {
+        MD_ASSERT(off == closer->end);
 
-    /* Mark us as an entity.
-     * As entity has no span, we may just turn the range into a single mark.
-     * (This also causes we do not get called for ';'. */
-    md_resolve_range(ctx, NULL, mark_index, mark_index+1);
-    opener->end = closer->end;
+        md_resolve_range(ctx, NULL, mark_index, mark_index+1);
+        opener->end = closer->end;
+    }
 }
 
 static void
@@ -3230,75 +3367,36 @@ md_analyze_link_contents(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg
 }
 
 static int
-md_unescape_link_dest(MD_CTX* ctx, OFF beg, OFF end, SZ* p_size)
+md_enter_leave_span_a(MD_CTX* ctx, int enter, MD_SPANTYPE type,
+                      const CHAR* dest, SZ dest_size, int prohibit_escapes_in_dest,
+                      const CHAR* title, SZ title_size)
 {
-    CHAR* ptr;
-    OFF off = beg;
+    MD_SPAN_A_DETAIL det;
     int ret = 0;
 
-    MD_TEMP_BUFFER((end - beg) * sizeof(CHAR));
-    ptr = ctx->buffer;
+    /* Note we here rely on fact that MD_SPAN_A_DETAIL and
+     * MD_SPAN_IMG_DETAIL are binary-compatible. */
+    memset(&det, 0, sizeof(MD_SPAN_A_DETAIL));
+    MD_CHECK(md_build_attribute(ctx, dest, dest_size,
+                    (prohibit_escapes_in_dest ? MD_BUILD_ATTR_NO_ESCAPES : 0),
+                    &det.href));
+    MD_CHECK(md_build_attribute(ctx, title, title_size, 0, &det.title));
 
-    while(off < end) {
-        if(CH(off) == _T('\\')  &&  off+1 < end  &&  ISPUNCT(off+1)) {
-            off++;
-            continue;
-        }
-
-        *ptr = CH(off);
-        ptr++;
-        off++;
-    }
-
-    *p_size = ptr - ctx->buffer;
-
-abort:
-    return ret;
-}
-
-static int
-md_setup_span_a_detail(MD_CTX* ctx, const MD_MARK* mark, MD_SPAN_A_DETAIL* det)
-{
-    const MD_MARK* dest_mark = mark+1;
-    const MD_MARK* title_mark = mark+2;
-    int ret = 0;
-
-    MD_ASSERT(dest_mark->ch == 'D');
-    if(dest_mark->beg < dest_mark->end) {
-        if(dest_mark->flags & MD_MARK_LINKDESTCONTAINESESCAPE) {
-            MD_CHECK(md_unescape_link_dest(ctx, dest_mark->beg, dest_mark->end, &det->href_size));
-            det->href = ctx->buffer;
-        } else {
-            det->href = STR(dest_mark->beg);
-            det->href_size = dest_mark->end - dest_mark->beg;
-        }
-    } else {
-        det->href = NULL;
-        det->href_size = 0;
-    }
-
-    MD_ASSERT(title_mark->ch == 'D');
-    det->title = md_mark_get_ptr(ctx, title_mark - ctx->marks);
-    det->title_size = title_mark->prev;
+    if(enter)
+        MD_ENTER_SPAN(type, &det);
+    else
+        MD_LEAVE_SPAN(type, &det);
 
 abort:
+    md_free_attribute(ctx, &det.href);
+    md_free_attribute(ctx, &det.title);
     return ret;
 }
 
-static inline int
-md_setup_span_img_detail(MD_CTX* ctx, MD_MARK* mark, MD_SPAN_IMG_DETAIL* det)
-{
-    /* MD_SPAN_A_DETAIL and MD_SPAN_IMG_DETAIL are binary-compatible. */
-    return md_setup_span_a_detail(ctx, mark, (MD_SPAN_A_DETAIL*) det);
-}
-
 /* Render the output, accordingly to the analyzed ctx->marks. */
 static int
 md_process_inlines(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
 {
-    union {
-        MD_SPAN_A_DETAIL a;
-    } det;
     MD_TEXTTYPE text_type;
     const MD_LINE* line = lines;
     MD_MARK* prev_mark = NULL;
@@ -3373,15 +3471,21 @@ md_process_inlines(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
 
                 case '[':       /* Link, image. */
                 case '!':
-                    /* Note we here rely on fact that MD_SPAN_A_DETAIL and
-                     * MD_SPAN_IMG_DETAIL are binary-compatible. */
-                    MD_CHECK(md_setup_span_a_detail(ctx, mark, &det.a));
-                    MD_ENTER_SPAN((mark->ch == '!' ? MD_SPAN_IMG : MD_SPAN_A), &det.a);
-                    break;
                 case ']':
-                    MD_CHECK(md_setup_span_a_detail(ctx, &ctx->marks[mark->prev], &det.a));
-                    MD_LEAVE_SPAN((ctx->marks[mark->prev].ch == '!' ? MD_SPAN_IMG : MD_SPAN_A), &det.a);
+                {
+                    const MD_MARK* opener = (mark->ch != ']' ? mark : &ctx->marks[mark->prev]);
+                    const MD_MARK* dest_mark = opener+1;
+                    const MD_MARK* title_mark = opener+2;
+
+                    MD_ASSERT(dest_mark->ch == 'D');
+                    MD_ASSERT(title_mark->ch == 'D');
+
+                    MD_CHECK(md_enter_leave_span_a(ctx, (mark->ch != ']'),
+                                (opener->ch == '!' ? MD_SPAN_IMG : MD_SPAN_A),
+                                STR(dest_mark->beg), dest_mark->end - dest_mark->beg, FALSE,
+                                md_mark_get_ptr(ctx, title_mark - ctx->marks), title_mark->prev));
                     break;
+                }
 
                 case '<':
                 case '>':       /* Autolink or raw HTML. */
@@ -3397,30 +3501,24 @@ md_process_inlines(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
 
                 case '@':       /* Permissive e-mail autolink. */
                 case ':':       /* Permissive URL autolink. */
-                    if(mark->flags & MD_MARK_OPENER) {
-                        if(mark->ch == '@') {
-                            SZ sz = 7 + ctx->marks[mark->next].beg - mark->end;
-
-                            MD_TEMP_BUFFER(sz * sizeof(CHAR));
-                            memcpy(ctx->buffer, _T("mailto:"), 7 * sizeof(CHAR));
-                            memcpy(ctx->buffer + 7, STR(mark->end), (sz-7) * sizeof(CHAR));
-
-                            det.a.href_size = sz;
-                            det.a.href = ctx->buffer;
-                        } else {
-                            det.a.href_size = ctx->marks[mark->next].beg - mark->end;
-                            det.a.href = STR(mark->end);
-                        }
-                        det.a.title = NULL;
-                        det.a.title_size = 0;
-                        MD_ENTER_SPAN(MD_SPAN_A, (void*) &det);
-                    } else {
-                        /* The detail already has to be initialized: There cannot
-                         * be any resolved mark between the autolink opener and
-                         * closer. */
-                        MD_LEAVE_SPAN(MD_SPAN_A, (void*) &det);
+                {
+                    const MD_MARK* opener = ((mark->flags & MD_MARK_OPENER) ? mark : &ctx->marks[mark->prev]);
+                    const MD_MARK* closer = &ctx->marks[opener->next];
+                    const CHAR* dest = STR(opener->end);
+                    SZ dest_size = closer->beg - opener->end;
+
+                    if(opener->ch == '@') {
+                        dest_size += 7;
+                        MD_TEMP_BUFFER(dest_size * sizeof(CHAR));
+                        memcpy(ctx->buffer, _T("mailto:"), 7 * sizeof(CHAR));
+                        memcpy(ctx->buffer + 7, dest, (dest_size-7) * sizeof(CHAR));
+                        dest = ctx->buffer;
                     }
+
+                    MD_CHECK(md_enter_leave_span_a(ctx, (mark->flags & MD_MARK_OPENER),
+                                MD_SPAN_A, dest, dest_size, TRUE, NULL, 0));
                     break;
+                }
 
                 case '&':       /* Entity. */
                     MD_TEXT(MD_TEXT_ENTITY, STR(mark->beg), mark->end - mark->beg);
@@ -3726,6 +3824,7 @@ md_setup_fenced_code_detail(MD_CTX* ctx, const MD_BLOCK* block, MD_BLOCK_CODE_DE
     const MD_VERBATIMLINE* fence_line = (const MD_VERBATIMLINE*)(block + 1);
     OFF beg = fence_line->beg;
     OFF end = fence_line->end;
+    OFF lang_end;
     CHAR fence_ch = CH(fence_line->beg);
     int ret = 0;
 
@@ -3740,25 +3839,26 @@ md_setup_fenced_code_detail(MD_CTX* ctx, const MD_BLOCK* block, MD_BLOCK_CODE_DE
     while(end > beg  &&  CH(end-1) == _T(' '))
         end--;
 
-    if(beg < end) {
-        MD_LINE line = { beg, end };
-        SZ size;
+    /* Build info string attribute. */
+    MD_CHECK(md_build_attribute(ctx, STR(beg), end - beg, 0, &det->info));
 
-        MD_TEMP_BUFFER((end - beg) * sizeof(CHAR));
-        md_do_normalize_string(ctx, beg, end, &line, 1, _T(' '), TRUE, ctx->buffer, &size);
-
-        det->info = ctx->buffer;
-        det->info_size = size;
-
-        det->lang = det->info;
-        while(det->lang_size < det->info_size  &&  !ISWHITESPACE_(det->lang[det->lang_size]))
-            det->lang_size++;
-    }
+    /* Build info string attribute. */
+    lang_end = beg;
+    while(lang_end < end  &&  !ISWHITESPACE(lang_end))
+        lang_end++;
+    MD_CHECK(md_build_attribute(ctx, STR(beg), lang_end - beg, 0, &det->lang));
 
 abort:
     return ret;
 }
 
+static inline void
+md_clean_fenced_code_detail(MD_CTX* ctx, MD_BLOCK_CODE_DETAIL* det)
+{
+    md_free_attribute(ctx, &det->info);
+    md_free_attribute(ctx, &det->lang);
+}
+
 static int
 md_process_leaf_block(MD_CTX* ctx, const MD_BLOCK* block)
 {
@@ -3767,6 +3867,7 @@ md_process_leaf_block(MD_CTX* ctx, const MD_BLOCK* block)
         MD_BLOCK_CODE_DETAIL code;
     } det;
     int is_in_tight_list;
+    int clean_fence_code_detail = FALSE;
     int ret = 0;
 
     memset(&det, 0, sizeof(det));
@@ -3783,8 +3884,11 @@ md_process_leaf_block(MD_CTX* ctx, const MD_BLOCK* block)
 
         case MD_BLOCK_CODE:
             /* For fenced code block, we may need to set the info string. */
-            if(block->data != 0)
+            if(block->data != 0) {
+                memset(&det.code, 0, sizeof(MD_BLOCK_CODE_DETAIL));
+                clean_fence_code_detail = TRUE;
                 MD_CHECK(md_setup_fenced_code_detail(ctx, block, &det.code));
+            }
             break;
 
         default:
@@ -3802,32 +3906,32 @@ md_process_leaf_block(MD_CTX* ctx, const MD_BLOCK* block)
             break;
 
         case MD_BLOCK_CODE:
-            ret = md_process_code_block_contents(ctx, (block->data != 0),
-                            (const MD_VERBATIMLINE*)(block + 1), block->n_lines);
+            MD_CHECK(md_process_code_block_contents(ctx, (block->data != 0),
+                            (const MD_VERBATIMLINE*)(block + 1), block->n_lines));
             break;
 
         case MD_BLOCK_HTML:
-            ret = md_process_verbatim_block_contents(ctx, MD_TEXT_HTML,
-                            (const MD_VERBATIMLINE*)(block + 1), block->n_lines);
+            MD_CHECK(md_process_verbatim_block_contents(ctx, MD_TEXT_HTML,
+                            (const MD_VERBATIMLINE*)(block + 1), block->n_lines));
             break;
 
         case MD_BLOCK_TABLE:
-            ret = md_process_table_block_contents(ctx, block->data,
-                            (const MD_LINE*)(block + 1), block->n_lines);
+            MD_CHECK(md_process_table_block_contents(ctx, block->data,
+                            (const MD_LINE*)(block + 1), block->n_lines));
             break;
 
         default:
-            ret = md_process_normal_block_contents(ctx,
-                            (const MD_LINE*)(block + 1), block->n_lines);
+            MD_CHECK(md_process_normal_block_contents(ctx,
+                            (const MD_LINE*)(block + 1), block->n_lines));
             break;
     }
-    if(ret != 0)
-        goto abort;
 
     if(!is_in_tight_list  ||  block->type != MD_BLOCK_P)
         MD_LEAVE_BLOCK(block->type, (void*) &det);
 
 abort:
+    if(clean_fence_code_detail)
+        md_clean_fenced_code_detail(ctx, &det.code);
     return ret;
 }
 
diff --git a/md4c/md4c.h b/md4c/md4c.h
@@ -176,6 +176,35 @@ enum MD_ALIGN_tag {
 };
 
 
+/* String attribute.
+ *
+ * This wraps strings which are outside of a normal text flow and which are
+ * propagated within various detailed structures, but which still may contain
+ * string portions of different types like e.g. entities.
+ *
+ * So, for example, lets consider an image has a title attribute string
+ * set to "foo &quot; bar". (Note the string size is 14.)
+ *
+ * Then:
+ *  -- [0]: "foo "   (substr_types[0] == MD_TEXT_NORMAL; substr_offsets[0] == 0)
+ *  -- [1]: "&quot;" (substr_types[1] == MD_TEXT_ENTITY; substr_offsets[1] == 4)
+ *  -- [2]: " bar"   (substr_types[2] == MD_TEXT_NORMAL; substr_offsets[2] == 10)
+ *  -- [3]: (n/a)    (n/a                              ; substr_offsets[3] == 14)
+ *
+ * Note that these conditions are guaranteed:
+ *  -- substr_offsets[0] == 0
+ *  -- substr_offsets[LAST+1] == size
+ *  -- Only MD_TEXT_NORMAL and MD_TEXT_ENTITY substrings can appear.
+ */
+typedef struct MD_ATTRIBUTE_tag MD_ATTRIBUTE;
+struct MD_ATTRIBUTE_tag {
+    const MD_CHAR* text;
+    MD_SIZE size;
+    const MD_TEXTTYPE* substr_types;
+    const MD_OFFSET* substr_offsets;
+};
+
+
 /* Detailed info for MD_BLOCK_OL_DETAIL. */
 typedef struct MD_BLOCK_OL_DETAIL_tag MD_BLOCK_OL_DETAIL;
 struct MD_BLOCK_OL_DETAIL_tag {
@@ -191,13 +220,8 @@ struct MD_BLOCK_H_DETAIL_tag {
 /* Detailed info for MD_BLOCK_CODE. */
 typedef struct MD_BLOCK_CODE_DETAIL_tag MD_BLOCK_CODE_DETAIL;
 struct MD_BLOCK_CODE_DETAIL_tag {
-    /* Complete "info string" */
-    const MD_CHAR* info;
-    MD_SIZE info_size;
-
-    /* Language portion of the info string. */
-    const MD_CHAR* lang;
-    MD_SIZE lang_size;
+    MD_ATTRIBUTE info;
+    MD_ATTRIBUTE lang;
 };
 
 /* Detailed info for MD_BLOCK_TH and MD_BLOCK_TD. */
@@ -209,21 +233,15 @@ struct MD_BLOCK_TD_DETAIL_tag {
 /* Detailed info for MD_SPAN_A. */
 typedef struct MD_SPAN_A_DETAIL_tag MD_SPAN_A_DETAIL;
 struct MD_SPAN_A_DETAIL_tag {
-    const MD_CHAR* href;
-    MD_SIZE href_size;
-
-    const MD_CHAR* title;
-    MD_SIZE title_size;
+    MD_ATTRIBUTE href;
+    MD_ATTRIBUTE title;
 };
 
 /* Detailed info for MD_SPAN_IMG. */
 typedef struct MD_SPAN_IMG_DETAIL_tag MD_SPAN_IMG_DETAIL;
 struct MD_SPAN_IMG_DETAIL_tag {
-    const MD_CHAR* src;
-    MD_SIZE src_size;
-
-    const MD_CHAR* title;
-    MD_SIZE title_size;
+    MD_ATTRIBUTE src;
+    MD_ATTRIBUTE title;
 };

	md4c C Markdown parser. Fast. SAX-like interface. Compliant to CommonMark specification.
	git clone https://noulin.net/git/md4c.git
	Log \| Files \| Refs \| README \| LICENSE

M	md2html/md2html.c	\|	210	++++++++++++++++++++++++++++++++++++++++++++-----------------------------------
M	md4c/md4c.c	\|	544	+++++++++++++++++++++++++++++++++++++++++++++++--------------------------------
M	md4c/md4c.h	\|	52	+++++++++++++++++++++++++++++++++++-----------------