commit c085ab5cfed594592ad9ad0b3d8a801cc2ef0db8
parent ba29d0075eb54cfa8ceee10a0d193fef96a0e674
Author: Martin Mitas <mity@morous.org>
Date: Mon, 12 Dec 2016 23:23:51 +0100
Implement support for entities outside normal text flow (issue #5).
* Change API (md4c.h) to propagate different substring type info to renderer.
* Implement/refactor related code in the parser.
* Adapt renderer (md2html) to the new API.
Diffstat:
| M | md2html/md2html.c | | | 210 | ++++++++++++++++++++++++++++++++++++++++++++----------------------------------- |
| M | md4c/md4c.c | | | 544 | +++++++++++++++++++++++++++++++++++++++++++++++-------------------------------- |
| M | md4c/md4c.h | | | 52 | +++++++++++++++++++++++++++++++++++----------------- |
3 files changed, 475 insertions(+), 331 deletions(-)
diff --git a/md2html/md2html.c b/md2html/md2html.c
@@ -180,89 +180,6 @@ membuf_append_url_escaped(struct membuffer* buf, const char* data, MD_SIZE size)
*** HTML rendering helper functions ***
*****************************************/
-static int image_nesting_level = 0;
-
-static void
-open_ol_block(struct membuffer* out, const MD_BLOCK_OL_DETAIL* det)
-{
- char buf[64];
-
- if(det->start == 1) {
- MEMBUF_APPEND_LITERAL(out, "<ol>");
- return;
- }
-
- snprintf(buf, sizeof(buf), "<ol start=\"%u\">", det->start);
- MEMBUF_APPEND_LITERAL(out, buf);
-}
-
-static void
-open_code_block(struct membuffer* out, const MD_BLOCK_CODE_DETAIL* det)
-{
- MEMBUF_APPEND_LITERAL(out, "<pre><code");
-
- /* If known, output the HTML 5 attribute class="language-LANGNAME". */
- if(det->lang != NULL) {
- MEMBUF_APPEND_LITERAL(out, " class=\"language-");
- membuf_append_escaped(out, det->lang, det->lang_size);
- MEMBUF_APPEND_LITERAL(out, "\"");
- }
-
- MEMBUF_APPEND_LITERAL(out, ">");
-}
-
-static void
-open_td_block(struct membuffer* out, const char* cell_type, const MD_BLOCK_TD_DETAIL* det)
-{
- MEMBUF_APPEND_LITERAL(out, "<");
- MEMBUF_APPEND_LITERAL(out, cell_type);
-
- switch(det->align) {
- case MD_ALIGN_LEFT: MEMBUF_APPEND_LITERAL(out, " align=\"left\">"); break;
- case MD_ALIGN_CENTER: MEMBUF_APPEND_LITERAL(out, " align=\"center\">"); break;
- case MD_ALIGN_RIGHT: MEMBUF_APPEND_LITERAL(out, " align=\"right\">"); break;
- default: MEMBUF_APPEND_LITERAL(out, ">"); break;
- }
-}
-
-static void
-open_a_span(struct membuffer* out, const MD_SPAN_A_DETAIL* det)
-{
- MEMBUF_APPEND_LITERAL(out, "<a href=\"");
- membuf_append_url_escaped(out, det->href, det->href_size);
-
- if(det->title != NULL) {
- MEMBUF_APPEND_LITERAL(out, "\" title=\"");
- membuf_append_escaped(out, det->title, det->title_size);
- }
-
- MEMBUF_APPEND_LITERAL(out, "\">");
-}
-
-static void
-open_img_span(struct membuffer* out, const MD_SPAN_IMG_DETAIL* det)
-{
- MEMBUF_APPEND_LITERAL(out, "<img src=\"");
- membuf_append_url_escaped(out, det->src, det->src_size);
-
- MEMBUF_APPEND_LITERAL(out, "\" alt=\"");
-
- image_nesting_level++;
-}
-
-static void
-close_img_span(struct membuffer* out, const MD_SPAN_IMG_DETAIL* det)
-{
- if(det->title != NULL) {
- MEMBUF_APPEND_LITERAL(out, "\" title=\"");
- membuf_append_escaped(out, det->title, det->title_size);
- }
-
- MEMBUF_APPEND_LITERAL(out, "\">");
-
- image_nesting_level--;
-}
-
static unsigned
hex_val(char ch)
{
@@ -275,7 +192,8 @@ hex_val(char ch)
}
static void
-render_utf8_codepoint(struct membuffer* out, unsigned codepoint)
+render_utf8_codepoint(struct membuffer* out, unsigned codepoint,
+ void (*fn_append)(struct membuffer*, const char*, MD_SIZE))
{
static const char utf8_replacement_char[] = { 0xef, 0xbf, 0xbd };
@@ -303,18 +221,19 @@ render_utf8_codepoint(struct membuffer* out, unsigned codepoint)
}
if(0 < codepoint && codepoint <= 0x10ffff)
- membuf_append_escaped(out, (char*)utf8, n);
+ fn_append(out, (char*)utf8, n);
else
- membuf_append(out, utf8_replacement_char, 3);
+ fn_append(out, utf8_replacement_char, 3);
}
/* Translate entity to its UTF-8 equivalent, or output the verbatim one
* if such entity is unknown (or if the translation is disabled). */
static void
-render_entity(struct membuffer* out, const MD_CHAR* text, MD_SIZE size)
+render_entity(struct membuffer* out, const MD_CHAR* text, MD_SIZE size,
+ void (*fn_append)(struct membuffer*, const char*, MD_SIZE))
{
if(want_verbatim_entities) {
- membuf_append(out, text, size);
+ fn_append(out, text, size);
return;
}
@@ -334,20 +253,123 @@ render_entity(struct membuffer* out, const MD_CHAR* text, MD_SIZE size)
codepoint = 10 * codepoint + (text[i] - '0');
}
- render_utf8_codepoint(out, codepoint);
+ render_utf8_codepoint(out, codepoint, fn_append);
return;
} else {
- /* Named entity (e.g. " ". */
+ /* Named entity (e.g. " "). */
const char* ent;
ent = entity_lookup(text, size);
if(ent != NULL) {
- membuf_append_escaped(out, ent, strlen(ent));
+ fn_append(out, ent, strlen(ent));
return;
}
}
- membuf_append_escaped(out, text, size);
+ fn_append(out, text, size);
+}
+
+static void
+render_attribute(struct membuffer* out, const MD_ATTRIBUTE* attr,
+ void (*fn_append)(struct membuffer*, const char*, MD_SIZE))
+{
+ int i;
+
+ for(i = 0; attr->substr_offsets[i] < attr->size; i++) {
+ MD_TEXTTYPE type = attr->substr_types[i];
+ MD_OFFSET off = attr->substr_offsets[i];
+ MD_SIZE size = attr->substr_offsets[i+1] - off;
+ const MD_CHAR* text = attr->text + off;
+
+ switch(type) {
+ case MD_TEXT_ENTITY: render_entity(out, text, size, fn_append); break;
+ default: fn_append(out, text, size); break;
+ }
+ }
+}
+
+
+static int image_nesting_level = 0;
+
+static void
+open_ol_block(struct membuffer* out, const MD_BLOCK_OL_DETAIL* det)
+{
+ char buf[64];
+
+ if(det->start == 1) {
+ MEMBUF_APPEND_LITERAL(out, "<ol>");
+ return;
+ }
+
+ snprintf(buf, sizeof(buf), "<ol start=\"%u\">", det->start);
+ MEMBUF_APPEND_LITERAL(out, buf);
+}
+
+static void
+open_code_block(struct membuffer* out, const MD_BLOCK_CODE_DETAIL* det)
+{
+ MEMBUF_APPEND_LITERAL(out, "<pre><code");
+
+ /* If known, output the HTML 5 attribute class="language-LANGNAME". */
+ if(det->lang.text != NULL) {
+ MEMBUF_APPEND_LITERAL(out, " class=\"language-");
+ render_attribute(out, &det->lang, membuf_append_escaped);
+ MEMBUF_APPEND_LITERAL(out, "\"");
+ }
+
+ MEMBUF_APPEND_LITERAL(out, ">");
+}
+
+static void
+open_td_block(struct membuffer* out, const char* cell_type, const MD_BLOCK_TD_DETAIL* det)
+{
+ MEMBUF_APPEND_LITERAL(out, "<");
+ MEMBUF_APPEND_LITERAL(out, cell_type);
+
+ switch(det->align) {
+ case MD_ALIGN_LEFT: MEMBUF_APPEND_LITERAL(out, " align=\"left\">"); break;
+ case MD_ALIGN_CENTER: MEMBUF_APPEND_LITERAL(out, " align=\"center\">"); break;
+ case MD_ALIGN_RIGHT: MEMBUF_APPEND_LITERAL(out, " align=\"right\">"); break;
+ default: MEMBUF_APPEND_LITERAL(out, ">"); break;
+ }
+}
+
+static void
+open_a_span(struct membuffer* out, const MD_SPAN_A_DETAIL* det)
+{
+ MEMBUF_APPEND_LITERAL(out, "<a href=\"");
+ render_attribute(out, &det->href, membuf_append_url_escaped);
+
+ if(det->title.text != NULL) {
+ MEMBUF_APPEND_LITERAL(out, "\" title=\"");
+ render_attribute(out, &det->title, membuf_append_escaped);
+ }
+
+ MEMBUF_APPEND_LITERAL(out, "\">");
+}
+
+static void
+open_img_span(struct membuffer* out, const MD_SPAN_IMG_DETAIL* det)
+{
+ MEMBUF_APPEND_LITERAL(out, "<img src=\"");
+ render_attribute(out, &det->src, membuf_append_url_escaped);
+
+ MEMBUF_APPEND_LITERAL(out, "\" alt=\"");
+
+ image_nesting_level++;
+}
+
+static void
+close_img_span(struct membuffer* out, const MD_SPAN_IMG_DETAIL* det)
+{
+ if(det->title.text != NULL) {
+ MEMBUF_APPEND_LITERAL(out, "\" title=\"");
+ render_attribute(out, &det->title, membuf_append_escaped);
+ }
+
+ MEMBUF_APPEND_LITERAL(out, "\">");
+
+ image_nesting_level--;
}
@@ -463,11 +485,11 @@ text_callback(MD_TEXTTYPE type, const MD_CHAR* text, MD_SIZE size, void* userdat
struct membuffer* out = (struct membuffer*) userdata;
switch(type) {
- case MD_TEXT_NULLCHAR: render_utf8_codepoint(out, 0x0000); break;
+ case MD_TEXT_NULLCHAR: render_utf8_codepoint(out, 0x0000, membuf_append); break;
case MD_TEXT_BR: MEMBUF_APPEND_LITERAL(out, (image_nesting_level == 0 ? "<br>\n" : " ")); break;
case MD_TEXT_SOFTBR: MEMBUF_APPEND_LITERAL(out, (image_nesting_level == 0 ? "\n" : " ")); break;
case MD_TEXT_HTML: membuf_append(out, text, size); break;
- case MD_TEXT_ENTITY: render_entity(out, text, size); break;
+ case MD_TEXT_ENTITY: render_entity(out, text, size, membuf_append_escaped); break;
default: membuf_append_escaped(out, text, size); break;
}
diff --git a/md4c/md4c.c b/md4c/md4c.c
@@ -840,17 +840,15 @@ struct MD_UNICODE_FOLD_INFO_tag {
*************************************/
/* Fill buffer with copy of the string between 'beg' and 'end' but replace any
- * line breaks with given replacement character and also optionally resolve any
- * escape sequences.
+ * line breaks with given replacement character.
*
* NOTE: Caller is responsible to make sure the buffer is large enough.
* (Given the output is always shorter then input, (end - beg) is good idea
* what the caller should allocate.)
*/
static void
-md_do_normalize_string(MD_CTX* ctx, OFF beg, OFF end, const MD_LINE* lines, int n_lines,
- CHAR line_break_replacement_char, int resolve_escapes,
- CHAR* buffer, SZ* p_size)
+md_merge_lines(MD_CTX* ctx, OFF beg, OFF end, const MD_LINE* lines, int n_lines,
+ CHAR line_break_replacement_char, CHAR* buffer, SZ* p_size)
{
CHAR* ptr = buffer;
int line_index = 0;
@@ -863,13 +861,6 @@ md_do_normalize_string(MD_CTX* ctx, OFF beg, OFF end, const MD_LINE* lines, int
line_end = end;
while(off < line_end) {
- if(resolve_escapes && CH(off) == _T('\\') &&
- off+1 < end && (ISPUNCT(off+1) || ISNEWLINE(off+1))) {
- if(ISNEWLINE(off+1))
- break;
- off++;
- }
-
*ptr = CH(off);
ptr++;
off++;
@@ -888,12 +879,11 @@ md_do_normalize_string(MD_CTX* ctx, OFF beg, OFF end, const MD_LINE* lines, int
}
}
-/* Wrapper of md_do_normalize_string() which allocates new buffer for the
- * output string. */
+/* Wrapper of md_merge_lines() which allocates new buffer for the output string.
+ */
static int
-md_normalize_string(MD_CTX* ctx, OFF beg, OFF end, const MD_LINE* lines, int n_lines,
- CHAR line_break_replacement_char, int resolve_escapes,
- CHAR** p_str, SZ* p_size)
+md_merge_lines_alloc(MD_CTX* ctx, OFF beg, OFF end, const MD_LINE* lines, int n_lines,
+ CHAR line_break_replacement_char, CHAR** p_str, SZ* p_size)
{
CHAR* buffer;
@@ -903,8 +893,8 @@ md_normalize_string(MD_CTX* ctx, OFF beg, OFF end, const MD_LINE* lines, int n_l
return -1;
}
- md_do_normalize_string(ctx, beg, end, lines, n_lines,
- line_break_replacement_char, resolve_escapes, buffer, p_size);
+ md_merge_lines(ctx, beg, end, lines, n_lines,
+ line_break_replacement_char, buffer, p_size);
*p_str = buffer;
return 0;
@@ -1246,6 +1236,218 @@ md_is_html_any(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_
}
+/****************************
+ *** Recognizing Entity ***
+ ****************************/
+
+static int
+md_is_hex_entity_contents(MD_CTX* ctx, const CHAR* text, OFF beg, OFF max_end, OFF* p_end)
+{
+ OFF off = beg;
+
+ while(off < max_end && ISXDIGIT_(text[off]) && off - beg <= 8)
+ off++;
+
+ if(1 <= off - beg && off - beg <= 8) {
+ *p_end = off;
+ return TRUE;
+ } else {
+ return FALSE;
+ }
+}
+
+static int
+md_is_dec_entity_contents(MD_CTX* ctx, const CHAR* text, OFF beg, OFF max_end, OFF* p_end)
+{
+ OFF off = beg;
+
+ while(off < max_end && ISDIGIT_(text[off]) && off - beg <= 8)
+ off++;
+
+ if(1 <= off - beg && off - beg <= 8) {
+ *p_end = off;
+ return TRUE;
+ } else {
+ return FALSE;
+ }
+}
+
+static int
+md_is_named_entity_contents(MD_CTX* ctx, const CHAR* text, OFF beg, OFF max_end, OFF* p_end)
+{
+ OFF off = beg;
+
+ if(off <= max_end && ISALPHA_(text[off]))
+ off++;
+ else
+ return FALSE;
+
+ while(off < max_end && ISALNUM_(text[off]) && off - beg <= 48)
+ off++;
+
+ if(2 <= off - beg && off - beg <= 48) {
+ *p_end = off;
+ return TRUE;
+ } else {
+ return FALSE;
+ }
+}
+
+static int
+md_is_entity_str(MD_CTX* ctx, const CHAR* text, OFF beg, OFF max_end, OFF* p_end)
+{
+ int is_contents;
+ OFF off = beg;
+
+ MD_ASSERT(text[off] == _T('&'));
+ off++;
+
+ if(off+1 < max_end && text[off] == _T('#') && (text[off+1] == _T('x') || text[off+1] == _T('X')))
+ is_contents = md_is_hex_entity_contents(ctx, text, off+2, max_end, &off);
+ else if(off < max_end && CH(off) == _T('#'))
+ is_contents = md_is_dec_entity_contents(ctx, text, off+1, max_end, &off);
+ else
+ is_contents = md_is_named_entity_contents(ctx, text, off, max_end, &off);
+
+ if(is_contents && off < max_end && text[off] == _T(';')) {
+ *p_end = off+1;
+ return TRUE;
+ } else {
+ return FALSE;
+ }
+}
+
+static inline int
+md_is_entity(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end)
+{
+ return md_is_entity_str(ctx, ctx->text, beg, max_end, p_end);
+}
+
+
+/******************************
+ *** Attribute Management ***
+ ******************************/
+
+typedef struct MD_ATTRIBUTE_BUILD_tag MD_ATTRIBUTE_BUILD;
+struct MD_ATTRIBUTE_BUILD_tag {
+ MD_TEXTTYPE* substr_types;
+ OFF* substr_offsets;
+ int substr_count;
+ int substr_alloc;
+};
+
+
+#define MD_BUILD_ATTR_NO_ESCAPES 0x0001
+
+static int
+md_build_attr_append_substr(MD_CTX* ctx, MD_ATTRIBUTE_BUILD* build,
+ MD_TEXTTYPE type, OFF off)
+{
+ if(build->substr_count >= build->substr_alloc) {
+ MD_TEXTTYPE* new_substr_types;
+ OFF* new_substr_offsets;
+
+ build->substr_alloc = (build->substr_alloc == 0 ? 8 : build->substr_alloc * 2);
+
+ new_substr_types = (MD_TEXTTYPE*) realloc(build->substr_types,
+ (build->substr_alloc+1) * sizeof(MD_TEXTTYPE));
+ if(new_substr_types == NULL) {
+ MD_LOG("realloc() failed.");
+ return -1;
+ }
+ new_substr_offsets = (OFF*) realloc(build->substr_offsets,
+ build->substr_alloc * sizeof(OFF));
+ if(new_substr_offsets == NULL) {
+ MD_LOG("realloc() failed.");
+ return -1;
+ }
+
+ build->substr_types = new_substr_types;
+ build->substr_offsets = new_substr_offsets;
+ }
+
+ build->substr_types[build->substr_count] = type;
+ build->substr_offsets[build->substr_count] = off;
+ build->substr_count++;
+ return 0;
+}
+
+static int
+md_build_attribute(MD_CTX* ctx, const CHAR* raw_text, SZ raw_size,
+ unsigned flags, MD_ATTRIBUTE* attr)
+{
+ MD_ATTRIBUTE_BUILD build = {0};
+ CHAR* text;
+ OFF raw_off = 0;
+ OFF off = 0;
+ int ret = 0;
+
+ if(raw_size == 0) {
+ static const MD_TEXTTYPE empty_types[] = { MD_TEXT_NORMAL };
+ static const OFF empty_offsets[] = { 0, 0 };
+
+ attr->text = NULL;
+ attr->size = 0;
+ attr->substr_types = empty_types;
+ attr->substr_offsets = empty_offsets;
+ return 0;
+ }
+
+ text = (CHAR*) malloc(raw_size * sizeof(CHAR));
+ if(text == NULL) {
+ MD_LOG("malloc() failed.");
+ goto abort;
+ }
+
+ while(raw_off < raw_size) {
+ if(raw_text[raw_off] == _T('&')) {
+ OFF ent_end;
+
+ if(md_is_entity_str(ctx, raw_text, raw_off, raw_size, &ent_end)) {
+ MD_CHECK(md_build_attr_append_substr(ctx, &build, MD_TEXT_ENTITY, off));
+ memcpy(text + off, raw_text + raw_off, ent_end - raw_off);
+ off += ent_end - raw_off;
+ raw_off = ent_end;
+ continue;
+ }
+ }
+
+ if(build.substr_count == 0 || build.substr_types[build.substr_count-1] != MD_TEXT_NORMAL)
+ MD_CHECK(md_build_attr_append_substr(ctx, &build, MD_TEXT_NORMAL, off));
+
+ if(!(flags & MD_BUILD_ATTR_NO_ESCAPES) &&
+ raw_text[raw_off] == _T('\\') && raw_off+1 < raw_size &&
+ (ISPUNCT_(raw_text[raw_off+1]) || ISNEWLINE_(raw_text[raw_off+1])))
+ raw_off++;
+
+ text[off++] = raw_text[raw_off++];
+ }
+ build.substr_offsets[build.substr_count] = off;
+
+ attr->text = text;
+ attr->size = off;
+ attr->substr_offsets = build.substr_offsets;
+ attr->substr_types = build.substr_types;
+ return 0;
+
+abort:
+ free(text);
+ free(build.substr_offsets);
+ free(build.substr_types);
+ return -1;
+}
+
+static void
+md_free_attribute(MD_CTX* ctx, MD_ATTRIBUTE* attr)
+{
+ if(attr->size > 0) {
+ free((void*) attr->text);
+ free((void*) attr->substr_types);
+ free((void*) attr->substr_offsets);
+ }
+}
+
+
/***************************
*** Recognizing Links ***
***************************/
@@ -1260,7 +1462,6 @@ struct MD_LINK_REF_DEF_tag {
SZ label_size : 24;
unsigned label_needs_free : 1;
unsigned title_needs_free : 1;
- unsigned dest_contains_escape : 1;
SZ title_size;
OFF dest_beg;
OFF dest_end;
@@ -1273,7 +1474,6 @@ struct MD_LINK_ATTR_tag {
CHAR* title;
SZ title_size;
- int dest_contains_escape;
int title_needs_free;
};
@@ -1347,8 +1547,7 @@ md_is_link_label(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg,
static int
md_is_link_destination_A(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end,
- OFF* p_contents_beg, OFF* p_contents_end,
- int* p_contains_escape)
+ OFF* p_contents_beg, OFF* p_contents_end)
{
OFF off = beg;
@@ -1356,11 +1555,8 @@ md_is_link_destination_A(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end,
return FALSE;
off++;
- *p_contains_escape = FALSE;
-
while(off < max_end) {
if(CH(off) == _T('\\') && off+1 < max_end && ISPUNCT(off+1)) {
- *p_contains_escape = TRUE;
off += 2;
continue;
}
@@ -1384,17 +1580,13 @@ md_is_link_destination_A(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end,
static int
md_is_link_destination_B(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end,
- OFF* p_contents_beg, OFF* p_contents_end,
- int* p_contains_escape)
+ OFF* p_contents_beg, OFF* p_contents_end)
{
OFF off = beg;
int in_parentheses = 0;
- *p_contains_escape = FALSE;
-
while(off < max_end) {
if(CH(off) == _T('\\') && off+1 < max_end && ISPUNCT(off+1)) {
- *p_contains_escape = TRUE;
off += 2;
continue;
}
@@ -1429,25 +1621,18 @@ md_is_link_destination_B(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end,
return TRUE;
}
-static int
+static inline int
md_is_link_destination(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end,
- OFF* p_contents_beg, OFF* p_contents_end,
- int* p_contains_escape)
+ OFF* p_contents_beg, OFF* p_contents_end)
{
- if(md_is_link_destination_A(ctx, beg, max_end, p_end, p_contents_beg, p_contents_end, p_contains_escape))
- return TRUE;
-
- if(md_is_link_destination_B(ctx, beg, max_end, p_end, p_contents_beg, p_contents_end, p_contains_escape))
- return TRUE;
-
- return FALSE;
+ return (md_is_link_destination_A(ctx, beg, max_end, p_end, p_contents_beg, p_contents_end) ||
+ md_is_link_destination_B(ctx, beg, max_end, p_end, p_contents_beg, p_contents_end));
}
static int
md_is_link_title(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg,
OFF* p_end, int* p_beg_line_index, int* p_end_line_index,
- OFF* p_contents_beg, OFF* p_contents_end,
- int* p_has_escape)
+ OFF* p_contents_beg, OFF* p_contents_end)
{
OFF off = beg;
CHAR closer_char;
@@ -1475,14 +1660,12 @@ md_is_link_title(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg,
off++;
*p_contents_beg = off;
- *p_has_escape = FALSE;
while(line_index < n_lines) {
OFF line_end = lines[line_index].end;
while(off < line_end) {
if(CH(off) == _T('\\') && off+1 < ctx->size && (ISPUNCT(off+1) || ISNEWLINE(off+1))) {
- *p_has_escape = TRUE;
off++;
} else if(CH(off) == closer_char) {
/* Success. */
@@ -1518,12 +1701,10 @@ md_is_link_reference_definition(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
int label_is_multiline;
OFF dest_contents_beg;
OFF dest_contents_end;
- int dest_contains_escape;
OFF title_contents_beg;
OFF title_contents_end;
int title_contents_line_index;
int title_is_multiline;
- int title_has_escape;
OFF off;
int line_index = 0;
int tmp_line_index;
@@ -1554,14 +1735,14 @@ md_is_link_reference_definition(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
/* Link destination. */
if(!md_is_link_destination(ctx, off, lines[line_index].end,
- &off, &dest_contents_beg, &dest_contents_end, &dest_contains_escape))
+ &off, &dest_contents_beg, &dest_contents_end))
return FALSE;
/* (Optional) title. Note we interpret it as an title only if nothing
* more follows on its last line. */
if(md_is_link_title(ctx, lines + line_index, n_lines - line_index, off,
&off, &title_contents_line_index, &tmp_line_index,
- &title_contents_beg, &title_contents_end, &title_has_escape)
+ &title_contents_beg, &title_contents_end)
&& off >= lines[line_index + tmp_line_index].end)
{
title_is_multiline = (tmp_line_index != title_contents_line_index);
@@ -1604,27 +1785,26 @@ md_is_link_reference_definition(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
} else {
SZ label_size;
- MD_CHECK(md_normalize_string(ctx, label_contents_beg, label_contents_end,
+ MD_CHECK(md_merge_lines_alloc(ctx, label_contents_beg, label_contents_end,
lines + label_contents_line_index, n_lines - label_contents_line_index,
- _T(' '), FALSE, &def->label, &label_size));
+ _T(' '), &def->label, &label_size));
def->label_size = label_size;
def->label_needs_free = TRUE;
}
def->dest_beg = dest_contents_beg;
def->dest_end = dest_contents_end;
- def->dest_contains_escape = dest_contains_escape;
if(title_contents_beg >= title_contents_end) {
def->title = NULL;
def->title_size = 0;
- } else if(!title_is_multiline && !title_has_escape) {
+ } else if(!title_is_multiline) {
def->title = (CHAR*) STR(title_contents_beg);
def->title_size = title_contents_end - title_contents_beg;
} else {
- MD_CHECK(md_normalize_string(ctx, title_contents_beg, title_contents_end,
+ MD_CHECK(md_merge_lines_alloc(ctx, title_contents_beg, title_contents_end,
lines + title_contents_line_index, n_lines - title_contents_line_index,
- _T('\n'), TRUE, &def->title, &def->title_size));
+ _T('\n'), &def->title, &def->title_size));
def->title_needs_free = TRUE;
}
@@ -1758,8 +1938,8 @@ md_is_link_reference(MD_CTX* ctx, const MD_LINE* lines, int n_lines,
end_line++;
if(beg_line != end_line) {
- MD_CHECK(md_normalize_string(ctx, beg, end, beg_line,
- n_lines - (beg_line - lines), _T(' '), FALSE, &label, &label_size));
+ MD_CHECK(md_merge_lines_alloc(ctx, beg, end, beg_line,
+ n_lines - (beg_line - lines), _T(' '), &label, &label_size));
} else {
label = (CHAR*) STR(beg);
label_size = end - beg;
@@ -1769,7 +1949,6 @@ md_is_link_reference(MD_CTX* ctx, const MD_LINE* lines, int n_lines,
if(ret == TRUE) {
attr->dest_beg = def->dest_beg;
attr->dest_end = def->dest_end;
- attr->dest_contains_escape = def->dest_contains_escape;
attr->title = def->title;
attr->title_size = def->title_size;
attr->title_needs_free = FALSE;
@@ -1792,7 +1971,6 @@ md_is_inline_link_spec(MD_CTX* ctx, const MD_LINE* lines, int n_lines,
OFF title_contents_end;
int title_contents_line_index;
int title_is_multiline;
- int title_has_escape;
OFF off = beg;
int ret = FALSE;
@@ -1814,7 +1992,7 @@ md_is_inline_link_spec(MD_CTX* ctx, const MD_LINE* lines, int n_lines,
/* (Optional) link destination. */
if(!md_is_link_destination(ctx, off, lines[line_index].end,
- &off, &attr->dest_beg, &attr->dest_end, &attr->dest_contains_escape)) {
+ &off, &attr->dest_beg, &attr->dest_end)) {
attr->dest_beg = off;
attr->dest_end = off;
}
@@ -1822,7 +2000,7 @@ md_is_inline_link_spec(MD_CTX* ctx, const MD_LINE* lines, int n_lines,
/* (Optional) title. */
if(md_is_link_title(ctx, lines + line_index, n_lines - line_index, off,
&off, &title_contents_line_index, &tmp_line_index,
- &title_contents_beg, &title_contents_end, &title_has_escape))
+ &title_contents_beg, &title_contents_end))
{
title_is_multiline = (tmp_line_index != title_contents_line_index);
title_contents_line_index += line_index;
@@ -1852,14 +2030,14 @@ md_is_inline_link_spec(MD_CTX* ctx, const MD_LINE* lines, int n_lines,
attr->title = NULL;
attr->title_size = 0;
attr->title_needs_free = FALSE;
- } else if(!title_is_multiline && !title_has_escape) {
+ } else if(!title_is_multiline) {
attr->title = (CHAR*) STR(title_contents_beg);
attr->title_size = title_contents_end - title_contents_beg;
attr->title_needs_free = FALSE;
} else {
- MD_CHECK(md_normalize_string(ctx, title_contents_beg, title_contents_end,
+ MD_CHECK(md_merge_lines_alloc(ctx, title_contents_beg, title_contents_end,
lines + title_contents_line_index, n_lines - title_contents_line_index,
- _T('\n'), TRUE, &attr->title, &attr->title_size));
+ _T('\n'), &attr->title, &attr->title_size));
attr->title_needs_free = TRUE;
}
@@ -1978,7 +2156,6 @@ struct MD_MARK_tag {
/* Mark flags specific for various mark types (so they can share bits). */
#define MD_MARK_INTRAWORD 0x40 /* Helper for emphasis '*', '_' ("the rule of 3"). */
#define MD_MARK_AUTOLINK 0x40 /* Distinguisher for '<', '>'. */
-#define MD_MARK_LINKDESTCONTAINESESCAPE 0x40 /* Flag that link destination contains an escape. */
static MD_MARK*
@@ -2872,8 +3049,6 @@ md_resolve_links(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
MD_ASSERT(ctx->marks[opener_index+1].ch == 'D');
ctx->marks[opener_index+1].beg = attr.dest_beg;
ctx->marks[opener_index+1].end = attr.dest_end;
- if(attr.dest_contains_escape)
- ctx->marks[opener_index+1].flags |= MD_MARK_LINKDESTCONTAINESESCAPE;
MD_ASSERT(ctx->marks[opener_index+2].ch == 'D');
md_mark_store_ptr(ctx, opener_index+2, attr.title);
@@ -2905,7 +3080,7 @@ md_analyze_entity(MD_CTX* ctx, int mark_index)
{
MD_MARK* opener = &ctx->marks[mark_index];
MD_MARK* closer;
- OFF beg, end, off;
+ OFF off;
/* Cannot be entity if there is no closer as the next mark.
* (Any other mark between would mean strange character which cannot be
@@ -2920,50 +3095,12 @@ md_analyze_entity(MD_CTX* ctx, int mark_index)
if(closer->ch != ';')
return;
- if(CH(opener->end) == _T('#')) {
- if(CH(opener->end+1) == _T('x') || CH(opener->end+1) == _T('X')) {
- /* It can be only a hexadecimal entity.
- * Check it has 1 - 8 hexadecimal digits. */
- beg = opener->end+2;
- end = closer->beg;
- if(!(1 <= end - beg && end - beg <= 8))
- return;
- for(off = beg; off < end; off++) {
- if(!ISXDIGIT(off))
- return;
- }
- } else {
- /* It can be only a decimal entity.
- * Check it has 1 - 8 decimal digits. */
- beg = opener->end+1;
- end = closer->beg;
- if(!(1 <= end - beg && end - beg <= 8))
- return;
- for(off = beg; off < end; off++) {
- if(!ISDIGIT(off))
- return;
- }
- }
- } else {
- /* It can be only a named entity.
- * Check it starts with letter and 1-47 alnum chars follow. */
- beg = opener->end;
- end = closer->beg;
- if(!(2 <= end - beg && end - beg <= 48))
- return;
- if(!ISALPHA(beg))
- return;
- for(off = beg + 1; off < end; off++) {
- if(!ISALNUM(off))
- return;
- }
- }
+ if(md_is_entity(ctx, opener->beg, closer->end, &off)) {
+ MD_ASSERT(off == closer->end);
- /* Mark us as an entity.
- * As entity has no span, we may just turn the range into a single mark.
- * (This also causes we do not get called for ';'. */
- md_resolve_range(ctx, NULL, mark_index, mark_index+1);
- opener->end = closer->end;
+ md_resolve_range(ctx, NULL, mark_index, mark_index+1);
+ opener->end = closer->end;
+ }
}
static void
@@ -3230,75 +3367,36 @@ md_analyze_link_contents(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg
}
static int
-md_unescape_link_dest(MD_CTX* ctx, OFF beg, OFF end, SZ* p_size)
+md_enter_leave_span_a(MD_CTX* ctx, int enter, MD_SPANTYPE type,
+ const CHAR* dest, SZ dest_size, int prohibit_escapes_in_dest,
+ const CHAR* title, SZ title_size)
{
- CHAR* ptr;
- OFF off = beg;
+ MD_SPAN_A_DETAIL det;
int ret = 0;
- MD_TEMP_BUFFER((end - beg) * sizeof(CHAR));
- ptr = ctx->buffer;
+ /* Note we here rely on fact that MD_SPAN_A_DETAIL and
+ * MD_SPAN_IMG_DETAIL are binary-compatible. */
+ memset(&det, 0, sizeof(MD_SPAN_A_DETAIL));
+ MD_CHECK(md_build_attribute(ctx, dest, dest_size,
+ (prohibit_escapes_in_dest ? MD_BUILD_ATTR_NO_ESCAPES : 0),
+ &det.href));
+ MD_CHECK(md_build_attribute(ctx, title, title_size, 0, &det.title));
- while(off < end) {
- if(CH(off) == _T('\\') && off+1 < end && ISPUNCT(off+1)) {
- off++;
- continue;
- }
-
- *ptr = CH(off);
- ptr++;
- off++;
- }
-
- *p_size = ptr - ctx->buffer;
-
-abort:
- return ret;
-}
-
-static int
-md_setup_span_a_detail(MD_CTX* ctx, const MD_MARK* mark, MD_SPAN_A_DETAIL* det)
-{
- const MD_MARK* dest_mark = mark+1;
- const MD_MARK* title_mark = mark+2;
- int ret = 0;
-
- MD_ASSERT(dest_mark->ch == 'D');
- if(dest_mark->beg < dest_mark->end) {
- if(dest_mark->flags & MD_MARK_LINKDESTCONTAINESESCAPE) {
- MD_CHECK(md_unescape_link_dest(ctx, dest_mark->beg, dest_mark->end, &det->href_size));
- det->href = ctx->buffer;
- } else {
- det->href = STR(dest_mark->beg);
- det->href_size = dest_mark->end - dest_mark->beg;
- }
- } else {
- det->href = NULL;
- det->href_size = 0;
- }
-
- MD_ASSERT(title_mark->ch == 'D');
- det->title = md_mark_get_ptr(ctx, title_mark - ctx->marks);
- det->title_size = title_mark->prev;
+ if(enter)
+ MD_ENTER_SPAN(type, &det);
+ else
+ MD_LEAVE_SPAN(type, &det);
abort:
+ md_free_attribute(ctx, &det.href);
+ md_free_attribute(ctx, &det.title);
return ret;
}
-static inline int
-md_setup_span_img_detail(MD_CTX* ctx, MD_MARK* mark, MD_SPAN_IMG_DETAIL* det)
-{
- /* MD_SPAN_A_DETAIL and MD_SPAN_IMG_DETAIL are binary-compatible. */
- return md_setup_span_a_detail(ctx, mark, (MD_SPAN_A_DETAIL*) det);
-}
-
/* Render the output, accordingly to the analyzed ctx->marks. */
static int
md_process_inlines(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
{
- union {
- MD_SPAN_A_DETAIL a;
- } det;
MD_TEXTTYPE text_type;
const MD_LINE* line = lines;
MD_MARK* prev_mark = NULL;
@@ -3373,15 +3471,21 @@ md_process_inlines(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
case '[': /* Link, image. */
case '!':
- /* Note we here rely on fact that MD_SPAN_A_DETAIL and
- * MD_SPAN_IMG_DETAIL are binary-compatible. */
- MD_CHECK(md_setup_span_a_detail(ctx, mark, &det.a));
- MD_ENTER_SPAN((mark->ch == '!' ? MD_SPAN_IMG : MD_SPAN_A), &det.a);
- break;
case ']':
- MD_CHECK(md_setup_span_a_detail(ctx, &ctx->marks[mark->prev], &det.a));
- MD_LEAVE_SPAN((ctx->marks[mark->prev].ch == '!' ? MD_SPAN_IMG : MD_SPAN_A), &det.a);
+ {
+ const MD_MARK* opener = (mark->ch != ']' ? mark : &ctx->marks[mark->prev]);
+ const MD_MARK* dest_mark = opener+1;
+ const MD_MARK* title_mark = opener+2;
+
+ MD_ASSERT(dest_mark->ch == 'D');
+ MD_ASSERT(title_mark->ch == 'D');
+
+ MD_CHECK(md_enter_leave_span_a(ctx, (mark->ch != ']'),
+ (opener->ch == '!' ? MD_SPAN_IMG : MD_SPAN_A),
+ STR(dest_mark->beg), dest_mark->end - dest_mark->beg, FALSE,
+ md_mark_get_ptr(ctx, title_mark - ctx->marks), title_mark->prev));
break;
+ }
case '<':
case '>': /* Autolink or raw HTML. */
@@ -3397,30 +3501,24 @@ md_process_inlines(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
case '@': /* Permissive e-mail autolink. */
case ':': /* Permissive URL autolink. */
- if(mark->flags & MD_MARK_OPENER) {
- if(mark->ch == '@') {
- SZ sz = 7 + ctx->marks[mark->next].beg - mark->end;
-
- MD_TEMP_BUFFER(sz * sizeof(CHAR));
- memcpy(ctx->buffer, _T("mailto:"), 7 * sizeof(CHAR));
- memcpy(ctx->buffer + 7, STR(mark->end), (sz-7) * sizeof(CHAR));
-
- det.a.href_size = sz;
- det.a.href = ctx->buffer;
- } else {
- det.a.href_size = ctx->marks[mark->next].beg - mark->end;
- det.a.href = STR(mark->end);
- }
- det.a.title = NULL;
- det.a.title_size = 0;
- MD_ENTER_SPAN(MD_SPAN_A, (void*) &det);
- } else {
- /* The detail already has to be initialized: There cannot
- * be any resolved mark between the autolink opener and
- * closer. */
- MD_LEAVE_SPAN(MD_SPAN_A, (void*) &det);
+ {
+ const MD_MARK* opener = ((mark->flags & MD_MARK_OPENER) ? mark : &ctx->marks[mark->prev]);
+ const MD_MARK* closer = &ctx->marks[opener->next];
+ const CHAR* dest = STR(opener->end);
+ SZ dest_size = closer->beg - opener->end;
+
+ if(opener->ch == '@') {
+ dest_size += 7;
+ MD_TEMP_BUFFER(dest_size * sizeof(CHAR));
+ memcpy(ctx->buffer, _T("mailto:"), 7 * sizeof(CHAR));
+ memcpy(ctx->buffer + 7, dest, (dest_size-7) * sizeof(CHAR));
+ dest = ctx->buffer;
}
+
+ MD_CHECK(md_enter_leave_span_a(ctx, (mark->flags & MD_MARK_OPENER),
+ MD_SPAN_A, dest, dest_size, TRUE, NULL, 0));
break;
+ }
case '&': /* Entity. */
MD_TEXT(MD_TEXT_ENTITY, STR(mark->beg), mark->end - mark->beg);
@@ -3726,6 +3824,7 @@ md_setup_fenced_code_detail(MD_CTX* ctx, const MD_BLOCK* block, MD_BLOCK_CODE_DE
const MD_VERBATIMLINE* fence_line = (const MD_VERBATIMLINE*)(block + 1);
OFF beg = fence_line->beg;
OFF end = fence_line->end;
+ OFF lang_end;
CHAR fence_ch = CH(fence_line->beg);
int ret = 0;
@@ -3740,25 +3839,26 @@ md_setup_fenced_code_detail(MD_CTX* ctx, const MD_BLOCK* block, MD_BLOCK_CODE_DE
while(end > beg && CH(end-1) == _T(' '))
end--;
- if(beg < end) {
- MD_LINE line = { beg, end };
- SZ size;
+ /* Build info string attribute. */
+ MD_CHECK(md_build_attribute(ctx, STR(beg), end - beg, 0, &det->info));
- MD_TEMP_BUFFER((end - beg) * sizeof(CHAR));
- md_do_normalize_string(ctx, beg, end, &line, 1, _T(' '), TRUE, ctx->buffer, &size);
-
- det->info = ctx->buffer;
- det->info_size = size;
-
- det->lang = det->info;
- while(det->lang_size < det->info_size && !ISWHITESPACE_(det->lang[det->lang_size]))
- det->lang_size++;
- }
+ /* Build info string attribute. */
+ lang_end = beg;
+ while(lang_end < end && !ISWHITESPACE(lang_end))
+ lang_end++;
+ MD_CHECK(md_build_attribute(ctx, STR(beg), lang_end - beg, 0, &det->lang));
abort:
return ret;
}
+static inline void
+md_clean_fenced_code_detail(MD_CTX* ctx, MD_BLOCK_CODE_DETAIL* det)
+{
+ md_free_attribute(ctx, &det->info);
+ md_free_attribute(ctx, &det->lang);
+}
+
static int
md_process_leaf_block(MD_CTX* ctx, const MD_BLOCK* block)
{
@@ -3767,6 +3867,7 @@ md_process_leaf_block(MD_CTX* ctx, const MD_BLOCK* block)
MD_BLOCK_CODE_DETAIL code;
} det;
int is_in_tight_list;
+ int clean_fence_code_detail = FALSE;
int ret = 0;
memset(&det, 0, sizeof(det));
@@ -3783,8 +3884,11 @@ md_process_leaf_block(MD_CTX* ctx, const MD_BLOCK* block)
case MD_BLOCK_CODE:
/* For fenced code block, we may need to set the info string. */
- if(block->data != 0)
+ if(block->data != 0) {
+ memset(&det.code, 0, sizeof(MD_BLOCK_CODE_DETAIL));
+ clean_fence_code_detail = TRUE;
MD_CHECK(md_setup_fenced_code_detail(ctx, block, &det.code));
+ }
break;
default:
@@ -3802,32 +3906,32 @@ md_process_leaf_block(MD_CTX* ctx, const MD_BLOCK* block)
break;
case MD_BLOCK_CODE:
- ret = md_process_code_block_contents(ctx, (block->data != 0),
- (const MD_VERBATIMLINE*)(block + 1), block->n_lines);
+ MD_CHECK(md_process_code_block_contents(ctx, (block->data != 0),
+ (const MD_VERBATIMLINE*)(block + 1), block->n_lines));
break;
case MD_BLOCK_HTML:
- ret = md_process_verbatim_block_contents(ctx, MD_TEXT_HTML,
- (const MD_VERBATIMLINE*)(block + 1), block->n_lines);
+ MD_CHECK(md_process_verbatim_block_contents(ctx, MD_TEXT_HTML,
+ (const MD_VERBATIMLINE*)(block + 1), block->n_lines));
break;
case MD_BLOCK_TABLE:
- ret = md_process_table_block_contents(ctx, block->data,
- (const MD_LINE*)(block + 1), block->n_lines);
+ MD_CHECK(md_process_table_block_contents(ctx, block->data,
+ (const MD_LINE*)(block + 1), block->n_lines));
break;
default:
- ret = md_process_normal_block_contents(ctx,
- (const MD_LINE*)(block + 1), block->n_lines);
+ MD_CHECK(md_process_normal_block_contents(ctx,
+ (const MD_LINE*)(block + 1), block->n_lines));
break;
}
- if(ret != 0)
- goto abort;
if(!is_in_tight_list || block->type != MD_BLOCK_P)
MD_LEAVE_BLOCK(block->type, (void*) &det);
abort:
+ if(clean_fence_code_detail)
+ md_clean_fenced_code_detail(ctx, &det.code);
return ret;
}
diff --git a/md4c/md4c.h b/md4c/md4c.h
@@ -176,6 +176,35 @@ enum MD_ALIGN_tag {
};
+/* String attribute.
+ *
+ * This wraps strings which are outside of a normal text flow and which are
+ * propagated within various detailed structures, but which still may contain
+ * string portions of different types like e.g. entities.
+ *
+ * So, for example, lets consider an image has a title attribute string
+ * set to "foo " bar". (Note the string size is 14.)
+ *
+ * Then:
+ * -- [0]: "foo " (substr_types[0] == MD_TEXT_NORMAL; substr_offsets[0] == 0)
+ * -- [1]: """ (substr_types[1] == MD_TEXT_ENTITY; substr_offsets[1] == 4)
+ * -- [2]: " bar" (substr_types[2] == MD_TEXT_NORMAL; substr_offsets[2] == 10)
+ * -- [3]: (n/a) (n/a ; substr_offsets[3] == 14)
+ *
+ * Note that these conditions are guaranteed:
+ * -- substr_offsets[0] == 0
+ * -- substr_offsets[LAST+1] == size
+ * -- Only MD_TEXT_NORMAL and MD_TEXT_ENTITY substrings can appear.
+ */
+typedef struct MD_ATTRIBUTE_tag MD_ATTRIBUTE;
+struct MD_ATTRIBUTE_tag {
+ const MD_CHAR* text;
+ MD_SIZE size;
+ const MD_TEXTTYPE* substr_types;
+ const MD_OFFSET* substr_offsets;
+};
+
+
/* Detailed info for MD_BLOCK_OL_DETAIL. */
typedef struct MD_BLOCK_OL_DETAIL_tag MD_BLOCK_OL_DETAIL;
struct MD_BLOCK_OL_DETAIL_tag {
@@ -191,13 +220,8 @@ struct MD_BLOCK_H_DETAIL_tag {
/* Detailed info for MD_BLOCK_CODE. */
typedef struct MD_BLOCK_CODE_DETAIL_tag MD_BLOCK_CODE_DETAIL;
struct MD_BLOCK_CODE_DETAIL_tag {
- /* Complete "info string" */
- const MD_CHAR* info;
- MD_SIZE info_size;
-
- /* Language portion of the info string. */
- const MD_CHAR* lang;
- MD_SIZE lang_size;
+ MD_ATTRIBUTE info;
+ MD_ATTRIBUTE lang;
};
/* Detailed info for MD_BLOCK_TH and MD_BLOCK_TD. */
@@ -209,21 +233,15 @@ struct MD_BLOCK_TD_DETAIL_tag {
/* Detailed info for MD_SPAN_A. */
typedef struct MD_SPAN_A_DETAIL_tag MD_SPAN_A_DETAIL;
struct MD_SPAN_A_DETAIL_tag {
- const MD_CHAR* href;
- MD_SIZE href_size;
-
- const MD_CHAR* title;
- MD_SIZE title_size;
+ MD_ATTRIBUTE href;
+ MD_ATTRIBUTE title;
};
/* Detailed info for MD_SPAN_IMG. */
typedef struct MD_SPAN_IMG_DETAIL_tag MD_SPAN_IMG_DETAIL;
struct MD_SPAN_IMG_DETAIL_tag {
- const MD_CHAR* src;
- MD_SIZE src_size;
-
- const MD_CHAR* title;
- MD_SIZE title_size;
+ MD_ATTRIBUTE src;
+ MD_ATTRIBUTE title;
};