commit dee8142c37271ed1816b45fb67182486eb6991d3
parent 55afb5bae32e30cd5ab70db7089485bdcafaa6c0
Author: Martin Mitas <mity@morous.org>
Date: Sun, 27 Nov 2016 00:37:22 +0100
Handle escapes in link destinations.
Diffstat:
| M | md4c/md4c.c | | | 105 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------- |
1 file changed, 80 insertions(+), 25 deletions(-)
diff --git a/md4c/md4c.c b/md4c/md4c.c
@@ -1184,9 +1184,10 @@ md_is_html_any(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_
struct MD_LINK_REF_DEF_tag {
CHAR* label;
CHAR* title;
- SZ label_size : 24;
- unsigned label_needs_free : 1;
- unsigned title_needs_free : 1;
+ SZ label_size : 24;
+ unsigned label_needs_free : 1;
+ unsigned title_needs_free : 1;
+ unsigned dest_contains_escape : 1;
SZ title_size;
OFF dest_beg;
OFF dest_end;
@@ -1199,6 +1200,7 @@ struct MD_LINK_ATTR_tag {
CHAR* title;
SZ title_size;
+ int dest_contains_escape;
int title_needs_free;
};
@@ -1272,7 +1274,8 @@ md_is_link_label(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg,
static int
md_is_link_destination_A(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end,
- OFF* p_contents_beg, OFF* p_contents_end)
+ OFF* p_contents_beg, OFF* p_contents_end,
+ int* p_contains_escape)
{
OFF off = beg;
@@ -1280,8 +1283,11 @@ md_is_link_destination_A(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end,
return FALSE;
off++;
+ *p_contains_escape = FALSE;
+
while(off < max_end) {
if(CH(off) == _T('\\') && off < max_end && ISPUNCT(off+1)) {
+ *p_contains_escape = TRUE;
off += 2;
continue;
}
@@ -1305,13 +1311,17 @@ md_is_link_destination_A(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end,
static int
md_is_link_destination_B(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end,
- OFF* p_contents_beg, OFF* p_contents_end)
+ OFF* p_contents_beg, OFF* p_contents_end,
+ int* p_contains_escape)
{
OFF off = beg;
int in_parentheses = 0;
+ *p_contains_escape = FALSE;
+
while(off < max_end) {
if(CH(off) == _T('\\') && off < max_end && ISPUNCT(off+1)) {
+ *p_contains_escape = TRUE;
off += 2;
continue;
}
@@ -1348,12 +1358,13 @@ md_is_link_destination_B(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end,
static int
md_is_link_destination(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end,
- OFF* p_contents_beg, OFF* p_contents_end)
+ OFF* p_contents_beg, OFF* p_contents_end,
+ int* p_contains_escape)
{
- if(md_is_link_destination_A(ctx, beg, max_end, p_end, p_contents_beg, p_contents_end))
+ if(md_is_link_destination_A(ctx, beg, max_end, p_end, p_contents_beg, p_contents_end, p_contains_escape))
return TRUE;
- if(md_is_link_destination_B(ctx, beg, max_end, p_end, p_contents_beg, p_contents_end))
+ if(md_is_link_destination_B(ctx, beg, max_end, p_end, p_contents_beg, p_contents_end, p_contains_escape))
return TRUE;
return FALSE;
@@ -1475,6 +1486,7 @@ md_is_link_reference_definition(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
int label_is_multiline;
OFF dest_contents_beg;
OFF dest_contents_end;
+ int dest_contains_escape;
OFF title_contents_beg;
OFF title_contents_end;
int title_contents_line_index;
@@ -1509,7 +1521,7 @@ md_is_link_reference_definition(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
/* Link destination. */
if(!md_is_link_destination(ctx, off, lines[line_index].end,
- &off, &dest_contents_beg, &dest_contents_end))
+ &off, &dest_contents_beg, &dest_contents_end, &dest_contains_escape))
return FALSE;
/* (Optional) title. Note we interpret it as an title only if nothing
@@ -1568,6 +1580,7 @@ md_is_link_reference_definition(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
def->dest_beg = dest_contents_beg;
def->dest_end = dest_contents_end;
+ def->dest_contains_escape = dest_contains_escape;
if(title_contents_beg >= title_contents_end) {
def->title = NULL;
@@ -1723,6 +1736,7 @@ md_is_link_reference(MD_CTX* ctx, const MD_LINE* lines, int n_lines,
if(ret == TRUE) {
attr->dest_beg = def->dest_beg;
attr->dest_end = def->dest_end;
+ attr->dest_contains_escape = def->dest_contains_escape;
attr->title = def->title;
attr->title_size = def->title_size;
attr->title_needs_free = FALSE;
@@ -1766,7 +1780,7 @@ md_is_inline_link_spec(MD_CTX* ctx, const MD_LINE* lines, int n_lines,
/* (Optional) link destination. */
if(!md_is_link_destination(ctx, off, lines[line_index].end,
- &off, &attr->dest_beg, &attr->dest_end)) {
+ &off, &attr->dest_beg, &attr->dest_end, &attr->dest_contains_escape)) {
attr->dest_beg = off;
attr->dest_end = off;
}
@@ -1920,16 +1934,17 @@ struct MD_MARK_tag {
};
/* Mark flags (these apply to ALL mark types). */
-#define MD_MARK_POTENTIAL_OPENER 0x01 /* Maybe opener. */
-#define MD_MARK_POTENTIAL_CLOSER 0x02 /* Maybe closer. */
-#define MD_MARK_OPENER 0x04 /* Definitely opener. */
-#define MD_MARK_CLOSER 0x08 /* Definitely closer. */
-#define MD_MARK_RESOLVED 0x10 /* Resolved in any definite way. */
-#define MD_MARK_LEAF 0x20 /* Pair does not contain any nested spans. */
+#define MD_MARK_POTENTIAL_OPENER 0x01 /* Maybe opener. */
+#define MD_MARK_POTENTIAL_CLOSER 0x02 /* Maybe closer. */
+#define MD_MARK_OPENER 0x04 /* Definitely opener. */
+#define MD_MARK_CLOSER 0x08 /* Definitely closer. */
+#define MD_MARK_RESOLVED 0x10 /* Resolved in any definite way. */
+#define MD_MARK_LEAF 0x20 /* Pair does not contain any nested spans. */
/* Mark flags specific for various mark types (so they can share bits). */
-#define MD_MARK_INTRAWORD 0x40 /* Helper for emphasis '*', '_' ("the rule of 3"). */
-#define MD_MARK_AUTOLINK 0x40 /* Distinguisher for '<', '>'. */
+#define MD_MARK_INTRAWORD 0x40 /* Helper for emphasis '*', '_' ("the rule of 3"). */
+#define MD_MARK_AUTOLINK 0x40 /* Distinguisher for '<', '>'. */
+#define MD_MARK_LINKDESTCONTAINESESCAPE 0x40 /* Flag that link destination contains an escape. */
static MD_MARK*
@@ -2821,6 +2836,8 @@ md_resolve_links(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
MD_ASSERT(ctx->marks[opener_index+1].ch == 'D');
ctx->marks[opener_index+1].beg = attr.dest_beg;
ctx->marks[opener_index+1].end = attr.dest_end;
+ if(attr.dest_contains_escape)
+ ctx->marks[opener_index+1].flags |= MD_MARK_LINKDESTCONTAINESESCAPE;
MD_ASSERT(ctx->marks[opener_index+2].ch == 'D');
md_mark_store_ptr(ctx, opener_index+2, attr.title);
@@ -3179,22 +3196,60 @@ md_analyze_link_contents(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg
UNDERSCORE_OPENERS.tail = -1;
}
-static void
+static int
+md_unescape_link_dest(MD_CTX* ctx, OFF beg, OFF end, SZ* p_size)
+{
+ CHAR* ptr;
+ OFF off = beg;
+ int ret = 0;
+
+ MD_TEMP_BUFFER((end - beg) * sizeof(CHAR));
+ ptr = ctx->buffer;
+
+ while(off < end) {
+ if(CH(off) == _T('\\') && off+1 < end && ISPUNCT(off+1)) {
+ off++;
+ continue;
+ }
+
+ *ptr = CH(off);
+ ptr++;
+ off++;
+ }
+
+ *p_size = ptr - ctx->buffer;
+
+abort:
+ return ret;
+}
+
+static int
md_setup_span_a_detail(MD_CTX* ctx, const MD_MARK* mark, MD_SPAN_A_DETAIL* det)
{
const MD_MARK* dest_mark = mark+1;
const MD_MARK* title_mark = mark+2;
+ int ret = 0;
MD_ASSERT(dest_mark->ch == 'D');
- if(dest_mark->beg < dest_mark->end)
- det->href = STR(dest_mark->beg);
- else
+ if(dest_mark->beg < dest_mark->end) {
+ if(dest_mark->flags & MD_MARK_LINKDESTCONTAINESESCAPE) {
+ MD_CHECK(md_unescape_link_dest(ctx, dest_mark->beg, dest_mark->end, &det->href_size));
+ det->href = ctx->buffer;
+ } else {
+ det->href = STR(dest_mark->beg);
+ det->href_size = dest_mark->end - dest_mark->beg;
+ }
+ } else {
det->href = NULL;
- det->href_size = dest_mark->end - dest_mark->beg;
+ det->href_size = 0;
+ }
MD_ASSERT(title_mark->ch == 'D');
det->title = md_mark_get_ptr(ctx, title_mark - ctx->marks);
det->title_size = title_mark->prev;
+
+abort:
+ return ret;
}
static void
@@ -3358,11 +3413,11 @@ md_process_inlines(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
break;
case '[': /* Link . */
- md_setup_span_a_detail(ctx, mark, &det.a);
+ MD_CHECK(md_setup_span_a_detail(ctx, mark, &det.a));
MD_ENTER_SPAN(MD_SPAN_A, &det.a);
break;
case ']':
- md_setup_span_a_detail(ctx, &ctx->marks[mark->prev], &det.a);
+ MD_CHECK(md_setup_span_a_detail(ctx, &ctx->marks[mark->prev], &det.a));
MD_LEAVE_SPAN(MD_SPAN_A, &det.a);
break;