commit a7899c057b4332ad7fd661ced4ce85f49006bd18
parent fec7b9e6fcd2e4aee6cc226a15644e8a4988bad7
Author: Martin Mitas <mity@morous.org>
Date: Tue, 11 Oct 2016 02:34:01 +0200
Implement autolinks.
Diffstat:
4 files changed, 117 insertions(+), 23 deletions(-)
diff --git a/README.md b/README.md
@@ -103,7 +103,7 @@ more or less forms our to do list.
- [ ] 6.4 Emphasis and strong emphasis
- [ ] 6.5 Links
- [ ] 6.6 Images
- - [ ] 6.7 Autolinks
+ - [x] 6.7 Autolinks
- [x] 6.8 Raw HTML
- [x] 6.9 Hard line breaks
- [x] 6.10 Soft line breaks
diff --git a/md2html/md2html.c b/md2html/md2html.c
@@ -149,6 +149,14 @@ open_code_block(struct membuffer* out, const MD_BLOCK_CODE_DETAIL* det)
MEMBUF_APPEND_LITERAL(out, ">");
}
+static void
+open_a_span(struct membuffer* out, MD_SPAN_A_DETAIL* det)
+{
+ MEMBUF_APPEND_LITERAL(out, "<a href=\"");
+ membuf_append_escaped(out, det->href, det->href_size);
+ MEMBUF_APPEND_LITERAL(out, "\">");
+}
+
static unsigned
hex_val(char ch)
{
@@ -285,6 +293,7 @@ enter_span_callback(MD_SPANTYPE type, void* detail, void* userdata)
struct membuffer* out = (struct membuffer*) userdata;
switch(type) {
+ case MD_SPAN_A: open_a_span(out, (MD_SPAN_A_DETAIL*) detail); break;
case MD_SPAN_CODE: MEMBUF_APPEND_LITERAL(out, "<code>"); break;
}
@@ -297,6 +306,7 @@ leave_span_callback(MD_SPANTYPE type, void* detail, void* userdata)
struct membuffer* out = (struct membuffer*) userdata;
switch(type) {
+ case MD_SPAN_A: MEMBUF_APPEND_LITERAL(out, "</a>"); break;
case MD_SPAN_CODE: MEMBUF_APPEND_LITERAL(out, "</code>"); break;
}
diff --git a/md4c/md4c.c b/md4c/md4c.c
@@ -100,8 +100,8 @@ struct MD_CTX_tag {
MD_MARKCHAIN mark_chains[2];
/* For md_analyze_backtick(). */
#define BACKTICK_OPENERS ctx->mark_chains[0]
- /* For md_analyze_raw_html(). */
- #define RAW_HTML_OPENERS ctx->mark_chains[1]
+ /* For md_analyze_lt_gt(). */
+ #define LT_GT_OPENERS ctx->mark_chains[1]
/* For MD_BLOCK_QUOTE */
unsigned quote_level; /* Nesting level. */
@@ -684,6 +684,49 @@ md_is_html_any(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_
}
+/******************************************
+ *** Recognizing Some Complex Inlines ***
+ ******************************************/
+
+static int
+md_is_autolink(MD_CTX* ctx, OFF beg, OFF end)
+{
+ OFF off;
+
+ MD_ASSERT(CH(beg) == _T('<'));
+ MD_ASSERT(CH(end-1) == _T('>'));
+
+ beg++;
+ end--;
+
+ /* Check for scheme. */
+ off = beg;
+ if(off >= end || !ISASCII(off))
+ return -1;
+ off++;
+ while(1) {
+ if(off >= end)
+ return -1;
+ if(off - beg > 32)
+ return -1;
+ if(CH(off) == _T(':') && off - beg >= 2)
+ break;
+ if(!ISALNUM(off) && CH(off) != _T('+') && CH(off) != _T('-') && CH(off) != _T('.'))
+ return -1;
+ off++;
+ }
+
+ /* Check the path after the scheme. */
+ while(off < end) {
+ if(ISWHITESPACE(off) || ISCNTRL(off) || CH(off) == _T('<') || CH(off) == _T('>'))
+ return -1;
+ off++;
+ }
+
+ return 0;
+}
+
+
/******************************************************
*** Processing Sequence of Inlines (a.k.a Spans) ***
******************************************************/
@@ -877,7 +920,7 @@ md_rollback(MD_CTX* ctx, int opener_index, int closer_index)
switch(opener->ch) {
case '`': chain = &BACKTICK_OPENERS; break;
- case '<': chain = &RAW_HTML_OPENERS; break;
+ case '<': chain = <_GT_OPENERS; break;
default: MD_UNREACHABLE(); break;
}
@@ -1046,47 +1089,61 @@ md_analyze_backtick(MD_CTX* ctx, int mark_index)
}
static void
-md_analyze_raw_html(MD_CTX* ctx, int mark_index, const MD_LINE* lines, int n_lines)
+md_analyze_lt_gt(MD_CTX* ctx, int mark_index, const MD_LINE* lines, int n_lines)
{
MD_MARK* mark = &ctx->marks[mark_index];
int opener_index;
/* If it is an opener ('<'), remember it. */
if(mark->flags & MD_MARK_POTENTIAL_OPENER) {
- md_mark_chain_append(ctx, &RAW_HTML_OPENERS, mark_index);
+ md_mark_chain_append(ctx, <_GT_OPENERS, mark_index);
return;
}
/* Otherwise we are potential closer and we try to resolve with since all
* the chained unresolved openers. */
- opener_index = RAW_HTML_OPENERS.head;
+ opener_index = LT_GT_OPENERS.head;
while(opener_index >= 0) {
MD_MARK* opener = &ctx->marks[opener_index];
- int line_index = 0;
OFF detected_end;
+ int is_autolink = 0;
+ int is_raw_html = 0;
- /* Identify the line where the opening mark lives. */
- while(1) {
- if(opener->beg < lines[line_index].end)
- break;
- line_index++;
+ is_autolink = (md_is_autolink(ctx, opener->beg, mark->end) == 0);
+
+ if(!is_autolink) {
+ /* Identify the line where the opening mark lives. */
+ int line_index = 0;
+ while(1) {
+ if(opener->beg < lines[line_index].end)
+ break;
+ line_index++;
+ }
+
+ is_raw_html = (md_is_html_any(ctx, lines + line_index,
+ n_lines - line_index, opener->beg, mark->end, &detected_end) == 0);
}
/* Check whether the range forms a valid raw HTML. */
- if(md_is_html_any(ctx, lines + line_index, n_lines - line_index,
- opener->beg, mark->end, &detected_end) == 0)
- {
- /* If this fail, it means we have missed some earlier opportunity
+ if(is_autolink || is_raw_html) {
+ /* If this fails, it means we have missed some earlier opportunity
* to resolve the opener. */
MD_ASSERT(detected_end == mark->end);
md_rollback(ctx, opener_index, mark_index);
- md_resolve_range(ctx, &RAW_HTML_OPENERS, opener_index, mark_index);
+ md_resolve_range(ctx, <_GT_OPENERS, opener_index, mark_index);
- /* Make these marks zero width so the '<' and '>' are part of its
- * contents. */
- opener->end = opener->beg;
- mark->beg = mark->end;
+ if(is_raw_html) {
+ /* Make these marks zero width so the '<' and '>' are part of its
+ * contents. */
+ opener->end = opener->beg;
+ mark->beg = mark->end;
+ } else {
+ /* Hack: This is to distinguish the autolink from raw HTML in
+ * md_process_inlines(). */
+ opener->ch = 'A';
+ mark->ch = 'B';
+ }
/* And we are done. */
return;
@@ -1202,7 +1259,7 @@ md_analyze_marks(MD_CTX* ctx, const MD_LINE* lines, int n_lines, int precedence_
case '<':
case '>':
- md_analyze_raw_html(ctx, i, lines, n_lines);
+ md_analyze_lt_gt(ctx, i, lines, n_lines);
break;
case '&':
@@ -1251,6 +1308,9 @@ md_analyze_inlines(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
static int
md_process_inlines(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
{
+ union {
+ MD_SPAN_A_DETAIL a;
+ } det;
MD_TEXTTYPE text_type;
const MD_LINE* line = lines;
const MD_MARK* prev_mark = NULL;
@@ -1302,6 +1362,18 @@ md_process_inlines(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
}
break;
+ case 'A': /* Autolink. */
+ det.a.href = STR(mark->end);
+ det.a.href_size = ctx->marks[mark->next].beg - mark->end;
+ MD_ENTER_SPAN(MD_SPAN_A, (void*) &det);
+ break;
+ case 'B':
+ /* The detail already has to be initialized: There cannot
+ * be any resolved mark between the autlink opener and
+ * closer. */
+ MD_LEAVE_SPAN(MD_SPAN_A, (void*) &det);
+ break;
+
case '<': /* Raw HTML. */
text_type = MD_TEXT_HTML;
break;
diff --git a/md4c/md4c.h b/md4c/md4c.h
@@ -89,6 +89,11 @@ enum MD_BLOCKTYPE_tag {
* like paragraph or list item. */
typedef enum MD_SPANTYPE_tag MD_SPANTYPE;
enum MD_SPANTYPE_tag {
+ /* <a href="xxx">...</a>
+ * Detail: See structure MD_SPAN_A_DETAIL. */
+ MD_SPAN_A,
+
+ /* <code>...</code> */
MD_SPAN_CODE
};
@@ -131,6 +136,13 @@ enum MD_TEXTTYPE_tag {
};
+/* Detailed info for MD_SPAN_A. */
+typedef struct MD_SPAN_A_DETAIL_tag MD_SPAN_A_DETAIL;
+struct MD_SPAN_A_DETAIL_tag {
+ const MD_CHAR* href; /* Not zero-terminated, use href_size. */
+ MD_SIZE href_size;
+};
+
/* Detailed info for MD_BLOCK_H. */
typedef struct MD_BLOCK_H_DETAIL_tag MD_BLOCK_H_DETAIL;
struct MD_BLOCK_H_DETAIL_tag {