md4c

C Markdown parser. Fast. SAX-like interface. Compliant to CommonMark specification.
git clone https://noulin.net/git/md4c.git
Log | Files | Refs | README | LICENSE

commit a3c721b23f7aed3f1788d28246d81a1c7c6cf07f
parent 978e04f9005ef0eb217571c6f58e88759356ca2a
Author: Martin Mitas <mity@morous.org>
Date:   Mon, 10 Oct 2016 20:01:34 +0200

Refactorize/improve analysis of inlines.

Diffstat:
Mmd4c/md4c.c | 471+++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------
1 file changed, 331 insertions(+), 140 deletions(-)

diff --git a/md4c/md4c.c b/md4c/md4c.c @@ -69,6 +69,17 @@ typedef MD_OFFSET OFF; typedef struct MD_MARK_tag MD_MARK; + +/* During analyzes of inline marks, we need to manage some "mark chains", + * of (yet unresolved) openers. This structure holds start/end of the chain. + * The chain internals are then realized through MD_MARK::prev and ::next. + */ +typedef struct MD_MARKCHAIN_tag MD_MARKCHAIN; +struct MD_MARKCHAIN_tag { + int head; /* Index of first mark in the chain, or -1 if empty. */ + int tail; /* Index of last mark in the chain, or -1 if empty. */ +}; + /* Context propagated through all the parsing. */ typedef struct MD_CTX_tag MD_CTX; struct MD_CTX_tag { @@ -86,6 +97,12 @@ struct MD_CTX_tag { unsigned n_marks; unsigned alloc_marks; + MD_MARKCHAIN mark_chains[2]; + /* For md_analyze_backtick(). */ + #define BACKTICK_OPENERS ctx->mark_chains[0] + /* For md_analyze_raw_html(). */ + #define RAW_HTML_OPENERS ctx->mark_chains[1] + /* For MD_BLOCK_QUOTE */ unsigned quote_level; /* Nesting level. */ @@ -156,7 +173,7 @@ md_log(MD_CTX* ctx, const char* fmt, ...) md_log(ctx, "%s:%d: Assertion '" #cond "' failed.", \ __FILE__, (int)__LINE__); \ ret = -2; \ - goto abort; \ + exit(1); \ } \ } while(0) #else @@ -309,7 +326,7 @@ md_str_case_eq(const CHAR* s1, const CHAR* s2, SZ n) * by n_lines == 0. */ static int -md_is_html_tag(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF* p_end) +md_is_html_tag(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end) { int attr_state; OFF off = beg; @@ -367,12 +384,11 @@ md_is_html_tag(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF* p_e off++; } else if(attr_state <= 2 && CH(off) == _T('>')) { /* End. */ - *p_end = off+1; - return 0; + goto done; } else if(attr_state <= 2 && CH(off) == _T('/') && off+1 < line_end && CH(off+1) == _T('>')) { /* End with digraph '/>' */ - *p_end = off+2; - return 0; + off++; + goto done; } else if((attr_state == 1 || attr_state == 2) && (ISALPHA(off) || CH(off) == _T('_') || CH(off) == _T(':'))) { off++; /* Attribute name */ @@ -403,24 +419,32 @@ md_is_html_tag(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF* p_e /* We have to be on a single line. See definition of start condition * of HTML block, type 7. */ if(n_lines == 0) - break; + return -1; i++; if(i >= n_lines) - break; + return -1; off = lines[i].beg; line_end = lines[i].end; if(attr_state == 0) attr_state = 1; + + if(off >= max_end) + return -1; } - return -1; +done: + if(off >= max_end) + return -1; + + *p_end = off+1; + return 0; } static int -md_is_html_comment(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF* p_end) +md_is_html_comment(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end) { OFF off = beg; int i = 0; @@ -442,8 +466,8 @@ md_is_html_comment(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF* if(CH(off) == _T('-') && CH(off+1) == _T('-')) { if(CH(off+2) == _T('>')) { /* Success. */ - *p_end = off + 3; - return 0; + off += 2; + goto done; } else { /* "--" is prohibited inside the comment. */ return -1; @@ -455,16 +479,24 @@ md_is_html_comment(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF* i++; if(i >= n_lines) - break; + return -1; off = lines[i].beg; + + if(off >= max_end) + return -1; } - return -1; +done: + if(off >= max_end) + return -1; + + *p_end = off+1; + return 0; } static int -md_is_html_processing_instruction(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF* p_end) +md_is_html_processing_instruction(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end) { OFF off = beg; int i = 0; @@ -479,8 +511,8 @@ md_is_html_processing_instruction(MD_CTX* ctx, const MD_LINE* lines, int n_lines while(off + 1 < lines[i].end) { if(CH(off) == _T('?') && CH(off+1) == _T('>')) { /* Success. */ - *p_end = off + 2; - return 0; + off++; + goto done; } off++; @@ -488,16 +520,23 @@ md_is_html_processing_instruction(MD_CTX* ctx, const MD_LINE* lines, int n_lines i++; if(i >= n_lines) - break; + return -1; off = lines[i].beg; + if(off >= max_end) + return -1; } - return -1; +done: + if(off >= max_end) + return -1; + + *p_end = off+1; + return 0; } static int -md_is_html_declaration(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF* p_end) +md_is_html_declaration(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end) { OFF off = beg; int i = 0; @@ -521,8 +560,8 @@ md_is_html_declaration(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, while(off < lines[i].end) { if(CH(off+1) == _T('>')) { /* Success. */ - *p_end = off + 2; - return 0; + off++; + goto done; } off++; @@ -530,16 +569,23 @@ md_is_html_declaration(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, i++; if(i >= n_lines) - break; + return -1; off = lines[i].beg; + if(off >= max_end) + return -1; } - return -1; +done: + if(off >= max_end) + return -1; + + *p_end = off+1; + return 0; } static int -md_is_html_cdata(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF* p_end) +md_is_html_cdata(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end) { static const CHAR open_str[9] = _T("<![CDATA["); @@ -556,8 +602,8 @@ md_is_html_cdata(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF* p while(off + 2 < lines[i].end) { if(CH(off) == _T(']') && CH(off+1) == _T(']') && CH(off+2) == _T('>')) { /* Success. */ - *p_end = off + 3; - return 0; + off += 2; + goto done; } off++; @@ -565,26 +611,33 @@ md_is_html_cdata(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF* p i++; if(i >= n_lines) - break; + return -1; off = lines[i].beg; + if(off >= max_end) + return -1; } - return -1; +done: + if(off >= max_end) + return -1; + + *p_end = off+1; + return 0; } static int -md_is_html_any(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF* p_end) +md_is_html_any(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end) { - if(md_is_html_tag(ctx, lines, n_lines, beg, p_end) == 0) + if(md_is_html_tag(ctx, lines, n_lines, beg, max_end, p_end) == 0) return 0; - if(md_is_html_comment(ctx, lines, n_lines, beg, p_end) == 0) + if(md_is_html_comment(ctx, lines, n_lines, beg, max_end, p_end) == 0) return 0; - if(md_is_html_processing_instruction(ctx, lines, n_lines, beg, p_end) == 0) + if(md_is_html_processing_instruction(ctx, lines, n_lines, beg, max_end, p_end) == 0) return 0; - if(md_is_html_declaration(ctx, lines, n_lines, beg, p_end) == 0) + if(md_is_html_declaration(ctx, lines, n_lines, beg, max_end, p_end) == 0) return 0; - if(md_is_html_cdata(ctx, lines, n_lines, beg, p_end) == 0) + if(md_is_html_cdata(ctx, lines, n_lines, beg, max_end, p_end) == 0) return 0; return -1; @@ -595,8 +648,40 @@ md_is_html_any(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF* p_e *** Processing Sequence of Inlines (a.k.a Spans) *** ******************************************************/ -/* Structure marking an offset which needs special attention. The type - * of the attention is determined by the member ch: +/* We process inlines in few phases: + * + * (1) We go through the block text and collect all significant characters + * which may start/end a span or some other significant position into + * ctx->marks[]. Core of this is what md_collect_marks() does. + * + * We also do some very brief preliminary context-less analysis, whether + * it might be opener or closer (e.g. of an emphasis span). + * + * This speeds the other steps as we do not need to re-iterate over all + * characters anymore. + * + * (2) We analyze each potential mark types, in order by their precedence. + * + * In each md_analyze_XXX() function, we re-iterate list of the marks, + * skipping already resolved regions (in preceding precedences) and try to + * resolve them. + * + * (2.1) For trivial marks, which are single (e.g. HTML entity), we just mark + * them as resolved. + * + * (2.2) For range-type marks, we analyze whether the mark could be closer + * and, if yes, whether there is some preceding opener it could satisfy. + * + * If not we check whether it could be really an opener and if yes, we + * remember it so subsequent closers may resolve it. + * + * (3) Finally, when all marks were analyzed, we render the block contents + * by calling MD_RENDERER::text() callback, interrupting by ::enter_span() + * or ::close_span() whenever we reach a resolved mark. + */ + + +/* The mark structure. * * '\\': Maybe escape sequence. * '`': Maybe code span start/end. @@ -610,26 +695,32 @@ md_is_html_any(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF* p_e * the special meaning. */ struct MD_MARK_tag { + /* For unresolved openers, these two form the chain of open openers of + * given type. + * + * During resolving, we disconnect from the chain and point to the + * corresponding counterpart so opener points to its closer and vice versa. + */ + int prev; + int next; + OFF beg; OFF end; - /* Index of another mark. Before resolving the member may be used for - * arbitrary purpose during the analyzes phase. - * For resolved openers, it has to point to the corresponding closer. */ - int next; - MD_CHAR ch; - unsigned short flags; + unsigned char flags; }; /* Mark flags. */ -#define MD_MARK_RESOLVED 0x0001 /* Yes, the special meaning is indeed recognized. */ -#define MD_MARK_OPENER 0x0002 /* This opens (or potentially may open) a span. */ -#define MD_MARK_CLOSER 0x0004 /* This closes (or potentially may close) a span. */ +#define MD_MARK_POTENTIAL_OPENER 0x01 /* Maybe opener. */ +#define MD_MARK_POTENTIAL_CLOSER 0x02 /* Maybe closer. */ +#define MD_MARK_RESOLVED 0x10 /* Yes, the special meaning is indeed recognized. */ +#define MD_MARK_OPENER 0x20 /* This opens (or potentially may open) a span. */ +#define MD_MARK_CLOSER 0x40 /* This closes (or potentially may close) a span. */ static MD_MARK* -md_push(MD_CTX* ctx) +md_push_mark(MD_CTX* ctx) { MD_MARK* mark; @@ -651,18 +742,20 @@ md_push(MD_CTX* ctx) return mark; } -#define PUSH_() \ +#define PUSH_MARK_() \ do { \ - mark = md_push(ctx); \ + mark = md_push_mark(ctx); \ if(mark == NULL) { \ ret = -1; \ goto abort; \ } \ } while(0) -#define PUSH(ch_, beg_, end_, flags_) \ +#define PUSH_MARK(ch_, beg_, end_, flags_) \ do { \ - PUSH_(); \ + PUSH_MARK_(); \ + mark->prev = -1; \ + mark->next = -1; \ mark->ch = (ch_); \ mark->beg = (beg_); \ mark->end = (end_); \ @@ -670,6 +763,91 @@ md_push(MD_CTX* ctx) } while(0) +static void +md_mark_chain_append(MD_CTX* ctx, MD_MARKCHAIN* chain, int mark_index) +{ + if(chain->tail >= 0) + ctx->marks[chain->tail].next = mark_index; + else + chain->head = mark_index; + + ctx->marks[mark_index].prev = chain->tail; + chain->tail = mark_index; +} + +static void +md_resolve_range(MD_CTX* ctx, MD_MARKCHAIN* chain, int opener_index, int closer_index) +{ + MD_MARK* opener = &ctx->marks[opener_index]; + MD_MARK* closer = &ctx->marks[closer_index]; + + /* Remove opener from the list of openers. */ + if(chain != NULL) { + if(opener->prev >= 0) + ctx->marks[opener->prev].next = opener->next; + else + chain->head = opener->next; + + if(opener->next >= 0) + ctx->marks[opener->next].prev = opener->prev; + else + chain->tail = opener->prev; + } + + /* Interconnect opener and closer and mark both as resolved. */ + opener->next = closer_index; + opener->flags |= MD_MARK_OPENER | MD_MARK_RESOLVED; + closer->prev = opener_index; + closer->flags |= MD_MARK_CLOSER | MD_MARK_RESOLVED; +} + +static void +md_rollback(MD_CTX* ctx, int opener_index, int closer_index) +{ + int i; + + /* Cut all unresolved openers at the mark index. */ + for(i = 0; i < SIZEOF_ARRAY(ctx->mark_chains); i++) { + MD_MARKCHAIN* chain = &ctx->mark_chains[i]; + + while(chain->tail >= opener_index) + chain->tail = ctx->marks[chain->tail].prev; + + if(chain->tail >= 0) + ctx->marks[chain->tail].next = -1; + else + chain->head = -1; + } + + /* Go backwards so that un-resolved openers are re-added into their + * respective chains in the right order. */ + for(i = closer_index - 1; i > opener_index; i--) { + /* If the close has an opening counter before the range, we have to + * un-resolve the opener and add it back to the list of unresolved + * openers. */ + if(ctx->marks[i].flags & MD_MARK_CLOSER) { + int index = ctx->marks[i].prev; + + if(index < opener_index) { + MD_MARK* opener = &ctx->marks[index]; + MD_MARKCHAIN* chain; + + switch(opener->ch) { + case _T('`'): chain = &BACKTICK_OPENERS; break; + case _T('<'): chain = &RAW_HTML_OPENERS; break; + default: MD_UNREACHABLE(); break; + } + + md_mark_chain_append(ctx, chain, index); + opener->flags &= ~(MD_MARK_OPENER | MD_MARK_CLOSER | MD_MARK_RESOLVED); + } + } + + /* Reset any "resolved" flags in the range. */ + ctx->marks[i].flags &= ~(MD_MARK_OPENER | MD_MARK_CLOSER | MD_MARK_RESOLVED); + } +} + static int md_collect_marks(MD_CTX* ctx, const MD_LINE* lines, int n_lines) { @@ -690,7 +868,7 @@ md_collect_marks(MD_CTX* ctx, const MD_LINE* lines, int n_lines) if(ch == _T('\\') && off+1 < ctx->size && (ISPUNCT(off+1) || ISNEWLINE(off+1))) { /* Hard-break cannot be on the last line of the block. */ if(!ISNEWLINE(off+1) || i+1 < n_lines) - PUSH(ch, off, off+2, MD_MARK_RESOLVED); + PUSH_MARK(ch, off, off+2, MD_MARK_RESOLVED); /* If '`' follows, we need both marks as the backslash may be * inside a code span. */ @@ -709,7 +887,7 @@ md_collect_marks(MD_CTX* ctx, const MD_LINE* lines, int n_lines) tmp++; if(tmp - end > 1 || ch != _T(' ')) { - PUSH(ch, off, tmp, MD_MARK_RESOLVED); + PUSH_MARK(ch, off, tmp, MD_MARK_RESOLVED); off = tmp; continue; } @@ -722,13 +900,13 @@ md_collect_marks(MD_CTX* ctx, const MD_LINE* lines, int n_lines) /* It may be opener only if it is not escaped. */ if(ctx->n_marks > 0 && ctx->marks[ctx->n_marks-1].beg == off-1 && CH(off-1) == _T('\\')) - flags = MD_MARK_CLOSER; + flags = MD_MARK_POTENTIAL_CLOSER; else - flags = MD_MARK_OPENER | MD_MARK_CLOSER; + flags = MD_MARK_POTENTIAL_OPENER | MD_MARK_POTENTIAL_CLOSER; while(tmp < end && CH(tmp) == _T('`')) tmp++; - PUSH(ch, off, tmp, flags); + PUSH_MARK(ch, off, tmp, flags); off = tmp; continue; @@ -736,7 +914,7 @@ md_collect_marks(MD_CTX* ctx, const MD_LINE* lines, int n_lines) /* A potential entity start. */ if(ch == _T('&')) { - PUSH(ch, off, off+1, MD_MARK_OPENER); + PUSH_MARK(ch, off, off+1, MD_MARK_POTENTIAL_OPENER); off++; continue; } @@ -745,7 +923,7 @@ md_collect_marks(MD_CTX* ctx, const MD_LINE* lines, int n_lines) if(ch == _T(';')) { /* We surely cannot be entity unless previous mark is '&'. */ if(ctx->n_marks > 0 && ctx->marks[ctx->n_marks-1].ch == _T('&')) { - PUSH(ch, off, off+1, MD_MARK_CLOSER); + PUSH_MARK(ch, off, off+1, MD_MARK_POTENTIAL_CLOSER); off++; continue; } @@ -754,7 +932,7 @@ md_collect_marks(MD_CTX* ctx, const MD_LINE* lines, int n_lines) /* A potential raw HTML start/end. */ if(ch == _T('<') || ch == _T('>')) { if(!(ctx->r.flags & MD_FLAG_NOHTMLSPANS)) { - PUSH(ch, off, off+1, (ch == _T('<') ? MD_MARK_OPENER : MD_MARK_CLOSER)); + PUSH_MARK(ch, off, off+1, (ch == _T('<') ? MD_MARK_POTENTIAL_OPENER : MD_MARK_POTENTIAL_CLOSER)); off++; continue; } @@ -766,7 +944,7 @@ md_collect_marks(MD_CTX* ctx, const MD_LINE* lines, int n_lines) /* Add a dummy mark at the end of processed block to simplify * md_process_inlines(). */ - PUSH_(); + PUSH_MARK_(); mark->beg = lines[n_lines-1].end + 1; mark->flags = MD_MARK_RESOLVED; @@ -775,38 +953,32 @@ abort: } -/* Analyze whether the backtick is really start/end mark of a code span. +/* Analyze whether the back-tick is really start/end mark of a code span. * If yes, reset all marks inside of it and setup flags of both marks. */ static void -md_analyze_backtick(MD_CTX* ctx, int mark_index, int* p_unresolved_openers) +md_analyze_backtick(MD_CTX* ctx, int mark_index) { MD_MARK* mark = &ctx->marks[mark_index]; - int opener = *p_unresolved_openers; + int opener_index = BACKTICK_OPENERS.tail; /* Try to find unresolved opener of the same length. If we find it, * we form a code span. */ - while(opener >= 0) { - MD_MARK* op = &ctx->marks[opener]; + while(opener_index >= 0) { + MD_MARK* opener = &ctx->marks[opener_index]; - if(op->end - op->beg == mark->end - mark->beg) { - /* Resolve the span. */ - op->flags = MD_MARK_OPENER | MD_MARK_RESOLVED; - mark->flags = MD_MARK_CLOSER | MD_MARK_RESOLVED; + if(opener->end - opener->beg == mark->end - mark->beg) { + /* Rollback anything found inside it. + * (e.g. the code span contains some back-ticks or other special + * chars we misinterpreted.) */ + md_rollback(ctx, opener_index, mark_index); - /* Shorten the list of unresolved openers. */ - *p_unresolved_openers = op->next; - - /* Make the opener point to us as its closer. */ - op->next = mark_index; - - /* Cancel any already resolved marks in the code span. */ - if(mark_index - opener > 1) - memset(ctx->marks + opener + 1, 0, sizeof(MD_MARK) * (mark_index - opener - 1)); + /* Resolve the span. */ + md_resolve_range(ctx, &BACKTICK_OPENERS, opener_index, mark_index); - /* Append any space or new line inside the span into the mark itself - * to swallow it. */ - while(CH(op->end) == _T(' ') || ISNEWLINE(op->end)) - op->end++; + /* Append any space or new line inside the span into the mark + * itself to swallow it. */ + while(CH(opener->end) == _T(' ') || ISNEWLINE(opener->end)) + opener->end++; while(CH(mark->beg-1) == _T(' ') || ISNEWLINE(mark->beg-1)) mark->beg--; @@ -814,58 +986,64 @@ md_analyze_backtick(MD_CTX* ctx, int mark_index, int* p_unresolved_openers) return; } - opener = ctx->marks[opener].next; + opener_index = ctx->marks[opener_index].prev; } - /* We didn't find any matching opener, remember it as a potential opener - * for subsequent '`' marks. */ - if(mark->flags & MD_MARK_OPENER) { - mark->next = *p_unresolved_openers; - *p_unresolved_openers = mark_index; - } + /* We didn't find any matching opener, so we ourselves may be the opener + * of some upcoming closer. */ + if(mark->flags & MD_MARK_POTENTIAL_OPENER) + md_mark_chain_append(ctx, &BACKTICK_OPENERS, mark_index); } static void md_analyze_raw_html(MD_CTX* ctx, int mark_index, const MD_LINE* lines, int n_lines) { - MD_MARK* opener = &ctx->marks[mark_index]; - MD_MARK* closer; - OFF end; - int i = 0; + MD_MARK* mark = &ctx->marks[mark_index]; + int opener_index; - /* Identify the line where the mark lives. */ - while(1) { - if(opener->beg < lines[i].end) - break; - i++; + /* If it is an opener ('<'), remember it. */ + if(mark->flags & MD_MARK_POTENTIAL_OPENER) { + md_mark_chain_append(ctx, &RAW_HTML_OPENERS, mark_index); + return; } - /* Return if we are not really raw HTML. */ - if(md_is_html_any(ctx, lines + i, n_lines - i, opener->beg, &end) < 0) - return; + /* Otherwise we are potential closer and we try to resolve with since all + * the chained unresolved openers. */ + opener_index = RAW_HTML_OPENERS.head; + while(opener_index >= 0) { + MD_MARK* opener = &ctx->marks[opener_index]; + int line_index = 0; + OFF detected_end; + + /* Identify the line where the opening mark lives. */ + while(1) { + if(opener->beg < lines[line_index].end) + break; + line_index++; + } + + /* Check whether the range forms a valid raw HTML. */ + if(md_is_html_any(ctx, lines + line_index, n_lines - line_index, + opener->beg, mark->end, &detected_end) == 0) + { + /* If this fail, it means we have missed some earlier opportunity + * to resolve the opener. */ + MD_ASSERT(detected_end == mark->end); + + md_rollback(ctx, opener_index, mark_index); + md_resolve_range(ctx, &RAW_HTML_OPENERS, opener_index, mark_index); + + /* Make these marks zero width so the '<' and '>' are part of its + * contents. */ + opener->end = opener->beg; + mark->beg = mark->end; - /* Cancel any already resolved marks in the range up to the closer. - * We have to find there the close '>' or something is severly broken. */ - mark_index++; - while(mark_index < ctx->n_marks && ctx->marks[mark_index].end < end) { - ctx->marks[mark_index].ch = _T('\0'); - ctx->marks[mark_index].flags = 0; - mark_index++; + /* And we are done. */ + return; + } + + opener_index = opener->next; } - closer = &ctx->marks[mark_index]; -/* - MD_ASSERT(closer->end == end); - MD_ASSERT(closer->ch == _T('>')); -*/ - - opener->flags |= MD_MARK_RESOLVED; - opener->next = mark_index; - closer->flags |= MD_MARK_RESOLVED; - - /* Make these marker zero width so the '<' and '>' are part of its - * contents. */ - opener->end = opener->beg; - closer->beg = closer->end; } /* Analyze whether the mark '&' starts a HTML entity. @@ -879,10 +1057,13 @@ md_analyze_entity(MD_CTX* ctx, int mark_index) /* Cannot be entity if there is no closer as the next mark. * (Any other mark between would mean strange character which cannot be - * part of the entity. */ + * part of the entity. + * + * So we can do all the work on '&' and do not call this later for the + * closing mark ';'. + */ if(mark_index + 1 >= ctx->n_marks) return; - closer = &ctx->marks[mark_index+1]; if(closer->ch != _T(';')) return; @@ -926,15 +1107,16 @@ md_analyze_entity(MD_CTX* ctx, int mark_index) } } - /* Mark it as an entity. We actually make a single mark from it, - * i.e. we expand the opener and leave the closer unresolved. */ + /* Mark us as an entity. + * As entity has no span, we may just turn the range into a single mark. + * (This also causes we do not get called for ';'. */ opener->end = closer->end; - opener->flags |= MD_MARK_RESOLVED; + opener->flags |= MD_MARK_OPENER | MD_MARK_RESOLVED; } /* Table of precedence of various span types. */ static const CHAR* md_precedence_table[] = { - _T("`<"), /* Code spans and raw HTML have the same precedence. */ + _T("`<>"), /* Code spans; raw HTML. */ _T("&") /* Entities. */ }; @@ -942,8 +1124,6 @@ static void md_analyze_marks(MD_CTX* ctx, const MD_LINE* lines, int n_lines, int precedence_level) { const CHAR* mark_chars = md_precedence_table[precedence_level]; - /* Chain of potential/unresolved code span openers. */ - int code_span_unresolved_openers = -1; int i = 0; while(i < ctx->n_marks) { @@ -967,10 +1147,11 @@ md_analyze_marks(MD_CTX* ctx, const MD_LINE* lines, int n_lines, int precedence_ /* Analyze the mark. */ switch(mark->ch) { case _T('`'): - md_analyze_backtick(ctx, i, &code_span_unresolved_openers); + md_analyze_backtick(ctx, i); break; case _T('<'): + case _T('>'): md_analyze_raw_html(ctx, i, lines, n_lines); break; @@ -984,13 +1165,29 @@ md_analyze_marks(MD_CTX* ctx, const MD_LINE* lines, int n_lines, int precedence_ } /* Analyze marks (build ctx->marks). */ -static void +static int md_analyze_inlines(MD_CTX* ctx, const MD_LINE* lines, int n_lines) { int i; + /* Reset the previously collected stack of marks. */ + ctx->n_marks = 0; + + /* Reset all unresolved opener mark chains. */ + for(i = 0; i < SIZEOF_ARRAY(ctx->mark_chains); i++) { + ctx->mark_chains[i].head = -1; + ctx->mark_chains[i].tail = -1; + } + + /* Collect all marks. */ + if(md_collect_marks(ctx, lines, n_lines) != 0) + return -1; + + /* Analyze all marks. */ for(i = 0; i < SIZEOF_ARRAY(md_precedence_table); i++) md_analyze_marks(ctx, lines, n_lines, i); + + return 0; } /* Render the output, accordingly to the analyzed ctx->marks. */ @@ -1120,13 +1317,7 @@ md_process_normal_block(MD_CTX* ctx, const MD_LINE* lines, int n_lines) { int ret; - /* Reset the previously collected stack of marks. */ - ctx->n_marks = 0; - - MD_CHECK(md_collect_marks(ctx, lines, n_lines)); - - md_analyze_inlines(ctx, lines, n_lines); - + MD_CHECK(md_analyze_inlines(ctx, lines, n_lines)); MD_CHECK(md_process_inlines(ctx, lines, n_lines)); abort: @@ -1440,7 +1631,7 @@ md_is_html_block_start_condition(MD_CTX* ctx, OFF beg) if(off + 1 < ctx->size) { OFF end; - if(md_is_html_tag(ctx, NULL, 0, beg, &end) == 0) { + if(md_is_html_tag(ctx, NULL, 0, beg, ctx->size, &end) == 0) { /* Only optional whitespace and new line may follow. */ while(end < ctx->size && ISWHITESPACE(end)) end++;