md4c

C Markdown parser. Fast. SAX-like interface. Compliant to CommonMark specification.
git clone https://noulin.net/git/md4c.git
Log | Files | Refs | README | LICENSE

commit 25a156ee1b21c7cb8d6cc82029c181fd15bccee8
parent d84dcec8f1e2f0b8d8b2086874637ace90dfb1e4
Author: Martin Mitas <mity@morous.org>
Date:   Wed, 12 Jul 2017 23:30:14 +0200

Implement strikethrough extension.

Diffstat:
MREADME.md | 3+++
Mmd2html/md2html.c | 3+++
Mmd2html/render_html.c | 2++
Mmd4c/md4c.c | 61++++++++++++++++++++++++++++++++++++++++++++++++++++---------
Mmd4c/md4c.h | 10++++++++--
Mscripts/run-tests.sh | 1+
Atest/strikethrough.txt | 35+++++++++++++++++++++++++++++++++++
7 files changed, 104 insertions(+), 11 deletions(-)

diff --git a/README.md b/README.md @@ -99,6 +99,9 @@ some extensions or allowing some deviations from the specification. * With the flag `MD_FLAG_NOINDENTEDCODEBLOCKS`, indented code blocks are disabled. + * With the flag `MD_FLAG_STRIKETHROUGH`, strikethrough spans are enabled + (text enclosed in tilde marks, e.g. '~foo bar~'). + ## Input/Output Encoding diff --git a/md2html/md2html.c b/md2html/md2html.c @@ -206,6 +206,7 @@ static const option cmdline_options[] = { { "fno-html", 0, 'H', OPTION_ARG_NONE }, { "fcollapse-whitespace", 0, 'W', OPTION_ARG_NONE }, { "ftables", 0, 'T', OPTION_ARG_NONE }, + { "fstrikethrough", 0, 'S', OPTION_ARG_NONE }, { 0 } }; @@ -249,6 +250,7 @@ usage(void) " Disable raw HTML spans\n" " --fno-html Same as --fno-html-blocks --fno-html-spans\n" " --ftables Enable tables\n" + " --fstrikethrough Enable strikethrough spans\n" ); } @@ -294,6 +296,7 @@ cmdline_callback(int opt, char const* value, void* data) case '@': parser_flags |= MD_FLAG_PERMISSIVEEMAILAUTOLINKS; break; case 'V': parser_flags |= MD_FLAG_PERMISSIVEAUTOLINKS; break; case 'T': parser_flags |= MD_FLAG_TABLES; break; + case 'S': parser_flags |= MD_FLAG_STRIKETHROUGH; break; default: fprintf(stderr, "Illegal option: %s\n", value); diff --git a/md2html/render_html.c b/md2html/render_html.c @@ -410,6 +410,7 @@ enter_span_callback(MD_SPANTYPE type, void* detail, void* userdata) case MD_SPAN_A: render_open_a_span(r, (MD_SPAN_A_DETAIL*) detail); break; case MD_SPAN_IMG: render_open_img_span(r, (MD_SPAN_IMG_DETAIL*) detail); break; case MD_SPAN_CODE: RENDER_LITERAL(r, "<code>"); break; + case MD_SPAN_DEL: RENDER_LITERAL(r, "<del>"); break; } return 0; @@ -434,6 +435,7 @@ leave_span_callback(MD_SPANTYPE type, void* detail, void* userdata) case MD_SPAN_A: RENDER_LITERAL(r, "</a>"); break; case MD_SPAN_IMG: /*noop, handled above*/ break; case MD_SPAN_CODE: RENDER_LITERAL(r, "</code>"); break; + case MD_SPAN_DEL: RENDER_LITERAL(r, "</del>"); break; } return 0; diff --git a/md4c/md4c.c b/md4c/md4c.c @@ -121,14 +121,15 @@ struct MD_CTX_tag { #endif /* For resolving of inline spans. */ - MD_MARKCHAIN mark_chains[7]; + MD_MARKCHAIN mark_chains[8]; #define PTR_CHAIN ctx->mark_chains[0] #define BACKTICK_OPENERS ctx->mark_chains[1] #define LOWERTHEN_OPENERS ctx->mark_chains[2] #define ASTERISK_OPENERS ctx->mark_chains[3] #define UNDERSCORE_OPENERS ctx->mark_chains[4] -#define BRACKET_OPENERS ctx->mark_chains[5] -#define TABLECELLBOUNDARIES ctx->mark_chains[6] +#define TILDE_OPENERS ctx->mark_chains[5] +#define BRACKET_OPENERS ctx->mark_chains[6] +#define TABLECELLBOUNDARIES ctx->mark_chains[7] int n_table_cell_boundaries; @@ -2131,6 +2132,8 @@ md_free_link_ref_defs(MD_CTX* ctx) * '\\': Maybe escape sequence. * '\0': NULL char. * '*': Maybe (strong) emphasis start/end. + * '_': Maybe (strong) emphasis start/end. + * '~': Maybe strikethrough start/end (needs MD_FLAG_STRIKETHROUGH). * '`': Maybe code span start/end. * '&': Maybe start of entity. * ';': Maybe end of entity. @@ -2348,6 +2351,7 @@ md_rollback(MD_CTX* ctx, int opener_index, int closer_index, int how) case '_': chain = &UNDERSCORE_OPENERS; break; case '`': chain = &BACKTICK_OPENERS; break; case '<': chain = &LOWERTHEN_OPENERS; break; + case '~': chain = &TILDE_OPENERS; break; default: MD_UNREACHABLE(); break; } md_mark_chain_append(ctx, chain, mark_opener_index); @@ -2395,6 +2399,9 @@ md_build_mark_char_map(MD_CTX* ctx) ctx->mark_char_map[']'] = 1; ctx->mark_char_map['\0'] = 1; + if(ctx->r.flags & MD_FLAG_STRIKETHROUGH) + ctx->mark_char_map['~'] = 1; + if(ctx->r.flags & MD_FLAG_PERMISSIVEURLAUTOLINKS) ctx->mark_char_map[':'] = 1; @@ -2653,6 +2660,17 @@ md_collect_marks(MD_CTX* ctx, const MD_LINE* lines, int n_lines, int table_mode) continue; } + /* A potential strikethrough start/end. */ + if(ch == _T('~')) { + OFF tmp = off+1; + + while(tmp < line_end && CH(tmp) == _T('~')) + tmp++; + + PUSH_MARK(ch, off, tmp, MD_MARK_POTENTIAL_OPENER | MD_MARK_POTENTIAL_CLOSER); + off = tmp; + } + /* Turn non-trivial whitespace into single space. */ if(ISWHITESPACE_(ch)) { OFF tmp = off+1; @@ -3233,6 +3251,25 @@ md_analyze_underscore(MD_CTX* ctx, int mark_index) } static void +md_analyze_tilde(MD_CTX* ctx, int mark_index) +{ + /* We attempt to be Github Flavored Markdown compatible here. GFM says + * that length of the tilde sequence is not important at all. Note that + * implies the TILDE_OPENERS chain can have at most one item. */ + + if(TILDE_OPENERS.head >= 0) { + /* The chain already contains an opener, so we may resolve the span. */ + int opener_index = TILDE_OPENERS.head; + + md_rollback(ctx, opener_index, mark_index, MD_ROLLBACK_CROSSING); + md_resolve_range(ctx, &TILDE_OPENERS, opener_index, mark_index); + } else { + /* We can only be opener. */ + md_mark_chain_append(ctx, &TILDE_OPENERS, mark_index); + } +} + +static void md_analyze_permissive_url_autolink(MD_CTX* ctx, int mark_index) { MD_MARK* opener = &ctx->marks[mark_index]; @@ -3386,6 +3423,7 @@ md_analyze_marks(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF en case '|': md_analyze_table_cell_boundary(ctx, i); break; case '*': md_analyze_asterisk(ctx, i); break; case '_': md_analyze_underscore(ctx, i); break; + case '~': md_analyze_tilde(ctx, i); break; case ':': md_analyze_permissive_url_autolink(ctx, i); break; case '@': md_analyze_permissive_email_autolink(ctx, i); break; } @@ -3434,11 +3472,7 @@ md_analyze_inlines(MD_CTX* ctx, const MD_LINE* lines, int n_lines, int table_mod md_analyze_marks(ctx, lines, n_lines, beg, end, _T("|")); } else { /* (3b) Emphasis and strong emphasis; permissive autolinks. */ - md_analyze_marks(ctx, lines, n_lines, beg, end, _T("*_@:")); - ASTERISK_OPENERS.head = -1; - ASTERISK_OPENERS.tail = -1; - UNDERSCORE_OPENERS.head = -1; - UNDERSCORE_OPENERS.tail = -1; + md_analyze_link_contents(ctx, lines, n_lines, beg, end); } abort: @@ -3448,11 +3482,13 @@ abort: static void md_analyze_link_contents(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF end) { - md_analyze_marks(ctx, lines, n_lines, beg, end, _T("*_@:")); + md_analyze_marks(ctx, lines, n_lines, beg, end, _T("*_~@:")); ASTERISK_OPENERS.head = -1; ASTERISK_OPENERS.tail = -1; UNDERSCORE_OPENERS.head = -1; UNDERSCORE_OPENERS.tail = -1; + TILDE_OPENERS.head = -1; + TILDE_OPENERS.tail = -1; } static int @@ -3558,6 +3594,13 @@ md_process_inlines(MD_CTX* ctx, const MD_LINE* lines, int n_lines) } break; + case '~': + if(mark->flags & MD_MARK_OPENER) + MD_ENTER_SPAN(MD_SPAN_DEL, NULL); + else + MD_LEAVE_SPAN(MD_SPAN_DEL, NULL); + break; + case '[': /* Link, image. */ case '!': case ']': diff --git a/md4c/md4c.h b/md4c/md4c.h @@ -126,7 +126,12 @@ typedef enum MD_SPANTYPE { MD_SPAN_IMG, /* <code>...</code> */ - MD_SPAN_CODE + MD_SPAN_CODE, + + /* <del>...</del> + * Note: Recognized only when MD_FLAG_STRIKETHROUGH is enabled. + */ + MD_SPAN_DEL } MD_SPANTYPE; /* Text is the actual textual contents of span. */ @@ -263,6 +268,7 @@ typedef struct MD_SPAN_IMG_DETAIL { #define MD_FLAG_NOHTMLSPANS 0x0040 /* Disable raw HTML (inline). */ #define MD_FLAG_NOHTML (MD_FLAG_NOHTMLBLOCKS | MD_FLAG_NOHTMLSPANS) #define MD_FLAG_TABLES 0x0100 /* Enable tables extension. */ +#define MD_FLAG_STRIKETHROUGH 0x0200 /* Enable strikethrough extension. */ /* Convenient sets of flags corresponding to well-known Markdown dialects. * Note we may only support subset of features of the referred dialect. @@ -270,7 +276,7 @@ typedef struct MD_SPAN_IMG_DETAIL { * possible given what features we implement. */ #define MD_DIALECT_COMMONMARK 0 -#define MD_DIALECT_GITHUB (MD_FLAG_PERMISSIVEAUTOLINKS | MD_FLAG_TABLES) +#define MD_DIALECT_GITHUB (MD_FLAG_PERMISSIVEAUTOLINKS | MD_FLAG_TABLES | MD_FLAG_STRIKETHROUGH) /* Renderer structure. */ diff --git a/scripts/run-tests.sh b/scripts/run-tests.sh @@ -35,3 +35,4 @@ $PYTHON "$TEST_DIR/spec_tests.py" -s "$TEST_DIR/coverage.txt" -p "$PROGRAM" $PYTHON "$TEST_DIR/spec_tests.py" -s "$TEST_DIR/permissive-email-autolinks.txt" -p "$PROGRAM --fpermissive-email-autolinks" $PYTHON "$TEST_DIR/spec_tests.py" -s "$TEST_DIR/permissive-url-autolinks.txt" -p "$PROGRAM --fpermissive-url-autolinks" $PYTHON "$TEST_DIR/spec_tests.py" -s "$TEST_DIR/tables.txt" -p "$PROGRAM --ftables" +$PYTHON "$TEST_DIR/spec_tests.py" -s "$TEST_DIR/strikethrough.txt" -p "$PROGRAM --fstrikethrough" diff --git a/test/strikethrough.txt b/test/strikethrough.txt @@ -0,0 +1,34 @@ + +# Strikethrough + +With the flag `MD_FLAG_STRIKETHROUGH`, MD4C enables extension for recognition +of strike-through spans. + +Strikethrough text is any text wrapped in tildes (~). + +```````````````````````````````` example +~Hi~ Hello, world! +. +<p><del>Hi</del> Hello, world!</p> +```````````````````````````````` + +Any number of tildes may be used on either side of the text; they do not need +to match, and they cannot be nested. + +```````````````````````````````` example +This ~text~~~~ is ~~~~curious~. +. +<p>This <del>text</del> is <del>curious</del>.</p> +```````````````````````````````` + +As with regular emphasis delimiters, a new paragraph will cause the cessation +of parsing a strikethrough: + +```````````````````````````````` example +This ~~has a + +new paragraph~~. +. +<p>This ~~has a</p> +<p>new paragraph~~.</p> +````````````````````````````````+ \ No newline at end of file