md4c

C Markdown parser. Fast. SAX-like interface. Compliant to CommonMark specification.
git clone https://noulin.net/git/md4c.git
Log | Files | Refs | README | LICENSE

commit 0225f337b416aee0c295e02a0431bd86c5dbcc63
parent d5535bd57a703e26c397f3efc4747a079954c87d
Author: Martin Mitas <mity@morous.org>
Date:   Tue,  4 Oct 2016 00:55:32 +0200

Implement ATX headers.

Diffstat:
MREADME.md | 2+-
Mmd2html/md2html.c | 13++++++++++++-
Mmd4c/md4c.c | 61+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
Mmd4c/md4c.h | 20++++++++++++++++++--
4 files changed, 90 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md @@ -82,7 +82,7 @@ more or less forms our to do list. - **Leaf Blocks:** - [x] 4.1 Thematic breaks - - [ ] 4.2 ATX headings + - [x] 4.2 ATX headings - [ ] 4.3 Setext headings - [ ] 4.4 Indented code blocks - [ ] 4.5 Fenced code blocks diff --git a/md2html/md2html.c b/md2html/md2html.c @@ -128,11 +128,13 @@ membuf_append_escaped(struct membuffer* buf, const char* data, MD_SIZE size) static int enter_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata) { + static const char* head[6] = { "<h1>", "<h2>", "<h3>", "<h4>", "<h5>", "<h6>" }; struct membuffer* out = (struct membuffer*) userdata; switch(type) { case MD_BLOCK_DOC: /* noop */ break; case MD_BLOCK_HR: MEMBUF_APPEND_LITERAL(out, "<hr>\n"); break; + case MD_BLOCK_H: MEMBUF_APPEND_LITERAL(out, head[((MD_BLOCK_H_DETAIL*)detail)->level - 1]); break; case MD_BLOCK_P: MEMBUF_APPEND_LITERAL(out, "<p>"); break; } @@ -142,11 +144,13 @@ enter_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata) static int leave_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata) { + static const char* head[6] = { "</h1>\n", "</h2>\n", "</h3>\n", "</h4>\n", "</h5>\n", "</h6>\n" }; struct membuffer* out = (struct membuffer*) userdata; switch(type) { case MD_BLOCK_DOC: /*noop*/ break; case MD_BLOCK_HR: /*noop*/ break; + case MD_BLOCK_H: MEMBUF_APPEND_LITERAL(out, head[((MD_BLOCK_H_DETAIL*)detail)->level - 1]); break; case MD_BLOCK_P: MEMBUF_APPEND_LITERAL(out, "</p>\n"); break; } @@ -281,6 +285,7 @@ static const option cmdline_options[] = { { "full-html", 'f', 'f', OPTION_ARG_NONE }, { "stat", 's', 's', OPTION_ARG_NONE }, { "help", 'h', 'h', OPTION_ARG_NONE }, + { "fpermissive-atx-headers", 0, 'A', OPTION_ARG_NONE }, { 0 } }; @@ -296,11 +301,15 @@ usage(void) " -f, --full-html generate full HTML document, including header\n" " -s, --stat measure time of input parsing\n" " -h, --help display this help and exit\n" + "\n" + "Markdown dialect options:\n" + " --fpermissive-atx-headers allow ATX headers without delimiting space\n" ); } static const char* input_path = NULL; static const char* output_path = NULL; +static unsigned renderer_flags = 0; static int want_fullhtml = 0; static int want_stat = 0; @@ -322,6 +331,8 @@ cmdline_callback(int opt, char const* value, void* data) case 's': want_stat = 1; break; case 'h': usage(); exit(0); break; + case 'A': renderer_flags |= MD_FLAG_PERMISSIVEATXHEADERS; break; + default: fprintf(stderr, "Illegal option: %s\n", value); fprintf(stderr, "Use --help for more info.\n"); @@ -359,7 +370,7 @@ main(int argc, char** argv) } } - ret = process_file(in, out, 0, want_fullhtml, want_stat); + ret = process_file(in, out, renderer_flags, want_fullhtml, want_stat); if(in != stdin) fclose(in); if(out != stdout) diff --git a/md4c/md4c.c b/md4c/md4c.c @@ -75,12 +75,16 @@ struct MD_CTX_tag { SZ size; MD_RENDERER r; void* userdata; + + /* For MD_BLOCK_HEADER. */ + unsigned header_level; }; typedef enum MD_LINETYPE_tag MD_LINETYPE; enum MD_LINETYPE_tag { MD_LINE_BLANK, MD_LINE_HR, + MD_LINE_ATXHEADER, MD_LINE_TEXT }; @@ -277,6 +281,29 @@ md_is_hr_line(MD_CTX* ctx, OFF beg, OFF* p_end) return 0; } +static int +md_is_atxheader_line(MD_CTX* ctx, OFF beg, OFF* p_beg, OFF* p_end) +{ + int n; + OFF off = beg + 1; + + while(off < ctx->size && CH(off) == _T('#') && off - beg < 7) + off++; + n = off - beg; + + if(n > 6) + return -1; + ctx->header_level = n; + + if(!(ctx->r.flags & MD_FLAG_PERMISSIVEATXHEADERS) && off < ctx->size && CH(off) != _T(' ')) + return -1; + + while(off < ctx->size && CH(off) == _T(' ')) + off++; + *p_beg = off; + return 0; +} + /* Analyze type of the line and find some its properties. This serves as a * main input for determining type and boundaries of a block. */ static void @@ -300,6 +327,14 @@ md_analyze_line(MD_CTX* ctx, OFF beg, OFF* p_end, const MD_LINE* pivot_line, MD_ goto done; } + /* Check whether we are ATX header. */ + if(CH(off) == _T('#')) { + if(md_is_atxheader_line(ctx, off, &line->beg, &off) == 0) { + line->type = MD_LINE_ATXHEADER; + goto done; + } + } + /* Check whether we are thematic break line. */ if(ISANYOF(off, _T("-_*"))) { if(md_is_hr_line(ctx, off, &off) == 0) { @@ -319,6 +354,19 @@ done: /* Set end of the line. */ line->end = off; + /* But for ATX header, we should not include the optional tailing mark. */ + if(line->type == MD_LINE_ATXHEADER) { + OFF tmp = line->end; + while(tmp > line->beg && CH(tmp-1) == _T(' ')) + tmp--; + while(tmp > line->beg && CH(tmp-1) == _T('#')) + tmp--; + while(tmp > line->beg && CH(tmp-1) == _T(' ')) + tmp--; + if(CH(tmp) == _T(' ') || (ctx->r.flags & MD_FLAG_PERMISSIVEATXHEADERS)) + line->end = tmp; + } + /* Eat also the new line. */ if(off < ctx->size && CH(off) == _T('\r')) off++; @@ -336,6 +384,9 @@ static int md_process_block(MD_CTX* ctx, const MD_LINE* lines, int n_lines) { MD_BLOCKTYPE block_type; + union { + MD_BLOCK_H_DETAIL header; + } det; int ret = 0; if(n_lines == 0) @@ -345,18 +396,24 @@ md_process_block(MD_CTX* ctx, const MD_LINE* lines, int n_lines) switch(lines[0].type) { case MD_LINE_BLANK: return 0; case MD_LINE_HR: block_type = MD_BLOCK_HR; break; + + case MD_LINE_ATXHEADER: + block_type = MD_BLOCK_H; + det.header.level = ctx->header_level; + break; + case MD_LINE_TEXT: block_type = MD_BLOCK_P; break; } /* Process the block accordingly to is type. */ - MD_ENTER_BLOCK(block_type, NULL); + MD_ENTER_BLOCK(block_type, (void*) &det); switch(block_type) { case MD_BLOCK_HR: /* Noop. */ break; default: ret = md_process_normal_block(ctx, lines, n_lines); break; } if(ret != 0) goto abort; - MD_LEAVE_BLOCK(block_type, NULL); + MD_LEAVE_BLOCK(block_type, (void*) &det); abort: return ret; diff --git a/md4c/md4c.h b/md4c/md4c.h @@ -64,11 +64,14 @@ enum MD_BLOCKTYPE_tag { /* <hr> */ MD_BLOCK_HR, + /* <h1>...</h1> (for levels up to 6) + * Detail: See structure MD_BLOCK_H_DETAIL. */ + MD_BLOCK_H, + /* <p>...</p> */ MD_BLOCK_P }; - /* Span represents an in-line piece of a document which should be rendered with * the same font, color and other attributes. A sequence of spans forms a block * like paragraph or list item. */ @@ -77,7 +80,6 @@ enum MD_SPANTYPE_tag { MD_SPAN_DUMMY = 0 /* not yet used... */ }; - /* Text is the actual textual contents of span. */ typedef enum MD_TEXTTYPE_tag MD_TEXTTYPE; enum MD_TEXTTYPE_tag { @@ -86,6 +88,20 @@ enum MD_TEXTTYPE_tag { }; +/* Detailed info for MD_BLOCK_H. */ +typedef struct MD_BLOCK_H_DETAIL_tag MD_BLOCK_H_DETAIL; +struct MD_BLOCK_H_DETAIL_tag { + unsigned level; /* Header level (1 - 6) */ +}; + + +/* Flags specifying Markdown dialect. + * + * By default (when MD_RENDERER::flags == 0), we follow CommMark specification. + * The following flags may allow some extensions or deviations from it. + */ +#define MD_FLAG_PERMISSIVEATXHEADERS 0x0001 /* Do not require space in ATX headers ( ###header ) */ + /* Caller-provided callbacks. * * For some block/span types, more detailed information is provided in a