md4c

C Markdown parser. Fast. SAX-like interface. Compliant to CommonMark specification.
git clone https://noulin.net/git/md4c.git
Log | Files | Refs | README | LICENSE

commit 63a92c08628c89346be521b5de8cc0685cfd4b2f
parent 93edb71a6c3578fc1161eb7d85dd80dc73f674cb
Author: Martin Mitas <mity@morous.org>
Date:   Tue,  4 Oct 2016 00:18:08 +0200

Implement thematic breaks (<hr>).

Diffstat:
MREADME.md | 2+-
Mmd2html/md2html.c | 2++
Mmd4c/md4c.c | 61++++++++++++++++++++++++++++++++++++++++++++++++++++++-------
Mmd4c/md4c.h | 3+++
4 files changed, 60 insertions(+), 8 deletions(-)

diff --git a/README.md b/README.md @@ -81,7 +81,7 @@ more or less forms our to do list. - [ ] 3.2 Container blocks and leaf blocks - **Leaf Blocks:** - - [ ] 4.1 Thematic breaks + - [x] 4.1 Thematic breaks - [ ] 4.2 ATX headings - [ ] 4.3 Setext headings - [ ] 4.4 Indented code blocks diff --git a/md2html/md2html.c b/md2html/md2html.c @@ -132,6 +132,7 @@ enter_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata) switch(type) { case MD_BLOCK_DOC: /* noop */ break; + case MD_BLOCK_HR: MEMBUF_APPEND_LITERAL(out, "<hr>\n"); break; case MD_BLOCK_P: MEMBUF_APPEND_LITERAL(out, "<p>"); break; } @@ -145,6 +146,7 @@ leave_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata) switch(type) { case MD_BLOCK_DOC: /*noop*/ break; + case MD_BLOCK_HR: /*noop*/ break; case MD_BLOCK_P: MEMBUF_APPEND_LITERAL(out, "</p>\n"); break; } diff --git a/md4c/md4c.c b/md4c/md4c.c @@ -72,6 +72,7 @@ struct MD_CTX_tag { typedef enum MD_LINETYPE_tag MD_LINETYPE; enum MD_LINETYPE_tag { MD_LINE_BLANK, + MD_LINE_HR, MD_LINE_TEXT }; @@ -148,6 +149,7 @@ md_log(MD_CTX* ctx, const char* fmt, ...) #define ISDIGIT_(ch) (_T('0') <= (ch) && (ch) <= _T('9')) #define ISXDIGIT_(ch) (ISDIGIT_(ch) || (_T('a') < (ch) && (ch) <= _T('f') || (_T('A') < (ch) && (ch) <= _T('F')) #define ISALNUM_(ch) (ISALPHA_(ch) || ISDIGIT_(ch)) +#define ISANYOF_(ch, palette) (md_strchr((palette), (ch)) != NULL) #define ISASCII(off) ISASCII_(CH(off)) #define ISBLANK(off) ISBLANK_(CH(off)) @@ -161,6 +163,19 @@ md_log(MD_CTX* ctx, const char* fmt, ...) #define ISDIGIT(off) ISDIGIT_(CH(off)) #define ISXDIGIT(off) ISXDIGIT_(CH(off)) #define ISALNUM(off) ISALNUM_(CH(off)) +#define ISANYOF(off, palette) ISANYOF_(CH(off), (palette)) + + +static inline const CHAR* +md_strchr(const CHAR* str, CHAR ch) +{ + OFF i; + for(i = 0; str[i] != _T('\0'); i++) { + if(ch == str[i]) + return (str + i); + } + return NULL; +} #define MD_ENTER_BLOCK(type, arg) \ @@ -235,6 +250,25 @@ abort: *** Breaking Document into Blocks *** ***************************************/ +static int +md_is_hr_line(MD_CTX* ctx, OFF beg, OFF* p_end) +{ + OFF off = beg + 1; + int n = 1; + + while(off < ctx->size && (CH(off) == CH(beg) || CH(off) == _T(' '))) { + if(CH(off) == CH(beg)) + n++; + off++; + } + + if(n < 3) + return -1; + + *p_end = off; + return 0; +} + /* Analyze type of the line and find some its properties. This serves as a * main input for determining type and boundaries of a block. */ static void @@ -258,6 +292,14 @@ md_analyze_line(MD_CTX* ctx, OFF beg, OFF* p_end, const MD_LINE* pivot_line, MD_ goto done; } + /* Check whether we are thematic break line. */ + if(ISANYOF(off, _T("-_*"))) { + if(md_is_hr_line(ctx, off, &off) == 0) { + line->type = MD_LINE_HR; + goto done; + } + } + /* By default, we are normal text line. */ line->type = MD_LINE_TEXT; @@ -293,17 +335,17 @@ md_process_block(MD_CTX* ctx, const MD_LINE* lines, int n_lines) /* Derive block type from type of the first line. */ switch(lines[0].type) { - case MD_LINE_BLANK: - return 0; - - case MD_LINE_TEXT: - block_type = MD_BLOCK_P; - break; + case MD_LINE_BLANK: return 0; + case MD_LINE_HR: block_type = MD_BLOCK_HR; break; + case MD_LINE_TEXT: block_type = MD_BLOCK_P; break; } /* Process the block accordingly to is type. */ MD_ENTER_BLOCK(block_type, NULL); - ret = md_process_normal_block(ctx, lines, n_lines); + switch(block_type) { + case MD_BLOCK_HR: /* Noop. */ break; + default: ret = md_process_normal_block(ctx, lines, n_lines); break; + } if(ret != 0) goto abort; MD_LEAVE_BLOCK(block_type, NULL); @@ -350,6 +392,10 @@ md_process_doc(MD_CTX *ctx) /* The same block continues as long lines are of the same type. */ if(line->type == pivot_line->type) { + /* But not so thematic break. */ + if(line->type == MD_LINE_HR) + goto force_block_end; + /* Do not grow the 'lines' because of blank lines. Semantically * one blank line is equivalent to many. */ if(line->type != MD_LINE_BLANK) @@ -358,6 +404,7 @@ md_process_doc(MD_CTX *ctx) continue; } +force_block_end: /* Otherwise the old block is complete and we have to process it. */ ret = md_process_block(ctx, lines, n_lines); if(ret != 0) diff --git a/md4c/md4c.h b/md4c/md4c.h @@ -61,6 +61,9 @@ enum MD_BLOCKTYPE_tag { /* <body>...</body> */ MD_BLOCK_DOC = 0, + /* <hr> */ + MD_BLOCK_HR, + /* <p>...</p> */ MD_BLOCK_P };