md4c

C Markdown parser. Fast. SAX-like interface. Compliant to CommonMark specification.
git clone https://noulin.net/git/md4c.git
Log | Files | Refs | README | LICENSE

commit 43bd28445b639afbd91e0155ffb5b5072c88e181
parent 2589694f156acfd2d1c00a44db056af3e07442e8
Author: Martin Mitas <mity@morous.org>
Date:   Tue,  4 Oct 2016 03:26:56 +0200

Implemented indented code blocks.

Diffstat:
MREADME.md | 4++--
Mmd2html/md2html.c | 5+++++
Mmd4c/md4c.c | 100+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------
Mmd4c/md4c.h | 12+++++++++++-
4 files changed, 108 insertions(+), 13 deletions(-)

diff --git a/README.md b/README.md @@ -84,7 +84,7 @@ more or less forms our to do list. - [x] 4.1 Thematic breaks - [x] 4.2 ATX headings - [x] 4.3 Setext headings - - [ ] 4.4 Indented code blocks + - [x] 4.4 Indented code blocks - [ ] 4.5 Fenced code blocks - [ ] 4.6 HTML blocks - [ ] 4.7 Link reference definitions @@ -141,7 +141,7 @@ consideration. - **Miscellaneous:** - [x] Permissive ATX headers: `###Header` (without space) - [ ] Permissive autolinks: `http://google.com` (without `<`...`>`) - - [ ] Disabling indented code blocks + - [x] Disabling indented code blocks - [ ] Disabling raw HTML blocks/spans diff --git a/md2html/md2html.c b/md2html/md2html.c @@ -135,6 +135,7 @@ enter_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata) case MD_BLOCK_DOC: /* noop */ break; case MD_BLOCK_HR: MEMBUF_APPEND_LITERAL(out, "<hr>\n"); break; case MD_BLOCK_H: MEMBUF_APPEND_LITERAL(out, head[((MD_BLOCK_H_DETAIL*)detail)->level - 1]); break; + case MD_BLOCK_CODE: MEMBUF_APPEND_LITERAL(out, "<pre><code>"); break; case MD_BLOCK_P: MEMBUF_APPEND_LITERAL(out, "<p>"); break; } @@ -151,6 +152,7 @@ leave_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata) case MD_BLOCK_DOC: /*noop*/ break; case MD_BLOCK_HR: /*noop*/ break; case MD_BLOCK_H: MEMBUF_APPEND_LITERAL(out, head[((MD_BLOCK_H_DETAIL*)detail)->level - 1]); break; + case MD_BLOCK_CODE: MEMBUF_APPEND_LITERAL(out, "</code></pre>\n"); break; case MD_BLOCK_P: MEMBUF_APPEND_LITERAL(out, "</p>\n"); break; } @@ -286,6 +288,7 @@ static const option cmdline_options[] = { { "stat", 's', 's', OPTION_ARG_NONE }, { "help", 'h', 'h', OPTION_ARG_NONE }, { "fpermissive-atx-headers", 0, 'A', OPTION_ARG_NONE }, + { "fno-indented-code", 0, 'I', OPTION_ARG_NONE }, { 0 } }; @@ -304,6 +307,7 @@ usage(void) "\n" "Markdown dialect options:\n" " --fpermissive-atx-headers allow ATX headers without delimiting space\n" + " --fno-indented-code disabled indented code blocks\n" ); } @@ -332,6 +336,7 @@ cmdline_callback(int opt, char const* value, void* data) case 'h': usage(); exit(0); break; case 'A': renderer_flags |= MD_FLAG_PERMISSIVEATXHEADERS; break; + case 'I': renderer_flags |= MD_FLAG_NOINDENTEDCODE; break; default: fprintf(stderr, "Illegal option: %s\n", value); diff --git a/md4c/md4c.c b/md4c/md4c.c @@ -76,6 +76,9 @@ struct MD_CTX_tag { MD_RENDERER r; void* userdata; + /* Minimal indentation to call the block "indented code". */ + unsigned code_indent_offset; + /* For MD_BLOCK_HEADER. */ unsigned header_level; }; @@ -87,6 +90,7 @@ enum MD_LINETYPE_tag { MD_LINE_ATXHEADER, MD_LINE_SETEXTHEADER, MD_LINE_SETEXTUNDERLINE, + MD_LINE_INDENTEDCODE, MD_LINE_TEXT }; @@ -95,6 +99,7 @@ struct MD_LINE_tag { MD_LINETYPE type; OFF beg; OFF end; + unsigned indent; /* Indentation level. */ }; @@ -259,6 +264,36 @@ abort: return ret; } +static int +md_process_verbatim_block(MD_CTX* ctx, const MD_LINE* lines, int n_lines) +{ + static const CHAR indent_str[16] = _T(" "); + int i; + int ret = 0; + + for(i = 0; i < n_lines; i++) { + const MD_LINE* line = &lines[i]; + int indent = line->indent; + + /* Output code indentation. */ + while(indent > SIZEOF_ARRAY(indent_str)) { + MD_TEXT(MD_TEXT_CODEBLOCK, indent_str, SIZEOF_ARRAY(indent_str)); + indent -= SIZEOF_ARRAY(indent_str); + } + if(indent > 0) + MD_TEXT(MD_TEXT_CODEBLOCK, indent_str, indent); + + /* Output the code line itself. */ + MD_TEXT(MD_TEXT_CODEBLOCK, STR(line->beg), line->end - line->beg); + + /* Enforce end-of-line. */ + MD_TEXT(MD_TEXT_CODEBLOCK, _T("\n"), 1); + } + +abort: + return ret; +} + /*************************************** *** Breaking Document into Blocks *** @@ -337,18 +372,51 @@ md_analyze_line(MD_CTX* ctx, OFF beg, OFF* p_end, const MD_LINE* pivot_line, MD_ OFF off = beg; line->type = MD_LINE_BLANK; + line->indent = 0; /* Eat indentation. */ while(off < ctx->size && ISBLANK(off)) { + if(CH(off) == _T('\t')) + line->indent = (line->indent + 4) & ~3; + else + line->indent++; off++; } line->beg = off; - /* Check whether we are blank line. Note we fall here even if we are beyond - * the document end. */ + /* Check whether we are blank line. + * Note blank lines after indented code are treated as part of that block. + * If they are at the end of the block, it is discarded by caller. + */ if(off >= ctx->size || ISNEWLINE(off)) { - line->type = MD_LINE_BLANK; + line->indent = 0; + if(pivot_line->type == MD_LINE_INDENTEDCODE) + line->type = MD_LINE_INDENTEDCODE; + else + line->type = MD_LINE_BLANK; + goto done; + } + + /* Check whether we are indented code line. + * Note indented code block cannot interrupt paragraph. + * Keep this is as the first check after the blank line: The checks below + * then do not need to verify that indentation < 4. */ + if((pivot_line->type == MD_LINE_BLANK || pivot_line->type == MD_LINE_INDENTEDCODE) + && line->indent >= ctx->code_indent_offset) { + line->type = MD_LINE_INDENTEDCODE; + line->indent -= ctx->code_indent_offset; + goto done; + } + + /* Check whether we are indented code line. + * Note indented code block cannot interrupt paragraph. + * Keep this is as the first check after the blank line: The checks below + * then do not need to verify that indentation < 4. */ + if((pivot_line->type == MD_LINE_BLANK || pivot_line->type == MD_LINE_INDENTEDCODE) + && line->indent >= ctx->code_indent_offset) { + line->type = MD_LINE_INDENTEDCODE; + line->indent -= ctx->code_indent_offset; goto done; } @@ -394,17 +462,18 @@ done: tmp--; while(tmp > line->beg && CH(tmp-1) == _T('#')) tmp--; - if(tmp == line->beg || CH(tmp-1) == _T(' ') || (ctx->r.flags & MD_FLAG_PERMISSIVEATXHEADERS)) { - while(tmp > line->beg && CH(tmp-1) == _T(' ')) - tmp--; + if(tmp == line->beg || CH(tmp-1) == _T(' ') || (ctx->r.flags & MD_FLAG_PERMISSIVEATXHEADERS)) line->end = tmp; - } } + /* Trim tailing spaces. */ + while(line->end > line->beg && CH(line->end-1) == _T(' ')) + line->end--; + /* Eat also the new line. */ - if(off < ctx->size && CH(off) == _T('\r')) + if(off < ctx->size && CH(off) == _T('\r')) off++; - if(off < ctx->size && CH(off) == _T('\n')) + if(off < ctx->size && CH(off) == _T('\n')) off++; *p_end = off; @@ -441,6 +510,10 @@ md_process_block(MD_CTX* ctx, const MD_LINE* lines, int n_lines) det.header.level = ctx->header_level; break; + case MD_LINE_INDENTEDCODE: + block_type = MD_BLOCK_CODE; + break; + case MD_LINE_TEXT: block_type = MD_BLOCK_P; break; @@ -459,6 +532,10 @@ md_process_block(MD_CTX* ctx, const MD_LINE* lines, int n_lines) /* Noop. */ break; + case MD_BLOCK_CODE: + ret = md_process_verbatim_block(ctx, lines, n_lines); + break; + default: ret = md_process_normal_block(ctx, lines, n_lines); break; @@ -585,6 +662,9 @@ md_parse(const MD_CHAR* text, MD_SIZE size, const MD_RENDERER* renderer, void* u memcpy(&ctx.r, renderer, sizeof(MD_RENDERER)); ctx.userdata = userdata; - /* Doo all the hard work. */ + /* Offset for indented code block. */ + ctx.code_indent_offset = (ctx.r.flags & MD_FLAG_NOINDENTEDCODE) ? (OFF)(-1) : 4; + + /* Do all the hard work. */ return md_process_doc(&ctx); } diff --git a/md4c/md4c.h b/md4c/md4c.h @@ -68,6 +68,10 @@ enum MD_BLOCKTYPE_tag { * Detail: See structure MD_BLOCK_H_DETAIL. */ MD_BLOCK_H, + /* <pre><code>...</code></pre> + * Note the text lines (spans) within blocks are terminated with '\n'. */ + MD_BLOCK_CODE, + /* <p>...</p> */ MD_BLOCK_P }; @@ -84,7 +88,12 @@ enum MD_SPANTYPE_tag { typedef enum MD_TEXTTYPE_tag MD_TEXTTYPE; enum MD_TEXTTYPE_tag { /* Normal text. */ - MD_TEXT_NORMAL = 0 + MD_TEXT_NORMAL = 0, + + /* Text in a code block (inside MD_BLOCK_CODE). + * Includes spaces for indentation and '\n' for new lines. + * MD_TEXT_BR and MD_TEXT_SOFTBR are not sent for this kind of text. */ + MD_TEXT_CODEBLOCK }; @@ -101,6 +110,7 @@ struct MD_BLOCK_H_DETAIL_tag { * The following flags may allow some extensions or deviations from it. */ #define MD_FLAG_PERMISSIVEATXHEADERS 0x0001 /* Do not require space in ATX headers ( ###header ) */ +#define MD_FLAG_NOINDENTEDCODE 0x0002 /* Recognize only fenced code blocks. */ /* Caller-provided callbacks. *