commit 0225f337b416aee0c295e02a0431bd86c5dbcc63
parent d5535bd57a703e26c397f3efc4747a079954c87d
Author: Martin Mitas <mity@morous.org>
Date: Tue, 4 Oct 2016 00:55:32 +0200
Implement ATX headers.
Diffstat:
4 files changed, 90 insertions(+), 6 deletions(-)
diff --git a/README.md b/README.md
@@ -82,7 +82,7 @@ more or less forms our to do list.
- **Leaf Blocks:**
- [x] 4.1 Thematic breaks
- - [ ] 4.2 ATX headings
+ - [x] 4.2 ATX headings
- [ ] 4.3 Setext headings
- [ ] 4.4 Indented code blocks
- [ ] 4.5 Fenced code blocks
diff --git a/md2html/md2html.c b/md2html/md2html.c
@@ -128,11 +128,13 @@ membuf_append_escaped(struct membuffer* buf, const char* data, MD_SIZE size)
static int
enter_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata)
{
+ static const char* head[6] = { "<h1>", "<h2>", "<h3>", "<h4>", "<h5>", "<h6>" };
struct membuffer* out = (struct membuffer*) userdata;
switch(type) {
case MD_BLOCK_DOC: /* noop */ break;
case MD_BLOCK_HR: MEMBUF_APPEND_LITERAL(out, "<hr>\n"); break;
+ case MD_BLOCK_H: MEMBUF_APPEND_LITERAL(out, head[((MD_BLOCK_H_DETAIL*)detail)->level - 1]); break;
case MD_BLOCK_P: MEMBUF_APPEND_LITERAL(out, "<p>"); break;
}
@@ -142,11 +144,13 @@ enter_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata)
static int
leave_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata)
{
+ static const char* head[6] = { "</h1>\n", "</h2>\n", "</h3>\n", "</h4>\n", "</h5>\n", "</h6>\n" };
struct membuffer* out = (struct membuffer*) userdata;
switch(type) {
case MD_BLOCK_DOC: /*noop*/ break;
case MD_BLOCK_HR: /*noop*/ break;
+ case MD_BLOCK_H: MEMBUF_APPEND_LITERAL(out, head[((MD_BLOCK_H_DETAIL*)detail)->level - 1]); break;
case MD_BLOCK_P: MEMBUF_APPEND_LITERAL(out, "</p>\n"); break;
}
@@ -281,6 +285,7 @@ static const option cmdline_options[] = {
{ "full-html", 'f', 'f', OPTION_ARG_NONE },
{ "stat", 's', 's', OPTION_ARG_NONE },
{ "help", 'h', 'h', OPTION_ARG_NONE },
+ { "fpermissive-atx-headers", 0, 'A', OPTION_ARG_NONE },
{ 0 }
};
@@ -296,11 +301,15 @@ usage(void)
" -f, --full-html generate full HTML document, including header\n"
" -s, --stat measure time of input parsing\n"
" -h, --help display this help and exit\n"
+ "\n"
+ "Markdown dialect options:\n"
+ " --fpermissive-atx-headers allow ATX headers without delimiting space\n"
);
}
static const char* input_path = NULL;
static const char* output_path = NULL;
+static unsigned renderer_flags = 0;
static int want_fullhtml = 0;
static int want_stat = 0;
@@ -322,6 +331,8 @@ cmdline_callback(int opt, char const* value, void* data)
case 's': want_stat = 1; break;
case 'h': usage(); exit(0); break;
+ case 'A': renderer_flags |= MD_FLAG_PERMISSIVEATXHEADERS; break;
+
default:
fprintf(stderr, "Illegal option: %s\n", value);
fprintf(stderr, "Use --help for more info.\n");
@@ -359,7 +370,7 @@ main(int argc, char** argv)
}
}
- ret = process_file(in, out, 0, want_fullhtml, want_stat);
+ ret = process_file(in, out, renderer_flags, want_fullhtml, want_stat);
if(in != stdin)
fclose(in);
if(out != stdout)
diff --git a/md4c/md4c.c b/md4c/md4c.c
@@ -75,12 +75,16 @@ struct MD_CTX_tag {
SZ size;
MD_RENDERER r;
void* userdata;
+
+ /* For MD_BLOCK_HEADER. */
+ unsigned header_level;
};
typedef enum MD_LINETYPE_tag MD_LINETYPE;
enum MD_LINETYPE_tag {
MD_LINE_BLANK,
MD_LINE_HR,
+ MD_LINE_ATXHEADER,
MD_LINE_TEXT
};
@@ -277,6 +281,29 @@ md_is_hr_line(MD_CTX* ctx, OFF beg, OFF* p_end)
return 0;
}
+static int
+md_is_atxheader_line(MD_CTX* ctx, OFF beg, OFF* p_beg, OFF* p_end)
+{
+ int n;
+ OFF off = beg + 1;
+
+ while(off < ctx->size && CH(off) == _T('#') && off - beg < 7)
+ off++;
+ n = off - beg;
+
+ if(n > 6)
+ return -1;
+ ctx->header_level = n;
+
+ if(!(ctx->r.flags & MD_FLAG_PERMISSIVEATXHEADERS) && off < ctx->size && CH(off) != _T(' '))
+ return -1;
+
+ while(off < ctx->size && CH(off) == _T(' '))
+ off++;
+ *p_beg = off;
+ return 0;
+}
+
/* Analyze type of the line and find some its properties. This serves as a
* main input for determining type and boundaries of a block. */
static void
@@ -300,6 +327,14 @@ md_analyze_line(MD_CTX* ctx, OFF beg, OFF* p_end, const MD_LINE* pivot_line, MD_
goto done;
}
+ /* Check whether we are ATX header. */
+ if(CH(off) == _T('#')) {
+ if(md_is_atxheader_line(ctx, off, &line->beg, &off) == 0) {
+ line->type = MD_LINE_ATXHEADER;
+ goto done;
+ }
+ }
+
/* Check whether we are thematic break line. */
if(ISANYOF(off, _T("-_*"))) {
if(md_is_hr_line(ctx, off, &off) == 0) {
@@ -319,6 +354,19 @@ done:
/* Set end of the line. */
line->end = off;
+ /* But for ATX header, we should not include the optional tailing mark. */
+ if(line->type == MD_LINE_ATXHEADER) {
+ OFF tmp = line->end;
+ while(tmp > line->beg && CH(tmp-1) == _T(' '))
+ tmp--;
+ while(tmp > line->beg && CH(tmp-1) == _T('#'))
+ tmp--;
+ while(tmp > line->beg && CH(tmp-1) == _T(' '))
+ tmp--;
+ if(CH(tmp) == _T(' ') || (ctx->r.flags & MD_FLAG_PERMISSIVEATXHEADERS))
+ line->end = tmp;
+ }
+
/* Eat also the new line. */
if(off < ctx->size && CH(off) == _T('\r'))
off++;
@@ -336,6 +384,9 @@ static int
md_process_block(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
{
MD_BLOCKTYPE block_type;
+ union {
+ MD_BLOCK_H_DETAIL header;
+ } det;
int ret = 0;
if(n_lines == 0)
@@ -345,18 +396,24 @@ md_process_block(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
switch(lines[0].type) {
case MD_LINE_BLANK: return 0;
case MD_LINE_HR: block_type = MD_BLOCK_HR; break;
+
+ case MD_LINE_ATXHEADER:
+ block_type = MD_BLOCK_H;
+ det.header.level = ctx->header_level;
+ break;
+
case MD_LINE_TEXT: block_type = MD_BLOCK_P; break;
}
/* Process the block accordingly to is type. */
- MD_ENTER_BLOCK(block_type, NULL);
+ MD_ENTER_BLOCK(block_type, (void*) &det);
switch(block_type) {
case MD_BLOCK_HR: /* Noop. */ break;
default: ret = md_process_normal_block(ctx, lines, n_lines); break;
}
if(ret != 0)
goto abort;
- MD_LEAVE_BLOCK(block_type, NULL);
+ MD_LEAVE_BLOCK(block_type, (void*) &det);
abort:
return ret;
diff --git a/md4c/md4c.h b/md4c/md4c.h
@@ -64,11 +64,14 @@ enum MD_BLOCKTYPE_tag {
/* <hr> */
MD_BLOCK_HR,
+ /* <h1>...</h1> (for levels up to 6)
+ * Detail: See structure MD_BLOCK_H_DETAIL. */
+ MD_BLOCK_H,
+
/* <p>...</p> */
MD_BLOCK_P
};
-
/* Span represents an in-line piece of a document which should be rendered with
* the same font, color and other attributes. A sequence of spans forms a block
* like paragraph or list item. */
@@ -77,7 +80,6 @@ enum MD_SPANTYPE_tag {
MD_SPAN_DUMMY = 0 /* not yet used... */
};
-
/* Text is the actual textual contents of span. */
typedef enum MD_TEXTTYPE_tag MD_TEXTTYPE;
enum MD_TEXTTYPE_tag {
@@ -86,6 +88,20 @@ enum MD_TEXTTYPE_tag {
};
+/* Detailed info for MD_BLOCK_H. */
+typedef struct MD_BLOCK_H_DETAIL_tag MD_BLOCK_H_DETAIL;
+struct MD_BLOCK_H_DETAIL_tag {
+ unsigned level; /* Header level (1 - 6) */
+};
+
+
+/* Flags specifying Markdown dialect.
+ *
+ * By default (when MD_RENDERER::flags == 0), we follow CommMark specification.
+ * The following flags may allow some extensions or deviations from it.
+ */
+#define MD_FLAG_PERMISSIVEATXHEADERS 0x0001 /* Do not require space in ATX headers ( ###header ) */
+
/* Caller-provided callbacks.
*
* For some block/span types, more detailed information is provided in a