md4c

C Markdown parser. Fast. SAX-like interface. Compliant to CommonMark specification.
git clone https://noulin.net/git/md4c.git
Log | Files | Refs | README | LICENSE

commit 5f47a5cbfa4bc83390185d7b9fa8af7f4cfb809f
parent bad11edea3271a65b8d476c0a90784f39cec781b
Author: Martin Mitas <mity@morous.org>
Date:   Mon, 11 Sep 2017 10:55:21 +0200

md_build_attribute: Handle U+0000 character.

Diffstat:
Mmd2html/render_html.c | 1+
Mmd4c/md4c.c | 72+++++++++++++++++++++++++++++++++++++++++-------------------------------
Mmd4c/md4c.h | 2+-
3 files changed, 43 insertions(+), 32 deletions(-)

diff --git a/md2html/render_html.c b/md2html/render_html.c @@ -245,6 +245,7 @@ render_attribute(MD_RENDER_HTML* r, const MD_ATTRIBUTE* attr, const MD_CHAR* text = attr->text + off; switch(type) { + case MD_TEXT_NULLCHAR: render_utf8_codepoint(r, 0x0000, render_text); break; case MD_TEXT_ENTITY: render_entity(r, text, size, fn_append); break; default: fn_append(r, text, size); break; } diff --git a/md4c/md4c.c b/md4c/md4c.c @@ -250,36 +250,38 @@ struct MD_VERBATIMLINE_tag { /* Character classification. * Note we assume ASCII compatibility of code points < 128 here. */ -#define ISIN_(ch, ch_min, ch_max) ((ch_min) <= (unsigned)(ch) && (unsigned)(ch) <= (ch_max)) -#define ISANYOF_(ch, palette) (md_strchr((palette), (ch)) != NULL) -#define ISANYOF2_(ch, ch1, ch2) ((ch) == (ch1) || (ch) == (ch2)) -#define ISASCII_(ch) ((unsigned)(ch) <= 127) -#define ISBLANK_(ch) (ISANYOF2_((ch), _T(' '), _T('\t'))) -#define ISNEWLINE_(ch) (ISANYOF2_((ch), _T('\r'), _T('\n'))) -#define ISWHITESPACE_(ch) (ISBLANK_(ch) || ISANYOF2_((ch), _T('\v'), _T('\f'))) -#define ISCNTRL_(ch) ((unsigned)(ch) <= 31 || (unsigned)(ch) == 127) -#define ISPUNCT_(ch) (ISIN_(ch, 33, 47) || ISIN_(ch, 58, 64) || ISIN_(ch, 91, 96) || ISIN_(ch, 123, 126)) -#define ISUPPER_(ch) (ISIN_(ch, _T('A'), _T('Z'))) -#define ISLOWER_(ch) (ISIN_(ch, _T('a'), _T('z'))) -#define ISALPHA_(ch) (ISUPPER_(ch) || ISLOWER_(ch)) -#define ISDIGIT_(ch) (ISIN_(ch, _T('0'), _T('9'))) -#define ISXDIGIT_(ch) (ISDIGIT_(ch) || ISIN_(ch, _T('A'), _T('F')) || ISIN_(ch, _T('a'), _T('f'))) -#define ISALNUM_(ch) (ISALPHA_(ch) || ISDIGIT_(ch)) - -#define ISANYOF(off, palette) ISANYOF_(CH(off), (palette)) -#define ISANYOF2(off, ch1, ch2) ISANYOF2_(CH(off), (ch1), (ch2)) -#define ISASCII(off) ISASCII_(CH(off)) -#define ISBLANK(off) ISBLANK_(CH(off)) -#define ISNEWLINE(off) ISNEWLINE_(CH(off)) -#define ISWHITESPACE(off) ISWHITESPACE_(CH(off)) -#define ISCNTRL(off) ISCNTRL_(CH(off)) -#define ISPUNCT(off) ISPUNCT_(CH(off)) -#define ISUPPER(off) ISUPPER_(CH(off)) -#define ISLOWER(off) ISLOWER_(CH(off)) -#define ISALPHA(off) ISALPHA_(CH(off)) -#define ISDIGIT(off) ISDIGIT_(CH(off)) -#define ISXDIGIT(off) ISXDIGIT_(CH(off)) -#define ISALNUM(off) ISALNUM_(CH(off)) +#define ISIN_(ch, ch_min, ch_max) ((ch_min) <= (unsigned)(ch) && (unsigned)(ch) <= (ch_max)) +#define ISANYOF_(ch, palette) (md_strchr((palette), (ch)) != NULL) +#define ISANYOF2_(ch, ch1, ch2) ((ch) == (ch1) || (ch) == (ch2)) +#define ISANYOF3_(ch, ch1, ch2, ch3) ((ch) == (ch1) || (ch) == (ch2) || (ch) == (ch3)) +#define ISASCII_(ch) ((unsigned)(ch) <= 127) +#define ISBLANK_(ch) (ISANYOF2_((ch), _T(' '), _T('\t'))) +#define ISNEWLINE_(ch) (ISANYOF2_((ch), _T('\r'), _T('\n'))) +#define ISWHITESPACE_(ch) (ISBLANK_(ch) || ISANYOF2_((ch), _T('\v'), _T('\f'))) +#define ISCNTRL_(ch) ((unsigned)(ch) <= 31 || (unsigned)(ch) == 127) +#define ISPUNCT_(ch) (ISIN_(ch, 33, 47) || ISIN_(ch, 58, 64) || ISIN_(ch, 91, 96) || ISIN_(ch, 123, 126)) +#define ISUPPER_(ch) (ISIN_(ch, _T('A'), _T('Z'))) +#define ISLOWER_(ch) (ISIN_(ch, _T('a'), _T('z'))) +#define ISALPHA_(ch) (ISUPPER_(ch) || ISLOWER_(ch)) +#define ISDIGIT_(ch) (ISIN_(ch, _T('0'), _T('9'))) +#define ISXDIGIT_(ch) (ISDIGIT_(ch) || ISIN_(ch, _T('A'), _T('F')) || ISIN_(ch, _T('a'), _T('f'))) +#define ISALNUM_(ch) (ISALPHA_(ch) || ISDIGIT_(ch)) + +#define ISANYOF(off, palette) ISANYOF_(CH(off), (palette)) +#define ISANYOF2(off, ch1, ch2) ISANYOF2_(CH(off), (ch1), (ch2)) +#define ISANYOF3(off, ch1, ch2, ch3) ISANYOF3_(CH(off), (ch1), (ch2), (ch3)) +#define ISASCII(off) ISASCII_(CH(off)) +#define ISBLANK(off) ISBLANK_(CH(off)) +#define ISNEWLINE(off) ISNEWLINE_(CH(off)) +#define ISWHITESPACE(off) ISWHITESPACE_(CH(off)) +#define ISCNTRL(off) ISCNTRL_(CH(off)) +#define ISPUNCT(off) ISPUNCT_(CH(off)) +#define ISUPPER(off) ISUPPER_(CH(off)) +#define ISLOWER(off) ISLOWER_(CH(off)) +#define ISALPHA(off) ISALPHA_(CH(off)) +#define ISDIGIT(off) ISDIGIT_(CH(off)) +#define ISXDIGIT(off) ISXDIGIT_(CH(off)) +#define ISALNUM(off) ISALNUM_(CH(off)) static inline const CHAR* md_strchr(const CHAR* str, CHAR ch) { @@ -1428,7 +1430,7 @@ md_build_attribute(MD_CTX* ctx, const CHAR* raw_text, SZ raw_size, * without any malloc(). */ is_trivial = TRUE; for(raw_off = 0; raw_off < raw_size; raw_off++) { - if(ISANYOF2_(raw_text[raw_off], _T('\\'), _T('&'))) { + if(ISANYOF3_(raw_text[raw_off], _T('\\'), _T('&'), _T('\0'))) { is_trivial = FALSE; break; } @@ -1455,6 +1457,14 @@ md_build_attribute(MD_CTX* ctx, const CHAR* raw_text, SZ raw_size, off = 0; while(raw_off < raw_size) { + if(raw_text[raw_off] == _T('\0')) { + MD_CHECK(md_build_attr_append_substr(ctx, build, MD_TEXT_NULLCHAR, off)); + memcpy(build->text + off, raw_text + raw_off, 1); + off++; + raw_off++; + continue; + } + if(raw_text[raw_off] == _T('&')) { OFF ent_end; diff --git a/md4c/md4c.h b/md4c/md4c.h @@ -201,7 +201,7 @@ typedef enum MD_ALIGN { * Note that these conditions are guaranteed: * -- substr_offsets[0] == 0 * -- substr_offsets[LAST+1] == size - * -- Only MD_TEXT_NORMAL and MD_TEXT_ENTITY substrings can appear. + * -- Only MD_TEXT_NORMAL, MD_TEXT_ENTITY, MD_TEXT_NULLCHAR substrings can appear. */ typedef struct MD_ATTRIBUTE { const MD_CHAR* text;