commit 6d9cac663cfc143d2e2634266081adb06dbe8d26
parent f814d89369829e5aabcbac0a059f972949c9ccd6
Author: Remy Noulin <loader2x@gmail.com>
Date: Mon, 26 Dec 2022 20:20:17 +0100
add effects in markdown syntax
add '-' for faint span.
add '%' for inverse span.
add '!' for conceal/hidden span.
add '^' for blink span.
add anchor with syntax: [|id] and link syntax [to anchor id](|id)
add autolink for gemini, gopher and spartan protocols
add MD_SPAN_COLOR for color support in program using this library
md4c/md4c.c | 316 ++++++++++++++++++++++++++++++++++++++++++++++++++++---
md4c/md4c.h | 16 ++-
md4c/package.yml | 2 +-
3 files changed, 314 insertions(+), 20 deletions(-)
Diffstat:
3 files changed, 314 insertions(+), 20 deletions(-)
diff --git a/md4c/md4c.c b/md4c/md4c.c
@@ -1,4 +1,4 @@
-/* commit e9ff661ff818ee94a4a231958d9b6768dc6882c9 - added _ for underline and __ for bold instead of underline
+/* commit e9ff661ff818ee94a4a231958d9b6768dc6882c9 - mity/md4c repo
* MD4C: Markdown parser for C
* (http://github.com/mity/md4c)
*
@@ -178,7 +178,7 @@ struct MD_CTX_tag {
#endif
/* For resolving of inline spans. */
- MD_MARKCHAIN mark_chains[13];
+ MD_MARKCHAIN mark_chains[17];
#define PTR_CHAIN (ctx->mark_chains[0])
#define TABLECELLBOUNDARIES (ctx->mark_chains[1])
#define ASTERISK_OPENERS_extraword_mod3_0 (ctx->mark_chains[2])
@@ -192,8 +192,12 @@ struct MD_CTX_tag {
#define TILDE_OPENERS_2 (ctx->mark_chains[10])
#define BRACKET_OPENERS (ctx->mark_chains[11])
#define DOLLAR_OPENERS (ctx->mark_chains[12])
+#define FAINT_OPENERS (ctx->mark_chains[13])
+#define INVERSE_OPENERS (ctx->mark_chains[14])
+#define CONCEAL_OPENERS (ctx->mark_chains[15])
+#define BLINK_OPENERS (ctx->mark_chains[16])
#define OPENERS_CHAIN_FIRST 1
-#define OPENERS_CHAIN_LAST 12
+#define OPENERS_CHAIN_LAST 16
int n_table_cell_boundaries;
@@ -2513,9 +2517,13 @@ md_mark_chain(MD_CTX* ctx, int mark_index)
case _T('*'): return md_asterisk_chain(ctx, mark->flags);
case _T('_'): return &UNDERSCORE_OPENERS;
case _T('~'): return (mark->end - mark->beg == 1) ? &TILDE_OPENERS_1 : &TILDE_OPENERS_2;
- case _T('!'): MD_FALLTHROUGH();
+ /* case _T('!'): MD_FALLTHROUGH(); */
case _T('['): return &BRACKET_OPENERS;
case _T('|'): return &TABLECELLBOUNDARIES;
+ case _T('-'): return &FAINT_OPENERS;
+ case _T('%'): return &INVERSE_OPENERS;
+ case _T('!'): return &CONCEAL_OPENERS;
+ case _T('^'): return &BLINK_OPENERS;
default: return NULL;
}
}
@@ -2723,6 +2731,9 @@ md_build_mark_char_map(MD_CTX* ctx)
memset(ctx->mark_char_map, 0, sizeof(ctx->mark_char_map));
ctx->mark_char_map['\\'] = 1;
+ ctx->mark_char_map['^'] = 1;
+ ctx->mark_char_map['%'] = 1;
+ ctx->mark_char_map['-'] = 1;
ctx->mark_char_map['*'] = 1;
ctx->mark_char_map['_'] = 1;
ctx->mark_char_map['`'] = 1;
@@ -2887,6 +2898,141 @@ md_is_code_span(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg,
return TRUE;
}
+/* detect anchors with syntax: [|anchorId] */
+static int
+md_is_anchor_span(MD_CTX* ctx, const MD_LINE* lines, OFF off, OFF* p_closer_beg)
+{
+ OFF line_end = lines[0].end;
+ // Smallest anchor is [|x]
+ // An anchor must be on a single line
+ if (off+4 >= line_end)
+ return FALSE;
+ off += 2;
+
+ // Find closer mark
+ int opener_end = off;
+ while (off < line_end) {
+ if (CH(off) == _T(']')) {
+ // Check if there an id for the anchor
+ if (off == opener_end)
+ return FALSE;
+ *p_closer_beg = off;
+ return TRUE;
+ }
+ off++;
+ }
+ return FALSE;
+}
+
+#ifdef MD4C_USE_UTF16
+ /* For UTF-16, mark_char_map[] covers only ASCII. */
+ #define IS_MARK_CHAR(off) ((CH(off) < SIZEOF_ARRAY(ctx->mark_char_map)) && \
+ (ctx->mark_char_map[(unsigned char) CH(off)]))
+#else
+ /* For 8-bit encodings, mark_char_map[] covers all 256 elements. */
+ #define IS_MARK_CHAR(off) (ctx->mark_char_map[(unsigned char) CH(off)])
+#endif
+
+/* detect faint effect: -text text- */
+static int
+md_is_faint_span(MD_CTX* ctx, const MD_LINE* lines, OFF beg, OFF* p_closer_beg)
+{
+ OFF tmp;
+ OFF line_end;
+
+ line_end = lines[0].end;
+ if (beg+2 >= line_end)
+ return FALSE;
+ if (ISUNICODEWHITESPACE(beg+1))
+ return FALSE;
+ tmp = beg+2;
+ while (tmp < line_end) {
+ if (CH(tmp) == _T('-') && (tmp+1 == line_end || ISUNICODEWHITESPACE(tmp+1) || IS_MARK_CHAR(tmp+1))
+ && (!ISUNICODEWHITESPACE(tmp-1))) {
+ *p_closer_beg = tmp;
+ return TRUE;
+ }
+ tmp++;
+ }
+
+ return FALSE;
+}
+
+/* detect inverse effect: %text text% */
+static int
+md_is_inverse_span(MD_CTX* ctx, const MD_LINE* lines, OFF beg, OFF* p_closer_beg)
+{
+ OFF tmp;
+ OFF line_end;
+
+ line_end = lines[0].end;
+ if (beg+2 >= line_end)
+ return FALSE;
+ if (ISUNICODEWHITESPACE(beg+1))
+ return FALSE;
+ tmp = beg+2;
+ while (tmp < line_end) {
+ if (CH(tmp) == _T('%') && (tmp+1 == line_end || ISUNICODEWHITESPACE(tmp+1) || IS_MARK_CHAR(tmp+1))
+ && (!ISUNICODEWHITESPACE(tmp-1))) {
+ *p_closer_beg = tmp;
+ return TRUE;
+ }
+ tmp++;
+ }
+
+ return FALSE;
+}
+
+/* detect conceal effect: !text text! */
+static int
+md_is_conceal_span(MD_CTX* ctx, const MD_LINE* lines, OFF beg, OFF* p_closer_beg)
+{
+ OFF tmp;
+ OFF line_end;
+
+ line_end = lines[0].end;
+ if (beg+2 >= line_end)
+ return FALSE;
+ if (ISUNICODEWHITESPACE(beg+1))
+ return FALSE;
+ tmp = beg+2;
+ while (tmp < line_end) {
+ if (CH(tmp) == _T('!') && (tmp+1 == line_end || ISUNICODEWHITESPACE(tmp+1) || IS_MARK_CHAR(tmp+1))
+ && (!ISUNICODEWHITESPACE(tmp-1))) {
+ *p_closer_beg = tmp;
+ return TRUE;
+ }
+ tmp++;
+ }
+
+ return FALSE;
+}
+
+/* detect blink effect: ^text text^ */
+static int
+md_is_blink_span(MD_CTX* ctx, const MD_LINE* lines, OFF beg, OFF* p_closer_beg)
+{
+ OFF tmp;
+ OFF line_end;
+
+ line_end = lines[0].end;
+ if (beg+2 >= line_end)
+ return FALSE;
+ if (ISUNICODEWHITESPACE(beg+1))
+ return FALSE;
+ tmp = beg+2;
+ while (tmp < line_end) {
+ if (CH(tmp) == _T('^') && (tmp+1 == line_end || ISUNICODEWHITESPACE(tmp+1) || IS_MARK_CHAR(tmp+1))
+ && (!ISUNICODEWHITESPACE(tmp-1))) {
+ *p_closer_beg = tmp;
+ return TRUE;
+ }
+ tmp++;
+ }
+
+ return FALSE;
+}
+
static int
md_is_autolink_uri(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end)
{
@@ -3009,15 +3155,6 @@ md_collect_marks(MD_CTX* ctx, const MD_LINE* lines, int n_lines, int table_mode)
while(TRUE) {
CHAR ch;
-#ifdef MD4C_USE_UTF16
- /* For UTF-16, mark_char_map[] covers only ASCII. */
- #define IS_MARK_CHAR(off) ((CH(off) < SIZEOF_ARRAY(ctx->mark_char_map)) && \
- (ctx->mark_char_map[(unsigned char) CH(off)]))
-#else
- /* For 8-bit encodings, mark_char_map[] covers all 256 elements. */
- #define IS_MARK_CHAR(off) (ctx->mark_char_map[(unsigned char) CH(off)])
-#endif
-
/* Optimization: Use some loop unrolling. */
while(off + 3 < line_end && !IS_MARK_CHAR(off+0) && !IS_MARK_CHAR(off+1)
&& !IS_MARK_CHAR(off+2) && !IS_MARK_CHAR(off+3))
@@ -3138,6 +3275,89 @@ md_collect_marks(MD_CTX* ctx, const MD_LINE* lines, int n_lines, int table_mode)
continue;
}
+ /* A potential faint span start/end. */
+ if(ch == _T('-')) {
+ OFF closer_beg;
+ int is_faint_span;
+
+ if (off == line->beg || ISUNICODEWHITESPACEBEFORE(off) || ISUNICODEPUNCTBEFORE(off)
+ || IS_MARK_CHAR(off-1)) {
+
+ is_faint_span = md_is_faint_span(ctx, line, off, &closer_beg);
+ if(is_faint_span) {
+ PUSH_MARK(_T('-'), off, off+1, MD_MARK_OPENER | MD_MARK_RESOLVED);
+ PUSH_MARK(_T('-'), closer_beg, closer_beg+1, MD_MARK_CLOSER | MD_MARK_RESOLVED);
+ ctx->marks[ctx->n_marks-2].next = ctx->n_marks-1;
+ ctx->marks[ctx->n_marks-1].prev = ctx->n_marks-2;
+ }
+ }
+ off++;
+ continue;
+ }
+
+ /* A potential inverse span start/end. */
+ if(ch == _T('%')) {
+ OFF closer_beg;
+ int is_inverse_span;
+
+ if (off == line->beg || ISUNICODEWHITESPACEBEFORE(off) || ISUNICODEPUNCTBEFORE(off)
+ || IS_MARK_CHAR(off-1)) {
+
+ is_inverse_span = md_is_inverse_span(ctx, line, off, &closer_beg);
+ if(is_inverse_span) {
+ PUSH_MARK(_T('%'), off, off+1, MD_MARK_OPENER | MD_MARK_RESOLVED);
+ PUSH_MARK(_T('%'), closer_beg, closer_beg+1, MD_MARK_CLOSER | MD_MARK_RESOLVED);
+ ctx->marks[ctx->n_marks-2].next = ctx->n_marks-1;
+ ctx->marks[ctx->n_marks-1].prev = ctx->n_marks-2;
+
+ }
+ }
+ off++;
+ continue;
+ }
+
+ /* A potential conceal span start/end. */
+ if(ch == _T('!')) {
+ OFF closer_beg;
+ int is_conceal_span;
+
+ if (off == line->beg || ISUNICODEWHITESPACEBEFORE(off) || ISUNICODEPUNCTBEFORE(off)
+ || IS_MARK_CHAR(off-1)) {
+
+ is_conceal_span = md_is_conceal_span(ctx, line, off, &closer_beg);
+ if(is_conceal_span) {
+ PUSH_MARK(_T('!'), off, off+1, MD_MARK_OPENER | MD_MARK_RESOLVED);
+ PUSH_MARK(_T('!'), closer_beg, closer_beg+1, MD_MARK_CLOSER | MD_MARK_RESOLVED);
+ ctx->marks[ctx->n_marks-2].next = ctx->n_marks-1;
+ ctx->marks[ctx->n_marks-1].prev = ctx->n_marks-2;
+
+ }
+ }
+ off++;
+ continue;
+ }
+
+ /* A potential blink span start/end. */
+ if(ch == _T('^')) {
+ OFF closer_beg;
+ int is_blink_span;
+
+ if (off == line->beg || ISUNICODEWHITESPACEBEFORE(off) || ISUNICODEPUNCTBEFORE(off)
+ || IS_MARK_CHAR(off-1)) {
+
+ is_blink_span = md_is_blink_span(ctx, line, off, &closer_beg);
+ if(is_blink_span) {
+ PUSH_MARK(_T('^'), off, off+1, MD_MARK_OPENER | MD_MARK_RESOLVED);
+ PUSH_MARK(_T('^'), closer_beg, closer_beg+1, MD_MARK_CLOSER | MD_MARK_RESOLVED);
+ ctx->marks[ctx->n_marks-2].next = ctx->n_marks-1;
+ ctx->marks[ctx->n_marks-1].prev = ctx->n_marks-2;
+
+ }
+ }
+ off++;
+ continue;
+ }
+
/* A potential entity start. */
if(ch == _T('&')) {
PUSH_MARK(ch, off, off+1, MD_MARK_POTENTIAL_OPENER);
@@ -3203,6 +3423,21 @@ md_collect_marks(MD_CTX* ctx, const MD_LINE* lines, int n_lines, int table_mode)
continue;
}
+ /* A potential anchor */
+ if(ch == _T('[') && off+1 < line_end && CH(off+1) == _T('|')) {
+ OFF closer_beg;
+ int is_anchor_span = md_is_anchor_span(ctx, line, off, &closer_beg);
+ if (is_anchor_span) {
+ PUSH_MARK(_T('['), off, off+2, MD_MARK_OPENER | MD_MARK_RESOLVED);
+ PUSH_MARK(_T(']'), closer_beg, closer_beg+1, MD_MARK_CLOSER | MD_MARK_RESOLVED);
+ ctx->marks[ctx->n_marks-2].next = ctx->n_marks-1;
+ ctx->marks[ctx->n_marks-1].prev = ctx->n_marks-2;
+ off = closer_beg+1;
+ continue;
+ }
+ // continue analyzing [ mark
+ }
+
/* A potential link or its part. */
if(ch == _T('[') || (ch == _T('!') && off+1 < line_end && CH(off+1) == _T('['))) {
OFF tmp = (ch == _T('[') ? off+1 : off+2);
@@ -3243,8 +3478,11 @@ md_collect_marks(MD_CTX* ctx, const MD_LINE* lines, int n_lines, int table_mode)
SZ suffix_size;
} scheme_map[] = {
/* In the order from the most frequently used, arguably. */
- { _T("http"), 4, _T("//"), 2 },
{ _T("https"), 5, _T("//"), 2 },
+ { _T("gemini"), 6, _T("//"), 2 },
+ { _T("http"), 4, _T("//"), 2 },
+ { _T("gopher"), 6, _T("//"), 2 },
+ { _T("spartan"), 7, _T("//"), 2 },
{ _T("ftp"), 3, _T("//"), 2 }
};
int scheme_index;
@@ -4204,6 +4442,30 @@ md_process_inlines(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
}
break;
+ case '-': /* faint */
+ if(mark->flags & MD_MARK_OPENER) {
+ MD_ENTER_SPAN(MD_SPAN_FNT, NULL);
+ } else {
+ MD_LEAVE_SPAN(MD_SPAN_FNT, NULL);
+ }
+ break;
+
+ case '%': /* inverse */
+ if(mark->flags & MD_MARK_OPENER) {
+ MD_ENTER_SPAN(MD_SPAN_INV, NULL);
+ } else {
+ MD_LEAVE_SPAN(MD_SPAN_INV, NULL);
+ }
+ break;
+
+ case '^': /* blink */
+ if(mark->flags & MD_MARK_OPENER) {
+ MD_ENTER_SPAN(MD_SPAN_BLI, NULL);
+ } else {
+ MD_LEAVE_SPAN(MD_SPAN_BLI, NULL);
+ }
+ break;
+
case '_': /* Underline (or emphasis if we fall through). */
if(ctx->parser.flags & MD_FLAG_UNDERLINE) {
if(mark->flags & MD_MARK_OPENER) {
@@ -4259,7 +4521,7 @@ md_process_inlines(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
}
break;
- case '~':
+ case '~': /* crossed */
if(mark->flags & MD_MARK_OPENER)
MD_ENTER_SPAN(MD_SPAN_DEL, NULL);
else
@@ -4276,8 +4538,16 @@ md_process_inlines(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
}
break;
- case '[': /* Link, wiki link, image. */
- case '!':
+ case '!': /* conceal/hidden */
+ if (mark->prev == -1) {
+ if(mark->flags & MD_MARK_OPENER) {
+ MD_ENTER_SPAN(MD_SPAN_COC, NULL);
+ } else {
+ MD_LEAVE_SPAN(MD_SPAN_COC, NULL);
+ }
+ break;
+ }
+ case '[': /* Link, wiki link, image, anchor. */
case ']':
{
const MD_MARK* opener = (mark->ch != ']' ? mark : &ctx->marks[mark->prev]);
@@ -4304,6 +4574,18 @@ md_process_inlines(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
break;
}
+ if ((opener->ch == '[' && closer->ch == ']') &&
+ opener->end - opener->beg == 2 &&
+ closer->end - closer->beg == 1 &&
+ CH(opener->beg+1) == _T('|'))
+ {
+ if(mark->flags & MD_MARK_OPENER) {
+ MD_ENTER_SPAN(MD_SPAN_ANCHOR, NULL);
+ } else {
+ MD_LEAVE_SPAN(MD_SPAN_ANCHOR, NULL);
+ }
+ }
+
dest_mark = opener+1;
MD_ASSERT(dest_mark->ch == 'D');
title_mark = opener+2;
diff --git a/md4c/md4c.h b/md4c/md4c.h
@@ -145,7 +145,19 @@ typedef enum MD_SPANTYPE {
/* <u>...</u>
* Note: Recognized only when MD_FLAG_UNDERLINE is enabled. */
- MD_SPAN_U
+ MD_SPAN_U,
+ MD_SPAN_FNT,
+ MD_SPAN_INV,
+ MD_SPAN_COC,
+ MD_SPAN_BLI,
+ MD_SPAN_ANCHOR,
+ /* This span type is issued by md4c
+ * MD_SPAN_COLOR allows supporting RGB colors:
+ * [text with colors](#1#13)
+ * md4c treats colors as MD_SPAN_A and the parsing of the color
+ * is done by the user.
+ */
+ MD_SPAN_COLOR,
} MD_SPANTYPE;
/* Text is the actual textual contents of span. */
@@ -164,7 +176,7 @@ typedef enum MD_TEXTTYPE {
MD_TEXT_SOFTBR, /* '\n' in source text where it is not semantically meaningful (soft break) */
/* Entity.
- * (a) Named entity, e.g.
+ * (a) Named entity, e.g.
* (Note MD4C does not have a list of known entities.
* Anything matching the regexp /&[A-Za-z][A-Za-z0-9]{1,47};/ is
* treated as a named entity.)
diff --git a/md4c/package.yml b/md4c/package.yml
@@ -1,6 +1,6 @@
---
name: md4c
- version: 0.0.1
+ version: 0.0.2
description: "md4c is a markdown parser library (forked from mity/md4c)"
bin: ./md4c.c
scripts: