commit 6c47ec78b884983a1369514b0e4c4012dec0ad2e
parent 6a9508866f6240fb6a07dc96aa40c05340283133
Author: Martin Mitas <mity@morous.org>
Date: Tue, 4 Oct 2016 22:13:44 +0200
Implement rudimentary infrastructure support for parsing inlines.
Diffstat:
| M | README.md | | | 2 | +- |
| M | md4c/md4c.c | | | 210 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----- |
2 files changed, 199 insertions(+), 13 deletions(-)
diff --git a/README.md b/README.md
@@ -97,7 +97,7 @@ more or less forms our to do list.
- [ ] 5.3 Lists
- **Inlines:**
- - [ ] 6.1 Backslash escapes
+ - [x] 6.1 Backslash escapes
- [ ] 6.2 Entity and numeric character references
- [ ] 6.3 Code spans
- [ ] 6.4 Emphasis and strong emphasis
diff --git a/md4c/md4c.c b/md4c/md4c.c
@@ -67,15 +67,25 @@ typedef MD_CHAR CHAR;
typedef MD_SIZE SZ;
typedef MD_OFFSET OFF;
+typedef struct MD_MARK_tag MD_MARK;
+
/* Context propagated through all the parsing. */
typedef struct MD_CTX_tag MD_CTX;
struct MD_CTX_tag {
- /* Immutables (parameters of md_parse()). */
+ /* Immutable stuff (parameters of md_parse()). */
const CHAR* text;
SZ size;
MD_RENDERER r;
void* userdata;
+ /* Stack of inline/span markers.
+ * This is only used for parsing a single block contents but by storing it
+ * here we may reuse the stack for subsequent blocks; i.e. we have fewer
+ * (re)allocations. */
+ MD_MARK* marks;
+ unsigned n_marks;
+ unsigned alloc_marks;
+
/* For MD_BLOCK_QUOTE */
unsigned quote_level; /* Nesting level. */
@@ -281,29 +291,178 @@ md_str_case_eq(const CHAR* s1, const CHAR* s2, SZ n)
} while(0)
-/******************************************
- *** Processing Single Block Contents ***
- ******************************************/
+/******************************************************
+ *** Processing Sequence of Inlines (a.k.a Spans) ***
+ ******************************************************/
+
+/* Structure marking an offset which needs special attention. The type
+ * of the attention is determined by the member ch:
+ *
+ * '\\': Escape sequence.
+ * (beg points to '\\'; beg+1 to the escaped char.)
+ *
+ * Note that not all instances of these chars in the text imply creation of the
+ * structure. Only those which have (or may have, after we see more context)
+ * the special meaning.
+ */
+struct MD_MARK_tag {
+ OFF beg;
+ OFF end;
+ MD_CHAR ch;
+ unsigned short flags;
+};
+
+/* Mark flags. */
+#define MD_MARK_ACTIVE 0x0001
+#define MD_MARK_OPENER 0x0002
+#define MD_MARK_CLOSER 0x0004
+
+
+static MD_MARK*
+md_push(MD_CTX* ctx)
+{
+ MD_MARK* mark;
+
+ if(ctx->n_marks >= ctx->alloc_marks) {
+ MD_MARK* new_marks;
+
+ ctx->alloc_marks = (ctx->alloc_marks > 0 ? ctx->alloc_marks * 2 : 64);
+ new_marks = realloc(ctx->marks, ctx->alloc_marks * sizeof(MD_MARK));
+ if(new_marks == NULL) {
+ md_log(ctx, "realloc() failed.");
+ return NULL;
+ }
+
+ ctx->marks = new_marks;
+ }
+
+ mark = &ctx->marks[ctx->n_marks];
+ ctx->n_marks++;
+ return mark;
+}
+
+#define PUSH_() \
+ do { \
+ mark = md_push(ctx); \
+ if(mark == NULL) { \
+ ret = -1; \
+ goto abort; \
+ } \
+ } while(0)
+
+#define PUSH(ch_, beg_, end_, flags_) \
+ do { \
+ PUSH_(); \
+ mark->ch = (ch_); \
+ mark->beg = (beg_); \
+ mark->end = (end_); \
+ mark->flags = (flags_); \
+ } while(0)
static int
-md_process_normal_block(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
+md_analyze_inlines(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
{
int i;
int ret = 0;
+ MD_MARK* mark;
+
+ ctx->n_marks = 0;
for(i = 0; i < n_lines; i++) {
- MD_TEXT(MD_TEXT_NORMAL, STR(lines[i].beg), lines[i].end - lines[i].beg);
+ const MD_LINE* line = &lines[i];
+ OFF off = line->beg;
+ OFF end = line->end;
+
+ while(off < end) {
+ CHAR ch = CH(off);
+ /* Analyze backslash escapes.
+ * Note it can go beyond line->end as it may involve
+ * escaped new line to form a hard break. */
+ if(ch == _T('\\') && off+1 < ctx->size && (ISPUNCT(off+1) || ISNEWLINE(off+1))) {
+ PUSH(ch, off, off+2, MD_MARK_ACTIVE);
+ off += 2;
+ continue;
+ }
+
+ off++;
+ }
+ }
+
+ /* Add a dummy mark at the end of processed block to simplify
+ * md_process_inlines(). */
+ PUSH_();
+ mark->beg = lines[n_lines-1].end + 1;
+ mark->flags = MD_MARK_ACTIVE;
+
+abort:
+ return ret;
+}
+
+static int
+md_process_inlines(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
+{
+ const MD_LINE* line = lines;
+ const MD_MARK* mark;
+ OFF off = lines[0].beg;
+ OFF end = lines[n_lines-1].end;
+ int enforce_hardbreak = 0;
+ int ret = 0;
+
+ /* Find first active mark. Note there is always at least one active mark,
+ * the dummy last one after the end of the latest line we actually never
+ * really reach. This saves us of a lot of special checks and cases in
+ * this function. */
+ mark = ctx->marks;
+ while(!(mark->flags & MD_MARK_ACTIVE))
+ mark++;
+
+ while(1) {
+ /* Process the text up to the next mark or end-of-line. */
+ OFF tmp = (line->end < mark->beg ? line->end : mark->beg);
+ if(tmp > off) {
+ MD_TEXT(MD_TEXT_NORMAL, STR(off), tmp - off);
+ off = tmp;
+ }
+
+ /* If reached the mark, process it and move to next one. */
+ if(off >= mark->beg) {
+ switch(mark->ch) {
+ case _T('\\'): /* Backslash escape. */
+ if(ISNEWLINE(mark->beg+1))
+ enforce_hardbreak = 1;
+ else
+ MD_TEXT(MD_TEXT_NORMAL, STR(mark->beg+1), 1);
+ break;
+ }
- /* Output soft or hard line break. */
- if(i + 1 < n_lines) {
+ off = mark->end;
+
+ /* Move to next active mark. */
+ mark++;
+ while(!(mark->flags & MD_MARK_ACTIVE))
+ mark++;
+ }
+
+ /* If reached end of line, move to next one. */
+ if(off >= line->end) {
MD_TEXTTYPE break_type;
- if(CH(lines[i].end) == _T(' ') && CH(lines[i].end+1) == _T(' '))
+ /* If it is the last line, we are done. */
+ if(off >= end)
+ break;
+
+ /* Output soft or hard line break. */
+ if(enforce_hardbreak || (CH(line->end) == _T(' ') && CH(line->end+1) == _T(' ')))
break_type = MD_TEXT_BR;
else
break_type = MD_TEXT_SOFTBR;
-
MD_TEXT(break_type, _T("\n"), 1);
+
+ /* Switch to the following line. */
+ line++;
+ off = line->beg;
+
+ enforce_hardbreak = 0;
}
}
@@ -311,6 +470,27 @@ abort:
return ret;
}
+
+/******************************************
+ *** Processing Single Block Contents ***
+ ******************************************/
+
+static int
+md_process_normal_block(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
+{
+ int ret;
+
+ ret = md_analyze_inlines(ctx, lines, n_lines);
+ if(ret != 0)
+ goto abort;
+ ret = md_process_inlines(ctx, lines, n_lines);
+ if(ret != 0)
+ goto abort;
+
+abort:
+ return ret;
+}
+
static int
md_process_verbatim_block(MD_CTX* ctx, MD_TEXTTYPE text_type, const MD_LINE* lines, int n_lines)
{
@@ -1078,6 +1258,7 @@ int
md_parse(const MD_CHAR* text, MD_SIZE size, const MD_RENDERER* renderer, void* userdata)
{
MD_CTX ctx;
+ int ret;
/* Setup context structure. */
memset(&ctx, 0, sizeof(MD_CTX));
@@ -1089,6 +1270,11 @@ md_parse(const MD_CHAR* text, MD_SIZE size, const MD_RENDERER* renderer, void* u
/* Offset for indented code block. */
ctx.code_indent_offset = (ctx.r.flags & MD_FLAG_NOINDENTEDCODEBLOCKS) ? (OFF)(-1) : 4;
- /* Do all the hard work. */
- return md_process_doc(&ctx);
+ /* All the work. */
+ ret = md_process_doc(&ctx);
+
+ /* Clean-up. */
+ free(ctx.marks);
+
+ return ret;
}