commit 5271238426f74e4250b28b3a690ada99da5cca4e
parent 72173b3f8aae2602af7f4233e0a2126f3106f6bc
Author: Martin Mitas <mity@morous.org>
Date: Tue, 27 Dec 2016 22:47:03 +0100
When parsing tables, pipes inside a link/image/code span cannot make cell boundary (issue #7).
Diffstat:
| M | md4c/md4c.c | | | 218 | ++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------------- |
| M | test/tables.txt | | | 23 | +++++++++++++++++++++++ |
2 files changed, 168 insertions(+), 73 deletions(-)
diff --git a/md4c/md4c.c b/md4c/md4c.c
@@ -117,13 +117,16 @@ struct MD_CTX_tag {
char mark_char_map[128];
/* For resolving of inline spans. */
- MD_MARKCHAIN mark_chains[6];
+ MD_MARKCHAIN mark_chains[7];
#define PTR_CHAIN ctx->mark_chains[0]
#define BACKTICK_OPENERS ctx->mark_chains[1]
#define LOWERTHEN_OPENERS ctx->mark_chains[2]
#define ASTERISK_OPENERS ctx->mark_chains[3]
#define UNDERSCORE_OPENERS ctx->mark_chains[4]
#define BRACKET_OPENERS ctx->mark_chains[5]
+#define TABLECELLBOUNDARIES ctx->mark_chains[6]
+
+ int n_table_cell_boundaries;
/* For resolving links. */
int unresolved_link_head;
@@ -2409,6 +2412,9 @@ md_build_mark_char_map(MD_CTX* ctx)
if(ctx->r.flags & MD_FLAG_PERMISSIVEEMAILAUTOLINKS)
ctx->mark_char_map['@'] = 1;
+ if(ctx->r.flags & MD_FLAG_TABLES)
+ ctx->mark_char_map['|'] = 1;
+
if(ctx->r.flags & MD_FLAG_COLLAPSEWHITESPACE) {
int i;
@@ -2420,7 +2426,7 @@ md_build_mark_char_map(MD_CTX* ctx)
}
static int
-md_collect_marks(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
+md_collect_marks(MD_CTX* ctx, const MD_LINE* lines, int n_lines, int table_mode)
{
int i;
int ret = 0;
@@ -2626,6 +2632,13 @@ md_collect_marks(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
continue;
}
+ /* A potential table cell boundary. */
+ if(table_mode && ch == _T('|')) {
+ PUSH_MARK(ch, off, off+1, 0);
+ off++;
+ continue;
+ }
+
/* Turn non-trivial whitespace into single space. */
if(ISWHITESPACE_(ch)) {
OFF tmp = off+1;
@@ -3108,6 +3121,16 @@ md_analyze_entity(MD_CTX* ctx, int mark_index)
}
static void
+md_analyze_table_cell_boundary(MD_CTX* ctx, int mark_index)
+{
+ MD_MARK* mark = &ctx->marks[mark_index];
+ mark->flags |= MD_MARK_RESOLVED;
+
+ md_mark_chain_append(ctx, &TABLECELLBOUNDARIES, mark_index);
+ ctx->n_table_cell_boundaries++;
+}
+
+static void
md_analyze_simple_pairing_mark(MD_CTX* ctx, MD_MARKCHAIN* chain, int mark_index,
int apply_rule_of_three)
{
@@ -3310,6 +3333,7 @@ md_analyze_marks(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF en
case '!': /* Pass through. */
case ']': md_analyze_bracket(ctx, i); break;
case '&': md_analyze_entity(ctx, i); break;
+ case '|': md_analyze_table_cell_boundary(ctx, i); break;
case '*': md_analyze_asterisk(ctx, i); break;
case '_': md_analyze_underscore(ctx, i); break;
case ':': md_analyze_permissive_url_autolink(ctx, i); break;
@@ -3322,7 +3346,7 @@ md_analyze_marks(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF en
/* Analyze marks (build ctx->marks). */
static int
-md_analyze_inlines(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
+md_analyze_inlines(MD_CTX* ctx, const MD_LINE* lines, int n_lines, int table_mode)
{
int ret;
OFF beg = lines[0].beg;
@@ -3332,7 +3356,7 @@ md_analyze_inlines(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
ctx->n_marks = 0;
/* Collect all marks. */
- if(md_collect_marks(ctx, lines, n_lines) != 0)
+ if(md_collect_marks(ctx, lines, n_lines, table_mode) != 0)
return -1;
/* We analyze marks in few groups to handle their precedence. */
@@ -3349,12 +3373,23 @@ md_analyze_inlines(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
BRACKET_OPENERS.tail = -1;
ctx->unresolved_link_head = -1;
ctx->unresolved_link_tail = -1;
- /* (3) Emphasis and strong emphasis; permissive autolinks. */
- md_analyze_marks(ctx, lines, n_lines, beg, end, _T("*_@:"));
- ASTERISK_OPENERS.head = -1;
- ASTERISK_OPENERS.tail = -1;
- UNDERSCORE_OPENERS.head = -1;
- UNDERSCORE_OPENERS.tail = -1;
+ if(table_mode) {
+ /* (3a) Analyze table cell boundaries.
+ * Note we reset TABLECELLBOUNDARIES chain prior to the call md_analyze_marks(),
+ * not after, because caller may need it. */
+ MD_ASSERT(n_lines == 1);
+ TABLECELLBOUNDARIES.head = -1;
+ TABLECELLBOUNDARIES.tail = -1;
+ ctx->n_table_cell_boundaries = 0;
+ md_analyze_marks(ctx, lines, n_lines, beg, end, _T("|"));
+ } else {
+ /* (3b) Emphasis and strong emphasis; permissive autolinks. */
+ md_analyze_marks(ctx, lines, n_lines, beg, end, _T("*_@:"));
+ ASTERISK_OPENERS.head = -1;
+ ASTERISK_OPENERS.tail = -1;
+ UNDERSCORE_OPENERS.head = -1;
+ UNDERSCORE_OPENERS.tail = -1;
+ }
abort:
return ret;
@@ -3627,52 +3662,85 @@ md_analyze_table_alignment(MD_CTX* ctx, OFF beg, OFF end, MD_ALIGN* align, int n
static int md_process_normal_block_contents(MD_CTX* ctx, const MD_LINE* lines, int n_lines);
static int
-md_process_table_row(MD_CTX* ctx, MD_BLOCKTYPE cell_type, OFF beg, OFF end,
- const MD_ALIGN* align, int n_align)
+md_process_table_cell(MD_CTX* ctx, MD_BLOCKTYPE cell_type, MD_ALIGN align, OFF beg, OFF end)
{
- OFF off = beg;
- OFF cell_beg, cell_end;
- int cell_index = 0;
+ MD_LINE line;
+ MD_BLOCK_TD_DETAIL det;
int ret = 0;
- MD_ENTER_BLOCK(MD_BLOCK_TR, NULL);
-
- if(CH(off) == _T('|'))
- off++;
+ while(beg < end && ISWHITESPACE(beg))
+ beg++;
+ while(end > beg && ISWHITESPACE(end-1))
+ end--;
- while(off < end) {
- cell_beg = off;
- while(off < end && CH(off) != _T('|')) {
- if(CH(off) == _T('\\') && off+1 < end && ISPUNCT(off+1))
- off += 2;
- else
- off++;
- }
- cell_end = off;
+ det.align = align;
+ line.beg = beg;
+ line.end = end;
- while(cell_beg < end && ISWHITESPACE(cell_beg))
- cell_beg++;
- while(cell_end > cell_beg && ISWHITESPACE(cell_end-1))
- cell_end--;
+ MD_ENTER_BLOCK(cell_type, &det);
+ MD_CHECK(md_process_normal_block_contents(ctx, &line, 1));
+ MD_LEAVE_BLOCK(cell_type, &det);
- if(cell_end > cell_beg || off < end) {
- MD_LINE cell_line = { cell_beg, cell_end };
- MD_BLOCK_TD_DETAIL det;
+abort:
+ return ret;
+}
- det.align = (cell_index < n_align ? align[cell_index] : MD_ALIGN_DEFAULT);
+static int
+md_process_table_row(MD_CTX* ctx, MD_BLOCKTYPE cell_type, OFF beg, OFF end,
+ const MD_ALIGN* align, int n_align)
+{
+ MD_LINE line = { beg, end };
+ OFF* pipe_offs;
+ int i, j, n;
+ int ret = 0;
- MD_ENTER_BLOCK(cell_type, &det);
- MD_CHECK(md_process_normal_block_contents(ctx, &cell_line, 1));
- MD_LEAVE_BLOCK(cell_type, &det);
- cell_index++;
- }
+ /* Break the line into table cells by identifying pipe characters who
+ * form the cell boundary. */
+ MD_CHECK(md_analyze_inlines(ctx, &line, 1, TRUE));
- off++;
+ /* We have to remember the cell boundaries in local buffer because
+ * ctx->marks[] shall be reused during cell contents processing. */
+ n = ctx->n_table_cell_boundaries;
+ pipe_offs = (OFF*) malloc(n * sizeof(OFF));
+ if(pipe_offs == NULL) {
+ MD_LOG("malloc() failed.");
+ ret = -1;
+ goto abort;
+ }
+ for(i = TABLECELLBOUNDARIES.head, j = 0; i >= 0; i = ctx->marks[i].next) {
+ MD_MARK* mark = &ctx->marks[i];
+ pipe_offs[j++] = mark->beg;
}
+ /* Process cells. */
+ MD_ENTER_BLOCK(MD_BLOCK_TR, NULL);
+ j = 0;
+ if(beg < pipe_offs[0]) {
+ MD_CHECK(md_process_table_cell(ctx, cell_type,
+ (j < n_align ? align[j++] : MD_ALIGN_DEFAULT),
+ beg, pipe_offs[0]));
+ }
+ for(i = 0; i < n-1; i++) {
+ MD_CHECK(md_process_table_cell(ctx, cell_type,
+ (j < n_align ? align[j++] : MD_ALIGN_DEFAULT),
+ pipe_offs[i]+1, pipe_offs[i+1]));
+ }
+ if(pipe_offs[n-1] < end-1) {
+ MD_CHECK(md_process_table_cell(ctx, cell_type,
+ (j < n_align ? align[j++] : MD_ALIGN_DEFAULT),
+ pipe_offs[n-1]+1, end));
+ }
MD_LEAVE_BLOCK(MD_BLOCK_TR, NULL);
abort:
+ free(pipe_offs);
+
+ /* Free any temporary memory blocks stored within some dummy marks. */
+ for(i = PTR_CHAIN.head; i >= 0; i = ctx->marks[i].next)
+ free(md_mark_get_ptr(ctx, i));
+ PTR_CHAIN.head = -1;
+ PTR_CHAIN.tail = -1;
+
return ret;
}
@@ -3713,6 +3781,35 @@ abort:
return ret;
}
+static int
+md_is_table_row(MD_CTX* ctx, OFF beg, OFF* p_end)
+{
+ MD_LINE line = { beg, beg };
+ int i;
+ int ret = FALSE;
+
+ /* Find end of line. */
+ while(line.end < ctx->size && !ISNEWLINE(line.end))
+ line.end++;
+
+ MD_CHECK(md_analyze_inlines(ctx, &line, 1, TRUE));
+
+ if(TABLECELLBOUNDARIES.head >= 0) {
+ if(p_end != NULL)
+ *p_end = line.end;
+ ret = TRUE;
+ }
+
+abort:
+ /* Free any temporary memory blocks stored within some dummy marks. */
+ for(i = PTR_CHAIN.head; i >= 0; i = ctx->marks[i].next)
+ free(md_mark_get_ptr(ctx, i));
+ PTR_CHAIN.head = -1;
+ PTR_CHAIN.tail = -1;
+
+ return ret;
+}
+
/**************************
*** Processing Block ***
@@ -3755,7 +3852,7 @@ md_process_normal_block_contents(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
int i;
int ret;
- MD_CHECK(md_analyze_inlines(ctx, lines, n_lines));
+ MD_CHECK(md_analyze_inlines(ctx, lines, n_lines, FALSE));
MD_CHECK(md_process_inlines(ctx, lines, n_lines));
abort:
@@ -4620,29 +4717,6 @@ md_is_html_block_end_condition(MD_CTX* ctx, OFF beg, OFF* p_end)
}
}
-/* Check whether there is a given unescaped char 'ch' between 'beg' and end of line. */
-static int
-md_line_contains_char(MD_CTX* ctx, OFF beg, CHAR ch, OFF* p_pos)
-{
- OFF off = beg;
-
- while(off < ctx->size) {
- if(ISNEWLINE(off)) {
- return FALSE;
- } else if(CH(off) == _T('\\') && off+1 < ctx->size && ISPUNCT(off+1)) {
- off += 2;
- } else if(CH(off) == ch) {
- if(p_pos != NULL)
- *p_pos = off;
- return TRUE;
- } else {
- off++;
- }
- }
-
- return FALSE;
-}
-
static int
md_is_container_compatible(const MD_CONTAINER* pivot, const MD_CONTAINER* container)
@@ -5029,11 +5103,9 @@ redo:
}
/* Check whether we are table continuation. */
- if(pivot_line->type == MD_LINE_TABLE) {
- if(md_line_contains_char(ctx, off, _T('|'), &off)) {
- line->type = MD_LINE_TABLE;
- goto done;
- }
+ if(pivot_line->type == MD_LINE_TABLE && md_is_table_row(ctx, off, &off)) {
+ line->type = MD_LINE_TABLE;
+ goto done;
}
/* Check for "brother" container. I.e. whether we are another list item
@@ -5175,8 +5247,8 @@ redo:
unsigned col_count;
if(ctx->current_block != NULL && ctx->current_block->n_lines == 1 &&
- md_line_contains_char(ctx, pivot_line->beg, _T('|'), NULL) &&
- md_is_table_underline(ctx, off, &off, &col_count))
+ md_is_table_underline(ctx, off, &off, &col_count) &&
+ md_is_table_row(ctx, pivot_line->beg, NULL))
{
line->data = col_count;
line->type = MD_LINE_TABLEUNDERLINE;
diff --git a/test/tables.txt b/test/tables.txt
@@ -247,3 +247,26 @@ quux | [quuz](/url2)
</tbody>
</table>
````````````````````````````````
+
+However pipes which are inside a link, an image or a code span are not
+recognized as cell boundaries.
+
+```````````````````````````````` example
+Column 1 | [|](/url)
+---------|---------
+`foo | bar`
+baz | qux
+quux | quuz
+.
+<table>
+<thead>
+<tr><th>Column 1</th><th><a href="/url">|</a></th></tr>
+</thead>
+<tbody>
+</tbody>
+</table>
+<p><code>foo | bar</code>
+baz | qux
+quux | quuz</p>
+
+````````````````````````````````