md

cat markdown files with syntax highlighting
git clone https://noulin.net/git/md.git
Log | Files | Refs | README | LICENSE

md4c.c (234850B)


      1 /* commit e9ff661ff818ee94a4a231958d9b6768dc6882c9 - mity/md4c repo
      2  * MD4C: Markdown parser for C
      3  * (http://github.com/mity/md4c)
      4  *
      5  * Copyright (c) 2016-2020 Martin Mitas
      6  *
      7  * Permission is hereby granted, free of charge, to any person obtaining a
      8  * copy of this software and associated documentation files (the "Software"),
      9  * to deal in the Software without restriction, including without limitation
     10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
     11  * and/or sell copies of the Software, and to permit persons to whom the
     12  * Software is furnished to do so, subject to the following conditions:
     13  *
     14  * The above copyright notice and this permission notice shall be included in
     15  * all copies or substantial portions of the Software.
     16  *
     17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
     18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
     20  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     22  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     23  * IN THE SOFTWARE.
     24  */
     25 
     26 #include "md4c.h"
     27 
     28 #include <limits.h>
     29 #include <stdio.h>
     30 #include <stdlib.h>
     31 #include <string.h>
     32 
     33 
     34 /*****************************
     35  ***  Miscellaneous Stuff  ***
     36  *****************************/
     37 
     38 #if !defined(__STDC_VERSION__) || __STDC_VERSION__ < 199409L
     39     /* C89/90 or old compilers in general may not understand "inline". */
     40     #if defined __GNUC__
     41         #define inline __inline__
     42     #elif defined _MSC_VER
     43         #define inline __inline
     44     #else
     45         #define inline
     46     #endif
     47 #endif
     48 
     49 /* Make the UTF-8 support the default. */
     50 #if !defined MD4C_USE_ASCII && !defined MD4C_USE_UTF8 && !defined MD4C_USE_UTF16
     51     #define MD4C_USE_UTF8
     52 #endif
     53 
     54 /* Magic for making wide literals with MD4C_USE_UTF16. */
     55 #ifdef _T
     56     #undef _T
     57 #endif
     58 #if defined MD4C_USE_UTF16
     59     #define _T(x)           L##x
     60 #else
     61     #define _T(x)           x
     62 #endif
     63 
     64 /* Misc. macros. */
     65 #define SIZEOF_ARRAY(a)     (sizeof(a) / sizeof(a[0]))
     66 
     67 #define STRINGIZE_(x)       #x
     68 #define STRINGIZE(x)        STRINGIZE_(x)
     69 
     70 #ifndef TRUE
     71     #define TRUE            1
     72     #define FALSE           0
     73 #endif
     74 
     75 #define MD_LOG(msg)                                                     \
     76     do {                                                                \
     77         if(ctx->parser.debug_log != NULL)                               \
     78             ctx->parser.debug_log((msg), ctx->userdata);                \
     79     } while(0)
     80 
     81 #ifdef DEBUG
     82     #define MD_ASSERT(cond)                                             \
     83             do {                                                        \
     84                 if(!(cond)) {                                           \
     85                     MD_LOG(__FILE__ ":" STRINGIZE(__LINE__) ": "        \
     86                            "Assertion '" STRINGIZE(cond) "' failed.");  \
     87                     exit(1);                                            \
     88                 }                                                       \
     89             } while(0)
     90 
     91     #define MD_UNREACHABLE()        MD_ASSERT(1 == 0)
     92 #else
     93     #ifdef __GNUC__
     94         #define MD_ASSERT(cond)     do { if(!(cond)) __builtin_unreachable(); } while(0)
     95         #define MD_UNREACHABLE()    do { __builtin_unreachable(); } while(0)
     96     #elif defined _MSC_VER  &&  _MSC_VER > 120
     97         #define MD_ASSERT(cond)     do { __assume(cond); } while(0)
     98         #define MD_UNREACHABLE()    do { __assume(0); } while(0)
     99     #else
    100         #define MD_ASSERT(cond)     do {} while(0)
    101         #define MD_UNREACHABLE()    do {} while(0)
    102     #endif
    103 #endif
    104 
    105 /* For falling through case labels in switch statements. */
    106 #if defined __clang__ && __clang_major__ >= 12
    107     #define MD_FALLTHROUGH()        __attribute__((fallthrough))
    108 #elif defined __GNUC__ && __GNUC__ >= 7
    109     #define MD_FALLTHROUGH()        __attribute__((fallthrough))
    110 #else
    111     #define MD_FALLTHROUGH()        ((void)0)
    112 #endif
    113 
    114 /* Suppress "unused parameter" warnings. */
    115 #define MD_UNUSED(x)                ((void)x)
    116 
    117 
    118 /************************
    119  ***  Internal Types  ***
    120  ************************/
    121 
    122 /* These are omnipresent so lets save some typing. */
    123 #define CHAR    MD_CHAR
    124 #define SZ      MD_SIZE
    125 #define OFF     MD_OFFSET
    126 
    127 typedef struct MD_MARK_tag MD_MARK;
    128 typedef struct MD_BLOCK_tag MD_BLOCK;
    129 typedef struct MD_CONTAINER_tag MD_CONTAINER;
    130 typedef struct MD_REF_DEF_tag MD_REF_DEF;
    131 
    132 
    133 /* During analyzes of inline marks, we need to manage some "mark chains",
    134  * of (yet unresolved) openers. This structure holds start/end of the chain.
    135  * The chain internals are then realized through MD_MARK::prev and ::next.
    136  */
    137 typedef struct MD_MARKCHAIN_tag MD_MARKCHAIN;
    138 struct MD_MARKCHAIN_tag {
    139     int head;   /* Index of first mark in the chain, or -1 if empty. */
    140     int tail;   /* Index of last mark in the chain, or -1 if empty. */
    141 };
    142 
    143 /* Context propagated through all the parsing. */
    144 typedef struct MD_CTX_tag MD_CTX;
    145 struct MD_CTX_tag {
    146     /* Immutable stuff (parameters of md_parse()). */
    147     const CHAR* text;
    148     SZ size;
    149     MD_PARSER parser;
    150     void* userdata;
    151 
    152     /* When this is true, it allows some optimizations. */
    153     int doc_ends_with_newline;
    154 
    155     /* Helper temporary growing buffer. */
    156     CHAR* buffer;
    157     unsigned alloc_buffer;
    158 
    159     /* Reference definitions. */
    160     MD_REF_DEF* ref_defs;
    161     int n_ref_defs;
    162     int alloc_ref_defs;
    163     void** ref_def_hashtable;
    164     int ref_def_hashtable_size;
    165 
    166     /* Stack of inline/span markers.
    167      * This is only used for parsing a single block contents but by storing it
    168      * here we may reuse the stack for subsequent blocks; i.e. we have fewer
    169      * (re)allocations. */
    170     MD_MARK* marks;
    171     int n_marks;
    172     int alloc_marks;
    173 
    174 #if defined MD4C_USE_UTF16
    175     char mark_char_map[128];
    176 #else
    177     char mark_char_map[256];
    178 #endif
    179 
    180     /* For resolving of inline spans. */
    181     MD_MARKCHAIN mark_chains[17];
    182 #define PTR_CHAIN                               (ctx->mark_chains[0])
    183 #define TABLECELLBOUNDARIES                     (ctx->mark_chains[1])
    184 #define ASTERISK_OPENERS_extraword_mod3_0       (ctx->mark_chains[2])
    185 #define ASTERISK_OPENERS_extraword_mod3_1       (ctx->mark_chains[3])
    186 #define ASTERISK_OPENERS_extraword_mod3_2       (ctx->mark_chains[4])
    187 #define ASTERISK_OPENERS_intraword_mod3_0       (ctx->mark_chains[5])
    188 #define ASTERISK_OPENERS_intraword_mod3_1       (ctx->mark_chains[6])
    189 #define ASTERISK_OPENERS_intraword_mod3_2       (ctx->mark_chains[7])
    190 #define UNDERSCORE_OPENERS                      (ctx->mark_chains[8])
    191 #define TILDE_OPENERS_1                         (ctx->mark_chains[9])
    192 #define TILDE_OPENERS_2                         (ctx->mark_chains[10])
    193 #define BRACKET_OPENERS                         (ctx->mark_chains[11])
    194 #define DOLLAR_OPENERS                          (ctx->mark_chains[12])
    195 #define FAINT_OPENERS                           (ctx->mark_chains[13])
    196 #define INVERSE_OPENERS                         (ctx->mark_chains[14])
    197 #define CONCEAL_OPENERS                         (ctx->mark_chains[15])
    198 #define BLINK_OPENERS                           (ctx->mark_chains[16])
    199 #define OPENERS_CHAIN_FIRST                     1
    200 #define OPENERS_CHAIN_LAST                      16
    201 
    202     int n_table_cell_boundaries;
    203 
    204     /* For resolving links. */
    205     int unresolved_link_head;
    206     int unresolved_link_tail;
    207 
    208     /* For resolving raw HTML. */
    209     OFF html_comment_horizon;
    210     OFF html_proc_instr_horizon;
    211     OFF html_decl_horizon;
    212     OFF html_cdata_horizon;
    213 
    214     /* For block analysis.
    215      * Notes:
    216      *   -- It holds MD_BLOCK as well as MD_LINE structures. After each
    217      *      MD_BLOCK, its (multiple) MD_LINE(s) follow.
    218      *   -- For MD_BLOCK_HTML and MD_BLOCK_CODE, MD_VERBATIMLINE(s) are used
    219      *      instead of MD_LINE(s).
    220      */
    221     void* block_bytes;
    222     MD_BLOCK* current_block;
    223     int n_block_bytes;
    224     int alloc_block_bytes;
    225 
    226     /* For container block analysis. */
    227     MD_CONTAINER* containers;
    228     int n_containers;
    229     int alloc_containers;
    230 
    231     /* Minimal indentation to call the block "indented code block". */
    232     unsigned code_indent_offset;
    233 
    234     /* Contextual info for line analysis. */
    235     SZ code_fence_length;   /* For checking closing fence length. */
    236     int html_block_type;    /* For checking closing raw HTML condition. */
    237     int last_line_has_list_loosening_effect;
    238     int last_list_item_starts_with_two_blank_lines;
    239 };
    240 
    241 enum MD_LINETYPE_tag {
    242     MD_LINE_BLANK,
    243     MD_LINE_HR,
    244     MD_LINE_ATXHEADER,
    245     MD_LINE_SETEXTHEADER,
    246     MD_LINE_SETEXTUNDERLINE,
    247     MD_LINE_INDENTEDCODE,
    248     MD_LINE_FENCEDCODE,
    249     MD_LINE_HTML,
    250     MD_LINE_TEXT,
    251     MD_LINE_TABLE,
    252     MD_LINE_TABLEUNDERLINE
    253 };
    254 typedef enum MD_LINETYPE_tag MD_LINETYPE;
    255 
    256 typedef struct MD_LINE_ANALYSIS_tag MD_LINE_ANALYSIS;
    257 struct MD_LINE_ANALYSIS_tag {
    258     MD_LINETYPE type    : 16;
    259     unsigned data       : 16;
    260     OFF beg;
    261     OFF end;
    262     unsigned indent;        /* Indentation level. */
    263 };
    264 
    265 typedef struct MD_LINE_tag MD_LINE;
    266 struct MD_LINE_tag {
    267     OFF beg;
    268     OFF end;
    269 };
    270 
    271 typedef struct MD_VERBATIMLINE_tag MD_VERBATIMLINE;
    272 struct MD_VERBATIMLINE_tag {
    273     OFF beg;
    274     OFF end;
    275     OFF indent;
    276 };
    277 
    278 
    279 /*****************
    280  ***  Helpers  ***
    281  *****************/
    282 
    283 /* Character accessors. */
    284 #define CH(off)                 (ctx->text[(off)])
    285 #define STR(off)                (ctx->text + (off))
    286 
    287 /* Character classification.
    288  * Note we assume ASCII compatibility of code points < 128 here. */
    289 #define ISIN_(ch, ch_min, ch_max)       ((ch_min) <= (unsigned)(ch) && (unsigned)(ch) <= (ch_max))
    290 #define ISANYOF_(ch, palette)           ((ch) != _T('\0')  &&  md_strchr((palette), (ch)) != NULL)
    291 #define ISANYOF2_(ch, ch1, ch2)         ((ch) == (ch1) || (ch) == (ch2))
    292 #define ISANYOF3_(ch, ch1, ch2, ch3)    ((ch) == (ch1) || (ch) == (ch2) || (ch) == (ch3))
    293 #define ISASCII_(ch)                    ((unsigned)(ch) <= 127)
    294 #define ISBLANK_(ch)                    (ISANYOF2_((ch), _T(' '), _T('\t')))
    295 #define ISNEWLINE_(ch)                  (ISANYOF2_((ch), _T('\r'), _T('\n')))
    296 #define ISWHITESPACE_(ch)               (ISBLANK_(ch) || ISANYOF2_((ch), _T('\v'), _T('\f')))
    297 #define ISCNTRL_(ch)                    ((unsigned)(ch) <= 31 || (unsigned)(ch) == 127)
    298 #define ISPUNCT_(ch)                    (ISIN_(ch, 33, 47) || ISIN_(ch, 58, 64) || ISIN_(ch, 91, 96) || ISIN_(ch, 123, 126))
    299 #define ISUPPER_(ch)                    (ISIN_(ch, _T('A'), _T('Z')))
    300 #define ISLOWER_(ch)                    (ISIN_(ch, _T('a'), _T('z')))
    301 #define ISALPHA_(ch)                    (ISUPPER_(ch) || ISLOWER_(ch))
    302 #define ISDIGIT_(ch)                    (ISIN_(ch, _T('0'), _T('9')))
    303 #define ISXDIGIT_(ch)                   (ISDIGIT_(ch) || ISIN_(ch, _T('A'), _T('F')) || ISIN_(ch, _T('a'), _T('f')))
    304 #define ISALNUM_(ch)                    (ISALPHA_(ch) || ISDIGIT_(ch))
    305 
    306 #define ISANYOF(off, palette)           ISANYOF_(CH(off), (palette))
    307 #define ISANYOF2(off, ch1, ch2)         ISANYOF2_(CH(off), (ch1), (ch2))
    308 #define ISANYOF3(off, ch1, ch2, ch3)    ISANYOF3_(CH(off), (ch1), (ch2), (ch3))
    309 #define ISASCII(off)                    ISASCII_(CH(off))
    310 #define ISBLANK(off)                    ISBLANK_(CH(off))
    311 #define ISNEWLINE(off)                  ISNEWLINE_(CH(off))
    312 #define ISWHITESPACE(off)               ISWHITESPACE_(CH(off))
    313 #define ISCNTRL(off)                    ISCNTRL_(CH(off))
    314 #define ISPUNCT(off)                    ISPUNCT_(CH(off))
    315 #define ISUPPER(off)                    ISUPPER_(CH(off))
    316 #define ISLOWER(off)                    ISLOWER_(CH(off))
    317 #define ISALPHA(off)                    ISALPHA_(CH(off))
    318 #define ISDIGIT(off)                    ISDIGIT_(CH(off))
    319 #define ISXDIGIT(off)                   ISXDIGIT_(CH(off))
    320 #define ISALNUM(off)                    ISALNUM_(CH(off))
    321 
    322 
    323 #if defined MD4C_USE_UTF16
    324     #define md_strchr wcschr
    325 #else
    326     #define md_strchr strchr
    327 #endif
    328 
    329 
    330 /* Case insensitive check of string equality. */
    331 static inline int
    332 md_ascii_case_eq(const CHAR* s1, const CHAR* s2, SZ n)
    333 {
    334     OFF i;
    335     for(i = 0; i < n; i++) {
    336         CHAR ch1 = s1[i];
    337         CHAR ch2 = s2[i];
    338 
    339         if(ISLOWER_(ch1))
    340             ch1 += ('A'-'a');
    341         if(ISLOWER_(ch2))
    342             ch2 += ('A'-'a');
    343         if(ch1 != ch2)
    344             return FALSE;
    345     }
    346     return TRUE;
    347 }
    348 
    349 static inline int
    350 md_ascii_eq(const CHAR* s1, const CHAR* s2, SZ n)
    351 {
    352     return memcmp(s1, s2, n * sizeof(CHAR)) == 0;
    353 }
    354 
    355 static int
    356 md_text_with_null_replacement(MD_CTX* ctx, MD_TEXTTYPE type, const CHAR* str, SZ size)
    357 {
    358     OFF off = 0;
    359     int ret = 0;
    360 
    361     while(1) {
    362         while(off < size  &&  str[off] != _T('\0'))
    363             off++;
    364 
    365         if(off > 0) {
    366             ret = ctx->parser.text(type, str, off, ctx->userdata);
    367             if(ret != 0)
    368                 return ret;
    369 
    370             str += off;
    371             size -= off;
    372             off = 0;
    373         }
    374 
    375         if(off >= size)
    376             return 0;
    377 
    378         ret = ctx->parser.text(MD_TEXT_NULLCHAR, _T(""), 1, ctx->userdata);
    379         if(ret != 0)
    380             return ret;
    381         off++;
    382     }
    383 }
    384 
    385 
    386 #define MD_CHECK(func)                                                      \
    387     do {                                                                    \
    388         ret = (func);                                                       \
    389         if(ret < 0)                                                         \
    390             goto abort;                                                     \
    391     } while(0)
    392 
    393 
    394 #define MD_TEMP_BUFFER(sz)                                                  \
    395     do {                                                                    \
    396         if(sz > ctx->alloc_buffer) {                                        \
    397             CHAR* new_buffer;                                               \
    398             SZ new_size = ((sz) + (sz) / 2 + 128) & ~127;                   \
    399                                                                             \
    400             new_buffer = realloc(ctx->buffer, new_size);                    \
    401             if(new_buffer == NULL) {                                        \
    402                 MD_LOG("realloc() failed.");                                \
    403                 ret = -1;                                                   \
    404                 goto abort;                                                 \
    405             }                                                               \
    406                                                                             \
    407             ctx->buffer = new_buffer;                                       \
    408             ctx->alloc_buffer = new_size;                                   \
    409         }                                                                   \
    410     } while(0)
    411 
    412 
    413 #define MD_ENTER_BLOCK(type, arg)                                           \
    414     do {                                                                    \
    415         ret = ctx->parser.enter_block((type), (arg), ctx->userdata);        \
    416         if(ret != 0) {                                                      \
    417             MD_LOG("Aborted from enter_block() callback.");                 \
    418             goto abort;                                                     \
    419         }                                                                   \
    420     } while(0)
    421 
    422 #define MD_LEAVE_BLOCK(type, arg)                                           \
    423     do {                                                                    \
    424         ret = ctx->parser.leave_block((type), (arg), ctx->userdata);        \
    425         if(ret != 0) {                                                      \
    426             MD_LOG("Aborted from leave_block() callback.");                 \
    427             goto abort;                                                     \
    428         }                                                                   \
    429     } while(0)
    430 
    431 #define MD_ENTER_SPAN(type, arg)                                            \
    432     do {                                                                    \
    433         ret = ctx->parser.enter_span((type), (arg), ctx->userdata);         \
    434         if(ret != 0) {                                                      \
    435             MD_LOG("Aborted from enter_span() callback.");                  \
    436             goto abort;                                                     \
    437         }                                                                   \
    438     } while(0)
    439 
    440 #define MD_LEAVE_SPAN(type, arg)                                            \
    441     do {                                                                    \
    442         ret = ctx->parser.leave_span((type), (arg), ctx->userdata);         \
    443         if(ret != 0) {                                                      \
    444             MD_LOG("Aborted from leave_span() callback.");                  \
    445             goto abort;                                                     \
    446         }                                                                   \
    447     } while(0)
    448 
    449 #define MD_TEXT(type, str, size)                                            \
    450     do {                                                                    \
    451         if(size > 0) {                                                      \
    452             ret = ctx->parser.text((type), (str), (size), ctx->userdata);   \
    453             if(ret != 0) {                                                  \
    454                 MD_LOG("Aborted from text() callback.");                    \
    455                 goto abort;                                                 \
    456             }                                                               \
    457         }                                                                   \
    458     } while(0)
    459 
    460 #define MD_TEXT_INSECURE(type, str, size)                                   \
    461     do {                                                                    \
    462         if(size > 0) {                                                      \
    463             ret = md_text_with_null_replacement(ctx, type, str, size);      \
    464             if(ret != 0) {                                                  \
    465                 MD_LOG("Aborted from text() callback.");                    \
    466                 goto abort;                                                 \
    467             }                                                               \
    468         }                                                                   \
    469     } while(0)
    470 
    471 
    472 /* If the offset falls into a gap between line, we return the following
    473  * line. */
    474 static const MD_LINE*
    475 md_lookup_line(OFF off, const MD_LINE* lines, int n_lines)
    476 {
    477     int lo, hi;
    478     int pivot;
    479     const MD_LINE* line;
    480 
    481     lo = 0;
    482     hi = n_lines - 1;
    483     while(lo <= hi) {
    484         pivot = (lo + hi) / 2;
    485         line = &lines[pivot];
    486 
    487         if(off < line->beg) {
    488             hi = pivot - 1;
    489             if(hi < 0  ||  lines[hi].end <= off)
    490                 return line;
    491         } else if(off > line->end) {
    492             lo = pivot + 1;
    493         } else {
    494             return line;
    495         }
    496     }
    497 
    498     return NULL;
    499 }
    500 
    501 
    502 /*************************
    503  ***  Unicode Support  ***
    504  *************************/
    505 
    506 typedef struct MD_UNICODE_FOLD_INFO_tag MD_UNICODE_FOLD_INFO;
    507 struct MD_UNICODE_FOLD_INFO_tag {
    508     unsigned codepoints[3];
    509     unsigned n_codepoints;
    510 };
    511 
    512 
    513 #if defined MD4C_USE_UTF16 || defined MD4C_USE_UTF8
    514     /* Binary search over sorted "map" of codepoints. Consecutive sequences
    515      * of codepoints may be encoded in the map by just using the
    516      * (MIN_CODEPOINT | 0x40000000) and (MAX_CODEPOINT | 0x80000000).
    517      *
    518      * Returns index of the found record in the map (in the case of ranges,
    519      * the minimal value is used); or -1 on failure. */
    520     static int
    521     md_unicode_bsearch__(unsigned codepoint, const unsigned* map, size_t map_size)
    522     {
    523         int beg, end;
    524         int pivot_beg, pivot_end;
    525 
    526         beg = 0;
    527         end = (int) map_size-1;
    528         while(beg <= end) {
    529             /* Pivot may be a range, not just a single value. */
    530             pivot_beg = pivot_end = (beg + end) / 2;
    531             if(map[pivot_end] & 0x40000000)
    532                 pivot_end++;
    533             if(map[pivot_beg] & 0x80000000)
    534                 pivot_beg--;
    535 
    536             if(codepoint < (map[pivot_beg] & 0x00ffffff))
    537                 end = pivot_beg - 1;
    538             else if(codepoint > (map[pivot_end] & 0x00ffffff))
    539                 beg = pivot_end + 1;
    540             else
    541                 return pivot_beg;
    542         }
    543 
    544         return -1;
    545     }
    546 
    547     static int
    548     md_is_unicode_whitespace__(unsigned codepoint)
    549     {
    550 #define R(cp_min, cp_max)   ((cp_min) | 0x40000000), ((cp_max) | 0x80000000)
    551 #define S(cp)               (cp)
    552         /* Unicode "Zs" category.
    553          * (generated by scripts/build_whitespace_map.py) */
    554         static const unsigned WHITESPACE_MAP[] = {
    555             S(0x0020), S(0x00a0), S(0x1680), R(0x2000,0x200a), S(0x202f), S(0x205f), S(0x3000)
    556         };
    557 #undef R
    558 #undef S
    559 
    560         /* The ASCII ones are the most frequently used ones, also CommonMark
    561          * specification requests few more in this range. */
    562         if(codepoint <= 0x7f)
    563             return ISWHITESPACE_(codepoint);
    564 
    565         return (md_unicode_bsearch__(codepoint, WHITESPACE_MAP, SIZEOF_ARRAY(WHITESPACE_MAP)) >= 0);
    566     }
    567 
    568     static int
    569     md_is_unicode_punct__(unsigned codepoint)
    570     {
    571 #define R(cp_min, cp_max)   ((cp_min) | 0x40000000), ((cp_max) | 0x80000000)
    572 #define S(cp)               (cp)
    573         /* Unicode "Pc", "Pd", "Pe", "Pf", "Pi", "Po", "Ps" categories.
    574          * (generated by scripts/build_punct_map.py) */
    575         static const unsigned PUNCT_MAP[] = {
    576             R(0x0021,0x0023), R(0x0025,0x002a), R(0x002c,0x002f), R(0x003a,0x003b), R(0x003f,0x0040),
    577             R(0x005b,0x005d), S(0x005f), S(0x007b), S(0x007d), S(0x00a1), S(0x00a7), S(0x00ab), R(0x00b6,0x00b7),
    578             S(0x00bb), S(0x00bf), S(0x037e), S(0x0387), R(0x055a,0x055f), R(0x0589,0x058a), S(0x05be), S(0x05c0),
    579             S(0x05c3), S(0x05c6), R(0x05f3,0x05f4), R(0x0609,0x060a), R(0x060c,0x060d), S(0x061b), R(0x061e,0x061f),
    580             R(0x066a,0x066d), S(0x06d4), R(0x0700,0x070d), R(0x07f7,0x07f9), R(0x0830,0x083e), S(0x085e),
    581             R(0x0964,0x0965), S(0x0970), S(0x09fd), S(0x0a76), S(0x0af0), S(0x0c77), S(0x0c84), S(0x0df4), S(0x0e4f),
    582             R(0x0e5a,0x0e5b), R(0x0f04,0x0f12), S(0x0f14), R(0x0f3a,0x0f3d), S(0x0f85), R(0x0fd0,0x0fd4),
    583             R(0x0fd9,0x0fda), R(0x104a,0x104f), S(0x10fb), R(0x1360,0x1368), S(0x1400), S(0x166e), R(0x169b,0x169c),
    584             R(0x16eb,0x16ed), R(0x1735,0x1736), R(0x17d4,0x17d6), R(0x17d8,0x17da), R(0x1800,0x180a),
    585             R(0x1944,0x1945), R(0x1a1e,0x1a1f), R(0x1aa0,0x1aa6), R(0x1aa8,0x1aad), R(0x1b5a,0x1b60),
    586             R(0x1bfc,0x1bff), R(0x1c3b,0x1c3f), R(0x1c7e,0x1c7f), R(0x1cc0,0x1cc7), S(0x1cd3), R(0x2010,0x2027),
    587             R(0x2030,0x2043), R(0x2045,0x2051), R(0x2053,0x205e), R(0x207d,0x207e), R(0x208d,0x208e),
    588             R(0x2308,0x230b), R(0x2329,0x232a), R(0x2768,0x2775), R(0x27c5,0x27c6), R(0x27e6,0x27ef),
    589             R(0x2983,0x2998), R(0x29d8,0x29db), R(0x29fc,0x29fd), R(0x2cf9,0x2cfc), R(0x2cfe,0x2cff), S(0x2d70),
    590             R(0x2e00,0x2e2e), R(0x2e30,0x2e4f), S(0x2e52), R(0x3001,0x3003), R(0x3008,0x3011), R(0x3014,0x301f),
    591             S(0x3030), S(0x303d), S(0x30a0), S(0x30fb), R(0xa4fe,0xa4ff), R(0xa60d,0xa60f), S(0xa673), S(0xa67e),
    592             R(0xa6f2,0xa6f7), R(0xa874,0xa877), R(0xa8ce,0xa8cf), R(0xa8f8,0xa8fa), S(0xa8fc), R(0xa92e,0xa92f),
    593             S(0xa95f), R(0xa9c1,0xa9cd), R(0xa9de,0xa9df), R(0xaa5c,0xaa5f), R(0xaade,0xaadf), R(0xaaf0,0xaaf1),
    594             S(0xabeb), R(0xfd3e,0xfd3f), R(0xfe10,0xfe19), R(0xfe30,0xfe52), R(0xfe54,0xfe61), S(0xfe63), S(0xfe68),
    595             R(0xfe6a,0xfe6b), R(0xff01,0xff03), R(0xff05,0xff0a), R(0xff0c,0xff0f), R(0xff1a,0xff1b),
    596             R(0xff1f,0xff20), R(0xff3b,0xff3d), S(0xff3f), S(0xff5b), S(0xff5d), R(0xff5f,0xff65), R(0x10100,0x10102),
    597             S(0x1039f), S(0x103d0), S(0x1056f), S(0x10857), S(0x1091f), S(0x1093f), R(0x10a50,0x10a58), S(0x10a7f),
    598             R(0x10af0,0x10af6), R(0x10b39,0x10b3f), R(0x10b99,0x10b9c), S(0x10ead), R(0x10f55,0x10f59),
    599             R(0x11047,0x1104d), R(0x110bb,0x110bc), R(0x110be,0x110c1), R(0x11140,0x11143), R(0x11174,0x11175),
    600             R(0x111c5,0x111c8), S(0x111cd), S(0x111db), R(0x111dd,0x111df), R(0x11238,0x1123d), S(0x112a9),
    601             R(0x1144b,0x1144f), R(0x1145a,0x1145b), S(0x1145d), S(0x114c6), R(0x115c1,0x115d7), R(0x11641,0x11643),
    602             R(0x11660,0x1166c), R(0x1173c,0x1173e), S(0x1183b), R(0x11944,0x11946), S(0x119e2), R(0x11a3f,0x11a46),
    603             R(0x11a9a,0x11a9c), R(0x11a9e,0x11aa2), R(0x11c41,0x11c45), R(0x11c70,0x11c71), R(0x11ef7,0x11ef8),
    604             S(0x11fff), R(0x12470,0x12474), R(0x16a6e,0x16a6f), S(0x16af5), R(0x16b37,0x16b3b), S(0x16b44),
    605             R(0x16e97,0x16e9a), S(0x16fe2), S(0x1bc9f), R(0x1da87,0x1da8b), R(0x1e95e,0x1e95f)
    606         };
    607 #undef R
    608 #undef S
    609 
    610         /* The ASCII ones are the most frequently used ones, also CommonMark
    611          * specification requests few more in this range. */
    612         if(codepoint <= 0x7f)
    613             return ISPUNCT_(codepoint);
    614 
    615         return (md_unicode_bsearch__(codepoint, PUNCT_MAP, SIZEOF_ARRAY(PUNCT_MAP)) >= 0);
    616     }
    617 
    618     static void
    619     md_get_unicode_fold_info(unsigned codepoint, MD_UNICODE_FOLD_INFO* info)
    620     {
    621 #define R(cp_min, cp_max)   ((cp_min) | 0x40000000), ((cp_max) | 0x80000000)
    622 #define S(cp)               (cp)
    623         /* Unicode "Pc", "Pd", "Pe", "Pf", "Pi", "Po", "Ps" categories.
    624          * (generated by scripts/build_folding_map.py) */
    625         static const unsigned FOLD_MAP_1[] = {
    626             R(0x0041,0x005a), S(0x00b5), R(0x00c0,0x00d6), R(0x00d8,0x00de), R(0x0100,0x012e), R(0x0132,0x0136),
    627             R(0x0139,0x0147), R(0x014a,0x0176), S(0x0178), R(0x0179,0x017d), S(0x017f), S(0x0181), S(0x0182),
    628             S(0x0184), S(0x0186), S(0x0187), S(0x0189), S(0x018a), S(0x018b), S(0x018e), S(0x018f), S(0x0190),
    629             S(0x0191), S(0x0193), S(0x0194), S(0x0196), S(0x0197), S(0x0198), S(0x019c), S(0x019d), S(0x019f),
    630             R(0x01a0,0x01a4), S(0x01a6), S(0x01a7), S(0x01a9), S(0x01ac), S(0x01ae), S(0x01af), S(0x01b1), S(0x01b2),
    631             S(0x01b3), S(0x01b5), S(0x01b7), S(0x01b8), S(0x01bc), S(0x01c4), S(0x01c5), S(0x01c7), S(0x01c8),
    632             S(0x01ca), R(0x01cb,0x01db), R(0x01de,0x01ee), S(0x01f1), S(0x01f2), S(0x01f4), S(0x01f6), S(0x01f7),
    633             R(0x01f8,0x021e), S(0x0220), R(0x0222,0x0232), S(0x023a), S(0x023b), S(0x023d), S(0x023e), S(0x0241),
    634             S(0x0243), S(0x0244), S(0x0245), R(0x0246,0x024e), S(0x0345), S(0x0370), S(0x0372), S(0x0376), S(0x037f),
    635             S(0x0386), R(0x0388,0x038a), S(0x038c), S(0x038e), S(0x038f), R(0x0391,0x03a1), R(0x03a3,0x03ab),
    636             S(0x03c2), S(0x03cf), S(0x03d0), S(0x03d1), S(0x03d5), S(0x03d6), R(0x03d8,0x03ee), S(0x03f0), S(0x03f1),
    637             S(0x03f4), S(0x03f5), S(0x03f7), S(0x03f9), S(0x03fa), R(0x03fd,0x03ff), R(0x0400,0x040f),
    638             R(0x0410,0x042f), R(0x0460,0x0480), R(0x048a,0x04be), S(0x04c0), R(0x04c1,0x04cd), R(0x04d0,0x052e),
    639             R(0x0531,0x0556), R(0x10a0,0x10c5), S(0x10c7), S(0x10cd), R(0x13f8,0x13fd), S(0x1c80), S(0x1c81),
    640             S(0x1c82), S(0x1c83), S(0x1c84), S(0x1c85), S(0x1c86), S(0x1c87), S(0x1c88), R(0x1c90,0x1cba),
    641             R(0x1cbd,0x1cbf), R(0x1e00,0x1e94), S(0x1e9b), R(0x1ea0,0x1efe), R(0x1f08,0x1f0f), R(0x1f18,0x1f1d),
    642             R(0x1f28,0x1f2f), R(0x1f38,0x1f3f), R(0x1f48,0x1f4d), S(0x1f59), S(0x1f5b), S(0x1f5d), S(0x1f5f),
    643             R(0x1f68,0x1f6f), S(0x1fb8), S(0x1fb9), S(0x1fba), S(0x1fbb), S(0x1fbe), R(0x1fc8,0x1fcb), S(0x1fd8),
    644             S(0x1fd9), S(0x1fda), S(0x1fdb), S(0x1fe8), S(0x1fe9), S(0x1fea), S(0x1feb), S(0x1fec), S(0x1ff8),
    645             S(0x1ff9), S(0x1ffa), S(0x1ffb), S(0x2126), S(0x212a), S(0x212b), S(0x2132), R(0x2160,0x216f), S(0x2183),
    646             R(0x24b6,0x24cf), R(0x2c00,0x2c2e), S(0x2c60), S(0x2c62), S(0x2c63), S(0x2c64), R(0x2c67,0x2c6b),
    647             S(0x2c6d), S(0x2c6e), S(0x2c6f), S(0x2c70), S(0x2c72), S(0x2c75), S(0x2c7e), S(0x2c7f), R(0x2c80,0x2ce2),
    648             S(0x2ceb), S(0x2ced), S(0x2cf2), R(0xa640,0xa66c), R(0xa680,0xa69a), R(0xa722,0xa72e), R(0xa732,0xa76e),
    649             S(0xa779), S(0xa77b), S(0xa77d), R(0xa77e,0xa786), S(0xa78b), S(0xa78d), S(0xa790), S(0xa792),
    650             R(0xa796,0xa7a8), S(0xa7aa), S(0xa7ab), S(0xa7ac), S(0xa7ad), S(0xa7ae), S(0xa7b0), S(0xa7b1), S(0xa7b2),
    651             S(0xa7b3), R(0xa7b4,0xa7be), S(0xa7c2), S(0xa7c4), S(0xa7c5), S(0xa7c6), S(0xa7c7), S(0xa7c9), S(0xa7f5),
    652             R(0xab70,0xabbf), R(0xff21,0xff3a), R(0x10400,0x10427), R(0x104b0,0x104d3), R(0x10c80,0x10cb2),
    653             R(0x118a0,0x118bf), R(0x16e40,0x16e5f), R(0x1e900,0x1e921)
    654         };
    655         static const unsigned FOLD_MAP_1_DATA[] = {
    656             0x0061, 0x007a, 0x03bc, 0x00e0, 0x00f6, 0x00f8, 0x00fe, 0x0101, 0x012f, 0x0133, 0x0137, 0x013a, 0x0148,
    657             0x014b, 0x0177, 0x00ff, 0x017a, 0x017e, 0x0073, 0x0253, 0x0183, 0x0185, 0x0254, 0x0188, 0x0256, 0x0257,
    658             0x018c, 0x01dd, 0x0259, 0x025b, 0x0192, 0x0260, 0x0263, 0x0269, 0x0268, 0x0199, 0x026f, 0x0272, 0x0275,
    659             0x01a1, 0x01a5, 0x0280, 0x01a8, 0x0283, 0x01ad, 0x0288, 0x01b0, 0x028a, 0x028b, 0x01b4, 0x01b6, 0x0292,
    660             0x01b9, 0x01bd, 0x01c6, 0x01c6, 0x01c9, 0x01c9, 0x01cc, 0x01cc, 0x01dc, 0x01df, 0x01ef, 0x01f3, 0x01f3,
    661             0x01f5, 0x0195, 0x01bf, 0x01f9, 0x021f, 0x019e, 0x0223, 0x0233, 0x2c65, 0x023c, 0x019a, 0x2c66, 0x0242,
    662             0x0180, 0x0289, 0x028c, 0x0247, 0x024f, 0x03b9, 0x0371, 0x0373, 0x0377, 0x03f3, 0x03ac, 0x03ad, 0x03af,
    663             0x03cc, 0x03cd, 0x03ce, 0x03b1, 0x03c1, 0x03c3, 0x03cb, 0x03c3, 0x03d7, 0x03b2, 0x03b8, 0x03c6, 0x03c0,
    664             0x03d9, 0x03ef, 0x03ba, 0x03c1, 0x03b8, 0x03b5, 0x03f8, 0x03f2, 0x03fb, 0x037b, 0x037d, 0x0450, 0x045f,
    665             0x0430, 0x044f, 0x0461, 0x0481, 0x048b, 0x04bf, 0x04cf, 0x04c2, 0x04ce, 0x04d1, 0x052f, 0x0561, 0x0586,
    666             0x2d00, 0x2d25, 0x2d27, 0x2d2d, 0x13f0, 0x13f5, 0x0432, 0x0434, 0x043e, 0x0441, 0x0442, 0x0442, 0x044a,
    667             0x0463, 0xa64b, 0x10d0, 0x10fa, 0x10fd, 0x10ff, 0x1e01, 0x1e95, 0x1e61, 0x1ea1, 0x1eff, 0x1f00, 0x1f07,
    668             0x1f10, 0x1f15, 0x1f20, 0x1f27, 0x1f30, 0x1f37, 0x1f40, 0x1f45, 0x1f51, 0x1f53, 0x1f55, 0x1f57, 0x1f60,
    669             0x1f67, 0x1fb0, 0x1fb1, 0x1f70, 0x1f71, 0x03b9, 0x1f72, 0x1f75, 0x1fd0, 0x1fd1, 0x1f76, 0x1f77, 0x1fe0,
    670             0x1fe1, 0x1f7a, 0x1f7b, 0x1fe5, 0x1f78, 0x1f79, 0x1f7c, 0x1f7d, 0x03c9, 0x006b, 0x00e5, 0x214e, 0x2170,
    671             0x217f, 0x2184, 0x24d0, 0x24e9, 0x2c30, 0x2c5e, 0x2c61, 0x026b, 0x1d7d, 0x027d, 0x2c68, 0x2c6c, 0x0251,
    672             0x0271, 0x0250, 0x0252, 0x2c73, 0x2c76, 0x023f, 0x0240, 0x2c81, 0x2ce3, 0x2cec, 0x2cee, 0x2cf3, 0xa641,
    673             0xa66d, 0xa681, 0xa69b, 0xa723, 0xa72f, 0xa733, 0xa76f, 0xa77a, 0xa77c, 0x1d79, 0xa77f, 0xa787, 0xa78c,
    674             0x0265, 0xa791, 0xa793, 0xa797, 0xa7a9, 0x0266, 0x025c, 0x0261, 0x026c, 0x026a, 0x029e, 0x0287, 0x029d,
    675             0xab53, 0xa7b5, 0xa7bf, 0xa7c3, 0xa794, 0x0282, 0x1d8e, 0xa7c8, 0xa7ca, 0xa7f6, 0x13a0, 0x13ef, 0xff41,
    676             0xff5a, 0x10428, 0x1044f, 0x104d8, 0x104fb, 0x10cc0, 0x10cf2, 0x118c0, 0x118df, 0x16e60, 0x16e7f, 0x1e922,
    677             0x1e943
    678         };
    679         static const unsigned FOLD_MAP_2[] = {
    680             S(0x00df), S(0x0130), S(0x0149), S(0x01f0), S(0x0587), S(0x1e96), S(0x1e97), S(0x1e98), S(0x1e99),
    681             S(0x1e9a), S(0x1e9e), S(0x1f50), R(0x1f80,0x1f87), R(0x1f88,0x1f8f), R(0x1f90,0x1f97), R(0x1f98,0x1f9f),
    682             R(0x1fa0,0x1fa7), R(0x1fa8,0x1faf), S(0x1fb2), S(0x1fb3), S(0x1fb4), S(0x1fb6), S(0x1fbc), S(0x1fc2),
    683             S(0x1fc3), S(0x1fc4), S(0x1fc6), S(0x1fcc), S(0x1fd6), S(0x1fe4), S(0x1fe6), S(0x1ff2), S(0x1ff3),
    684             S(0x1ff4), S(0x1ff6), S(0x1ffc), S(0xfb00), S(0xfb01), S(0xfb02), S(0xfb05), S(0xfb06), S(0xfb13),
    685             S(0xfb14), S(0xfb15), S(0xfb16), S(0xfb17)
    686         };
    687         static const unsigned FOLD_MAP_2_DATA[] = {
    688             0x0073,0x0073, 0x0069,0x0307, 0x02bc,0x006e, 0x006a,0x030c, 0x0565,0x0582, 0x0068,0x0331, 0x0074,0x0308,
    689             0x0077,0x030a, 0x0079,0x030a, 0x0061,0x02be, 0x0073,0x0073, 0x03c5,0x0313, 0x1f00,0x03b9, 0x1f07,0x03b9,
    690             0x1f00,0x03b9, 0x1f07,0x03b9, 0x1f20,0x03b9, 0x1f27,0x03b9, 0x1f20,0x03b9, 0x1f27,0x03b9, 0x1f60,0x03b9,
    691             0x1f67,0x03b9, 0x1f60,0x03b9, 0x1f67,0x03b9, 0x1f70,0x03b9, 0x03b1,0x03b9, 0x03ac,0x03b9, 0x03b1,0x0342,
    692             0x03b1,0x03b9, 0x1f74,0x03b9, 0x03b7,0x03b9, 0x03ae,0x03b9, 0x03b7,0x0342, 0x03b7,0x03b9, 0x03b9,0x0342,
    693             0x03c1,0x0313, 0x03c5,0x0342, 0x1f7c,0x03b9, 0x03c9,0x03b9, 0x03ce,0x03b9, 0x03c9,0x0342, 0x03c9,0x03b9,
    694             0x0066,0x0066, 0x0066,0x0069, 0x0066,0x006c, 0x0073,0x0074, 0x0073,0x0074, 0x0574,0x0576, 0x0574,0x0565,
    695             0x0574,0x056b, 0x057e,0x0576, 0x0574,0x056d
    696         };
    697         static const unsigned FOLD_MAP_3[] = {
    698             S(0x0390), S(0x03b0), S(0x1f52), S(0x1f54), S(0x1f56), S(0x1fb7), S(0x1fc7), S(0x1fd2), S(0x1fd3),
    699             S(0x1fd7), S(0x1fe2), S(0x1fe3), S(0x1fe7), S(0x1ff7), S(0xfb03), S(0xfb04)
    700         };
    701         static const unsigned FOLD_MAP_3_DATA[] = {
    702             0x03b9,0x0308,0x0301, 0x03c5,0x0308,0x0301, 0x03c5,0x0313,0x0300, 0x03c5,0x0313,0x0301,
    703             0x03c5,0x0313,0x0342, 0x03b1,0x0342,0x03b9, 0x03b7,0x0342,0x03b9, 0x03b9,0x0308,0x0300,
    704             0x03b9,0x0308,0x0301, 0x03b9,0x0308,0x0342, 0x03c5,0x0308,0x0300, 0x03c5,0x0308,0x0301,
    705             0x03c5,0x0308,0x0342, 0x03c9,0x0342,0x03b9, 0x0066,0x0066,0x0069, 0x0066,0x0066,0x006c
    706         };
    707 #undef R
    708 #undef S
    709         static const struct {
    710             const unsigned* map;
    711             const unsigned* data;
    712             size_t map_size;
    713             unsigned n_codepoints;
    714         } FOLD_MAP_LIST[] = {
    715             { FOLD_MAP_1, FOLD_MAP_1_DATA, SIZEOF_ARRAY(FOLD_MAP_1), 1 },
    716             { FOLD_MAP_2, FOLD_MAP_2_DATA, SIZEOF_ARRAY(FOLD_MAP_2), 2 },
    717             { FOLD_MAP_3, FOLD_MAP_3_DATA, SIZEOF_ARRAY(FOLD_MAP_3), 3 }
    718         };
    719 
    720         int i;
    721 
    722         /* Fast path for ASCII characters. */
    723         if(codepoint <= 0x7f) {
    724             info->codepoints[0] = codepoint;
    725             if(ISUPPER_(codepoint))
    726                 info->codepoints[0] += 'a' - 'A';
    727             info->n_codepoints = 1;
    728             return;
    729         }
    730 
    731         /* Try to locate the codepoint in any of the maps. */
    732         for(i = 0; i < (int) SIZEOF_ARRAY(FOLD_MAP_LIST); i++) {
    733             int index;
    734 
    735             index = md_unicode_bsearch__(codepoint, FOLD_MAP_LIST[i].map, FOLD_MAP_LIST[i].map_size);
    736             if(index >= 0) {
    737                 /* Found the mapping. */
    738                 unsigned n_codepoints = FOLD_MAP_LIST[i].n_codepoints;
    739                 const unsigned* map = FOLD_MAP_LIST[i].map;
    740                 const unsigned* codepoints = FOLD_MAP_LIST[i].data + (index * n_codepoints);
    741 
    742                 memcpy(info->codepoints, codepoints, sizeof(unsigned) * n_codepoints);
    743                 info->n_codepoints = n_codepoints;
    744 
    745                 if(FOLD_MAP_LIST[i].map[index] != codepoint) {
    746                     /* The found mapping maps whole range of codepoints,
    747                      * i.e. we have to offset info->codepoints[0] accordingly. */
    748                     if((map[index] & 0x00ffffff)+1 == codepoints[0]) {
    749                         /* Alternating type of the range. */
    750                         info->codepoints[0] = codepoint + ((codepoint & 0x1) == (map[index] & 0x1) ? 1 : 0);
    751                     } else {
    752                         /* Range to range kind of mapping. */
    753                         info->codepoints[0] += (codepoint - (map[index] & 0x00ffffff));
    754                     }
    755                 }
    756 
    757                 return;
    758             }
    759         }
    760 
    761         /* No mapping found. Map the codepoint to itself. */
    762         info->codepoints[0] = codepoint;
    763         info->n_codepoints = 1;
    764     }
    765 #endif
    766 
    767 
    768 #if defined MD4C_USE_UTF16
    769     #define IS_UTF16_SURROGATE_HI(word)     (((WORD)(word) & 0xfc00) == 0xd800)
    770     #define IS_UTF16_SURROGATE_LO(word)     (((WORD)(word) & 0xfc00) == 0xdc00)
    771     #define UTF16_DECODE_SURROGATE(hi, lo)  (0x10000 + ((((unsigned)(hi) & 0x3ff) << 10) | (((unsigned)(lo) & 0x3ff) << 0)))
    772 
    773     static unsigned
    774     md_decode_utf16le__(const CHAR* str, SZ str_size, SZ* p_size)
    775     {
    776         if(IS_UTF16_SURROGATE_HI(str[0])) {
    777             if(1 < str_size && IS_UTF16_SURROGATE_LO(str[1])) {
    778                 if(p_size != NULL)
    779                     *p_size = 2;
    780                 return UTF16_DECODE_SURROGATE(str[0], str[1]);
    781             }
    782         }
    783 
    784         if(p_size != NULL)
    785             *p_size = 1;
    786         return str[0];
    787     }
    788 
    789     static unsigned
    790     md_decode_utf16le_before__(MD_CTX* ctx, OFF off)
    791     {
    792         if(off > 2 && IS_UTF16_SURROGATE_HI(CH(off-2)) && IS_UTF16_SURROGATE_LO(CH(off-1)))
    793             return UTF16_DECODE_SURROGATE(CH(off-2), CH(off-1));
    794 
    795         return CH(off);
    796     }
    797 
    798     /* No whitespace uses surrogates, so no decoding needed here. */
    799     #define ISUNICODEWHITESPACE_(codepoint) md_is_unicode_whitespace__(codepoint)
    800     #define ISUNICODEWHITESPACE(off)        md_is_unicode_whitespace__(CH(off))
    801     #define ISUNICODEWHITESPACEBEFORE(off)  md_is_unicode_whitespace__(CH((off)-1))
    802 
    803     #define ISUNICODEPUNCT(off)             md_is_unicode_punct__(md_decode_utf16le__(STR(off), ctx->size - (off), NULL))
    804     #define ISUNICODEPUNCTBEFORE(off)       md_is_unicode_punct__(md_decode_utf16le_before__(ctx, off))
    805 
    806     static inline int
    807     md_decode_unicode(const CHAR* str, OFF off, SZ str_size, SZ* p_char_size)
    808     {
    809         return md_decode_utf16le__(str+off, str_size-off, p_char_size);
    810     }
    811 #elif defined MD4C_USE_UTF8
    812     #define IS_UTF8_LEAD1(byte)     ((unsigned char)(byte) <= 0x7f)
    813     #define IS_UTF8_LEAD2(byte)     (((unsigned char)(byte) & 0xe0) == 0xc0)
    814     #define IS_UTF8_LEAD3(byte)     (((unsigned char)(byte) & 0xf0) == 0xe0)
    815     #define IS_UTF8_LEAD4(byte)     (((unsigned char)(byte) & 0xf8) == 0xf0)
    816     #define IS_UTF8_TAIL(byte)      (((unsigned char)(byte) & 0xc0) == 0x80)
    817 
    818     static unsigned
    819     md_decode_utf8__(const CHAR* str, SZ str_size, SZ* p_size)
    820     {
    821         if(!IS_UTF8_LEAD1(str[0])) {
    822             if(IS_UTF8_LEAD2(str[0])) {
    823                 if(1 < str_size && IS_UTF8_TAIL(str[1])) {
    824                     if(p_size != NULL)
    825                         *p_size = 2;
    826 
    827                     return (((unsigned int)str[0] & 0x1f) << 6) |
    828                            (((unsigned int)str[1] & 0x3f) << 0);
    829                 }
    830             } else if(IS_UTF8_LEAD3(str[0])) {
    831                 if(2 < str_size && IS_UTF8_TAIL(str[1]) && IS_UTF8_TAIL(str[2])) {
    832                     if(p_size != NULL)
    833                         *p_size = 3;
    834 
    835                     return (((unsigned int)str[0] & 0x0f) << 12) |
    836                            (((unsigned int)str[1] & 0x3f) << 6) |
    837                            (((unsigned int)str[2] & 0x3f) << 0);
    838                 }
    839             } else if(IS_UTF8_LEAD4(str[0])) {
    840                 if(3 < str_size && IS_UTF8_TAIL(str[1]) && IS_UTF8_TAIL(str[2]) && IS_UTF8_TAIL(str[3])) {
    841                     if(p_size != NULL)
    842                         *p_size = 4;
    843 
    844                     return (((unsigned int)str[0] & 0x07) << 18) |
    845                            (((unsigned int)str[1] & 0x3f) << 12) |
    846                            (((unsigned int)str[2] & 0x3f) << 6) |
    847                            (((unsigned int)str[3] & 0x3f) << 0);
    848                 }
    849             }
    850         }
    851 
    852         if(p_size != NULL)
    853             *p_size = 1;
    854         return (unsigned) str[0];
    855     }
    856 
    857     static unsigned
    858     md_decode_utf8_before__(MD_CTX* ctx, OFF off)
    859     {
    860         if(!IS_UTF8_LEAD1(CH(off-1))) {
    861             if(off > 1 && IS_UTF8_LEAD2(CH(off-2)) && IS_UTF8_TAIL(CH(off-1)))
    862                 return (((unsigned int)CH(off-2) & 0x1f) << 6) |
    863                        (((unsigned int)CH(off-1) & 0x3f) << 0);
    864 
    865             if(off > 2 && IS_UTF8_LEAD3(CH(off-3)) && IS_UTF8_TAIL(CH(off-2)) && IS_UTF8_TAIL(CH(off-1)))
    866                 return (((unsigned int)CH(off-3) & 0x0f) << 12) |
    867                        (((unsigned int)CH(off-2) & 0x3f) << 6) |
    868                        (((unsigned int)CH(off-1) & 0x3f) << 0);
    869 
    870             if(off > 3 && IS_UTF8_LEAD4(CH(off-4)) && IS_UTF8_TAIL(CH(off-3)) && IS_UTF8_TAIL(CH(off-2)) && IS_UTF8_TAIL(CH(off-1)))
    871                 return (((unsigned int)CH(off-4) & 0x07) << 18) |
    872                        (((unsigned int)CH(off-3) & 0x3f) << 12) |
    873                        (((unsigned int)CH(off-2) & 0x3f) << 6) |
    874                        (((unsigned int)CH(off-1) & 0x3f) << 0);
    875         }
    876 
    877         return (unsigned) CH(off-1);
    878     }
    879 
    880     #define ISUNICODEWHITESPACE_(codepoint) md_is_unicode_whitespace__(codepoint)
    881     #define ISUNICODEWHITESPACE(off)        md_is_unicode_whitespace__(md_decode_utf8__(STR(off), ctx->size - (off), NULL))
    882     #define ISUNICODEWHITESPACEBEFORE(off)  md_is_unicode_whitespace__(md_decode_utf8_before__(ctx, off))
    883 
    884     #define ISUNICODEPUNCT(off)             md_is_unicode_punct__(md_decode_utf8__(STR(off), ctx->size - (off), NULL))
    885     #define ISUNICODEPUNCTBEFORE(off)       md_is_unicode_punct__(md_decode_utf8_before__(ctx, off))
    886 
    887     static inline unsigned
    888     md_decode_unicode(const CHAR* str, OFF off, SZ str_size, SZ* p_char_size)
    889     {
    890         return md_decode_utf8__(str+off, str_size-off, p_char_size);
    891     }
    892 #else
    893     #define ISUNICODEWHITESPACE_(codepoint) ISWHITESPACE_(codepoint)
    894     #define ISUNICODEWHITESPACE(off)        ISWHITESPACE(off)
    895     #define ISUNICODEWHITESPACEBEFORE(off)  ISWHITESPACE((off)-1)
    896 
    897     #define ISUNICODEPUNCT(off)             ISPUNCT(off)
    898     #define ISUNICODEPUNCTBEFORE(off)       ISPUNCT((off)-1)
    899 
    900     static inline void
    901     md_get_unicode_fold_info(unsigned codepoint, MD_UNICODE_FOLD_INFO* info)
    902     {
    903         info->codepoints[0] = codepoint;
    904         if(ISUPPER_(codepoint))
    905             info->codepoints[0] += 'a' - 'A';
    906         info->n_codepoints = 1;
    907     }
    908 
    909     static inline unsigned
    910     md_decode_unicode(const CHAR* str, OFF off, SZ str_size, SZ* p_size)
    911     {
    912         *p_size = 1;
    913         return (unsigned) str[off];
    914     }
    915 #endif
    916 
    917 
    918 /*************************************
    919  ***  Helper string manipulations  ***
    920  *************************************/
    921 
    922 /* Fill buffer with copy of the string between 'beg' and 'end' but replace any
    923  * line breaks with given replacement character.
    924  *
    925  * NOTE: Caller is responsible to make sure the buffer is large enough.
    926  * (Given the output is always shorter then input, (end - beg) is good idea
    927  * what the caller should allocate.)
    928  */
    929 static void
    930 md_merge_lines(MD_CTX* ctx, OFF beg, OFF end, const MD_LINE* lines, int n_lines,
    931                CHAR line_break_replacement_char, CHAR* buffer, SZ* p_size)
    932 {
    933     CHAR* ptr = buffer;
    934     int line_index = 0;
    935     OFF off = beg;
    936 
    937     MD_UNUSED(n_lines);
    938 
    939     while(1) {
    940         const MD_LINE* line = &lines[line_index];
    941         OFF line_end = line->end;
    942         if(end < line_end)
    943             line_end = end;
    944 
    945         while(off < line_end) {
    946             *ptr = CH(off);
    947             ptr++;
    948             off++;
    949         }
    950 
    951         if(off >= end) {
    952             *p_size = (MD_SIZE)(ptr - buffer);
    953             return;
    954         }
    955 
    956         *ptr = line_break_replacement_char;
    957         ptr++;
    958 
    959         line_index++;
    960         off = lines[line_index].beg;
    961     }
    962 }
    963 
    964 /* Wrapper of md_merge_lines() which allocates new buffer for the output string.
    965  */
    966 static int
    967 md_merge_lines_alloc(MD_CTX* ctx, OFF beg, OFF end, const MD_LINE* lines, int n_lines,
    968                     CHAR line_break_replacement_char, CHAR** p_str, SZ* p_size)
    969 {
    970     CHAR* buffer;
    971 
    972     buffer = (CHAR*) malloc(sizeof(CHAR) * (end - beg));
    973     if(buffer == NULL) {
    974         MD_LOG("malloc() failed.");
    975         return -1;
    976     }
    977 
    978     md_merge_lines(ctx, beg, end, lines, n_lines,
    979                 line_break_replacement_char, buffer, p_size);
    980 
    981     *p_str = buffer;
    982     return 0;
    983 }
    984 
    985 static OFF
    986 md_skip_unicode_whitespace(const CHAR* label, OFF off, SZ size)
    987 {
    988     SZ char_size;
    989     unsigned codepoint;
    990 
    991     while(off < size) {
    992         codepoint = md_decode_unicode(label, off, size, &char_size);
    993         if(!ISUNICODEWHITESPACE_(codepoint)  &&  !ISNEWLINE_(label[off]))
    994             break;
    995         off += char_size;
    996     }
    997 
    998     return off;
    999 }
   1000 
   1001 
   1002 /******************************
   1003  ***  Recognizing raw HTML  ***
   1004  ******************************/
   1005 
   1006 /* md_is_html_tag() may be called when processing inlines (inline raw HTML)
   1007  * or when breaking document to blocks (checking for start of HTML block type 7).
   1008  *
   1009  * When breaking document to blocks, we do not yet know line boundaries, but
   1010  * in that case the whole tag has to live on a single line. We distinguish this
   1011  * by n_lines == 0.
   1012  */
   1013 static int
   1014 md_is_html_tag(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end)
   1015 {
   1016     int attr_state;
   1017     OFF off = beg;
   1018     OFF line_end = (n_lines > 0) ? lines[0].end : ctx->size;
   1019     int i = 0;
   1020 
   1021     MD_ASSERT(CH(beg) == _T('<'));
   1022 
   1023     if(off + 1 >= line_end)
   1024         return FALSE;
   1025     off++;
   1026 
   1027     /* For parsing attributes, we need a little state automaton below.
   1028      * State -1: no attributes are allowed.
   1029      * State 0: attribute could follow after some whitespace.
   1030      * State 1: after a whitespace (attribute name may follow).
   1031      * State 2: after attribute name ('=' MAY follow).
   1032      * State 3: after '=' (value specification MUST follow).
   1033      * State 41: in middle of unquoted attribute value.
   1034      * State 42: in middle of single-quoted attribute value.
   1035      * State 43: in middle of double-quoted attribute value.
   1036      */
   1037     attr_state = 0;
   1038 
   1039     if(CH(off) == _T('/')) {
   1040         /* Closer tag "</ ... >". No attributes may be present. */
   1041         attr_state = -1;
   1042         off++;
   1043     }
   1044 
   1045     /* Tag name */
   1046     if(off >= line_end  ||  !ISALPHA(off))
   1047         return FALSE;
   1048     off++;
   1049     while(off < line_end  &&  (ISALNUM(off)  ||  CH(off) == _T('-')))
   1050         off++;
   1051 
   1052     /* (Optional) attributes (if not closer), (optional) '/' (if not closer)
   1053      * and final '>'. */
   1054     while(1) {
   1055         while(off < line_end  &&  !ISNEWLINE(off)) {
   1056             if(attr_state > 40) {
   1057                 if(attr_state == 41 && (ISBLANK(off) || ISANYOF(off, _T("\"'=<>`")))) {
   1058                     attr_state = 0;
   1059                     off--;  /* Put the char back for re-inspection in the new state. */
   1060                 } else if(attr_state == 42 && CH(off) == _T('\'')) {
   1061                     attr_state = 0;
   1062                 } else if(attr_state == 43 && CH(off) == _T('"')) {
   1063                     attr_state = 0;
   1064                 }
   1065                 off++;
   1066             } else if(ISWHITESPACE(off)) {
   1067                 if(attr_state == 0)
   1068                     attr_state = 1;
   1069                 off++;
   1070             } else if(attr_state <= 2 && CH(off) == _T('>')) {
   1071                 /* End. */
   1072                 goto done;
   1073             } else if(attr_state <= 2 && CH(off) == _T('/') && off+1 < line_end && CH(off+1) == _T('>')) {
   1074                 /* End with digraph '/>' */
   1075                 off++;
   1076                 goto done;
   1077             } else if((attr_state == 1 || attr_state == 2) && (ISALPHA(off) || CH(off) == _T('_') || CH(off) == _T(':'))) {
   1078                 off++;
   1079                 /* Attribute name */
   1080                 while(off < line_end && (ISALNUM(off) || ISANYOF(off, _T("_.:-"))))
   1081                     off++;
   1082                 attr_state = 2;
   1083             } else if(attr_state == 2 && CH(off) == _T('=')) {
   1084                 /* Attribute assignment sign */
   1085                 off++;
   1086                 attr_state = 3;
   1087             } else if(attr_state == 3) {
   1088                 /* Expecting start of attribute value. */
   1089                 if(CH(off) == _T('"'))
   1090                     attr_state = 43;
   1091                 else if(CH(off) == _T('\''))
   1092                     attr_state = 42;
   1093                 else if(!ISANYOF(off, _T("\"'=<>`"))  &&  !ISNEWLINE(off))
   1094                     attr_state = 41;
   1095                 else
   1096                     return FALSE;
   1097                 off++;
   1098             } else {
   1099                 /* Anything unexpected. */
   1100                 return FALSE;
   1101             }
   1102         }
   1103 
   1104         /* We have to be on a single line. See definition of start condition
   1105          * of HTML block, type 7. */
   1106         if(n_lines == 0)
   1107             return FALSE;
   1108 
   1109         i++;
   1110         if(i >= n_lines)
   1111             return FALSE;
   1112 
   1113         off = lines[i].beg;
   1114         line_end = lines[i].end;
   1115 
   1116         if(attr_state == 0  ||  attr_state == 41)
   1117             attr_state = 1;
   1118 
   1119         if(off >= max_end)
   1120             return FALSE;
   1121     }
   1122 
   1123 done:
   1124     if(off >= max_end)
   1125         return FALSE;
   1126 
   1127     *p_end = off+1;
   1128     return TRUE;
   1129 }
   1130 
   1131 static int
   1132 md_scan_for_html_closer(MD_CTX* ctx, const MD_CHAR* str, MD_SIZE len,
   1133                         const MD_LINE* lines, int n_lines,
   1134                         OFF beg, OFF max_end, OFF* p_end,
   1135                         OFF* p_scan_horizon)
   1136 {
   1137     OFF off = beg;
   1138     int i = 0;
   1139 
   1140     if(off < *p_scan_horizon  &&  *p_scan_horizon >= max_end - len) {
   1141         /* We have already scanned the range up to the max_end so we know
   1142          * there is nothing to see. */
   1143         return FALSE;
   1144     }
   1145 
   1146     while(TRUE) {
   1147         while(off + len <= lines[i].end  &&  off + len <= max_end) {
   1148             if(md_ascii_eq(STR(off), str, len)) {
   1149                 /* Success. */
   1150                 *p_end = off + len;
   1151                 return TRUE;
   1152             }
   1153             off++;
   1154         }
   1155 
   1156         i++;
   1157         if(off >= max_end  ||  i >= n_lines) {
   1158             /* Failure. */
   1159             *p_scan_horizon = off;
   1160             return FALSE;
   1161         }
   1162 
   1163         off = lines[i].beg;
   1164     }
   1165 }
   1166 
   1167 static int
   1168 md_is_html_comment(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end)
   1169 {
   1170     OFF off = beg;
   1171 
   1172     MD_ASSERT(CH(beg) == _T('<'));
   1173 
   1174     if(off + 4 >= lines[0].end)
   1175         return FALSE;
   1176     if(CH(off+1) != _T('!')  ||  CH(off+2) != _T('-')  ||  CH(off+3) != _T('-'))
   1177         return FALSE;
   1178     off += 4;
   1179 
   1180     /* ">" and "->" must not follow the opening. */
   1181     if(off < lines[0].end  &&  CH(off) == _T('>'))
   1182         return FALSE;
   1183     if(off+1 < lines[0].end  &&  CH(off) == _T('-')  &&  CH(off+1) == _T('>'))
   1184         return FALSE;
   1185 
   1186     /* HTML comment must not contain "--", so we scan just for "--" instead
   1187      * of "-->" and verify manually that '>' follows. */
   1188     if(md_scan_for_html_closer(ctx, _T("--"), 2,
   1189                 lines, n_lines, off, max_end, p_end, &ctx->html_comment_horizon))
   1190     {
   1191         if(*p_end < max_end  &&  CH(*p_end) == _T('>')) {
   1192             *p_end = *p_end + 1;
   1193             return TRUE;
   1194         }
   1195     }
   1196 
   1197     return FALSE;
   1198 }
   1199 
   1200 static int
   1201 md_is_html_processing_instruction(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end)
   1202 {
   1203     OFF off = beg;
   1204 
   1205     if(off + 2 >= lines[0].end)
   1206         return FALSE;
   1207     if(CH(off+1) != _T('?'))
   1208         return FALSE;
   1209     off += 2;
   1210 
   1211     return md_scan_for_html_closer(ctx, _T("?>"), 2,
   1212                 lines, n_lines, off, max_end, p_end, &ctx->html_proc_instr_horizon);
   1213 }
   1214 
   1215 static int
   1216 md_is_html_declaration(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end)
   1217 {
   1218     OFF off = beg;
   1219 
   1220     if(off + 2 >= lines[0].end)
   1221         return FALSE;
   1222     if(CH(off+1) != _T('!'))
   1223         return FALSE;
   1224     off += 2;
   1225 
   1226     /* Declaration name. */
   1227     if(off >= lines[0].end  ||  !ISALPHA(off))
   1228         return FALSE;
   1229     off++;
   1230     while(off < lines[0].end  &&  ISALPHA(off))
   1231         off++;
   1232     if(off < lines[0].end  &&  !ISWHITESPACE(off))
   1233         return FALSE;
   1234 
   1235     return md_scan_for_html_closer(ctx, _T(">"), 1,
   1236                 lines, n_lines, off, max_end, p_end, &ctx->html_decl_horizon);
   1237 }
   1238 
   1239 static int
   1240 md_is_html_cdata(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end)
   1241 {
   1242     static const CHAR open_str[] = _T("<![CDATA[");
   1243     static const SZ open_size = SIZEOF_ARRAY(open_str) - 1;
   1244 
   1245     OFF off = beg;
   1246 
   1247     if(off + open_size >= lines[0].end)
   1248         return FALSE;
   1249     if(memcmp(STR(off), open_str, open_size) != 0)
   1250         return FALSE;
   1251     off += open_size;
   1252 
   1253     if(lines[n_lines-1].end < max_end)
   1254         max_end = lines[n_lines-1].end - 2;
   1255 
   1256     return md_scan_for_html_closer(ctx, _T("]]>"), 3,
   1257                 lines, n_lines, off, max_end, p_end, &ctx->html_cdata_horizon);
   1258 }
   1259 
   1260 static int
   1261 md_is_html_any(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end)
   1262 {
   1263     MD_ASSERT(CH(beg) == _T('<'));
   1264     return (md_is_html_tag(ctx, lines, n_lines, beg, max_end, p_end)  ||
   1265             md_is_html_comment(ctx, lines, n_lines, beg, max_end, p_end)  ||
   1266             md_is_html_processing_instruction(ctx, lines, n_lines, beg, max_end, p_end)  ||
   1267             md_is_html_declaration(ctx, lines, n_lines, beg, max_end, p_end)  ||
   1268             md_is_html_cdata(ctx, lines, n_lines, beg, max_end, p_end));
   1269 }
   1270 
   1271 
   1272 /****************************
   1273  ***  Recognizing Entity  ***
   1274  ****************************/
   1275 
   1276 static int
   1277 md_is_hex_entity_contents(MD_CTX* ctx, const CHAR* text, OFF beg, OFF max_end, OFF* p_end)
   1278 {
   1279     OFF off = beg;
   1280     MD_UNUSED(ctx);
   1281 
   1282     while(off < max_end  &&  ISXDIGIT_(text[off])  &&  off - beg <= 8)
   1283         off++;
   1284 
   1285     if(1 <= off - beg  &&  off - beg <= 6) {
   1286         *p_end = off;
   1287         return TRUE;
   1288     } else {
   1289         return FALSE;
   1290     }
   1291 }
   1292 
   1293 static int
   1294 md_is_dec_entity_contents(MD_CTX* ctx, const CHAR* text, OFF beg, OFF max_end, OFF* p_end)
   1295 {
   1296     OFF off = beg;
   1297     MD_UNUSED(ctx);
   1298 
   1299     while(off < max_end  &&  ISDIGIT_(text[off])  &&  off - beg <= 8)
   1300         off++;
   1301 
   1302     if(1 <= off - beg  &&  off - beg <= 7) {
   1303         *p_end = off;
   1304         return TRUE;
   1305     } else {
   1306         return FALSE;
   1307     }
   1308 }
   1309 
   1310 static int
   1311 md_is_named_entity_contents(MD_CTX* ctx, const CHAR* text, OFF beg, OFF max_end, OFF* p_end)
   1312 {
   1313     OFF off = beg;
   1314     MD_UNUSED(ctx);
   1315 
   1316     if(off < max_end  &&  ISALPHA_(text[off]))
   1317         off++;
   1318     else
   1319         return FALSE;
   1320 
   1321     while(off < max_end  &&  ISALNUM_(text[off])  &&  off - beg <= 48)
   1322         off++;
   1323 
   1324     if(2 <= off - beg  &&  off - beg <= 48) {
   1325         *p_end = off;
   1326         return TRUE;
   1327     } else {
   1328         return FALSE;
   1329     }
   1330 }
   1331 
   1332 static int
   1333 md_is_entity_str(MD_CTX* ctx, const CHAR* text, OFF beg, OFF max_end, OFF* p_end)
   1334 {
   1335     int is_contents;
   1336     OFF off = beg;
   1337 
   1338     MD_ASSERT(text[off] == _T('&'));
   1339     off++;
   1340 
   1341     if(off+2 < max_end  &&  text[off] == _T('#')  &&  (text[off+1] == _T('x') || text[off+1] == _T('X')))
   1342         is_contents = md_is_hex_entity_contents(ctx, text, off+2, max_end, &off);
   1343     else if(off+1 < max_end  &&  text[off] == _T('#'))
   1344         is_contents = md_is_dec_entity_contents(ctx, text, off+1, max_end, &off);
   1345     else
   1346         is_contents = md_is_named_entity_contents(ctx, text, off, max_end, &off);
   1347 
   1348     if(is_contents  &&  off < max_end  &&  text[off] == _T(';')) {
   1349         *p_end = off+1;
   1350         return TRUE;
   1351     } else {
   1352         return FALSE;
   1353     }
   1354 }
   1355 
   1356 static inline int
   1357 md_is_entity(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end)
   1358 {
   1359     return md_is_entity_str(ctx, ctx->text, beg, max_end, p_end);
   1360 }
   1361 
   1362 
   1363 /******************************
   1364  ***  Attribute Management  ***
   1365  ******************************/
   1366 
   1367 typedef struct MD_ATTRIBUTE_BUILD_tag MD_ATTRIBUTE_BUILD;
   1368 struct MD_ATTRIBUTE_BUILD_tag {
   1369     CHAR* text;
   1370     MD_TEXTTYPE* substr_types;
   1371     OFF* substr_offsets;
   1372     int substr_count;
   1373     int substr_alloc;
   1374     MD_TEXTTYPE trivial_types[1];
   1375     OFF trivial_offsets[2];
   1376 };
   1377 
   1378 
   1379 #define MD_BUILD_ATTR_NO_ESCAPES    0x0001
   1380 
   1381 static int
   1382 md_build_attr_append_substr(MD_CTX* ctx, MD_ATTRIBUTE_BUILD* build,
   1383                             MD_TEXTTYPE type, OFF off)
   1384 {
   1385     if(build->substr_count >= build->substr_alloc) {
   1386         MD_TEXTTYPE* new_substr_types;
   1387         OFF* new_substr_offsets;
   1388 
   1389         build->substr_alloc = (build->substr_alloc > 0
   1390                 ? build->substr_alloc + build->substr_alloc / 2
   1391                 : 8);
   1392         new_substr_types = (MD_TEXTTYPE*) realloc(build->substr_types,
   1393                                     build->substr_alloc * sizeof(MD_TEXTTYPE));
   1394         if(new_substr_types == NULL) {
   1395             MD_LOG("realloc() failed.");
   1396             return -1;
   1397         }
   1398         /* Note +1 to reserve space for final offset (== raw_size). */
   1399         new_substr_offsets = (OFF*) realloc(build->substr_offsets,
   1400                                     (build->substr_alloc+1) * sizeof(OFF));
   1401         if(new_substr_offsets == NULL) {
   1402             MD_LOG("realloc() failed.");
   1403             free(new_substr_types);
   1404             return -1;
   1405         }
   1406 
   1407         build->substr_types = new_substr_types;
   1408         build->substr_offsets = new_substr_offsets;
   1409     }
   1410 
   1411     build->substr_types[build->substr_count] = type;
   1412     build->substr_offsets[build->substr_count] = off;
   1413     build->substr_count++;
   1414     return 0;
   1415 }
   1416 
   1417 static void
   1418 md_free_attribute(MD_CTX* ctx, MD_ATTRIBUTE_BUILD* build)
   1419 {
   1420     MD_UNUSED(ctx);
   1421 
   1422     if(build->substr_alloc > 0) {
   1423         free(build->text);
   1424         free(build->substr_types);
   1425         free(build->substr_offsets);
   1426     }
   1427 }
   1428 
   1429 static int
   1430 md_build_attribute(MD_CTX* ctx, const CHAR* raw_text, SZ raw_size,
   1431                    unsigned flags, MD_ATTRIBUTE* attr, MD_ATTRIBUTE_BUILD* build)
   1432 {
   1433     OFF raw_off, off;
   1434     int is_trivial;
   1435     int ret = 0;
   1436 
   1437     memset(build, 0, sizeof(MD_ATTRIBUTE_BUILD));
   1438 
   1439     /* If there is no backslash and no ampersand, build trivial attribute
   1440      * without any malloc(). */
   1441     is_trivial = TRUE;
   1442     for(raw_off = 0; raw_off < raw_size; raw_off++) {
   1443         if(ISANYOF3_(raw_text[raw_off], _T('\\'), _T('&'), _T('\0'))) {
   1444             is_trivial = FALSE;
   1445             break;
   1446         }
   1447     }
   1448 
   1449     if(is_trivial) {
   1450         build->text = (CHAR*) (raw_size ? raw_text : NULL);
   1451         build->substr_types = build->trivial_types;
   1452         build->substr_offsets = build->trivial_offsets;
   1453         build->substr_count = 1;
   1454         build->substr_alloc = 0;
   1455         build->trivial_types[0] = MD_TEXT_NORMAL;
   1456         build->trivial_offsets[0] = 0;
   1457         build->trivial_offsets[1] = raw_size;
   1458         off = raw_size;
   1459     } else {
   1460         build->text = (CHAR*) malloc(raw_size * sizeof(CHAR));
   1461         if(build->text == NULL) {
   1462             MD_LOG("malloc() failed.");
   1463             goto abort;
   1464         }
   1465 
   1466         raw_off = 0;
   1467         off = 0;
   1468 
   1469         while(raw_off < raw_size) {
   1470             if(raw_text[raw_off] == _T('\0')) {
   1471                 MD_CHECK(md_build_attr_append_substr(ctx, build, MD_TEXT_NULLCHAR, off));
   1472                 memcpy(build->text + off, raw_text + raw_off, 1);
   1473                 off++;
   1474                 raw_off++;
   1475                 continue;
   1476             }
   1477 
   1478             if(raw_text[raw_off] == _T('&')) {
   1479                 OFF ent_end;
   1480 
   1481                 if(md_is_entity_str(ctx, raw_text, raw_off, raw_size, &ent_end)) {
   1482                     MD_CHECK(md_build_attr_append_substr(ctx, build, MD_TEXT_ENTITY, off));
   1483                     memcpy(build->text + off, raw_text + raw_off, ent_end - raw_off);
   1484                     off += ent_end - raw_off;
   1485                     raw_off = ent_end;
   1486                     continue;
   1487                 }
   1488             }
   1489 
   1490             if(build->substr_count == 0  ||  build->substr_types[build->substr_count-1] != MD_TEXT_NORMAL)
   1491                 MD_CHECK(md_build_attr_append_substr(ctx, build, MD_TEXT_NORMAL, off));
   1492 
   1493             if(!(flags & MD_BUILD_ATTR_NO_ESCAPES)  &&
   1494                raw_text[raw_off] == _T('\\')  &&  raw_off+1 < raw_size  &&
   1495                (ISPUNCT_(raw_text[raw_off+1]) || ISNEWLINE_(raw_text[raw_off+1])))
   1496                 raw_off++;
   1497 
   1498             build->text[off++] = raw_text[raw_off++];
   1499         }
   1500         build->substr_offsets[build->substr_count] = off;
   1501     }
   1502 
   1503     attr->text = build->text;
   1504     attr->size = off;
   1505     attr->substr_offsets = build->substr_offsets;
   1506     attr->substr_types = build->substr_types;
   1507     return 0;
   1508 
   1509 abort:
   1510     md_free_attribute(ctx, build);
   1511     return -1;
   1512 }
   1513 
   1514 
   1515 /*********************************************
   1516  ***  Dictionary of Reference Definitions  ***
   1517  *********************************************/
   1518 
   1519 #define MD_FNV1A_BASE       2166136261U
   1520 #define MD_FNV1A_PRIME      16777619U
   1521 
   1522 static inline unsigned
   1523 md_fnv1a(unsigned base, const void* data, size_t n)
   1524 {
   1525     const unsigned char* buf = (const unsigned char*) data;
   1526     unsigned hash = base;
   1527     size_t i;
   1528 
   1529     for(i = 0; i < n; i++) {
   1530         hash ^= buf[i];
   1531         hash *= MD_FNV1A_PRIME;
   1532     }
   1533 
   1534     return hash;
   1535 }
   1536 
   1537 
   1538 struct MD_REF_DEF_tag {
   1539     CHAR* label;
   1540     CHAR* title;
   1541     unsigned hash;
   1542     SZ label_size;
   1543     SZ title_size;
   1544     OFF dest_beg;
   1545     OFF dest_end;
   1546     unsigned char label_needs_free : 1;
   1547     unsigned char title_needs_free : 1;
   1548 };
   1549 
   1550 /* Label equivalence is quite complicated with regards to whitespace and case
   1551  * folding. This complicates computing a hash of it as well as direct comparison
   1552  * of two labels. */
   1553 
   1554 static unsigned
   1555 md_link_label_hash(const CHAR* label, SZ size)
   1556 {
   1557     unsigned hash = MD_FNV1A_BASE;
   1558     OFF off;
   1559     unsigned codepoint;
   1560     int is_whitespace = FALSE;
   1561 
   1562     off = md_skip_unicode_whitespace(label, 0, size);
   1563     while(off < size) {
   1564         SZ char_size;
   1565 
   1566         codepoint = md_decode_unicode(label, off, size, &char_size);
   1567         is_whitespace = ISUNICODEWHITESPACE_(codepoint) || ISNEWLINE_(label[off]);
   1568 
   1569         if(is_whitespace) {
   1570             codepoint = ' ';
   1571             hash = md_fnv1a(hash, &codepoint, sizeof(unsigned));
   1572             off = md_skip_unicode_whitespace(label, off, size);
   1573         } else {
   1574             MD_UNICODE_FOLD_INFO fold_info;
   1575 
   1576             md_get_unicode_fold_info(codepoint, &fold_info);
   1577             hash = md_fnv1a(hash, fold_info.codepoints, fold_info.n_codepoints * sizeof(unsigned));
   1578             off += char_size;
   1579         }
   1580     }
   1581 
   1582     return hash;
   1583 }
   1584 
   1585 static OFF
   1586 md_link_label_cmp_load_fold_info(const CHAR* label, OFF off, SZ size,
   1587                                  MD_UNICODE_FOLD_INFO* fold_info)
   1588 {
   1589     unsigned codepoint;
   1590     SZ char_size;
   1591 
   1592     if(off >= size) {
   1593         /* Treat end of a link label as a whitespace. */
   1594         goto whitespace;
   1595     }
   1596 
   1597     codepoint = md_decode_unicode(label, off, size, &char_size);
   1598     off += char_size;
   1599     if(ISUNICODEWHITESPACE_(codepoint)) {
   1600         /* Treat all whitespace as equivalent */
   1601         goto whitespace;
   1602     }
   1603 
   1604     /* Get real folding info. */
   1605     md_get_unicode_fold_info(codepoint, fold_info);
   1606     return off;
   1607 
   1608 whitespace:
   1609     fold_info->codepoints[0] = _T(' ');
   1610     fold_info->n_codepoints = 1;
   1611     return md_skip_unicode_whitespace(label, off, size);
   1612 }
   1613 
   1614 static int
   1615 md_link_label_cmp(const CHAR* a_label, SZ a_size, const CHAR* b_label, SZ b_size)
   1616 {
   1617     OFF a_off;
   1618     OFF b_off;
   1619     MD_UNICODE_FOLD_INFO a_fi = { { 0 }, 0 };
   1620     MD_UNICODE_FOLD_INFO b_fi = { { 0 }, 0 };
   1621     OFF a_fi_off = 0;
   1622     OFF b_fi_off = 0;
   1623     int cmp;
   1624 
   1625     a_off = md_skip_unicode_whitespace(a_label, 0, a_size);
   1626     b_off = md_skip_unicode_whitespace(b_label, 0, b_size);
   1627     while(a_off < a_size || a_fi_off < a_fi.n_codepoints ||
   1628           b_off < b_size || b_fi_off < b_fi.n_codepoints)
   1629     {
   1630         /* If needed, load fold info for next char. */
   1631         if(a_fi_off >= a_fi.n_codepoints) {
   1632             a_fi_off = 0;
   1633             a_off = md_link_label_cmp_load_fold_info(a_label, a_off, a_size, &a_fi);
   1634         }
   1635         if(b_fi_off >= b_fi.n_codepoints) {
   1636             b_fi_off = 0;
   1637             b_off = md_link_label_cmp_load_fold_info(b_label, b_off, b_size, &b_fi);
   1638         }
   1639 
   1640         cmp = b_fi.codepoints[b_fi_off] - a_fi.codepoints[a_fi_off];
   1641         if(cmp != 0)
   1642             return cmp;
   1643 
   1644         a_fi_off++;
   1645         b_fi_off++;
   1646     }
   1647 
   1648     return 0;
   1649 }
   1650 
   1651 typedef struct MD_REF_DEF_LIST_tag MD_REF_DEF_LIST;
   1652 struct MD_REF_DEF_LIST_tag {
   1653     int n_ref_defs;
   1654     int alloc_ref_defs;
   1655     MD_REF_DEF* ref_defs[];  /* Valid items always  point into ctx->ref_defs[] */
   1656 };
   1657 
   1658 static int
   1659 md_ref_def_cmp(const void* a, const void* b)
   1660 {
   1661     const MD_REF_DEF* a_ref = *(const MD_REF_DEF**)a;
   1662     const MD_REF_DEF* b_ref = *(const MD_REF_DEF**)b;
   1663 
   1664     if(a_ref->hash < b_ref->hash)
   1665         return -1;
   1666     else if(a_ref->hash > b_ref->hash)
   1667         return +1;
   1668     else
   1669         return md_link_label_cmp(a_ref->label, a_ref->label_size, b_ref->label, b_ref->label_size);
   1670 }
   1671 
   1672 static int
   1673 md_ref_def_cmp_for_sort(const void* a, const void* b)
   1674 {
   1675     int cmp;
   1676 
   1677     cmp = md_ref_def_cmp(a, b);
   1678 
   1679     /* Ensure stability of the sorting. */
   1680     if(cmp == 0) {
   1681         const MD_REF_DEF* a_ref = *(const MD_REF_DEF**)a;
   1682         const MD_REF_DEF* b_ref = *(const MD_REF_DEF**)b;
   1683 
   1684         if(a_ref < b_ref)
   1685             cmp = -1;
   1686         else if(a_ref > b_ref)
   1687             cmp = +1;
   1688         else
   1689             cmp = 0;
   1690     }
   1691 
   1692     return cmp;
   1693 }
   1694 
   1695 static int
   1696 md_build_ref_def_hashtable(MD_CTX* ctx)
   1697 {
   1698     int i, j;
   1699 
   1700     if(ctx->n_ref_defs == 0)
   1701         return 0;
   1702 
   1703     ctx->ref_def_hashtable_size = (ctx->n_ref_defs * 5) / 4;
   1704     ctx->ref_def_hashtable = malloc(ctx->ref_def_hashtable_size * sizeof(void*));
   1705     if(ctx->ref_def_hashtable == NULL) {
   1706         MD_LOG("malloc() failed.");
   1707         goto abort;
   1708     }
   1709     memset(ctx->ref_def_hashtable, 0, ctx->ref_def_hashtable_size * sizeof(void*));
   1710 
   1711     /* Each member of ctx->ref_def_hashtable[] can be:
   1712      *  -- NULL,
   1713      *  -- pointer to the MD_REF_DEF in ctx->ref_defs[], or
   1714      *  -- pointer to a MD_REF_DEF_LIST, which holds multiple pointers to
   1715      *     such MD_REF_DEFs.
   1716      */
   1717     for(i = 0; i < ctx->n_ref_defs; i++) {
   1718         MD_REF_DEF* def = &ctx->ref_defs[i];
   1719         void* bucket;
   1720         MD_REF_DEF_LIST* list;
   1721 
   1722         def->hash = md_link_label_hash(def->label, def->label_size);
   1723         bucket = ctx->ref_def_hashtable[def->hash % ctx->ref_def_hashtable_size];
   1724 
   1725         if(bucket == NULL) {
   1726             /* The bucket is empty. Make it just point to the def. */
   1727             ctx->ref_def_hashtable[def->hash % ctx->ref_def_hashtable_size] = def;
   1728             continue;
   1729         }
   1730 
   1731         if(ctx->ref_defs <= (MD_REF_DEF*) bucket  &&  (MD_REF_DEF*) bucket < ctx->ref_defs + ctx->n_ref_defs) {
   1732             /* The bucket already contains one ref. def. Lets see whether it
   1733              * is the same label (ref. def. duplicate) or different one
   1734              * (hash conflict). */
   1735             MD_REF_DEF* old_def = (MD_REF_DEF*) bucket;
   1736 
   1737             if(md_link_label_cmp(def->label, def->label_size, old_def->label, old_def->label_size) == 0) {
   1738                 /* Duplicate label: Ignore this ref. def. */
   1739                 continue;
   1740             }
   1741 
   1742             /* Make the bucket complex, i.e. able to hold more ref. defs. */
   1743             list = (MD_REF_DEF_LIST*) malloc(sizeof(MD_REF_DEF_LIST) + 2 * sizeof(MD_REF_DEF*));
   1744             if(list == NULL) {
   1745                 MD_LOG("malloc() failed.");
   1746                 goto abort;
   1747             }
   1748             list->ref_defs[0] = old_def;
   1749             list->ref_defs[1] = def;
   1750             list->n_ref_defs = 2;
   1751             list->alloc_ref_defs = 2;
   1752             ctx->ref_def_hashtable[def->hash % ctx->ref_def_hashtable_size] = list;
   1753             continue;
   1754         }
   1755 
   1756         /* Append the def to the complex bucket list.
   1757          *
   1758          * Note in this case we ignore potential duplicates to avoid expensive
   1759          * iterating over the complex bucket. Below, we revisit all the complex
   1760          * buckets and handle it more cheaply after the complex bucket contents
   1761          * is sorted. */
   1762         list = (MD_REF_DEF_LIST*) bucket;
   1763         if(list->n_ref_defs >= list->alloc_ref_defs) {
   1764             int alloc_ref_defs = list->alloc_ref_defs + list->alloc_ref_defs / 2;
   1765             MD_REF_DEF_LIST* list_tmp = (MD_REF_DEF_LIST*) realloc(list,
   1766                         sizeof(MD_REF_DEF_LIST) + alloc_ref_defs * sizeof(MD_REF_DEF*));
   1767             if(list_tmp == NULL) {
   1768                 MD_LOG("realloc() failed.");
   1769                 goto abort;
   1770             }
   1771             list = list_tmp;
   1772             list->alloc_ref_defs = alloc_ref_defs;
   1773             ctx->ref_def_hashtable[def->hash % ctx->ref_def_hashtable_size] = list;
   1774         }
   1775 
   1776         list->ref_defs[list->n_ref_defs] = def;
   1777         list->n_ref_defs++;
   1778     }
   1779 
   1780     /* Sort the complex buckets so we can use bsearch() with them. */
   1781     for(i = 0; i < ctx->ref_def_hashtable_size; i++) {
   1782         void* bucket = ctx->ref_def_hashtable[i];
   1783         MD_REF_DEF_LIST* list;
   1784 
   1785         if(bucket == NULL)
   1786             continue;
   1787         if(ctx->ref_defs <= (MD_REF_DEF*) bucket  &&  (MD_REF_DEF*) bucket < ctx->ref_defs + ctx->n_ref_defs)
   1788             continue;
   1789 
   1790         list = (MD_REF_DEF_LIST*) bucket;
   1791         qsort(list->ref_defs, list->n_ref_defs, sizeof(MD_REF_DEF*), md_ref_def_cmp_for_sort);
   1792 
   1793         /* Disable all duplicates in the complex bucket by forcing all such
   1794          * records to point to the 1st such ref. def. I.e. no matter which
   1795          * record is found during the lookup, it will always point to the right
   1796          * ref. def. in ctx->ref_defs[]. */
   1797         for(j = 1; j < list->n_ref_defs; j++) {
   1798             if(md_ref_def_cmp(&list->ref_defs[j-1], &list->ref_defs[j]) == 0)
   1799                 list->ref_defs[j] = list->ref_defs[j-1];
   1800         }
   1801     }
   1802 
   1803     return 0;
   1804 
   1805 abort:
   1806     return -1;
   1807 }
   1808 
   1809 static void
   1810 md_free_ref_def_hashtable(MD_CTX* ctx)
   1811 {
   1812     if(ctx->ref_def_hashtable != NULL) {
   1813         int i;
   1814 
   1815         for(i = 0; i < ctx->ref_def_hashtable_size; i++) {
   1816             void* bucket = ctx->ref_def_hashtable[i];
   1817             if(bucket == NULL)
   1818                 continue;
   1819             if(ctx->ref_defs <= (MD_REF_DEF*) bucket  &&  (MD_REF_DEF*) bucket < ctx->ref_defs + ctx->n_ref_defs)
   1820                 continue;
   1821             free(bucket);
   1822         }
   1823 
   1824         free(ctx->ref_def_hashtable);
   1825     }
   1826 }
   1827 
   1828 static const MD_REF_DEF*
   1829 md_lookup_ref_def(MD_CTX* ctx, const CHAR* label, SZ label_size)
   1830 {
   1831     unsigned hash;
   1832     void* bucket;
   1833 
   1834     if(ctx->ref_def_hashtable_size == 0)
   1835         return NULL;
   1836 
   1837     hash = md_link_label_hash(label, label_size);
   1838     bucket = ctx->ref_def_hashtable[hash % ctx->ref_def_hashtable_size];
   1839 
   1840     if(bucket == NULL) {
   1841         return NULL;
   1842     } else if(ctx->ref_defs <= (MD_REF_DEF*) bucket  &&  (MD_REF_DEF*) bucket < ctx->ref_defs + ctx->n_ref_defs) {
   1843         const MD_REF_DEF* def = (MD_REF_DEF*) bucket;
   1844 
   1845         if(md_link_label_cmp(def->label, def->label_size, label, label_size) == 0)
   1846             return def;
   1847         else
   1848             return NULL;
   1849     } else {
   1850         MD_REF_DEF_LIST* list = (MD_REF_DEF_LIST*) bucket;
   1851         MD_REF_DEF key_buf;
   1852         const MD_REF_DEF* key = &key_buf;
   1853         const MD_REF_DEF** ret;
   1854 
   1855         key_buf.label = (CHAR*) label;
   1856         key_buf.label_size = label_size;
   1857         key_buf.hash = md_link_label_hash(key_buf.label, key_buf.label_size);
   1858 
   1859         ret = (const MD_REF_DEF**) bsearch(&key, list->ref_defs,
   1860                     list->n_ref_defs, sizeof(MD_REF_DEF*), md_ref_def_cmp);
   1861         if(ret != NULL)
   1862             return *ret;
   1863         else
   1864             return NULL;
   1865     }
   1866 }
   1867 
   1868 
   1869 /***************************
   1870  ***  Recognizing Links  ***
   1871  ***************************/
   1872 
   1873 /* Note this code is partially shared between processing inlines and blocks
   1874  * as reference definitions and links share some helper parser functions.
   1875  */
   1876 
   1877 typedef struct MD_LINK_ATTR_tag MD_LINK_ATTR;
   1878 struct MD_LINK_ATTR_tag {
   1879     OFF dest_beg;
   1880     OFF dest_end;
   1881 
   1882     CHAR* title;
   1883     SZ title_size;
   1884     int title_needs_free;
   1885 };
   1886 
   1887 
   1888 static int
   1889 md_is_link_label(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg,
   1890                  OFF* p_end, int* p_beg_line_index, int* p_end_line_index,
   1891                  OFF* p_contents_beg, OFF* p_contents_end)
   1892 {
   1893     OFF off = beg;
   1894     OFF contents_beg = 0;
   1895     OFF contents_end = 0;
   1896     int line_index = 0;
   1897     int len = 0;
   1898 
   1899     if(CH(off) != _T('['))
   1900         return FALSE;
   1901     off++;
   1902 
   1903     while(1) {
   1904         OFF line_end = lines[line_index].end;
   1905 
   1906         while(off < line_end) {
   1907             if(CH(off) == _T('\\')  &&  off+1 < ctx->size  &&  (ISPUNCT(off+1) || ISNEWLINE(off+1))) {
   1908                 if(contents_end == 0) {
   1909                     contents_beg = off;
   1910                     *p_beg_line_index = line_index;
   1911                 }
   1912                 contents_end = off + 2;
   1913                 off += 2;
   1914             } else if(CH(off) == _T('[')) {
   1915                 return FALSE;
   1916             } else if(CH(off) == _T(']')) {
   1917                 if(contents_beg < contents_end) {
   1918                     /* Success. */
   1919                     *p_contents_beg = contents_beg;
   1920                     *p_contents_end = contents_end;
   1921                     *p_end = off+1;
   1922                     *p_end_line_index = line_index;
   1923                     return TRUE;
   1924                 } else {
   1925                     /* Link label must have some non-whitespace contents. */
   1926                     return FALSE;
   1927                 }
   1928             } else {
   1929                 unsigned codepoint;
   1930                 SZ char_size;
   1931 
   1932                 codepoint = md_decode_unicode(ctx->text, off, ctx->size, &char_size);
   1933                 if(!ISUNICODEWHITESPACE_(codepoint)) {
   1934                     if(contents_end == 0) {
   1935                         contents_beg = off;
   1936                         *p_beg_line_index = line_index;
   1937                     }
   1938                     contents_end = off + char_size;
   1939                 }
   1940 
   1941                 off += char_size;
   1942             }
   1943 
   1944             len++;
   1945             if(len > 999)
   1946                 return FALSE;
   1947         }
   1948 
   1949         line_index++;
   1950         len++;
   1951         if(line_index < n_lines)
   1952             off = lines[line_index].beg;
   1953         else
   1954             break;
   1955     }
   1956 
   1957     return FALSE;
   1958 }
   1959 
   1960 static int
   1961 md_is_link_destination_A(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end,
   1962                          OFF* p_contents_beg, OFF* p_contents_end)
   1963 {
   1964     OFF off = beg;
   1965 
   1966     if(off >= max_end  ||  CH(off) != _T('<'))
   1967         return FALSE;
   1968     off++;
   1969 
   1970     while(off < max_end) {
   1971         if(CH(off) == _T('\\')  &&  off+1 < max_end  &&  ISPUNCT(off+1)) {
   1972             off += 2;
   1973             continue;
   1974         }
   1975 
   1976         if(ISNEWLINE(off)  ||  CH(off) == _T('<'))
   1977             return FALSE;
   1978 
   1979         if(CH(off) == _T('>')) {
   1980             /* Success. */
   1981             *p_contents_beg = beg+1;
   1982             *p_contents_end = off;
   1983             *p_end = off+1;
   1984             return TRUE;
   1985         }
   1986 
   1987         off++;
   1988     }
   1989 
   1990     return FALSE;
   1991 }
   1992 
   1993 static int
   1994 md_is_link_destination_B(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end,
   1995                          OFF* p_contents_beg, OFF* p_contents_end)
   1996 {
   1997     OFF off = beg;
   1998     int parenthesis_level = 0;
   1999 
   2000     while(off < max_end) {
   2001         if(CH(off) == _T('\\')  &&  off+1 < max_end  &&  ISPUNCT(off+1)) {
   2002             off += 2;
   2003             continue;
   2004         }
   2005 
   2006         if(ISWHITESPACE(off) || ISCNTRL(off))
   2007             break;
   2008 
   2009         /* Link destination may include balanced pairs of unescaped '(' ')'.
   2010          * Note we limit the maximal nesting level by 32 to protect us from
   2011          * https://github.com/jgm/cmark/issues/214 */
   2012         if(CH(off) == _T('(')) {
   2013             parenthesis_level++;
   2014             if(parenthesis_level > 32)
   2015                 return FALSE;
   2016         } else if(CH(off) == _T(')')) {
   2017             if(parenthesis_level == 0)
   2018                 break;
   2019             parenthesis_level--;
   2020         }
   2021 
   2022         off++;
   2023     }
   2024 
   2025     if(parenthesis_level != 0  ||  off == beg)
   2026         return FALSE;
   2027 
   2028     /* Success. */
   2029     *p_contents_beg = beg;
   2030     *p_contents_end = off;
   2031     *p_end = off;
   2032     return TRUE;
   2033 }
   2034 
   2035 static inline int
   2036 md_is_link_destination(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end,
   2037                        OFF* p_contents_beg, OFF* p_contents_end)
   2038 {
   2039     if(CH(beg) == _T('<'))
   2040         return md_is_link_destination_A(ctx, beg, max_end, p_end, p_contents_beg, p_contents_end);
   2041     else
   2042         return md_is_link_destination_B(ctx, beg, max_end, p_end, p_contents_beg, p_contents_end);
   2043 }
   2044 
   2045 static int
   2046 md_is_link_title(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg,
   2047                  OFF* p_end, int* p_beg_line_index, int* p_end_line_index,
   2048                  OFF* p_contents_beg, OFF* p_contents_end)
   2049 {
   2050     OFF off = beg;
   2051     CHAR closer_char;
   2052     int line_index = 0;
   2053 
   2054     /* White space with up to one line break. */
   2055     while(off < lines[line_index].end  &&  ISWHITESPACE(off))
   2056         off++;
   2057     if(off >= lines[line_index].end) {
   2058         line_index++;
   2059         if(line_index >= n_lines)
   2060             return FALSE;
   2061         off = lines[line_index].beg;
   2062     }
   2063     if(off == beg)
   2064         return FALSE;
   2065 
   2066     *p_beg_line_index = line_index;
   2067 
   2068     /* First char determines how to detect end of it. */
   2069     switch(CH(off)) {
   2070         case _T('"'):   closer_char = _T('"'); break;
   2071         case _T('\''):  closer_char = _T('\''); break;
   2072         case _T('('):   closer_char = _T(')'); break;
   2073         default:        return FALSE;
   2074     }
   2075     off++;
   2076 
   2077     *p_contents_beg = off;
   2078 
   2079     while(line_index < n_lines) {
   2080         OFF line_end = lines[line_index].end;
   2081 
   2082         while(off < line_end) {
   2083             if(CH(off) == _T('\\')  &&  off+1 < ctx->size  &&  (ISPUNCT(off+1) || ISNEWLINE(off+1))) {
   2084                 off++;
   2085             } else if(CH(off) == closer_char) {
   2086                 /* Success. */
   2087                 *p_contents_end = off;
   2088                 *p_end = off+1;
   2089                 *p_end_line_index = line_index;
   2090                 return TRUE;
   2091             } else if(closer_char == _T(')')  &&  CH(off) == _T('(')) {
   2092                 /* ()-style title cannot contain (unescaped '(')) */
   2093                 return FALSE;
   2094             }
   2095 
   2096             off++;
   2097         }
   2098 
   2099         line_index++;
   2100     }
   2101 
   2102     return FALSE;
   2103 }
   2104 
   2105 /* Returns 0 if it is not a reference definition.
   2106  *
   2107  * Returns N > 0 if it is a reference definition. N then corresponds to the
   2108  * number of lines forming it). In this case the definition is stored for
   2109  * resolving any links referring to it.
   2110  *
   2111  * Returns -1 in case of an error (out of memory).
   2112  */
   2113 static int
   2114 md_is_link_reference_definition(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
   2115 {
   2116     OFF label_contents_beg;
   2117     OFF label_contents_end;
   2118     int label_contents_line_index = -1;
   2119     int label_is_multiline = FALSE;
   2120     OFF dest_contents_beg;
   2121     OFF dest_contents_end;
   2122     OFF title_contents_beg;
   2123     OFF title_contents_end;
   2124     int title_contents_line_index;
   2125     int title_is_multiline = FALSE;
   2126     OFF off;
   2127     int line_index = 0;
   2128     int tmp_line_index;
   2129     MD_REF_DEF* def = NULL;
   2130     int ret = 0;
   2131 
   2132     /* Link label. */
   2133     if(!md_is_link_label(ctx, lines, n_lines, lines[0].beg,
   2134                 &off, &label_contents_line_index, &line_index,
   2135                 &label_contents_beg, &label_contents_end))
   2136         return FALSE;
   2137     label_is_multiline = (label_contents_line_index != line_index);
   2138 
   2139     /* Colon. */
   2140     if(off >= lines[line_index].end  ||  CH(off) != _T(':'))
   2141         return FALSE;
   2142     off++;
   2143 
   2144     /* Optional white space with up to one line break. */
   2145     while(off < lines[line_index].end  &&  ISWHITESPACE(off))
   2146         off++;
   2147     if(off >= lines[line_index].end) {
   2148         line_index++;
   2149         if(line_index >= n_lines)
   2150             return FALSE;
   2151         off = lines[line_index].beg;
   2152     }
   2153 
   2154     /* Link destination. */
   2155     if(!md_is_link_destination(ctx, off, lines[line_index].end,
   2156                 &off, &dest_contents_beg, &dest_contents_end))
   2157         return FALSE;
   2158 
   2159     /* (Optional) title. Note we interpret it as an title only if nothing
   2160      * more follows on its last line. */
   2161     if(md_is_link_title(ctx, lines + line_index, n_lines - line_index, off,
   2162                 &off, &title_contents_line_index, &tmp_line_index,
   2163                 &title_contents_beg, &title_contents_end)
   2164         &&  off >= lines[line_index + tmp_line_index].end)
   2165     {
   2166         title_is_multiline = (tmp_line_index != title_contents_line_index);
   2167         title_contents_line_index += line_index;
   2168         line_index += tmp_line_index;
   2169     } else {
   2170         /* Not a title. */
   2171         title_is_multiline = FALSE;
   2172         title_contents_beg = off;
   2173         title_contents_end = off;
   2174         title_contents_line_index = 0;
   2175     }
   2176 
   2177     /* Nothing more can follow on the last line. */
   2178     if(off < lines[line_index].end)
   2179         return FALSE;
   2180 
   2181     /* So, it _is_ a reference definition. Remember it. */
   2182     if(ctx->n_ref_defs >= ctx->alloc_ref_defs) {
   2183         MD_REF_DEF* new_defs;
   2184 
   2185         ctx->alloc_ref_defs = (ctx->alloc_ref_defs > 0
   2186                 ? ctx->alloc_ref_defs + ctx->alloc_ref_defs / 2
   2187                 : 16);
   2188         new_defs = (MD_REF_DEF*) realloc(ctx->ref_defs, ctx->alloc_ref_defs * sizeof(MD_REF_DEF));
   2189         if(new_defs == NULL) {
   2190             MD_LOG("realloc() failed.");
   2191             goto abort;
   2192         }
   2193 
   2194         ctx->ref_defs = new_defs;
   2195     }
   2196     def = &ctx->ref_defs[ctx->n_ref_defs];
   2197     memset(def, 0, sizeof(MD_REF_DEF));
   2198 
   2199     if(label_is_multiline) {
   2200         MD_CHECK(md_merge_lines_alloc(ctx, label_contents_beg, label_contents_end,
   2201                     lines + label_contents_line_index, n_lines - label_contents_line_index,
   2202                     _T(' '), &def->label, &def->label_size));
   2203         def->label_needs_free = TRUE;
   2204     } else {
   2205         def->label = (CHAR*) STR(label_contents_beg);
   2206         def->label_size = label_contents_end - label_contents_beg;
   2207     }
   2208 
   2209     if(title_is_multiline) {
   2210         MD_CHECK(md_merge_lines_alloc(ctx, title_contents_beg, title_contents_end,
   2211                     lines + title_contents_line_index, n_lines - title_contents_line_index,
   2212                     _T('\n'), &def->title, &def->title_size));
   2213         def->title_needs_free = TRUE;
   2214     } else {
   2215         def->title = (CHAR*) STR(title_contents_beg);
   2216         def->title_size = title_contents_end - title_contents_beg;
   2217     }
   2218 
   2219     def->dest_beg = dest_contents_beg;
   2220     def->dest_end = dest_contents_end;
   2221 
   2222     /* Success. */
   2223     ctx->n_ref_defs++;
   2224     return line_index + 1;
   2225 
   2226 abort:
   2227     /* Failure. */
   2228     if(def != NULL  &&  def->label_needs_free)
   2229         free(def->label);
   2230     if(def != NULL  &&  def->title_needs_free)
   2231         free(def->title);
   2232     return ret;
   2233 }
   2234 
   2235 static int
   2236 md_is_link_reference(MD_CTX* ctx, const MD_LINE* lines, int n_lines,
   2237                      OFF beg, OFF end, MD_LINK_ATTR* attr)
   2238 {
   2239     const MD_REF_DEF* def;
   2240     const MD_LINE* beg_line;
   2241     int is_multiline;
   2242     CHAR* label;
   2243     SZ label_size;
   2244     int ret;
   2245 
   2246     MD_ASSERT(CH(beg) == _T('[') || CH(beg) == _T('!'));
   2247     MD_ASSERT(CH(end-1) == _T(']'));
   2248 
   2249     beg += (CH(beg) == _T('!') ? 2 : 1);
   2250     end--;
   2251 
   2252     /* Find lines corresponding to the beg and end positions. */
   2253     beg_line = md_lookup_line(beg, lines, n_lines);
   2254     is_multiline = (end > beg_line->end);
   2255 
   2256     if(is_multiline) {
   2257         MD_CHECK(md_merge_lines_alloc(ctx, beg, end, beg_line,
   2258                  (int)(n_lines - (beg_line - lines)), _T(' '), &label, &label_size));
   2259     } else {
   2260         label = (CHAR*) STR(beg);
   2261         label_size = end - beg;
   2262     }
   2263 
   2264     def = md_lookup_ref_def(ctx, label, label_size);
   2265     if(def != NULL) {
   2266         attr->dest_beg = def->dest_beg;
   2267         attr->dest_end = def->dest_end;
   2268         attr->title = def->title;
   2269         attr->title_size = def->title_size;
   2270         attr->title_needs_free = FALSE;
   2271     }
   2272 
   2273     if(is_multiline)
   2274         free(label);
   2275 
   2276     ret = (def != NULL);
   2277 
   2278 abort:
   2279     return ret;
   2280 }
   2281 
   2282 static int
   2283 md_is_inline_link_spec(MD_CTX* ctx, const MD_LINE* lines, int n_lines,
   2284                        OFF beg, OFF* p_end, MD_LINK_ATTR* attr)
   2285 {
   2286     int line_index = 0;
   2287     int tmp_line_index;
   2288     OFF title_contents_beg;
   2289     OFF title_contents_end;
   2290     int title_contents_line_index;
   2291     int title_is_multiline;
   2292     OFF off = beg;
   2293     int ret = FALSE;
   2294 
   2295     while(off >= lines[line_index].end)
   2296         line_index++;
   2297 
   2298     MD_ASSERT(CH(off) == _T('('));
   2299     off++;
   2300 
   2301     /* Optional white space with up to one line break. */
   2302     while(off < lines[line_index].end  &&  ISWHITESPACE(off))
   2303         off++;
   2304     if(off >= lines[line_index].end  &&  (off >= ctx->size  ||  ISNEWLINE(off))) {
   2305         line_index++;
   2306         if(line_index >= n_lines)
   2307             return FALSE;
   2308         off = lines[line_index].beg;
   2309     }
   2310 
   2311     /* Link destination may be omitted, but only when not also having a title. */
   2312     if(off < ctx->size  &&  CH(off) == _T(')')) {
   2313         attr->dest_beg = off;
   2314         attr->dest_end = off;
   2315         attr->title = NULL;
   2316         attr->title_size = 0;
   2317         attr->title_needs_free = FALSE;
   2318         off++;
   2319         *p_end = off;
   2320         return TRUE;
   2321     }
   2322 
   2323     /* Link destination. */
   2324     if(!md_is_link_destination(ctx, off, lines[line_index].end,
   2325                         &off, &attr->dest_beg, &attr->dest_end))
   2326         return FALSE;
   2327 
   2328     /* (Optional) title. */
   2329     if(md_is_link_title(ctx, lines + line_index, n_lines - line_index, off,
   2330                 &off, &title_contents_line_index, &tmp_line_index,
   2331                 &title_contents_beg, &title_contents_end))
   2332     {
   2333         title_is_multiline = (tmp_line_index != title_contents_line_index);
   2334         title_contents_line_index += line_index;
   2335         line_index += tmp_line_index;
   2336     } else {
   2337         /* Not a title. */
   2338         title_is_multiline = FALSE;
   2339         title_contents_beg = off;
   2340         title_contents_end = off;
   2341         title_contents_line_index = 0;
   2342     }
   2343 
   2344     /* Optional whitespace followed with final ')'. */
   2345     while(off < lines[line_index].end  &&  ISWHITESPACE(off))
   2346         off++;
   2347     if (off >= lines[line_index].end  &&  (off >= ctx->size || ISNEWLINE(off))) {
   2348         line_index++;
   2349         if(line_index >= n_lines)
   2350             return FALSE;
   2351         off = lines[line_index].beg;
   2352     }
   2353     if(CH(off) != _T(')'))
   2354         goto abort;
   2355     off++;
   2356 
   2357     if(title_contents_beg >= title_contents_end) {
   2358         attr->title = NULL;
   2359         attr->title_size = 0;
   2360         attr->title_needs_free = FALSE;
   2361     } else if(!title_is_multiline) {
   2362         attr->title = (CHAR*) STR(title_contents_beg);
   2363         attr->title_size = title_contents_end - title_contents_beg;
   2364         attr->title_needs_free = FALSE;
   2365     } else {
   2366         MD_CHECK(md_merge_lines_alloc(ctx, title_contents_beg, title_contents_end,
   2367                     lines + title_contents_line_index, n_lines - title_contents_line_index,
   2368                     _T('\n'), &attr->title, &attr->title_size));
   2369         attr->title_needs_free = TRUE;
   2370     }
   2371 
   2372     *p_end = off;
   2373     ret = TRUE;
   2374 
   2375 abort:
   2376     return ret;
   2377 }
   2378 
   2379 static void
   2380 md_free_ref_defs(MD_CTX* ctx)
   2381 {
   2382     int i;
   2383 
   2384     for(i = 0; i < ctx->n_ref_defs; i++) {
   2385         MD_REF_DEF* def = &ctx->ref_defs[i];
   2386 
   2387         if(def->label_needs_free)
   2388             free(def->label);
   2389         if(def->title_needs_free)
   2390             free(def->title);
   2391     }
   2392 
   2393     free(ctx->ref_defs);
   2394 }
   2395 
   2396 
   2397 /******************************************
   2398  ***  Processing Inlines (a.k.a Spans)  ***
   2399  ******************************************/
   2400 
   2401 /* We process inlines in few phases:
   2402  *
   2403  * (1) We go through the block text and collect all significant characters
   2404  *     which may start/end a span or some other significant position into
   2405  *     ctx->marks[]. Core of this is what md_collect_marks() does.
   2406  *
   2407  *     We also do some very brief preliminary context-less analysis, whether
   2408  *     it might be opener or closer (e.g. of an emphasis span).
   2409  *
   2410  *     This speeds the other steps as we do not need to re-iterate over all
   2411  *     characters anymore.
   2412  *
   2413  * (2) We analyze each potential mark types, in order by their precedence.
   2414  *
   2415  *     In each md_analyze_XXX() function, we re-iterate list of the marks,
   2416  *     skipping already resolved regions (in preceding precedences) and try to
   2417  *     resolve them.
   2418  *
   2419  * (2.1) For trivial marks, which are single (e.g. HTML entity), we just mark
   2420  *       them as resolved.
   2421  *
   2422  * (2.2) For range-type marks, we analyze whether the mark could be closer
   2423  *       and, if yes, whether there is some preceding opener it could satisfy.
   2424  *
   2425  *       If not we check whether it could be really an opener and if yes, we
   2426  *       remember it so subsequent closers may resolve it.
   2427  *
   2428  * (3) Finally, when all marks were analyzed, we render the block contents
   2429  *     by calling MD_RENDERER::text() callback, interrupting by ::enter_span()
   2430  *     or ::close_span() whenever we reach a resolved mark.
   2431  */
   2432 
   2433 
   2434 /* The mark structure.
   2435  *
   2436  * '\\': Maybe escape sequence.
   2437  * '\0': NULL char.
   2438  *  '*': Maybe (strong) emphasis start/end.
   2439  *  '_': Maybe (strong) emphasis start/end.
   2440  *  '~': Maybe strikethrough start/end (needs MD_FLAG_STRIKETHROUGH).
   2441  *  '`': Maybe code span start/end.
   2442  *  '&': Maybe start of entity.
   2443  *  ';': Maybe end of entity.
   2444  *  '<': Maybe start of raw HTML or autolink.
   2445  *  '>': Maybe end of raw HTML or autolink.
   2446  *  '[': Maybe start of link label or link text.
   2447  *  '!': Equivalent of '[' for image.
   2448  *  ']': Maybe end of link label or link text.
   2449  *  '@': Maybe permissive e-mail auto-link (needs MD_FLAG_PERMISSIVEEMAILAUTOLINKS).
   2450  *  ':': Maybe permissive URL auto-link (needs MD_FLAG_PERMISSIVEURLAUTOLINKS).
   2451  *  '.': Maybe permissive WWW auto-link (needs MD_FLAG_PERMISSIVEWWWAUTOLINKS).
   2452  *  'D': Dummy mark, it reserves a space for splitting a previous mark
   2453  *       (e.g. emphasis) or to make more space for storing some special data
   2454  *       related to the preceding mark (e.g. link).
   2455  *
   2456  * Note that not all instances of these chars in the text imply creation of the
   2457  * structure. Only those which have (or may have, after we see more context)
   2458  * the special meaning.
   2459  *
   2460  * (Keep this struct as small as possible to fit as much of them into CPU
   2461  * cache line.)
   2462  */
   2463 struct MD_MARK_tag {
   2464     OFF beg;
   2465     OFF end;
   2466 
   2467     /* For unresolved openers, 'prev' and 'next' form the chain of open openers
   2468      * of given type 'ch'.
   2469      *
   2470      * During resolving, we disconnect from the chain and point to the
   2471      * corresponding counterpart so opener points to its closer and vice versa.
   2472      */
   2473     int prev;
   2474     int next;
   2475     CHAR ch;
   2476     unsigned char flags;
   2477 };
   2478 
   2479 /* Mark flags (these apply to ALL mark types). */
   2480 #define MD_MARK_POTENTIAL_OPENER            0x01  /* Maybe opener. */
   2481 #define MD_MARK_POTENTIAL_CLOSER            0x02  /* Maybe closer. */
   2482 #define MD_MARK_OPENER                      0x04  /* Definitely opener. */
   2483 #define MD_MARK_CLOSER                      0x08  /* Definitely closer. */
   2484 #define MD_MARK_RESOLVED                    0x10  /* Resolved in any definite way. */
   2485 
   2486 /* Mark flags specific for various mark types (so they can share bits). */
   2487 #define MD_MARK_EMPH_INTRAWORD              0x20  /* Helper for the "rule of 3". */
   2488 #define MD_MARK_EMPH_MOD3_0                 0x40
   2489 #define MD_MARK_EMPH_MOD3_1                 0x80
   2490 #define MD_MARK_EMPH_MOD3_2                 (0x40 | 0x80)
   2491 #define MD_MARK_EMPH_MOD3_MASK              (0x40 | 0x80)
   2492 #define MD_MARK_AUTOLINK                    0x20  /* Distinguisher for '<', '>'. */
   2493 #define MD_MARK_VALIDPERMISSIVEAUTOLINK     0x20  /* For permissive autolinks. */
   2494 #define MD_MARK_HASNESTEDBRACKETS           0x20  /* For '[' to rule out invalid link labels early */
   2495 
   2496 static MD_MARKCHAIN*
   2497 md_asterisk_chain(MD_CTX* ctx, unsigned flags)
   2498 {
   2499     switch(flags & (MD_MARK_EMPH_INTRAWORD | MD_MARK_EMPH_MOD3_MASK)) {
   2500         case MD_MARK_EMPH_INTRAWORD | MD_MARK_EMPH_MOD3_0:  return &ASTERISK_OPENERS_intraword_mod3_0;
   2501         case MD_MARK_EMPH_INTRAWORD | MD_MARK_EMPH_MOD3_1:  return &ASTERISK_OPENERS_intraword_mod3_1;
   2502         case MD_MARK_EMPH_INTRAWORD | MD_MARK_EMPH_MOD3_2:  return &ASTERISK_OPENERS_intraword_mod3_2;
   2503         case MD_MARK_EMPH_MOD3_0:                           return &ASTERISK_OPENERS_extraword_mod3_0;
   2504         case MD_MARK_EMPH_MOD3_1:                           return &ASTERISK_OPENERS_extraword_mod3_1;
   2505         case MD_MARK_EMPH_MOD3_2:                           return &ASTERISK_OPENERS_extraword_mod3_2;
   2506         default:                                            MD_UNREACHABLE();
   2507     }
   2508     return NULL;
   2509 }
   2510 
   2511 static MD_MARKCHAIN*
   2512 md_mark_chain(MD_CTX* ctx, int mark_index)
   2513 {
   2514     MD_MARK* mark = &ctx->marks[mark_index];
   2515 
   2516     switch(mark->ch) {
   2517         case _T('*'):   return md_asterisk_chain(ctx, mark->flags);
   2518         case _T('_'):   return &UNDERSCORE_OPENERS;
   2519         case _T('~'):   return (mark->end - mark->beg == 1) ? &TILDE_OPENERS_1 : &TILDE_OPENERS_2;
   2520         /* case _T('!'):   MD_FALLTHROUGH(); */
   2521         case _T('['):   return &BRACKET_OPENERS;
   2522         case _T('|'):   return &TABLECELLBOUNDARIES;
   2523         case _T('-'):   return &FAINT_OPENERS;
   2524         case _T('%'):   return &INVERSE_OPENERS;
   2525         case _T('!'):   return &CONCEAL_OPENERS;
   2526         case _T('^'):   return &BLINK_OPENERS;
   2527         default:        return NULL;
   2528     }
   2529 }
   2530 
   2531 static MD_MARK*
   2532 md_push_mark(MD_CTX* ctx)
   2533 {
   2534     if(ctx->n_marks >= ctx->alloc_marks) {
   2535         MD_MARK* new_marks;
   2536 
   2537         ctx->alloc_marks = (ctx->alloc_marks > 0
   2538                 ? ctx->alloc_marks + ctx->alloc_marks / 2
   2539                 : 64);
   2540         new_marks = realloc(ctx->marks, ctx->alloc_marks * sizeof(MD_MARK));
   2541         if(new_marks == NULL) {
   2542             MD_LOG("realloc() failed.");
   2543             return NULL;
   2544         }
   2545 
   2546         ctx->marks = new_marks;
   2547     }
   2548 
   2549     return &ctx->marks[ctx->n_marks++];
   2550 }
   2551 
   2552 #define PUSH_MARK_()                                                    \
   2553         do {                                                            \
   2554             mark = md_push_mark(ctx);                                   \
   2555             if(mark == NULL) {                                          \
   2556                 ret = -1;                                               \
   2557                 goto abort;                                             \
   2558             }                                                           \
   2559         } while(0)
   2560 
   2561 #define PUSH_MARK(ch_, beg_, end_, flags_)                              \
   2562         do {                                                            \
   2563             PUSH_MARK_();                                               \
   2564             mark->beg = (beg_);                                         \
   2565             mark->end = (end_);                                         \
   2566             mark->prev = -1;                                            \
   2567             mark->next = -1;                                            \
   2568             mark->ch = (char)(ch_);                                     \
   2569             mark->flags = (flags_);                                     \
   2570         } while(0)
   2571 
   2572 
   2573 static void
   2574 md_mark_chain_append(MD_CTX* ctx, MD_MARKCHAIN* chain, int mark_index)
   2575 {
   2576     if(chain->tail >= 0)
   2577         ctx->marks[chain->tail].next = mark_index;
   2578     else
   2579         chain->head = mark_index;
   2580 
   2581     ctx->marks[mark_index].prev = chain->tail;
   2582     ctx->marks[mark_index].next = -1;
   2583     chain->tail = mark_index;
   2584 }
   2585 
   2586 /* Sometimes, we need to store a pointer into the mark. It is quite rare
   2587  * so we do not bother to make MD_MARK use union, and it can only happen
   2588  * for dummy marks. */
   2589 static inline void
   2590 md_mark_store_ptr(MD_CTX* ctx, int mark_index, void* ptr)
   2591 {
   2592     MD_MARK* mark = &ctx->marks[mark_index];
   2593     MD_ASSERT(mark->ch == 'D');
   2594 
   2595     /* Check only members beg and end are misused for this. */
   2596     MD_ASSERT(sizeof(void*) <= 2 * sizeof(OFF));
   2597     memcpy(mark, &ptr, sizeof(void*));
   2598 }
   2599 
   2600 static inline void*
   2601 md_mark_get_ptr(MD_CTX* ctx, int mark_index)
   2602 {
   2603     void* ptr;
   2604     MD_MARK* mark = &ctx->marks[mark_index];
   2605     MD_ASSERT(mark->ch == 'D');
   2606     memcpy(&ptr, mark, sizeof(void*));
   2607     return ptr;
   2608 }
   2609 
   2610 static void
   2611 md_resolve_range(MD_CTX* ctx, MD_MARKCHAIN* chain, int opener_index, int closer_index)
   2612 {
   2613     MD_MARK* opener = &ctx->marks[opener_index];
   2614     MD_MARK* closer = &ctx->marks[closer_index];
   2615 
   2616     /* Remove opener from the list of openers. */
   2617     if(chain != NULL) {
   2618         if(opener->prev >= 0)
   2619             ctx->marks[opener->prev].next = opener->next;
   2620         else
   2621             chain->head = opener->next;
   2622 
   2623         if(opener->next >= 0)
   2624             ctx->marks[opener->next].prev = opener->prev;
   2625         else
   2626             chain->tail = opener->prev;
   2627     }
   2628 
   2629     /* Interconnect opener and closer and mark both as resolved. */
   2630     opener->next = closer_index;
   2631     opener->flags |= MD_MARK_OPENER | MD_MARK_RESOLVED;
   2632     closer->prev = opener_index;
   2633     closer->flags |= MD_MARK_CLOSER | MD_MARK_RESOLVED;
   2634 }
   2635 
   2636 
   2637 #define MD_ROLLBACK_ALL         0
   2638 #define MD_ROLLBACK_CROSSING    1
   2639 
   2640 /* In the range ctx->marks[opener_index] ... [closer_index], undo some or all
   2641  * resolvings accordingly to these rules:
   2642  *
   2643  * (1) All openers BEFORE the range corresponding to any closer inside the
   2644  *     range are un-resolved and they are re-added to their respective chains
   2645  *     of unresolved openers. This ensures we can reuse the opener for closers
   2646  *     AFTER the range.
   2647  *
   2648  * (2) If 'how' is MD_ROLLBACK_ALL, then ALL resolved marks inside the range
   2649  *     are discarded.
   2650  *
   2651  * (3) If 'how' is MD_ROLLBACK_CROSSING, only closers with openers handled
   2652  *     in (1) are discarded. I.e. pairs of openers and closers which are both
   2653  *     inside the range are retained as well as any unpaired marks.
   2654  */
   2655 static void
   2656 md_rollback(MD_CTX* ctx, int opener_index, int closer_index, int how)
   2657 {
   2658     int i;
   2659     int mark_index;
   2660 
   2661     /* Cut all unresolved openers at the mark index. */
   2662     for(i = OPENERS_CHAIN_FIRST; i < OPENERS_CHAIN_LAST+1; i++) {
   2663         MD_MARKCHAIN* chain = &ctx->mark_chains[i];
   2664 
   2665         while(chain->tail >= opener_index) {
   2666             int same = chain->tail == opener_index;
   2667             chain->tail = ctx->marks[chain->tail].prev;
   2668             if (same) break;
   2669         }
   2670 
   2671         if(chain->tail >= 0)
   2672             ctx->marks[chain->tail].next = -1;
   2673         else
   2674             chain->head = -1;
   2675     }
   2676 
   2677     /* Go backwards so that unresolved openers are re-added into their
   2678      * respective chains, in the right order. */
   2679     mark_index = closer_index - 1;
   2680     while(mark_index > opener_index) {
   2681         MD_MARK* mark = &ctx->marks[mark_index];
   2682         int mark_flags = mark->flags;
   2683         int discard_flag = (how == MD_ROLLBACK_ALL);
   2684 
   2685         if(mark->flags & MD_MARK_CLOSER) {
   2686             int mark_opener_index = mark->prev;
   2687 
   2688             /* Undo opener BEFORE the range. */
   2689             if(mark_opener_index < opener_index) {
   2690                 MD_MARK* mark_opener = &ctx->marks[mark_opener_index];
   2691                 MD_MARKCHAIN* chain;
   2692 
   2693                 mark_opener->flags &= ~(MD_MARK_OPENER | MD_MARK_CLOSER | MD_MARK_RESOLVED);
   2694                 chain = md_mark_chain(ctx, opener_index);
   2695                 if(chain != NULL) {
   2696                     md_mark_chain_append(ctx, chain, mark_opener_index);
   2697                     discard_flag = 1;
   2698                 }
   2699             }
   2700         }
   2701 
   2702         /* And reset our flags. */
   2703         if(discard_flag) {
   2704             /* Make zero-length closer a dummy mark as that's how it was born */
   2705             if((mark->flags & MD_MARK_CLOSER)  &&  mark->beg == mark->end)
   2706                 mark->ch = 'D';
   2707 
   2708             mark->flags &= ~(MD_MARK_OPENER | MD_MARK_CLOSER | MD_MARK_RESOLVED);
   2709         }
   2710 
   2711         /* Jump as far as we can over unresolved or non-interesting marks. */
   2712         switch(how) {
   2713             case MD_ROLLBACK_CROSSING:
   2714                 if((mark_flags & MD_MARK_CLOSER)  &&  mark->prev > opener_index) {
   2715                     /* If we are closer with opener INSIDE the range, there may
   2716                      * not be any other crosser inside the subrange. */
   2717                     mark_index = mark->prev;
   2718                     break;
   2719                 }
   2720                 MD_FALLTHROUGH();
   2721             default:
   2722                 mark_index--;
   2723                 break;
   2724         }
   2725     }
   2726 }
   2727 
   2728 static void
   2729 md_build_mark_char_map(MD_CTX* ctx)
   2730 {
   2731     memset(ctx->mark_char_map, 0, sizeof(ctx->mark_char_map));
   2732 
   2733     ctx->mark_char_map['\\'] = 1;
   2734     ctx->mark_char_map['^'] = 1;
   2735     ctx->mark_char_map['%'] = 1;
   2736     ctx->mark_char_map['-'] = 1;
   2737     ctx->mark_char_map['*'] = 1;
   2738     ctx->mark_char_map['_'] = 1;
   2739     ctx->mark_char_map['`'] = 1;
   2740     ctx->mark_char_map['&'] = 1;
   2741     ctx->mark_char_map[';'] = 1;
   2742     ctx->mark_char_map['<'] = 1;
   2743     ctx->mark_char_map['>'] = 1;
   2744     ctx->mark_char_map['['] = 1;
   2745     ctx->mark_char_map['!'] = 1;
   2746     ctx->mark_char_map[']'] = 1;
   2747     ctx->mark_char_map['\0'] = 1;
   2748 
   2749     if(ctx->parser.flags & MD_FLAG_STRIKETHROUGH)
   2750         ctx->mark_char_map['~'] = 1;
   2751 
   2752     if(ctx->parser.flags & MD_FLAG_LATEXMATHSPANS)
   2753         ctx->mark_char_map['$'] = 1;
   2754 
   2755     if(ctx->parser.flags & MD_FLAG_PERMISSIVEEMAILAUTOLINKS)
   2756         ctx->mark_char_map['@'] = 1;
   2757 
   2758     if(ctx->parser.flags & MD_FLAG_PERMISSIVEURLAUTOLINKS)
   2759         ctx->mark_char_map[':'] = 1;
   2760 
   2761     if(ctx->parser.flags & MD_FLAG_PERMISSIVEWWWAUTOLINKS)
   2762         ctx->mark_char_map['.'] = 1;
   2763 
   2764     if((ctx->parser.flags & MD_FLAG_TABLES) || (ctx->parser.flags & MD_FLAG_WIKILINKS))
   2765         ctx->mark_char_map['|'] = 1;
   2766 
   2767     if(ctx->parser.flags & MD_FLAG_COLLAPSEWHITESPACE) {
   2768         int i;
   2769 
   2770         for(i = 0; i < (int) sizeof(ctx->mark_char_map); i++) {
   2771             if(ISWHITESPACE_(i))
   2772                 ctx->mark_char_map[i] = 1;
   2773         }
   2774     }
   2775 }
   2776 
   2777 /* We limit code span marks to lower than 32 backticks. This solves the
   2778  * pathologic case of too many openers, each of different length: Their
   2779  * resolving would be then O(n^2). */
   2780 #define CODESPAN_MARK_MAXLEN    32
   2781 
   2782 static int
   2783 md_is_code_span(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg,
   2784                 OFF* p_opener_beg, OFF* p_opener_end,
   2785                 OFF* p_closer_beg, OFF* p_closer_end,
   2786                 OFF last_potential_closers[CODESPAN_MARK_MAXLEN],
   2787                 int* p_reached_paragraph_end)
   2788 {
   2789     OFF opener_beg = beg;
   2790     OFF opener_end;
   2791     OFF closer_beg;
   2792     OFF closer_end;
   2793     SZ mark_len;
   2794     OFF line_end;
   2795     int has_space_after_opener = FALSE;
   2796     int has_eol_after_opener = FALSE;
   2797     int has_space_before_closer = FALSE;
   2798     int has_eol_before_closer = FALSE;
   2799     int has_only_space = TRUE;
   2800     int line_index = 0;
   2801 
   2802     line_end = lines[0].end;
   2803     opener_end = opener_beg;
   2804     while(opener_end < line_end  &&  CH(opener_end) == _T('`'))
   2805         opener_end++;
   2806     has_space_after_opener = (opener_end < line_end && CH(opener_end) == _T(' '));
   2807     has_eol_after_opener = (opener_end == line_end);
   2808 
   2809     /* The caller needs to know end of the opening mark even if we fail. */
   2810     *p_opener_end = opener_end;
   2811 
   2812     mark_len = opener_end - opener_beg;
   2813     if(mark_len > CODESPAN_MARK_MAXLEN)
   2814         return FALSE;
   2815 
   2816     /* Check whether we already know there is no closer of this length.
   2817      * If so, re-scan does no sense. This fixes issue #59. */
   2818     if(last_potential_closers[mark_len-1] >= lines[n_lines-1].end  ||
   2819        (*p_reached_paragraph_end  &&  last_potential_closers[mark_len-1] < opener_end))
   2820         return FALSE;
   2821 
   2822     closer_beg = opener_end;
   2823     closer_end = opener_end;
   2824 
   2825     /* Find closer mark. */
   2826     while(TRUE) {
   2827         while(closer_beg < line_end  &&  CH(closer_beg) != _T('`')) {
   2828             if(CH(closer_beg) != _T(' '))
   2829                 has_only_space = FALSE;
   2830             closer_beg++;
   2831         }
   2832         closer_end = closer_beg;
   2833         while(closer_end < line_end  &&  CH(closer_end) == _T('`'))
   2834             closer_end++;
   2835 
   2836         if(closer_end - closer_beg == mark_len) {
   2837             /* Success. */
   2838             has_space_before_closer = (closer_beg > lines[line_index].beg && CH(closer_beg-1) == _T(' '));
   2839             has_eol_before_closer = (closer_beg == lines[line_index].beg);
   2840             break;
   2841         }
   2842 
   2843         if(closer_end - closer_beg > 0) {
   2844             /* We have found a back-tick which is not part of the closer. */
   2845             has_only_space = FALSE;
   2846 
   2847             /* But if we eventually fail, remember it as a potential closer
   2848              * of its own length for future attempts. This mitigates needs for
   2849              * rescans. */
   2850             if(closer_end - closer_beg < CODESPAN_MARK_MAXLEN) {
   2851                 if(closer_beg > last_potential_closers[closer_end - closer_beg - 1])
   2852                     last_potential_closers[closer_end - closer_beg - 1] = closer_beg;
   2853             }
   2854         }
   2855 
   2856         if(closer_end >= line_end) {
   2857             line_index++;
   2858             if(line_index >= n_lines) {
   2859                 /* Reached end of the paragraph and still nothing. */
   2860                 *p_reached_paragraph_end = TRUE;
   2861                 return FALSE;
   2862             }
   2863             /* Try on the next line. */
   2864             line_end = lines[line_index].end;
   2865             closer_beg = lines[line_index].beg;
   2866         } else {
   2867             closer_beg = closer_end;
   2868         }
   2869     }
   2870 
   2871     /* If there is a space or a new line both after and before the opener
   2872      * (and if the code span is not made of spaces only), consume one initial
   2873      * and one trailing space as part of the marks. */
   2874     if(!has_only_space  &&
   2875        (has_space_after_opener || has_eol_after_opener)  &&
   2876        (has_space_before_closer || has_eol_before_closer))
   2877     {
   2878         if(has_space_after_opener)
   2879             opener_end++;
   2880         else
   2881             opener_end = lines[1].beg;
   2882 
   2883         if(has_space_before_closer)
   2884             closer_beg--;
   2885         else {
   2886             closer_beg = lines[line_index-1].end;
   2887             /* We need to eat the preceding "\r\n" but not any line trailing
   2888              * spaces. */
   2889             while(closer_beg < ctx->size  &&  ISBLANK(closer_beg))
   2890                 closer_beg++;
   2891         }
   2892     }
   2893 
   2894     *p_opener_beg = opener_beg;
   2895     *p_opener_end = opener_end;
   2896     *p_closer_beg = closer_beg;
   2897     *p_closer_end = closer_end;
   2898     return TRUE;
   2899 }
   2900 
   2901 /* detect anchors with syntax: [|anchorId] */
   2902 static int
   2903 md_is_anchor_span(MD_CTX* ctx, const MD_LINE* lines, OFF off, OFF* p_closer_beg)
   2904 {
   2905     OFF line_end = lines[0].end;
   2906     // Smallest anchor is [|x]
   2907     // An anchor must be on a single line
   2908     if (off+4 >= line_end)
   2909         return FALSE;
   2910     off += 2;
   2911 
   2912     // Find closer mark
   2913     int opener_end = off;
   2914     while (off < line_end) {
   2915         if (CH(off) == _T(']')) {
   2916             // Check if there an id for the anchor
   2917             if (off == opener_end)
   2918                 return FALSE;
   2919             *p_closer_beg = off;
   2920             return TRUE;
   2921         }
   2922         off++;
   2923     }
   2924     return FALSE;
   2925 }
   2926 
   2927 #ifdef MD4C_USE_UTF16
   2928     /* For UTF-16, mark_char_map[] covers only ASCII. */
   2929     #define IS_MARK_CHAR(off)   ((CH(off) < SIZEOF_ARRAY(ctx->mark_char_map))  &&  \
   2930                                 (ctx->mark_char_map[(unsigned char) CH(off)]))
   2931 #else
   2932     /* For 8-bit encodings, mark_char_map[] covers all 256 elements. */
   2933     #define IS_MARK_CHAR(off)   (ctx->mark_char_map[(unsigned char) CH(off)])
   2934 #endif
   2935 
   2936 /* detect faint effect: -text text- */
   2937 static int
   2938 md_is_faint_span(MD_CTX* ctx, const MD_LINE* lines, OFF beg, OFF* p_closer_beg)
   2939 {
   2940     OFF tmp;
   2941     OFF line_end;
   2942 
   2943     line_end = lines[0].end;
   2944     if (beg+2 >= line_end)
   2945         return FALSE;
   2946     if (ISUNICODEWHITESPACE(beg+1))
   2947         return FALSE;
   2948     tmp = beg+2;
   2949     while (tmp < line_end) {
   2950         if (CH(tmp) == _T('-') && (tmp+1 == line_end || ISUNICODEWHITESPACE(tmp+1) || IS_MARK_CHAR(tmp+1))
   2951             && (!ISUNICODEWHITESPACE(tmp-1))) {
   2952             *p_closer_beg = tmp;
   2953             return TRUE;
   2954         }
   2955         tmp++;
   2956     }
   2957 
   2958     return FALSE;
   2959 }
   2960 
   2961 /* detect inverse effect: %text text% */
   2962 static int
   2963 md_is_inverse_span(MD_CTX* ctx, const MD_LINE* lines, OFF beg, OFF* p_closer_beg)
   2964 {
   2965     OFF tmp;
   2966     OFF line_end;
   2967 
   2968     line_end = lines[0].end;
   2969     if (beg+2 >= line_end)
   2970         return FALSE;
   2971     if (ISUNICODEWHITESPACE(beg+1))
   2972         return FALSE;
   2973     tmp = beg+2;
   2974     while (tmp < line_end) {
   2975         if (CH(tmp) == _T('%') && (tmp+1 == line_end || ISUNICODEWHITESPACE(tmp+1) || IS_MARK_CHAR(tmp+1))
   2976             && (!ISUNICODEWHITESPACE(tmp-1))) {
   2977             *p_closer_beg = tmp;
   2978             return TRUE;
   2979         }
   2980         tmp++;
   2981     }
   2982 
   2983     return FALSE;
   2984 }
   2985 
   2986 /* detect conceal effect: !text text! */
   2987 static int
   2988 md_is_conceal_span(MD_CTX* ctx, const MD_LINE* lines, OFF beg, OFF* p_closer_beg)
   2989 {
   2990     OFF tmp;
   2991     OFF line_end;
   2992 
   2993     line_end = lines[0].end;
   2994     if (beg+2 >= line_end)
   2995         return FALSE;
   2996     if (ISUNICODEWHITESPACE(beg+1))
   2997         return FALSE;
   2998     tmp = beg+2;
   2999     while (tmp < line_end) {
   3000         if (CH(tmp) == _T('!') && (tmp+1 == line_end || ISUNICODEWHITESPACE(tmp+1) || IS_MARK_CHAR(tmp+1))
   3001             && (!ISUNICODEWHITESPACE(tmp-1))) {
   3002             *p_closer_beg = tmp;
   3003             return TRUE;
   3004         }
   3005         tmp++;
   3006     }
   3007 
   3008     return FALSE;
   3009 }
   3010 
   3011 /* detect blink effect: ^text text^ */
   3012 static int
   3013 md_is_blink_span(MD_CTX* ctx, const MD_LINE* lines, OFF beg, OFF* p_closer_beg)
   3014 {
   3015     OFF tmp;
   3016     OFF line_end;
   3017 
   3018     line_end = lines[0].end;
   3019     if (beg+2 >= line_end)
   3020         return FALSE;
   3021     if (ISUNICODEWHITESPACE(beg+1))
   3022         return FALSE;
   3023     tmp = beg+2;
   3024     while (tmp < line_end) {
   3025         if (CH(tmp) == _T('^') && (tmp+1 == line_end || ISUNICODEWHITESPACE(tmp+1) || IS_MARK_CHAR(tmp+1))
   3026             && (!ISUNICODEWHITESPACE(tmp-1))) {
   3027             *p_closer_beg = tmp;
   3028             return TRUE;
   3029         }
   3030         tmp++;
   3031     }
   3032 
   3033     return FALSE;
   3034 }
   3035 
   3036 static int
   3037 md_is_autolink_uri(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end)
   3038 {
   3039     OFF off = beg+1;
   3040 
   3041     MD_ASSERT(CH(beg) == _T('<'));
   3042 
   3043     /* Check for scheme. */
   3044     if(off >= max_end  ||  !ISASCII(off))
   3045         return FALSE;
   3046     off++;
   3047     while(1) {
   3048         if(off >= max_end)
   3049             return FALSE;
   3050         if(off - beg > 32)
   3051             return FALSE;
   3052         if(CH(off) == _T(':')  &&  off - beg >= 3)
   3053             break;
   3054         if(!ISALNUM(off) && CH(off) != _T('+') && CH(off) != _T('-') && CH(off) != _T('.'))
   3055             return FALSE;
   3056         off++;
   3057     }
   3058 
   3059     /* Check the path after the scheme. */
   3060     while(off < max_end  &&  CH(off) != _T('>')) {
   3061         if(ISWHITESPACE(off) || ISCNTRL(off) || CH(off) == _T('<'))
   3062             return FALSE;
   3063         off++;
   3064     }
   3065 
   3066     if(off >= max_end)
   3067         return FALSE;
   3068 
   3069     MD_ASSERT(CH(off) == _T('>'));
   3070     *p_end = off+1;
   3071     return TRUE;
   3072 }
   3073 
   3074 static int
   3075 md_is_autolink_email(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end)
   3076 {
   3077     OFF off = beg + 1;
   3078     int label_len;
   3079 
   3080     MD_ASSERT(CH(beg) == _T('<'));
   3081 
   3082     /* The code should correspond to this regexp:
   3083             /^[a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+
   3084             @[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?
   3085             (?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$/
   3086      */
   3087 
   3088     /* Username (before '@'). */
   3089     while(off < max_end  &&  (ISALNUM(off) || ISANYOF(off, _T(".!#$%&'*+/=?^_`{|}~-"))))
   3090         off++;
   3091     if(off <= beg+1)
   3092         return FALSE;
   3093 
   3094     /* '@' */
   3095     if(off >= max_end  ||  CH(off) != _T('@'))
   3096         return FALSE;
   3097     off++;
   3098 
   3099     /* Labels delimited with '.'; each label is sequence of 1 - 63 alnum
   3100      * characters or '-', but '-' is not allowed as first or last char. */
   3101     label_len = 0;
   3102     while(off < max_end) {
   3103         if(ISALNUM(off))
   3104             label_len++;
   3105         else if(CH(off) == _T('-')  &&  label_len > 0)
   3106             label_len++;
   3107         else if(CH(off) == _T('.')  &&  label_len > 0  &&  CH(off-1) != _T('-'))
   3108             label_len = 0;
   3109         else
   3110             break;
   3111 
   3112         if(label_len > 63)
   3113             return FALSE;
   3114 
   3115         off++;
   3116     }
   3117 
   3118     if(label_len <= 0  || off >= max_end  ||  CH(off) != _T('>') ||  CH(off-1) == _T('-'))
   3119         return FALSE;
   3120 
   3121     *p_end = off+1;
   3122     return TRUE;
   3123 }
   3124 
   3125 static int
   3126 md_is_autolink(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end, int* p_missing_mailto)
   3127 {
   3128     if(md_is_autolink_uri(ctx, beg, max_end, p_end)) {
   3129         *p_missing_mailto = FALSE;
   3130         return TRUE;
   3131     }
   3132 
   3133     if(md_is_autolink_email(ctx, beg, max_end, p_end)) {
   3134         *p_missing_mailto = TRUE;
   3135         return TRUE;
   3136     }
   3137 
   3138     return FALSE;
   3139 }
   3140 
   3141 static int
   3142 md_collect_marks(MD_CTX* ctx, const MD_LINE* lines, int n_lines, int table_mode)
   3143 {
   3144     const MD_LINE* line_term = lines + n_lines;
   3145     const MD_LINE* line;
   3146     int ret = 0;
   3147     MD_MARK* mark;
   3148     OFF codespan_last_potential_closers[CODESPAN_MARK_MAXLEN] = { 0 };
   3149     int codespan_scanned_till_paragraph_end = FALSE;
   3150 
   3151     for(line = lines; line < line_term; line++) {
   3152         OFF off = line->beg;
   3153         OFF line_end = line->end;
   3154 
   3155         while(TRUE) {
   3156             CHAR ch;
   3157 
   3158             /* Optimization: Use some loop unrolling. */
   3159             while(off + 3 < line_end  &&  !IS_MARK_CHAR(off+0)  &&  !IS_MARK_CHAR(off+1)
   3160                                       &&  !IS_MARK_CHAR(off+2)  &&  !IS_MARK_CHAR(off+3))
   3161                 off += 4;
   3162             while(off < line_end  &&  !IS_MARK_CHAR(off+0))
   3163                 off++;
   3164 
   3165             if(off >= line_end)
   3166                 break;
   3167 
   3168             ch = CH(off);
   3169 
   3170             /* A backslash escape.
   3171              * It can go beyond line->end as it may involve escaped new
   3172              * line to form a hard break. */
   3173             if(ch == _T('\\')  &&  off+1 < ctx->size  &&  (ISPUNCT(off+1) || ISNEWLINE(off+1))) {
   3174                 /* Hard-break cannot be on the last line of the block. */
   3175                 if(!ISNEWLINE(off+1)  ||  line+1 < line_term)
   3176                     PUSH_MARK(ch, off, off+2, MD_MARK_RESOLVED);
   3177                 off += 2;
   3178                 continue;
   3179             }
   3180 
   3181             /* A potential (string) emphasis start/end. */
   3182             if(ch == _T('*')  ||  ch == _T('_')) {
   3183                 OFF tmp = off+1;
   3184                 int left_level;     /* What precedes: 0 = whitespace; 1 = punctuation; 2 = other char. */
   3185                 int right_level;    /* What follows: 0 = whitespace; 1 = punctuation; 2 = other char. */
   3186 
   3187                 while(tmp < line_end  &&  CH(tmp) == ch)
   3188                     tmp++;
   3189 
   3190                 if(off == line->beg  ||  ISUNICODEWHITESPACEBEFORE(off))
   3191                     left_level = 0;
   3192                 else if(ISUNICODEPUNCTBEFORE(off))
   3193                     left_level = 1;
   3194                 else
   3195                     left_level = 2;
   3196 
   3197                 if(tmp == line_end  ||  ISUNICODEWHITESPACE(tmp))
   3198                     right_level = 0;
   3199                 else if(ISUNICODEPUNCT(tmp))
   3200                     right_level = 1;
   3201                 else
   3202                     right_level = 2;
   3203 
   3204                 /* Intra-word underscore doesn't have special meaning. */
   3205                 if(ch == _T('_')  &&  left_level == 2  &&  right_level == 2) {
   3206                     left_level = 0;
   3207                     right_level = 0;
   3208                 }
   3209 
   3210                 if(left_level != 0  ||  right_level != 0) {
   3211                     unsigned flags = 0;
   3212 
   3213                     if(left_level > 0  &&  left_level >= right_level)
   3214                         flags |= MD_MARK_POTENTIAL_CLOSER;
   3215                     if(right_level > 0  &&  right_level >= left_level)
   3216                         flags |= MD_MARK_POTENTIAL_OPENER;
   3217                     if(left_level == 2  &&  right_level == 2)
   3218                         flags |= MD_MARK_EMPH_INTRAWORD;
   3219 
   3220                     /* For "the rule of three" we need to remember the original
   3221                      * size of the mark (modulo three), before we potentially
   3222                      * split the mark when being later resolved partially by some
   3223                      * shorter closer. */
   3224                     switch((tmp - off) % 3) {
   3225                         case 0: flags |= MD_MARK_EMPH_MOD3_0; break;
   3226                         case 1: flags |= MD_MARK_EMPH_MOD3_1; break;
   3227                         case 2: flags |= MD_MARK_EMPH_MOD3_2; break;
   3228                     }
   3229 
   3230                     PUSH_MARK(ch, off, tmp, flags);
   3231 
   3232                     /* During resolving, multiple asterisks may have to be
   3233                      * split into independent span start/ends. Consider e.g.
   3234                      * "**foo* bar*". Therefore we push also some empty dummy
   3235                      * marks to have enough space for that. */
   3236                     off++;
   3237                     while(off < tmp) {
   3238                         PUSH_MARK('D', off, off, 0);
   3239                         off++;
   3240                     }
   3241                     continue;
   3242                 }
   3243 
   3244                 off = tmp;
   3245                 continue;
   3246             }
   3247 
   3248             /* A potential code span start/end. */
   3249             if(ch == _T('`')) {
   3250                 OFF opener_beg, opener_end;
   3251                 OFF closer_beg, closer_end;
   3252                 int is_code_span;
   3253 
   3254                 is_code_span = md_is_code_span(ctx, line, line_term - line, off,
   3255                                     &opener_beg, &opener_end, &closer_beg, &closer_end,
   3256                                     codespan_last_potential_closers,
   3257                                     &codespan_scanned_till_paragraph_end);
   3258                 if(is_code_span) {
   3259                     PUSH_MARK(_T('`'), opener_beg, opener_end, MD_MARK_OPENER | MD_MARK_RESOLVED);
   3260                     PUSH_MARK(_T('`'), closer_beg, closer_end, MD_MARK_CLOSER | MD_MARK_RESOLVED);
   3261                     ctx->marks[ctx->n_marks-2].next = ctx->n_marks-1;
   3262                     ctx->marks[ctx->n_marks-1].prev = ctx->n_marks-2;
   3263 
   3264                     off = closer_end;
   3265 
   3266                     /* Advance the current line accordingly. */
   3267                     if(off > line_end) {
   3268                         line = md_lookup_line(off, line, line_term - line);
   3269                         line_end = line->end;
   3270                     }
   3271                     continue;
   3272                 }
   3273 
   3274                 off = opener_end;
   3275                 continue;
   3276             }
   3277 
   3278             /* A potential faint span start/end. */
   3279             if(ch == _T('-')) {
   3280                 OFF closer_beg;
   3281                 int is_faint_span;
   3282 
   3283                 if (off == line->beg  ||  ISUNICODEWHITESPACEBEFORE(off) || ISUNICODEPUNCTBEFORE(off)
   3284                     || IS_MARK_CHAR(off-1)) {
   3285 
   3286                     is_faint_span = md_is_faint_span(ctx, line, off, &closer_beg);
   3287                     if(is_faint_span) {
   3288                         PUSH_MARK(_T('-'), off, off+1, MD_MARK_OPENER | MD_MARK_RESOLVED);
   3289                         PUSH_MARK(_T('-'), closer_beg, closer_beg+1, MD_MARK_CLOSER | MD_MARK_RESOLVED);
   3290                         ctx->marks[ctx->n_marks-2].next = ctx->n_marks-1;
   3291                         ctx->marks[ctx->n_marks-1].prev = ctx->n_marks-2;
   3292                     }
   3293                 }
   3294                 off++;
   3295                 continue;
   3296             }
   3297 
   3298             /* A potential inverse span start/end. */
   3299             if(ch == _T('%')) {
   3300                 OFF closer_beg;
   3301                 int is_inverse_span;
   3302 
   3303                 if (off == line->beg  ||  ISUNICODEWHITESPACEBEFORE(off) || ISUNICODEPUNCTBEFORE(off)
   3304                     || IS_MARK_CHAR(off-1)) {
   3305 
   3306                     is_inverse_span = md_is_inverse_span(ctx, line, off, &closer_beg);
   3307                     if(is_inverse_span) {
   3308                         PUSH_MARK(_T('%'), off, off+1, MD_MARK_OPENER | MD_MARK_RESOLVED);
   3309                         PUSH_MARK(_T('%'), closer_beg, closer_beg+1, MD_MARK_CLOSER | MD_MARK_RESOLVED);
   3310                         ctx->marks[ctx->n_marks-2].next = ctx->n_marks-1;
   3311                         ctx->marks[ctx->n_marks-1].prev = ctx->n_marks-2;
   3312 
   3313                     }
   3314                 }
   3315                 off++;
   3316                 continue;
   3317             }
   3318 
   3319             /* A potential conceal span start/end. */
   3320             if(ch == _T('!')) {
   3321                 OFF closer_beg;
   3322                 int is_conceal_span;
   3323 
   3324                 if (off == line->beg  ||  ISUNICODEWHITESPACEBEFORE(off) || ISUNICODEPUNCTBEFORE(off)
   3325                     || IS_MARK_CHAR(off-1)) {
   3326 
   3327                     is_conceal_span = md_is_conceal_span(ctx, line, off, &closer_beg);
   3328                     if(is_conceal_span) {
   3329                         PUSH_MARK(_T('!'), off, off+1, MD_MARK_OPENER | MD_MARK_RESOLVED);
   3330                         PUSH_MARK(_T('!'), closer_beg, closer_beg+1, MD_MARK_CLOSER | MD_MARK_RESOLVED);
   3331                         ctx->marks[ctx->n_marks-2].next = ctx->n_marks-1;
   3332                         ctx->marks[ctx->n_marks-1].prev = ctx->n_marks-2;
   3333 
   3334                     }
   3335                 }
   3336                 off++;
   3337                 continue;
   3338             }
   3339 
   3340             /* A potential blink span start/end. */
   3341             if(ch == _T('^')) {
   3342                 OFF closer_beg;
   3343                 int is_blink_span;
   3344 
   3345                 if (off == line->beg  ||  ISUNICODEWHITESPACEBEFORE(off) || ISUNICODEPUNCTBEFORE(off)
   3346                     || IS_MARK_CHAR(off-1)) {
   3347 
   3348                     is_blink_span = md_is_blink_span(ctx, line, off, &closer_beg);
   3349                     if(is_blink_span) {
   3350                         PUSH_MARK(_T('^'), off, off+1, MD_MARK_OPENER | MD_MARK_RESOLVED);
   3351                         PUSH_MARK(_T('^'), closer_beg, closer_beg+1, MD_MARK_CLOSER | MD_MARK_RESOLVED);
   3352                         ctx->marks[ctx->n_marks-2].next = ctx->n_marks-1;
   3353                         ctx->marks[ctx->n_marks-1].prev = ctx->n_marks-2;
   3354 
   3355                     }
   3356                 }
   3357                 off++;
   3358                 continue;
   3359             }
   3360 
   3361             /* A potential entity start. */
   3362             if(ch == _T('&')) {
   3363                 PUSH_MARK(ch, off, off+1, MD_MARK_POTENTIAL_OPENER);
   3364                 off++;
   3365                 continue;
   3366             }
   3367 
   3368             /* A potential entity end. */
   3369             if(ch == _T(';')) {
   3370                 /* We surely cannot be entity unless the previous mark is '&'. */
   3371                 if(ctx->n_marks > 0  &&  ctx->marks[ctx->n_marks-1].ch == _T('&'))
   3372                     PUSH_MARK(ch, off, off+1, MD_MARK_POTENTIAL_CLOSER);
   3373 
   3374                 off++;
   3375                 continue;
   3376             }
   3377 
   3378             /* A potential autolink or raw HTML start/end. */
   3379             if(ch == _T('<')) {
   3380                 int is_autolink;
   3381                 OFF autolink_end;
   3382                 int missing_mailto;
   3383 
   3384                 if(!(ctx->parser.flags & MD_FLAG_NOHTMLSPANS)) {
   3385                     int is_html;
   3386                     OFF html_end;
   3387 
   3388                     /* Given the nature of the raw HTML, we have to recognize
   3389                      * it here. Doing so later in md_analyze_lt_gt() could
   3390                      * open can of worms of quadratic complexity. */
   3391                     is_html = md_is_html_any(ctx, line, line_term - line, off,
   3392                                     lines[n_lines-1].end, &html_end);
   3393                     if(is_html) {
   3394                         PUSH_MARK(_T('<'), off, off, MD_MARK_OPENER | MD_MARK_RESOLVED);
   3395                         PUSH_MARK(_T('>'), html_end, html_end, MD_MARK_CLOSER | MD_MARK_RESOLVED);
   3396                         ctx->marks[ctx->n_marks-2].next = ctx->n_marks-1;
   3397                         ctx->marks[ctx->n_marks-1].prev = ctx->n_marks-2;
   3398                         off = html_end;
   3399 
   3400                         /* Advance the current line accordingly. */
   3401                         if(off > line_end) {
   3402                             line = md_lookup_line(off, line, line_term - line);
   3403                             line_end = line->end;
   3404                         }
   3405                         continue;
   3406                     }
   3407                 }
   3408 
   3409                 is_autolink = md_is_autolink(ctx, off, lines[n_lines-1].end,
   3410                                     &autolink_end, &missing_mailto);
   3411                 if(is_autolink) {
   3412                     PUSH_MARK((missing_mailto ? _T('@') : _T('<')), off, off+1,
   3413                                 MD_MARK_OPENER | MD_MARK_RESOLVED | MD_MARK_AUTOLINK);
   3414                     PUSH_MARK(_T('>'), autolink_end-1, autolink_end,
   3415                                 MD_MARK_CLOSER | MD_MARK_RESOLVED | MD_MARK_AUTOLINK);
   3416                     ctx->marks[ctx->n_marks-2].next = ctx->n_marks-1;
   3417                     ctx->marks[ctx->n_marks-1].prev = ctx->n_marks-2;
   3418                     off = autolink_end;
   3419                     continue;
   3420                 }
   3421 
   3422                 off++;
   3423                 continue;
   3424             }
   3425 
   3426             /* A potential anchor */
   3427             if(ch == _T('[') && off+1 < line_end && CH(off+1) == _T('|')) {
   3428                 OFF closer_beg;
   3429                 int is_anchor_span = md_is_anchor_span(ctx, line, off, &closer_beg);
   3430                 if (is_anchor_span) {
   3431                     PUSH_MARK(_T('['), off, off+2, MD_MARK_OPENER | MD_MARK_RESOLVED);
   3432                     PUSH_MARK(_T(']'), closer_beg, closer_beg+1, MD_MARK_CLOSER | MD_MARK_RESOLVED);
   3433                     ctx->marks[ctx->n_marks-2].next = ctx->n_marks-1;
   3434                     ctx->marks[ctx->n_marks-1].prev = ctx->n_marks-2;
   3435                     off = closer_beg+1;
   3436                     continue;
   3437                 }
   3438                 // continue analyzing [ mark
   3439             }
   3440 
   3441             /* A potential link or its part. */
   3442             if(ch == _T('[')  ||  (ch == _T('!') && off+1 < line_end && CH(off+1) == _T('['))) {
   3443                 OFF tmp = (ch == _T('[') ? off+1 : off+2);
   3444                 PUSH_MARK(ch, off, tmp, MD_MARK_POTENTIAL_OPENER);
   3445                 off = tmp;
   3446                 /* Two dummies to make enough place for data we need if it is
   3447                  * a link. */
   3448                 PUSH_MARK('D', off, off, 0);
   3449                 PUSH_MARK('D', off, off, 0);
   3450                 continue;
   3451             }
   3452             if(ch == _T(']')) {
   3453                 PUSH_MARK(ch, off, off+1, MD_MARK_POTENTIAL_CLOSER);
   3454                 off++;
   3455                 continue;
   3456             }
   3457 
   3458             /* A potential permissive e-mail autolink. */
   3459             if(ch == _T('@')) {
   3460                 if(line->beg + 1 <= off  &&  ISALNUM(off-1)  &&
   3461                     off + 3 < line->end  &&  ISALNUM(off+1))
   3462                 {
   3463                     PUSH_MARK(ch, off, off+1, MD_MARK_POTENTIAL_OPENER);
   3464                     /* Push a dummy as a reserve for a closer. */
   3465                     PUSH_MARK('D', off, off, 0);
   3466                 }
   3467 
   3468                 off++;
   3469                 continue;
   3470             }
   3471 
   3472             /* A potential permissive URL autolink. */
   3473             if(ch == _T(':')) {
   3474                 static struct {
   3475                     const CHAR* scheme;
   3476                     SZ scheme_size;
   3477                     const CHAR* suffix;
   3478                     SZ suffix_size;
   3479                 } scheme_map[] = {
   3480                     /* In the order from the most frequently used, arguably. */
   3481                     { _T("https"), 5,   _T("//"), 2 },
   3482                     { _T("gemini"), 6,   _T("//"), 2 },
   3483                     { _T("http"), 4,    _T("//"), 2 },
   3484                     { _T("gopher"), 6,   _T("//"), 2 },
   3485                     { _T("spartan"), 7,    _T("//"), 2 },
   3486                     { _T("ftp"), 3,     _T("//"), 2 }
   3487                 };
   3488                 int scheme_index;
   3489 
   3490                 for(scheme_index = 0; scheme_index < (int) SIZEOF_ARRAY(scheme_map); scheme_index++) {
   3491                     const CHAR* scheme = scheme_map[scheme_index].scheme;
   3492                     const SZ scheme_size = scheme_map[scheme_index].scheme_size;
   3493                     const CHAR* suffix = scheme_map[scheme_index].suffix;
   3494                     const SZ suffix_size = scheme_map[scheme_index].suffix_size;
   3495 
   3496                     if(line->beg + scheme_size <= off  &&  md_ascii_eq(STR(off-scheme_size), scheme, scheme_size)  &&
   3497                         (line->beg + scheme_size == off || ISWHITESPACE(off-scheme_size-1) || ISANYOF(off-scheme_size-1, _T("*_~([")))  &&
   3498                         off + 1 + suffix_size < line->end  &&  md_ascii_eq(STR(off+1), suffix, suffix_size))
   3499                     {
   3500                         PUSH_MARK(ch, off-scheme_size, off+1+suffix_size, MD_MARK_POTENTIAL_OPENER);
   3501                         /* Push a dummy as a reserve for a closer. */
   3502                         PUSH_MARK('D', off, off, 0);
   3503                         off += 1 + suffix_size;
   3504                         break;
   3505                     }
   3506                 }
   3507 
   3508                 off++;
   3509                 continue;
   3510             }
   3511 
   3512             /* A potential permissive WWW autolink. */
   3513             if(ch == _T('.')) {
   3514                 if(line->beg + 3 <= off  &&  md_ascii_eq(STR(off-3), _T("www"), 3)  &&
   3515                     (line->beg + 3 == off || ISWHITESPACE(off-4) || ISANYOF(off-4, _T("*_~([")))  &&
   3516                     off + 1 < line_end)
   3517                 {
   3518                     PUSH_MARK(ch, off-3, off+1, MD_MARK_POTENTIAL_OPENER);
   3519                     /* Push a dummy as a reserve for a closer. */
   3520                     PUSH_MARK('D', off, off, 0);
   3521                     off++;
   3522                     continue;
   3523                 }
   3524 
   3525                 off++;
   3526                 continue;
   3527             }
   3528 
   3529             /* A potential table cell boundary or wiki link label delimiter. */
   3530             if((table_mode || ctx->parser.flags & MD_FLAG_WIKILINKS) && ch == _T('|')) {
   3531                 PUSH_MARK(ch, off, off+1, 0);
   3532                 off++;
   3533                 continue;
   3534             }
   3535 
   3536             /* A potential strikethrough start/end. */
   3537             if(ch == _T('~')) {
   3538                 OFF tmp = off+1;
   3539 
   3540                 while(tmp < line_end  &&  CH(tmp) == _T('~'))
   3541                     tmp++;
   3542 
   3543                 if(tmp - off < 3) {
   3544                     unsigned flags = 0;
   3545 
   3546                     if(tmp < line_end  &&  !ISUNICODEWHITESPACE(tmp))
   3547                         flags |= MD_MARK_POTENTIAL_OPENER;
   3548                     if(off > line->beg  &&  !ISUNICODEWHITESPACEBEFORE(off))
   3549                         flags |= MD_MARK_POTENTIAL_CLOSER;
   3550                     if(flags != 0)
   3551                         PUSH_MARK(ch, off, tmp, flags);
   3552                 }
   3553 
   3554                 off = tmp;
   3555                 continue;
   3556             }
   3557 
   3558             /* A potential equation start/end */
   3559             if(ch == _T('$')) {
   3560                 /* We can have at most two consecutive $ signs,
   3561                  * where two dollar signs signify a display equation. */
   3562                 OFF tmp = off+1;
   3563 
   3564                 while(tmp < line_end && CH(tmp) == _T('$'))
   3565                     tmp++;
   3566 
   3567                 if (tmp - off <= 2)
   3568                     PUSH_MARK(ch, off, tmp, MD_MARK_POTENTIAL_OPENER | MD_MARK_POTENTIAL_CLOSER);
   3569                 off = tmp;
   3570                 continue;
   3571             }
   3572 
   3573             /* Turn non-trivial whitespace into single space. */
   3574             if(ISWHITESPACE_(ch)) {
   3575                 OFF tmp = off+1;
   3576 
   3577                 while(tmp < line_end  &&  ISWHITESPACE(tmp))
   3578                     tmp++;
   3579 
   3580                 if(tmp - off > 1  ||  ch != _T(' '))
   3581                     PUSH_MARK(ch, off, tmp, MD_MARK_RESOLVED);
   3582 
   3583                 off = tmp;
   3584                 continue;
   3585             }
   3586 
   3587             /* NULL character. */
   3588             if(ch == _T('\0')) {
   3589                 PUSH_MARK(ch, off, off+1, MD_MARK_RESOLVED);
   3590                 off++;
   3591                 continue;
   3592             }
   3593 
   3594             off++;
   3595         }
   3596     }
   3597 
   3598     /* Add a dummy mark at the end of the mark vector to simplify
   3599      * process_inlines(). */
   3600     PUSH_MARK(127, ctx->size, ctx->size, MD_MARK_RESOLVED);
   3601 
   3602 abort:
   3603     return ret;
   3604 }
   3605 
   3606 static void
   3607 md_analyze_bracket(MD_CTX* ctx, int mark_index)
   3608 {
   3609     /* We cannot really resolve links here as for that we would need
   3610      * more context. E.g. a following pair of brackets (reference link),
   3611      * or enclosing pair of brackets (if the inner is the link, the outer
   3612      * one cannot be.)
   3613      *
   3614      * Therefore we here only construct a list of '[' ']' pairs ordered by
   3615      * position of the closer. This allows us to analyze what is or is not
   3616      * link in the right order, from inside to outside in case of nested
   3617      * brackets.
   3618      *
   3619      * The resolving itself is deferred to md_resolve_links().
   3620      */
   3621 
   3622     MD_MARK* mark = &ctx->marks[mark_index];
   3623 
   3624     if(mark->flags & MD_MARK_POTENTIAL_OPENER) {
   3625         if(BRACKET_OPENERS.head != -1)
   3626             ctx->marks[BRACKET_OPENERS.tail].flags |= MD_MARK_HASNESTEDBRACKETS;
   3627 
   3628         md_mark_chain_append(ctx, &BRACKET_OPENERS, mark_index);
   3629         return;
   3630     }
   3631 
   3632     if(BRACKET_OPENERS.tail >= 0) {
   3633         /* Pop the opener from the chain. */
   3634         int opener_index = BRACKET_OPENERS.tail;
   3635         MD_MARK* opener = &ctx->marks[opener_index];
   3636         if(opener->prev >= 0)
   3637             ctx->marks[opener->prev].next = -1;
   3638         else
   3639             BRACKET_OPENERS.head = -1;
   3640         BRACKET_OPENERS.tail = opener->prev;
   3641 
   3642         /* Interconnect the opener and closer. */
   3643         opener->next = mark_index;
   3644         mark->prev = opener_index;
   3645 
   3646         /* Add the pair into chain of potential links for md_resolve_links().
   3647          * Note we misuse opener->prev for this as opener->next points to its
   3648          * closer. */
   3649         if(ctx->unresolved_link_tail >= 0)
   3650             ctx->marks[ctx->unresolved_link_tail].prev = opener_index;
   3651         else
   3652             ctx->unresolved_link_head = opener_index;
   3653         ctx->unresolved_link_tail = opener_index;
   3654         opener->prev = -1;
   3655     }
   3656 }
   3657 
   3658 /* Forward declaration. */
   3659 static void md_analyze_link_contents(MD_CTX* ctx, const MD_LINE* lines, int n_lines,
   3660                                      int mark_beg, int mark_end);
   3661 
   3662 static int
   3663 md_resolve_links(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
   3664 {
   3665     int opener_index = ctx->unresolved_link_head;
   3666     OFF last_link_beg = 0;
   3667     OFF last_link_end = 0;
   3668     OFF last_img_beg = 0;
   3669     OFF last_img_end = 0;
   3670 
   3671     while(opener_index >= 0) {
   3672         MD_MARK* opener = &ctx->marks[opener_index];
   3673         int closer_index = opener->next;
   3674         MD_MARK* closer = &ctx->marks[closer_index];
   3675         int next_index = opener->prev;
   3676         MD_MARK* next_opener;
   3677         MD_MARK* next_closer;
   3678         MD_LINK_ATTR attr;
   3679         int is_link = FALSE;
   3680 
   3681         if(next_index >= 0) {
   3682             next_opener = &ctx->marks[next_index];
   3683             next_closer = &ctx->marks[next_opener->next];
   3684         } else {
   3685             next_opener = NULL;
   3686             next_closer = NULL;
   3687         }
   3688 
   3689         /* If nested ("[ [ ] ]"), we need to make sure that:
   3690          *   - The outer does not end inside of (...) belonging to the inner.
   3691          *   - The outer cannot be link if the inner is link (i.e. not image).
   3692          *
   3693          * (Note we here analyze from inner to outer as the marks are ordered
   3694          * by closer->beg.)
   3695          */
   3696         if((opener->beg < last_link_beg  &&  closer->end < last_link_end)  ||
   3697            (opener->beg < last_img_beg  &&  closer->end < last_img_end)  ||
   3698            (opener->beg < last_link_end  &&  opener->ch == '['))
   3699         {
   3700             opener_index = next_index;
   3701             continue;
   3702         }
   3703 
   3704         /* Recognize and resolve wiki links.
   3705          * Wiki-links maybe '[[destination]]' or '[[destination|label]]'.
   3706          */
   3707         if ((ctx->parser.flags & MD_FLAG_WIKILINKS) &&
   3708             (opener->end - opener->beg == 1) &&         /* not image */
   3709             next_opener != NULL &&                      /* double '[' opener */
   3710             next_opener->ch == '[' &&
   3711             (next_opener->beg == opener->beg - 1) &&
   3712             (next_opener->end - next_opener->beg == 1) &&
   3713             next_closer != NULL &&                      /* double ']' closer */
   3714             next_closer->ch == ']' &&
   3715             (next_closer->beg == closer->beg + 1) &&
   3716             (next_closer->end - next_closer->beg == 1))
   3717         {
   3718             MD_MARK* delim = NULL;
   3719             int delim_index;
   3720             OFF dest_beg, dest_end;
   3721 
   3722             is_link = TRUE;
   3723 
   3724             /* We don't allow destination to be longer than 100 characters.
   3725              * Lets scan to see whether there is '|'. (If not then the whole
   3726              * wiki-link has to be below the 100 characters.) */
   3727             delim_index = opener_index + 1;
   3728             while(delim_index < closer_index) {
   3729                 MD_MARK* m = &ctx->marks[delim_index];
   3730                 if(m->ch == '|') {
   3731                     delim = m;
   3732                     break;
   3733                 }
   3734                 if(m->ch != 'D'  &&  m->beg - opener->end > 100)
   3735                     break;
   3736                 delim_index++;
   3737             }
   3738             dest_beg = opener->end;
   3739             dest_end = (delim != NULL) ? delim->beg : closer->beg;
   3740             if(dest_end - dest_beg == 0 || dest_end - dest_beg > 100)
   3741                 is_link = FALSE;
   3742 
   3743             /* There may not be any new line in the destination. */
   3744             if(is_link) {
   3745                 OFF off;
   3746                 for(off = dest_beg; off < dest_end; off++) {
   3747                     if(ISNEWLINE(off)) {
   3748                         is_link = FALSE;
   3749                         break;
   3750                     }
   3751                 }
   3752             }
   3753 
   3754             if(is_link) {
   3755                 if(delim != NULL) {
   3756                     if(delim->end < closer->beg) {
   3757                         md_rollback(ctx, opener_index, delim_index, MD_ROLLBACK_ALL);
   3758                         md_rollback(ctx, delim_index, closer_index, MD_ROLLBACK_CROSSING);
   3759                         delim->flags |= MD_MARK_RESOLVED;
   3760                         opener->end = delim->beg;
   3761                     } else {
   3762                         /* The pipe is just before the closer: [[foo|]] */
   3763                         md_rollback(ctx, opener_index, closer_index, MD_ROLLBACK_ALL);
   3764                         closer->beg = delim->beg;
   3765                         delim = NULL;
   3766                     }
   3767                 }
   3768 
   3769                 opener->beg = next_opener->beg;
   3770                 opener->next = closer_index;
   3771                 opener->flags |= MD_MARK_OPENER | MD_MARK_RESOLVED;
   3772 
   3773                 closer->end = next_closer->end;
   3774                 closer->prev = opener_index;
   3775                 closer->flags |= MD_MARK_CLOSER | MD_MARK_RESOLVED;
   3776 
   3777                 last_link_beg = opener->beg;
   3778                 last_link_end = closer->end;
   3779 
   3780                 if(delim != NULL)
   3781                     md_analyze_link_contents(ctx, lines, n_lines, delim_index+1, closer_index);
   3782 
   3783                 opener_index = next_opener->prev;
   3784                 continue;
   3785             }
   3786         }
   3787 
   3788         if(next_opener != NULL  &&  next_opener->beg == closer->end) {
   3789             if(next_closer->beg > closer->end + 1) {
   3790                 /* Might be full reference link. */
   3791                 if(!(next_opener->flags & MD_MARK_HASNESTEDBRACKETS))
   3792                     is_link = md_is_link_reference(ctx, lines, n_lines, next_opener->beg, next_closer->end, &attr);
   3793             } else {
   3794                 /* Might be shortcut reference link. */
   3795                 if(!(opener->flags & MD_MARK_HASNESTEDBRACKETS))
   3796                     is_link = md_is_link_reference(ctx, lines, n_lines, opener->beg, closer->end, &attr);
   3797             }
   3798 
   3799             if(is_link < 0)
   3800                 return -1;
   3801 
   3802             if(is_link) {
   3803                 /* Eat the 2nd "[...]". */
   3804                 closer->end = next_closer->end;
   3805 
   3806                 /* Do not analyze the label as a standalone link in the next
   3807                  * iteration. */
   3808                 next_index = ctx->marks[next_index].prev;
   3809             }
   3810         } else {
   3811             if(closer->end < ctx->size  &&  CH(closer->end) == _T('(')) {
   3812                 /* Might be inline link. */
   3813                 OFF inline_link_end = UINT_MAX;
   3814 
   3815                 is_link = md_is_inline_link_spec(ctx, lines, n_lines, closer->end, &inline_link_end, &attr);
   3816                 if(is_link < 0)
   3817                     return -1;
   3818 
   3819                 /* Check the closing ')' is not inside an already resolved range
   3820                  * (i.e. a range with a higher priority), e.g. a code span. */
   3821                 if(is_link) {
   3822                     int i = closer_index + 1;
   3823 
   3824                     while(i < ctx->n_marks) {
   3825                         MD_MARK* mark = &ctx->marks[i];
   3826 
   3827                         if(mark->beg >= inline_link_end)
   3828                             break;
   3829                         if((mark->flags & (MD_MARK_OPENER | MD_MARK_RESOLVED)) == (MD_MARK_OPENER | MD_MARK_RESOLVED)) {
   3830                             if(ctx->marks[mark->next].beg >= inline_link_end) {
   3831                                 /* Cancel the link status. */
   3832                                 if(attr.title_needs_free)
   3833                                     free(attr.title);
   3834                                 is_link = FALSE;
   3835                                 break;
   3836                             }
   3837 
   3838                             i = mark->next + 1;
   3839                         } else {
   3840                             i++;
   3841                         }
   3842                     }
   3843                 }
   3844 
   3845                 if(is_link) {
   3846                     /* Eat the "(...)" */
   3847                     closer->end = inline_link_end;
   3848                 }
   3849             }
   3850 
   3851             if(!is_link) {
   3852                 /* Might be collapsed reference link. */
   3853                 if(!(opener->flags & MD_MARK_HASNESTEDBRACKETS))
   3854                     is_link = md_is_link_reference(ctx, lines, n_lines, opener->beg, closer->end, &attr);
   3855                 if(is_link < 0)
   3856                     return -1;
   3857             }
   3858         }
   3859 
   3860         if(is_link) {
   3861             /* Resolve the brackets as a link. */
   3862             opener->flags |= MD_MARK_OPENER | MD_MARK_RESOLVED;
   3863             closer->flags |= MD_MARK_CLOSER | MD_MARK_RESOLVED;
   3864 
   3865             /* If it is a link, we store the destination and title in the two
   3866              * dummy marks after the opener. */
   3867             MD_ASSERT(ctx->marks[opener_index+1].ch == 'D');
   3868             ctx->marks[opener_index+1].beg = attr.dest_beg;
   3869             ctx->marks[opener_index+1].end = attr.dest_end;
   3870 
   3871             MD_ASSERT(ctx->marks[opener_index+2].ch == 'D');
   3872             md_mark_store_ptr(ctx, opener_index+2, attr.title);
   3873             /* The title might or might not have been allocated for us. */
   3874             if(attr.title_needs_free)
   3875                 md_mark_chain_append(ctx, &PTR_CHAIN, opener_index+2);
   3876             ctx->marks[opener_index+2].prev = attr.title_size;
   3877 
   3878             if(opener->ch == '[') {
   3879                 last_link_beg = opener->beg;
   3880                 last_link_end = closer->end;
   3881             } else {
   3882                 last_img_beg = opener->beg;
   3883                 last_img_end = closer->end;
   3884             }
   3885 
   3886             md_analyze_link_contents(ctx, lines, n_lines, opener_index+1, closer_index);
   3887 
   3888             /* If the link text is formed by nothing but permissive autolink,
   3889              * suppress the autolink.
   3890              * See https://github.com/mity/md4c/issues/152 for more info. */
   3891             if(ctx->parser.flags & MD_FLAG_PERMISSIVEAUTOLINKS) {
   3892                 MD_MARK* first_nested;
   3893                 MD_MARK* last_nested;
   3894 
   3895                 first_nested = opener + 1;
   3896                 while(first_nested->ch == _T('D')  &&  first_nested < closer)
   3897                     first_nested++;
   3898 
   3899                 last_nested = closer - 1;
   3900                 while(first_nested->ch == _T('D')  &&  last_nested > opener)
   3901                     last_nested--;
   3902 
   3903                 if((first_nested->flags & MD_MARK_RESOLVED)  &&
   3904                    first_nested->beg == opener->end  &&
   3905                    ISANYOF_(first_nested->ch, _T("@:."))  &&
   3906                    first_nested->next == (last_nested - ctx->marks)  &&
   3907                    last_nested->end == closer->beg)
   3908                 {
   3909                     first_nested->ch = _T('D');
   3910                     first_nested->flags &= ~MD_MARK_RESOLVED;
   3911                     last_nested->ch = _T('D');
   3912                     last_nested->flags &= ~MD_MARK_RESOLVED;
   3913                 }
   3914             }
   3915         }
   3916 
   3917         opener_index = next_index;
   3918     }
   3919 
   3920     return 0;
   3921 }
   3922 
   3923 /* Analyze whether the mark '&' starts a HTML entity.
   3924  * If so, update its flags as well as flags of corresponding closer ';'. */
   3925 static void
   3926 md_analyze_entity(MD_CTX* ctx, int mark_index)
   3927 {
   3928     MD_MARK* opener = &ctx->marks[mark_index];
   3929     MD_MARK* closer;
   3930     OFF off;
   3931 
   3932     /* Cannot be entity if there is no closer as the next mark.
   3933      * (Any other mark between would mean strange character which cannot be
   3934      * part of the entity.
   3935      *
   3936      * So we can do all the work on '&' and do not call this later for the
   3937      * closing mark ';'.
   3938      */
   3939     if(mark_index + 1 >= ctx->n_marks)
   3940         return;
   3941     closer = &ctx->marks[mark_index+1];
   3942     if(closer->ch != ';')
   3943         return;
   3944 
   3945     if(md_is_entity(ctx, opener->beg, closer->end, &off)) {
   3946         MD_ASSERT(off == closer->end);
   3947 
   3948         md_resolve_range(ctx, NULL, mark_index, mark_index+1);
   3949         opener->end = closer->end;
   3950     }
   3951 }
   3952 
   3953 static void
   3954 md_analyze_table_cell_boundary(MD_CTX* ctx, int mark_index)
   3955 {
   3956     MD_MARK* mark = &ctx->marks[mark_index];
   3957     mark->flags |= MD_MARK_RESOLVED;
   3958 
   3959     md_mark_chain_append(ctx, &TABLECELLBOUNDARIES, mark_index);
   3960     ctx->n_table_cell_boundaries++;
   3961 }
   3962 
   3963 /* Split a longer mark into two. The new mark takes the given count of
   3964  * characters. May only be called if an adequate number of dummy 'D' marks
   3965  * follows.
   3966  */
   3967 static int
   3968 md_split_emph_mark(MD_CTX* ctx, int mark_index, SZ n)
   3969 {
   3970     MD_MARK* mark = &ctx->marks[mark_index];
   3971     int new_mark_index = mark_index + (mark->end - mark->beg - n);
   3972     MD_MARK* dummy = &ctx->marks[new_mark_index];
   3973 
   3974     MD_ASSERT(mark->end - mark->beg > n);
   3975     MD_ASSERT(dummy->ch == 'D');
   3976 
   3977     memcpy(dummy, mark, sizeof(MD_MARK));
   3978     mark->end -= n;
   3979     dummy->beg = mark->end;
   3980 
   3981     return new_mark_index;
   3982 }
   3983 
   3984 static void
   3985 md_analyze_emph(MD_CTX* ctx, int mark_index)
   3986 {
   3987     MD_MARK* mark = &ctx->marks[mark_index];
   3988     MD_MARKCHAIN* chain = md_mark_chain(ctx, mark_index);
   3989 
   3990     /* If we can be a closer, try to resolve with the preceding opener. */
   3991     if(mark->flags & MD_MARK_POTENTIAL_CLOSER) {
   3992         MD_MARK* opener = NULL;
   3993         int opener_index = 0;
   3994 
   3995         if(mark->ch == _T('*')) {
   3996             MD_MARKCHAIN* opener_chains[6];
   3997             int i, n_opener_chains;
   3998             unsigned flags = mark->flags;
   3999 
   4000             /* Apply the "rule of three". */
   4001             n_opener_chains = 0;
   4002             opener_chains[n_opener_chains++] = &ASTERISK_OPENERS_intraword_mod3_0;
   4003             if((flags & MD_MARK_EMPH_MOD3_MASK) != MD_MARK_EMPH_MOD3_2)
   4004                 opener_chains[n_opener_chains++] = &ASTERISK_OPENERS_intraword_mod3_1;
   4005             if((flags & MD_MARK_EMPH_MOD3_MASK) != MD_MARK_EMPH_MOD3_1)
   4006                 opener_chains[n_opener_chains++] = &ASTERISK_OPENERS_intraword_mod3_2;
   4007             opener_chains[n_opener_chains++] = &ASTERISK_OPENERS_extraword_mod3_0;
   4008             if(!(flags & MD_MARK_EMPH_INTRAWORD)  ||  (flags & MD_MARK_EMPH_MOD3_MASK) != MD_MARK_EMPH_MOD3_2)
   4009                 opener_chains[n_opener_chains++] = &ASTERISK_OPENERS_extraword_mod3_1;
   4010             if(!(flags & MD_MARK_EMPH_INTRAWORD)  ||  (flags & MD_MARK_EMPH_MOD3_MASK) != MD_MARK_EMPH_MOD3_1)
   4011                 opener_chains[n_opener_chains++] = &ASTERISK_OPENERS_extraword_mod3_2;
   4012 
   4013             /* Opener is the most recent mark from the allowed chains. */
   4014             for(i = 0; i < n_opener_chains; i++) {
   4015                 if(opener_chains[i]->tail >= 0) {
   4016                     int tmp_index = opener_chains[i]->tail;
   4017                     MD_MARK* tmp_mark = &ctx->marks[tmp_index];
   4018                     if(opener == NULL  ||  tmp_mark->end > opener->end) {
   4019                         opener_index = tmp_index;
   4020                         opener = tmp_mark;
   4021                     }
   4022                 }
   4023             }
   4024         } else {
   4025             /* Simple emph. mark */
   4026             if(chain->tail >= 0) {
   4027                 opener_index = chain->tail;
   4028                 opener = &ctx->marks[opener_index];
   4029             }
   4030         }
   4031 
   4032         /* Resolve, if we have found matching opener. */
   4033         if(opener != NULL) {
   4034             SZ opener_size = opener->end - opener->beg;
   4035             SZ closer_size = mark->end - mark->beg;
   4036             MD_MARKCHAIN* opener_chain = md_mark_chain(ctx, opener_index);
   4037 
   4038             if(opener_size > closer_size) {
   4039                 opener_index = md_split_emph_mark(ctx, opener_index, closer_size);
   4040                 md_mark_chain_append(ctx, opener_chain, opener_index);
   4041             } else if(opener_size < closer_size) {
   4042                 md_split_emph_mark(ctx, mark_index, closer_size - opener_size);
   4043             }
   4044 
   4045             md_rollback(ctx, opener_index, mark_index, MD_ROLLBACK_CROSSING);
   4046             md_resolve_range(ctx, opener_chain, opener_index, mark_index);
   4047             return;
   4048         }
   4049     }
   4050 
   4051     /* If we could not resolve as closer, we may be yet be an opener. */
   4052     if(mark->flags & MD_MARK_POTENTIAL_OPENER)
   4053         md_mark_chain_append(ctx, chain, mark_index);
   4054 }
   4055 
   4056 static void
   4057 md_analyze_tilde(MD_CTX* ctx, int mark_index)
   4058 {
   4059     MD_MARK* mark = &ctx->marks[mark_index];
   4060     MD_MARKCHAIN* chain = md_mark_chain(ctx, mark_index);
   4061 
   4062     /* We attempt to be Github Flavored Markdown compatible here. GFM accepts
   4063      * only tildes sequences of length 1 and 2, and the length of the opener
   4064      * and closer has to match. */
   4065 
   4066     if((mark->flags & MD_MARK_POTENTIAL_CLOSER)  &&  chain->head >= 0) {
   4067         int opener_index = chain->head;
   4068 
   4069         md_rollback(ctx, opener_index, mark_index, MD_ROLLBACK_CROSSING);
   4070         md_resolve_range(ctx, chain, opener_index, mark_index);
   4071         return;
   4072     }
   4073 
   4074     if(mark->flags & MD_MARK_POTENTIAL_OPENER)
   4075         md_mark_chain_append(ctx, chain, mark_index);
   4076 }
   4077 
   4078 static void
   4079 md_analyze_dollar(MD_CTX* ctx, int mark_index)
   4080 {
   4081     /* This should mimic the way inline equations work in LaTeX, so there
   4082      * can only ever be one item in the chain (i.e. the dollars can't be
   4083      * nested). This is basically the same as the md_analyze_tilde function,
   4084      * except that we require matching openers and closers to be of the same
   4085      * length.
   4086      *
   4087      * E.g.: $abc$$def$$ => abc (display equation) def (end equation) */
   4088     if(DOLLAR_OPENERS.head >= 0) {
   4089         /* If the potential closer has a non-matching number of $, discard */
   4090         MD_MARK* open = &ctx->marks[DOLLAR_OPENERS.head];
   4091         MD_MARK* close = &ctx->marks[mark_index];
   4092 
   4093         int opener_index = DOLLAR_OPENERS.head;
   4094         md_rollback(ctx, opener_index, mark_index, MD_ROLLBACK_ALL);
   4095         if (open->end - open->beg == close->end - close->beg) {
   4096             /* We are the matching closer */
   4097             md_resolve_range(ctx, &DOLLAR_OPENERS, opener_index, mark_index);
   4098             return;
   4099         }
   4100     }
   4101 
   4102     md_mark_chain_append(ctx, &DOLLAR_OPENERS, mark_index);
   4103 }
   4104 
   4105 static void
   4106 md_analyze_permissive_url_autolink(MD_CTX* ctx, int mark_index)
   4107 {
   4108     MD_MARK* opener = &ctx->marks[mark_index];
   4109     int closer_index = mark_index + 1;
   4110     MD_MARK* closer = &ctx->marks[closer_index];
   4111     MD_MARK* next_resolved_mark;
   4112     OFF off = opener->end;
   4113     int n_dots = FALSE;
   4114     int has_underscore_in_last_seg = FALSE;
   4115     int has_underscore_in_next_to_last_seg = FALSE;
   4116     int n_opened_parenthesis = 0;
   4117     int n_excess_parenthesis = 0;
   4118 
   4119     /* Check for domain. */
   4120     while(off < ctx->size) {
   4121         if(ISALNUM(off) || CH(off) == _T('-')) {
   4122             off++;
   4123         } else if(CH(off) == _T('.')) {
   4124             /* We must see at least one period. */
   4125             n_dots++;
   4126             has_underscore_in_next_to_last_seg = has_underscore_in_last_seg;
   4127             has_underscore_in_last_seg = FALSE;
   4128             off++;
   4129         } else if(CH(off) == _T('_')) {
   4130             /* No underscore may be present in the last two domain segments. */
   4131             has_underscore_in_last_seg = TRUE;
   4132             off++;
   4133         } else {
   4134             break;
   4135         }
   4136     }
   4137     if(off > opener->end  &&  CH(off-1) == _T('.')) {
   4138         off--;
   4139         n_dots--;
   4140     }
   4141     if(off <= opener->end || n_dots == 0 || has_underscore_in_next_to_last_seg || has_underscore_in_last_seg)
   4142         return;
   4143 
   4144     /* Check for path. */
   4145     next_resolved_mark = closer + 1;
   4146     while(next_resolved_mark->ch == 'D' || !(next_resolved_mark->flags & MD_MARK_RESOLVED))
   4147         next_resolved_mark++;
   4148     while(off < next_resolved_mark->beg  &&  CH(off) != _T('<')  &&  !ISWHITESPACE(off)  &&  !ISNEWLINE(off)) {
   4149         /* Parenthesis must be balanced. */
   4150         if(CH(off) == _T('(')) {
   4151             n_opened_parenthesis++;
   4152         } else if(CH(off) == _T(')')) {
   4153             if(n_opened_parenthesis > 0)
   4154                 n_opened_parenthesis--;
   4155             else
   4156                 n_excess_parenthesis++;
   4157         }
   4158 
   4159         off++;
   4160     }
   4161 
   4162     /* Trim a trailing punctuation from the end. */
   4163     while(TRUE) {
   4164         if(ISANYOF(off-1, _T("?!.,:*_~"))) {
   4165             off--;
   4166         } else if(CH(off-1) == ')'  &&  n_excess_parenthesis > 0) {
   4167             /* Unmatched ')' can be in an interior of the path but not at the
   4168              * of it, so the auto-link may be safely nested in a parenthesis
   4169              * pair. */
   4170             off--;
   4171             n_excess_parenthesis--;
   4172         } else {
   4173             break;
   4174         }
   4175     }
   4176 
   4177     /* Ok. Lets call it an auto-link. Adapt opener and create closer to zero
   4178      * length so all the contents becomes the link text. */
   4179     MD_ASSERT(closer->ch == 'D' ||
   4180               ((ctx->parser.flags & MD_FLAG_PERMISSIVEWWWAUTOLINKS) &&
   4181                (closer->ch == '.' || closer->ch == ':' || closer->ch == '@')));
   4182     opener->end = opener->beg;
   4183     closer->ch = opener->ch;
   4184     closer->beg = off;
   4185     closer->end = off;
   4186     md_resolve_range(ctx, NULL, mark_index, closer_index);
   4187 }
   4188 
   4189 /* The permissive autolinks do not have to be enclosed in '<' '>' but we
   4190  * instead impose stricter rules what is understood as an e-mail address
   4191  * here. Actually any non-alphanumeric characters with exception of '.'
   4192  * are prohibited both in username and after '@'. */
   4193 static void
   4194 md_analyze_permissive_email_autolink(MD_CTX* ctx, int mark_index)
   4195 {
   4196     MD_MARK* opener = &ctx->marks[mark_index];
   4197     int closer_index;
   4198     MD_MARK* closer;
   4199     OFF beg = opener->beg;
   4200     OFF end = opener->end;
   4201     int dot_count = 0;
   4202 
   4203     MD_ASSERT(opener->ch == _T('@'));
   4204 
   4205     /* Scan for name before '@'. */
   4206     while(beg > 0  &&  (ISALNUM(beg-1) || ISANYOF(beg-1, _T(".-_+"))))
   4207         beg--;
   4208 
   4209     /* Scan for domain after '@'. */
   4210     while(end < ctx->size  &&  (ISALNUM(end) || ISANYOF(end, _T(".-_")))) {
   4211         if(CH(end) == _T('.'))
   4212             dot_count++;
   4213         end++;
   4214     }
   4215     if(CH(end-1) == _T('.')) {  /* Final '.' not part of it. */
   4216         dot_count--;
   4217         end--;
   4218     }
   4219     else if(ISANYOF2(end-1, _T('-'), _T('_'))) /* These are forbidden at the end. */
   4220         return;
   4221     if(CH(end-1) == _T('@')  ||  dot_count == 0)
   4222         return;
   4223 
   4224     /* Ok. Lets call it auto-link. Adapt opener and create closer to zero
   4225      * length so all the contents becomes the link text. */
   4226     closer_index = mark_index + 1;
   4227     closer = &ctx->marks[closer_index];
   4228     if (closer->ch != 'D') return;
   4229 
   4230     opener->beg = beg;
   4231     opener->end = beg;
   4232     closer->ch = opener->ch;
   4233     closer->beg = end;
   4234     closer->end = end;
   4235     md_resolve_range(ctx, NULL, mark_index, closer_index);
   4236 }
   4237 
   4238 static inline void
   4239 md_analyze_marks(MD_CTX* ctx, const MD_LINE* lines, int n_lines,
   4240                  int mark_beg, int mark_end, const CHAR* mark_chars)
   4241 {
   4242     int i = mark_beg;
   4243     MD_UNUSED(lines);
   4244     MD_UNUSED(n_lines);
   4245 
   4246     while(i < mark_end) {
   4247         MD_MARK* mark = &ctx->marks[i];
   4248 
   4249         /* Skip resolved spans. */
   4250         if(mark->flags & MD_MARK_RESOLVED) {
   4251             if(mark->flags & MD_MARK_OPENER) {
   4252                 MD_ASSERT(i < mark->next);
   4253                 i = mark->next + 1;
   4254             } else {
   4255                 i++;
   4256             }
   4257             continue;
   4258         }
   4259 
   4260         /* Skip marks we do not want to deal with. */
   4261         if(!ISANYOF_(mark->ch, mark_chars)) {
   4262             i++;
   4263             continue;
   4264         }
   4265 
   4266         /* Analyze the mark. */
   4267         switch(mark->ch) {
   4268             case '[':   /* Pass through. */
   4269             case '!':   /* Pass through. */
   4270             case ']':   md_analyze_bracket(ctx, i); break;
   4271             case '&':   md_analyze_entity(ctx, i); break;
   4272             case '|':   md_analyze_table_cell_boundary(ctx, i); break;
   4273             case '_':   /* Pass through. */
   4274             case '*':   md_analyze_emph(ctx, i); break;
   4275             case '~':   md_analyze_tilde(ctx, i); break;
   4276             case '$':   md_analyze_dollar(ctx, i); break;
   4277             case '.':   /* Pass through. */
   4278             case ':':   md_analyze_permissive_url_autolink(ctx, i); break;
   4279             case '@':   md_analyze_permissive_email_autolink(ctx, i); break;
   4280         }
   4281 
   4282         i++;
   4283     }
   4284 }
   4285 
   4286 /* Analyze marks (build ctx->marks). */
   4287 static int
   4288 md_analyze_inlines(MD_CTX* ctx, const MD_LINE* lines, int n_lines, int table_mode)
   4289 {
   4290     int ret;
   4291 
   4292     /* Reset the previously collected stack of marks. */
   4293     ctx->n_marks = 0;
   4294 
   4295     /* Collect all marks. */
   4296     MD_CHECK(md_collect_marks(ctx, lines, n_lines, table_mode));
   4297 
   4298     /* (1) Links. */
   4299     md_analyze_marks(ctx, lines, n_lines, 0, ctx->n_marks, _T("[]!"));
   4300     MD_CHECK(md_resolve_links(ctx, lines, n_lines));
   4301     BRACKET_OPENERS.head = -1;
   4302     BRACKET_OPENERS.tail = -1;
   4303     ctx->unresolved_link_head = -1;
   4304     ctx->unresolved_link_tail = -1;
   4305 
   4306     if(table_mode) {
   4307         /* (2) Analyze table cell boundaries.
   4308          * Note we reset TABLECELLBOUNDARIES chain prior to the call md_analyze_marks(),
   4309          * not after, because caller may need it. */
   4310         MD_ASSERT(n_lines == 1);
   4311         TABLECELLBOUNDARIES.head = -1;
   4312         TABLECELLBOUNDARIES.tail = -1;
   4313         ctx->n_table_cell_boundaries = 0;
   4314         md_analyze_marks(ctx, lines, n_lines, 0, ctx->n_marks, _T("|"));
   4315         return ret;
   4316     }
   4317 
   4318     /* (3) Emphasis and strong emphasis; permissive autolinks. */
   4319     md_analyze_link_contents(ctx, lines, n_lines, 0, ctx->n_marks);
   4320 
   4321 abort:
   4322     return ret;
   4323 }
   4324 
   4325 static void
   4326 md_analyze_link_contents(MD_CTX* ctx, const MD_LINE* lines, int n_lines,
   4327                          int mark_beg, int mark_end)
   4328 {
   4329     int i;
   4330 
   4331     md_analyze_marks(ctx, lines, n_lines, mark_beg, mark_end, _T("&"));
   4332     md_analyze_marks(ctx, lines, n_lines, mark_beg, mark_end, _T("*_~$@:."));
   4333 
   4334     for(i = OPENERS_CHAIN_FIRST; i <= OPENERS_CHAIN_LAST; i++) {
   4335         ctx->mark_chains[i].head = -1;
   4336         ctx->mark_chains[i].tail = -1;
   4337     }
   4338 }
   4339 
   4340 static int
   4341 md_enter_leave_span_a(MD_CTX* ctx, int enter, MD_SPANTYPE type,
   4342                       const CHAR* dest, SZ dest_size, int prohibit_escapes_in_dest,
   4343                       const CHAR* title, SZ title_size)
   4344 {
   4345     MD_ATTRIBUTE_BUILD href_build = { 0 };
   4346     MD_ATTRIBUTE_BUILD title_build = { 0 };
   4347     MD_SPAN_A_DETAIL det;
   4348     int ret = 0;
   4349 
   4350     /* Note we here rely on fact that MD_SPAN_A_DETAIL and
   4351      * MD_SPAN_IMG_DETAIL are binary-compatible. */
   4352     memset(&det, 0, sizeof(MD_SPAN_A_DETAIL));
   4353     MD_CHECK(md_build_attribute(ctx, dest, dest_size,
   4354                     (prohibit_escapes_in_dest ? MD_BUILD_ATTR_NO_ESCAPES : 0),
   4355                     &det.href, &href_build));
   4356     MD_CHECK(md_build_attribute(ctx, title, title_size, 0, &det.title, &title_build));
   4357 
   4358     if(enter)
   4359         MD_ENTER_SPAN(type, &det);
   4360     else
   4361         MD_LEAVE_SPAN(type, &det);
   4362 
   4363 abort:
   4364     md_free_attribute(ctx, &href_build);
   4365     md_free_attribute(ctx, &title_build);
   4366     return ret;
   4367 }
   4368 
   4369 static int
   4370 md_enter_leave_span_wikilink(MD_CTX* ctx, int enter, const CHAR* target, SZ target_size)
   4371 {
   4372     MD_ATTRIBUTE_BUILD target_build = { 0 };
   4373     MD_SPAN_WIKILINK_DETAIL det;
   4374     int ret = 0;
   4375 
   4376     memset(&det, 0, sizeof(MD_SPAN_WIKILINK_DETAIL));
   4377     MD_CHECK(md_build_attribute(ctx, target, target_size, 0, &det.target, &target_build));
   4378 
   4379     if (enter)
   4380         MD_ENTER_SPAN(MD_SPAN_WIKILINK, &det);
   4381     else
   4382         MD_LEAVE_SPAN(MD_SPAN_WIKILINK, &det);
   4383 
   4384 abort:
   4385     md_free_attribute(ctx, &target_build);
   4386     return ret;
   4387 }
   4388 
   4389 
   4390 /* Render the output, accordingly to the analyzed ctx->marks. */
   4391 static int
   4392 md_process_inlines(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
   4393 {
   4394     MD_TEXTTYPE text_type;
   4395     const MD_LINE* line = lines;
   4396     MD_MARK* prev_mark = NULL;
   4397     MD_MARK* mark;
   4398     OFF off = lines[0].beg;
   4399     OFF end = lines[n_lines-1].end;
   4400     int enforce_hardbreak = 0;
   4401     int ret = 0;
   4402 
   4403     /* Find first resolved mark. Note there is always at least one resolved
   4404      * mark,  the dummy last one after the end of the latest line we actually
   4405      * never really reach. This saves us of a lot of special checks and cases
   4406      * in this function. */
   4407     mark = ctx->marks;
   4408     while(!(mark->flags & MD_MARK_RESOLVED))
   4409         mark++;
   4410 
   4411     text_type = MD_TEXT_NORMAL;
   4412 
   4413     while(1) {
   4414         /* Process the text up to the next mark or end-of-line. */
   4415         OFF tmp = (line->end < mark->beg ? line->end : mark->beg);
   4416         if(tmp > off) {
   4417             MD_TEXT(text_type, STR(off), tmp - off);
   4418             off = tmp;
   4419         }
   4420 
   4421         /* If reached the mark, process it and move to next one. */
   4422         if(off >= mark->beg) {
   4423             switch(mark->ch) {
   4424                 case '\\':      /* Backslash escape. */
   4425                     if(ISNEWLINE(mark->beg+1))
   4426                         enforce_hardbreak = 1;
   4427                     else
   4428                         MD_TEXT(text_type, STR(mark->beg+1), 1);
   4429                     break;
   4430 
   4431                 case ' ':       /* Non-trivial space. */
   4432                     MD_TEXT(text_type, _T(" "), 1);
   4433                     break;
   4434 
   4435                 case '`':       /* Code span. */
   4436                     if(mark->flags & MD_MARK_OPENER) {
   4437                         MD_ENTER_SPAN(MD_SPAN_CODE, NULL);
   4438                         text_type = MD_TEXT_CODE;
   4439                     } else {
   4440                         MD_LEAVE_SPAN(MD_SPAN_CODE, NULL);
   4441                         text_type = MD_TEXT_NORMAL;
   4442                     }
   4443                     break;
   4444 
   4445                 case '-': /* faint */
   4446                     if(mark->flags & MD_MARK_OPENER) {
   4447                         MD_ENTER_SPAN(MD_SPAN_FNT, NULL);
   4448                     } else {
   4449                         MD_LEAVE_SPAN(MD_SPAN_FNT, NULL);
   4450                     }
   4451                     break;
   4452 
   4453                 case '%': /* inverse */
   4454                     if(mark->flags & MD_MARK_OPENER) {
   4455                         MD_ENTER_SPAN(MD_SPAN_INV, NULL);
   4456                     } else {
   4457                         MD_LEAVE_SPAN(MD_SPAN_INV, NULL);
   4458                     }
   4459                     break;
   4460 
   4461                 case '^': /* blink */
   4462                     if(mark->flags & MD_MARK_OPENER) {
   4463                         MD_ENTER_SPAN(MD_SPAN_BLI, NULL);
   4464                     } else {
   4465                         MD_LEAVE_SPAN(MD_SPAN_BLI, NULL);
   4466                     }
   4467                     break;
   4468 
   4469                 case '_':       /* Underline (or emphasis if we fall through). */
   4470                     if(ctx->parser.flags & MD_FLAG_UNDERLINE) {
   4471                         if(mark->flags & MD_MARK_OPENER) {
   4472                             /* while(off < mark->end) { */
   4473                             /*     MD_ENTER_SPAN(MD_SPAN_U, NULL); */
   4474                             /*     off++; */
   4475                             /* } */
   4476                             if((mark->end - off) % 2) {
   4477                                 MD_ENTER_SPAN(MD_SPAN_U, NULL);
   4478                                 off++;
   4479                             }
   4480                             while(off + 1 < mark->end) {
   4481                                 MD_ENTER_SPAN(MD_SPAN_STRONG, NULL);
   4482                                 off += 2;
   4483                             }
   4484                         } else {
   4485                             /* while(off < mark->end) { */
   4486                             /*     MD_LEAVE_SPAN(MD_SPAN_U, NULL); */
   4487                             /*     off++; */
   4488                             /* } */
   4489                             while(off + 1 < mark->end) {
   4490                                 MD_LEAVE_SPAN(MD_SPAN_STRONG, NULL);
   4491                                 off += 2;
   4492                             }
   4493                             if((mark->end - off) % 2) {
   4494                                 MD_LEAVE_SPAN(MD_SPAN_U, NULL);
   4495                                 off++;
   4496                             }
   4497                         }
   4498                         break;
   4499                     }
   4500                     MD_FALLTHROUGH();
   4501 
   4502                 case '*':       /* Emphasis, strong emphasis. */
   4503                     if(mark->flags & MD_MARK_OPENER) {
   4504                         if((mark->end - off) % 2) {
   4505                             MD_ENTER_SPAN(MD_SPAN_EM, NULL);
   4506                             off++;
   4507                         }
   4508                         while(off + 1 < mark->end) {
   4509                             MD_ENTER_SPAN(MD_SPAN_STRONG, NULL);
   4510                             off += 2;
   4511                         }
   4512                     } else {
   4513                         while(off + 1 < mark->end) {
   4514                             MD_LEAVE_SPAN(MD_SPAN_STRONG, NULL);
   4515                             off += 2;
   4516                         }
   4517                         if((mark->end - off) % 2) {
   4518                             MD_LEAVE_SPAN(MD_SPAN_EM, NULL);
   4519                             off++;
   4520                         }
   4521                     }
   4522                     break;
   4523 
   4524                 case '~': /* crossed */
   4525                     if(mark->flags & MD_MARK_OPENER)
   4526                         MD_ENTER_SPAN(MD_SPAN_DEL, NULL);
   4527                     else
   4528                         MD_LEAVE_SPAN(MD_SPAN_DEL, NULL);
   4529                     break;
   4530 
   4531                 case '$':
   4532                     if(mark->flags & MD_MARK_OPENER) {
   4533                         MD_ENTER_SPAN((mark->end - off) % 2 ? MD_SPAN_LATEXMATH : MD_SPAN_LATEXMATH_DISPLAY, NULL);
   4534                         text_type = MD_TEXT_LATEXMATH;
   4535                     } else {
   4536                         MD_LEAVE_SPAN((mark->end - off) % 2 ? MD_SPAN_LATEXMATH : MD_SPAN_LATEXMATH_DISPLAY, NULL);
   4537                         text_type = MD_TEXT_NORMAL;
   4538                     }
   4539                     break;
   4540 
   4541                 case '!': /* conceal/hidden */
   4542                     if (mark->prev == -1) {
   4543                         if (mark->flags & MD_MARK_OPENER) {
   4544                             MD_ENTER_SPAN(MD_SPAN_COC, NULL);
   4545                             break;
   4546                         }
   4547                     }
   4548                     else {
   4549                         if (ctx->marks[mark->prev].ch == '!' && !(mark->flags & MD_MARK_OPENER)) {
   4550                             MD_LEAVE_SPAN(MD_SPAN_COC, NULL);
   4551                             break;
   4552                         }
   4553                     }
   4554                 case '[':       /* Link, wiki link, image, anchor. */
   4555                 case ']':
   4556                 {
   4557                     const MD_MARK* opener = (mark->ch != ']' ? mark : &ctx->marks[mark->prev]);
   4558                     const MD_MARK* closer = &ctx->marks[opener->next];
   4559                     const MD_MARK* dest_mark;
   4560                     const MD_MARK* title_mark;
   4561 
   4562                     if ((opener->ch == '[' && closer->ch == ']') &&
   4563                         opener->end - opener->beg >= 2 &&
   4564                         closer->end - closer->beg >= 2)
   4565                     {
   4566                         int has_label = (opener->end - opener->beg > 2);
   4567                         SZ target_sz;
   4568 
   4569                         if(has_label)
   4570                             target_sz = opener->end - (opener->beg+2);
   4571                         else
   4572                             target_sz = closer->beg - opener->end;
   4573 
   4574                         MD_CHECK(md_enter_leave_span_wikilink(ctx, (mark->ch != ']'),
   4575                                  has_label ? STR(opener->beg+2) : STR(opener->end),
   4576                                  target_sz));
   4577 
   4578                         break;
   4579                     }
   4580 
   4581                     if ((opener->ch == '[' && closer->ch == ']') &&
   4582                         opener->end - opener->beg == 2 &&
   4583                         closer->end - closer->beg == 1 &&
   4584                         CH(opener->beg+1) == _T('|'))
   4585                     {
   4586                         if(mark->flags & MD_MARK_OPENER) {
   4587                             MD_ENTER_SPAN(MD_SPAN_ANCHOR, NULL);
   4588                         } else {
   4589                             MD_LEAVE_SPAN(MD_SPAN_ANCHOR, NULL);
   4590                         }
   4591                     }
   4592 
   4593                     dest_mark = opener+1;
   4594                     MD_ASSERT(dest_mark->ch == 'D');
   4595                     title_mark = opener+2;
   4596                     if (title_mark->ch != 'D') break;
   4597 
   4598                     MD_CHECK(md_enter_leave_span_a(ctx, (mark->ch != ']'),
   4599                                 (opener->ch == '!' ? MD_SPAN_IMG : MD_SPAN_A),
   4600                                 STR(dest_mark->beg), dest_mark->end - dest_mark->beg, FALSE,
   4601                                 md_mark_get_ptr(ctx, (int)(title_mark - ctx->marks)),
   4602 								title_mark->prev));
   4603 
   4604                     /* link/image closer may span multiple lines. */
   4605                     if(mark->ch == ']') {
   4606                         while(mark->end > line->end)
   4607                             line++;
   4608                     }
   4609 
   4610                     break;
   4611                 }
   4612 
   4613                 case '<':
   4614                 case '>':       /* Autolink or raw HTML. */
   4615                     if(!(mark->flags & MD_MARK_AUTOLINK)) {
   4616                         /* Raw HTML. */
   4617                         if(mark->flags & MD_MARK_OPENER)
   4618                             text_type = MD_TEXT_HTML;
   4619                         else
   4620                             text_type = MD_TEXT_NORMAL;
   4621                         break;
   4622                     }
   4623                     /* Pass through, if auto-link. */
   4624                     MD_FALLTHROUGH();
   4625 
   4626                 case '@':       /* Permissive e-mail autolink. */
   4627                 case ':':       /* Permissive URL autolink. */
   4628                 case '.':       /* Permissive WWW autolink. */
   4629                 {
   4630                     MD_MARK* opener = ((mark->flags & MD_MARK_OPENER) ? mark : &ctx->marks[mark->prev]);
   4631                     MD_MARK* closer = &ctx->marks[opener->next];
   4632                     const CHAR* dest = STR(opener->end);
   4633                     SZ dest_size = closer->beg - opener->end;
   4634 
   4635                     /* For permissive auto-links we do not know closer mark
   4636                      * position at the time of md_collect_marks(), therefore
   4637                      * it can be out-of-order in ctx->marks[].
   4638                      *
   4639                      * With this flag, we make sure that we output the closer
   4640                      * only if we processed the opener. */
   4641                     if(mark->flags & MD_MARK_OPENER)
   4642                         closer->flags |= MD_MARK_VALIDPERMISSIVEAUTOLINK;
   4643 
   4644                     if(opener->ch == '@' || opener->ch == '.') {
   4645                         dest_size += 7;
   4646                         MD_TEMP_BUFFER(dest_size * sizeof(CHAR));
   4647                         memcpy(ctx->buffer,
   4648                                 (opener->ch == '@' ? _T("mailto:") : _T("http://")),
   4649                                 7 * sizeof(CHAR));
   4650                         memcpy(ctx->buffer + 7, dest, (dest_size-7) * sizeof(CHAR));
   4651                         dest = ctx->buffer;
   4652                     }
   4653 
   4654                     if(closer->flags & MD_MARK_VALIDPERMISSIVEAUTOLINK)
   4655                         MD_CHECK(md_enter_leave_span_a(ctx, (mark->flags & MD_MARK_OPENER),
   4656                                     MD_SPAN_A, dest, dest_size, TRUE, NULL, 0));
   4657                     break;
   4658                 }
   4659 
   4660                 case '&':       /* Entity. */
   4661                     MD_TEXT(MD_TEXT_ENTITY, STR(mark->beg), mark->end - mark->beg);
   4662                     break;
   4663 
   4664                 case '\0':
   4665                     MD_TEXT(MD_TEXT_NULLCHAR, _T(""), 1);
   4666                     break;
   4667 
   4668                 case 127:
   4669                     goto abort;
   4670             }
   4671 
   4672             off = mark->end;
   4673 
   4674             /* Move to next resolved mark. */
   4675             prev_mark = mark;
   4676             mark++;
   4677             while(!(mark->flags & MD_MARK_RESOLVED)  ||  mark->beg < off)
   4678                 mark++;
   4679         }
   4680 
   4681         /* If reached end of line, move to next one. */
   4682         if(off >= line->end) {
   4683             /* If it is the last line, we are done. */
   4684             if(off >= end)
   4685                 break;
   4686 
   4687             if(text_type == MD_TEXT_CODE || text_type == MD_TEXT_LATEXMATH) {
   4688                 OFF tmp;
   4689 
   4690                 MD_ASSERT(prev_mark != NULL);
   4691                 MD_ASSERT(ISANYOF2_(prev_mark->ch, '`', '$')  &&  (prev_mark->flags & MD_MARK_OPENER));
   4692                 MD_ASSERT(ISANYOF2_(mark->ch, '`', '$')  &&  (mark->flags & MD_MARK_CLOSER));
   4693 
   4694                 /* Inside a code span, trailing line whitespace has to be
   4695                  * outputted. */
   4696                 tmp = off;
   4697                 while(off < ctx->size  &&  ISBLANK(off))
   4698                     off++;
   4699                 if(off > tmp)
   4700                     MD_TEXT(text_type, STR(tmp), off-tmp);
   4701 
   4702                 /* and new lines are transformed into single spaces. */
   4703                 if(prev_mark->end < off  &&  off < mark->beg)
   4704                     MD_TEXT(text_type, _T(" "), 1);
   4705             } else if(text_type == MD_TEXT_HTML) {
   4706                 /* Inside raw HTML, we output the new line verbatim, including
   4707                  * any trailing spaces. */
   4708                 OFF tmp = off;
   4709 
   4710                 while(tmp < end  &&  ISBLANK(tmp))
   4711                     tmp++;
   4712                 if(tmp > off)
   4713                     MD_TEXT(MD_TEXT_HTML, STR(off), tmp - off);
   4714                 MD_TEXT(MD_TEXT_HTML, _T("\n"), 1);
   4715             } else {
   4716                 /* Output soft or hard line break. */
   4717                 MD_TEXTTYPE break_type = MD_TEXT_SOFTBR;
   4718 
   4719                 if(text_type == MD_TEXT_NORMAL) {
   4720                     if(enforce_hardbreak)
   4721                         break_type = MD_TEXT_BR;
   4722                     else if((CH(line->end) == _T(' ') && CH(line->end+1) == _T(' ')))
   4723                         break_type = MD_TEXT_BR;
   4724                 }
   4725 
   4726                 MD_TEXT(break_type, _T("\n"), 1);
   4727             }
   4728 
   4729             /* Move to the next line. */
   4730             line++;
   4731             off = line->beg;
   4732 
   4733             enforce_hardbreak = 0;
   4734         }
   4735     }
   4736 
   4737 abort:
   4738     return ret;
   4739 }
   4740 
   4741 
   4742 /***************************
   4743  ***  Processing Tables  ***
   4744  ***************************/
   4745 
   4746 static void
   4747 md_analyze_table_alignment(MD_CTX* ctx, OFF beg, OFF end, MD_ALIGN* align, int n_align)
   4748 {
   4749     static const MD_ALIGN align_map[] = { MD_ALIGN_DEFAULT, MD_ALIGN_LEFT, MD_ALIGN_RIGHT, MD_ALIGN_CENTER };
   4750     OFF off = beg;
   4751 
   4752     while(n_align > 0) {
   4753         int index = 0;  /* index into align_map[] */
   4754 
   4755         while(CH(off) != _T('-'))
   4756             off++;
   4757         if(off > beg  &&  CH(off-1) == _T(':'))
   4758             index |= 1;
   4759         while(off < end  &&  CH(off) == _T('-'))
   4760             off++;
   4761         if(off < end  &&  CH(off) == _T(':'))
   4762             index |= 2;
   4763 
   4764         *align = align_map[index];
   4765         align++;
   4766         n_align--;
   4767     }
   4768 
   4769 }
   4770 
   4771 /* Forward declaration. */
   4772 static int md_process_normal_block_contents(MD_CTX* ctx, const MD_LINE* lines, int n_lines);
   4773 
   4774 static int
   4775 md_process_table_cell(MD_CTX* ctx, MD_BLOCKTYPE cell_type, MD_ALIGN align, OFF beg, OFF end)
   4776 {
   4777     MD_LINE line;
   4778     MD_BLOCK_TD_DETAIL det;
   4779     int ret = 0;
   4780 
   4781     while(beg < end  &&  ISWHITESPACE(beg))
   4782         beg++;
   4783     while(end > beg  &&  ISWHITESPACE(end-1))
   4784         end--;
   4785 
   4786     det.align = align;
   4787     line.beg = beg;
   4788     line.end = end;
   4789 
   4790     MD_ENTER_BLOCK(cell_type, &det);
   4791     MD_CHECK(md_process_normal_block_contents(ctx, &line, 1));
   4792     MD_LEAVE_BLOCK(cell_type, &det);
   4793 
   4794 abort:
   4795     return ret;
   4796 }
   4797 
   4798 static int
   4799 md_process_table_row(MD_CTX* ctx, MD_BLOCKTYPE cell_type, OFF beg, OFF end,
   4800                      const MD_ALIGN* align, int col_count)
   4801 {
   4802     MD_LINE line;
   4803     OFF* pipe_offs = NULL;
   4804     int i, j, k, n;
   4805     int ret = 0;
   4806 
   4807     line.beg = beg;
   4808     line.end = end;
   4809 
   4810     /* Break the line into table cells by identifying pipe characters who
   4811      * form the cell boundary. */
   4812     MD_CHECK(md_analyze_inlines(ctx, &line, 1, TRUE));
   4813 
   4814     /* We have to remember the cell boundaries in local buffer because
   4815      * ctx->marks[] shall be reused during cell contents processing. */
   4816     n = ctx->n_table_cell_boundaries + 2;
   4817     pipe_offs = (OFF*) malloc(n * sizeof(OFF));
   4818     if(pipe_offs == NULL) {
   4819         MD_LOG("malloc() failed.");
   4820         ret = -1;
   4821         goto abort;
   4822     }
   4823     j = 0;
   4824     pipe_offs[j++] = beg;
   4825     for(i = TABLECELLBOUNDARIES.head; i >= 0; i = ctx->marks[i].next) {
   4826         MD_MARK* mark = &ctx->marks[i];
   4827         pipe_offs[j++] = mark->end;
   4828     }
   4829     pipe_offs[j++] = end+1;
   4830 
   4831     /* Process cells. */
   4832     MD_ENTER_BLOCK(MD_BLOCK_TR, NULL);
   4833     k = 0;
   4834     for(i = 0; i < j-1  &&  k < col_count; i++) {
   4835         if(pipe_offs[i] < pipe_offs[i+1]-1)
   4836             MD_CHECK(md_process_table_cell(ctx, cell_type, align[k++], pipe_offs[i], pipe_offs[i+1]-1));
   4837     }
   4838     /* Make sure we call enough table cells even if the current table contains
   4839      * too few of them. */
   4840     while(k < col_count)
   4841         MD_CHECK(md_process_table_cell(ctx, cell_type, align[k++], 0, 0));
   4842     MD_LEAVE_BLOCK(MD_BLOCK_TR, NULL);
   4843 
   4844 abort:
   4845     free(pipe_offs);
   4846 
   4847     /* Free any temporary memory blocks stored within some dummy marks. */
   4848     for(i = PTR_CHAIN.head; i >= 0; i = ctx->marks[i].next)
   4849         free(md_mark_get_ptr(ctx, i));
   4850     PTR_CHAIN.head = -1;
   4851     PTR_CHAIN.tail = -1;
   4852 
   4853     return ret;
   4854 }
   4855 
   4856 static int
   4857 md_process_table_block_contents(MD_CTX* ctx, int col_count, const MD_LINE* lines, int n_lines)
   4858 {
   4859     MD_ALIGN* align;
   4860     int i;
   4861     int ret = 0;
   4862 
   4863     /* At least two lines have to be present: The column headers and the line
   4864      * with the underlines. */
   4865     MD_ASSERT(n_lines >= 2);
   4866 
   4867     align = malloc(col_count * sizeof(MD_ALIGN));
   4868     if(align == NULL) {
   4869         MD_LOG("malloc() failed.");
   4870         ret = -1;
   4871         goto abort;
   4872     }
   4873 
   4874     md_analyze_table_alignment(ctx, lines[1].beg, lines[1].end, align, col_count);
   4875 
   4876     MD_ENTER_BLOCK(MD_BLOCK_THEAD, NULL);
   4877     MD_CHECK(md_process_table_row(ctx, MD_BLOCK_TH,
   4878                         lines[0].beg, lines[0].end, align, col_count));
   4879     MD_LEAVE_BLOCK(MD_BLOCK_THEAD, NULL);
   4880 
   4881     if(n_lines > 2) {
   4882         MD_ENTER_BLOCK(MD_BLOCK_TBODY, NULL);
   4883         for(i = 2; i < n_lines; i++) {
   4884             MD_CHECK(md_process_table_row(ctx, MD_BLOCK_TD,
   4885                      lines[i].beg, lines[i].end, align, col_count));
   4886         }
   4887         MD_LEAVE_BLOCK(MD_BLOCK_TBODY, NULL);
   4888     }
   4889 
   4890 abort:
   4891     free(align);
   4892     return ret;
   4893 }
   4894 
   4895 
   4896 /**************************
   4897  ***  Processing Block  ***
   4898  **************************/
   4899 
   4900 #define MD_BLOCK_CONTAINER_OPENER   0x01
   4901 #define MD_BLOCK_CONTAINER_CLOSER   0x02
   4902 #define MD_BLOCK_CONTAINER          (MD_BLOCK_CONTAINER_OPENER | MD_BLOCK_CONTAINER_CLOSER)
   4903 #define MD_BLOCK_LOOSE_LIST         0x04
   4904 #define MD_BLOCK_SETEXT_HEADER      0x08
   4905 
   4906 struct MD_BLOCK_tag {
   4907     MD_BLOCKTYPE type  :  8;
   4908     unsigned flags     :  8;
   4909 
   4910     /* MD_BLOCK_H:      Header level (1 - 6)
   4911      * MD_BLOCK_CODE:   Non-zero if fenced, zero if indented.
   4912      * MD_BLOCK_LI:     Task mark character (0 if not task list item, 'x', 'X' or ' ').
   4913      * MD_BLOCK_TABLE:  Column count (as determined by the table underline).
   4914      */
   4915     unsigned data      : 16;
   4916 
   4917     /* Leaf blocks:     Count of lines (MD_LINE or MD_VERBATIMLINE) on the block.
   4918      * MD_BLOCK_LI:     Task mark offset in the input doc.
   4919      * MD_BLOCK_OL:     Start item number.
   4920      */
   4921     unsigned n_lines;
   4922 };
   4923 
   4924 struct MD_CONTAINER_tag {
   4925     CHAR ch;
   4926     unsigned is_loose    : 8;
   4927     unsigned is_task     : 8;
   4928     unsigned start;
   4929     unsigned mark_indent;
   4930     unsigned contents_indent;
   4931     OFF block_byte_off;
   4932     OFF task_mark_off;
   4933 };
   4934 
   4935 
   4936 static int
   4937 md_process_normal_block_contents(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
   4938 {
   4939     int i;
   4940     int ret;
   4941 
   4942     MD_CHECK(md_analyze_inlines(ctx, lines, n_lines, FALSE));
   4943     MD_CHECK(md_process_inlines(ctx, lines, n_lines));
   4944 
   4945 abort:
   4946     /* Free any temporary memory blocks stored within some dummy marks. */
   4947     for(i = PTR_CHAIN.head; i >= 0; i = ctx->marks[i].next)
   4948         free(md_mark_get_ptr(ctx, i));
   4949     PTR_CHAIN.head = -1;
   4950     PTR_CHAIN.tail = -1;
   4951 
   4952     return ret;
   4953 }
   4954 
   4955 static int
   4956 md_process_verbatim_block_contents(MD_CTX* ctx, MD_TEXTTYPE text_type, const MD_VERBATIMLINE* lines, int n_lines)
   4957 {
   4958     static const CHAR indent_chunk_str[] = _T("                ");
   4959     static const SZ indent_chunk_size = SIZEOF_ARRAY(indent_chunk_str) - 1;
   4960 
   4961     int i;
   4962     int ret = 0;
   4963 
   4964     for(i = 0; i < n_lines; i++) {
   4965         const MD_VERBATIMLINE* line = &lines[i];
   4966         int indent = line->indent;
   4967 
   4968         MD_ASSERT(indent >= 0);
   4969 
   4970         /* Output code indentation. */
   4971         while(indent > (int) indent_chunk_size) {
   4972             MD_TEXT(text_type, indent_chunk_str, indent_chunk_size);
   4973             indent -= indent_chunk_size;
   4974         }
   4975         if(indent > 0)
   4976             MD_TEXT(text_type, indent_chunk_str, indent);
   4977 
   4978         /* Output the code line itself. */
   4979         MD_TEXT_INSECURE(text_type, STR(line->beg), line->end - line->beg);
   4980 
   4981         /* Enforce end-of-line. */
   4982         MD_TEXT(text_type, _T("\n"), 1);
   4983     }
   4984 
   4985 abort:
   4986     return ret;
   4987 }
   4988 
   4989 static int
   4990 md_process_code_block_contents(MD_CTX* ctx, int is_fenced, const MD_VERBATIMLINE* lines, int n_lines)
   4991 {
   4992     if(is_fenced) {
   4993         /* Skip the first line in case of fenced code: It is the fence.
   4994          * (Only the starting fence is present due to logic in md_analyze_line().) */
   4995         lines++;
   4996         n_lines--;
   4997     } else {
   4998         /* Ignore blank lines at start/end of indented code block. */
   4999         while(n_lines > 0  &&  lines[0].beg == lines[0].end) {
   5000             lines++;
   5001             n_lines--;
   5002         }
   5003         while(n_lines > 0  &&  lines[n_lines-1].beg == lines[n_lines-1].end) {
   5004             n_lines--;
   5005         }
   5006     }
   5007 
   5008     if(n_lines == 0)
   5009         return 0;
   5010 
   5011     return md_process_verbatim_block_contents(ctx, MD_TEXT_CODE, lines, n_lines);
   5012 }
   5013 
   5014 static int
   5015 md_setup_fenced_code_detail(MD_CTX* ctx, const MD_BLOCK* block, MD_BLOCK_CODE_DETAIL* det,
   5016                             MD_ATTRIBUTE_BUILD* info_build, MD_ATTRIBUTE_BUILD* lang_build)
   5017 {
   5018     const MD_VERBATIMLINE* fence_line = (const MD_VERBATIMLINE*)(block + 1);
   5019     OFF beg = fence_line->beg;
   5020     OFF end = fence_line->end;
   5021     OFF lang_end;
   5022     CHAR fence_ch = CH(fence_line->beg);
   5023     int ret = 0;
   5024 
   5025     /* Skip the fence itself. */
   5026     while(beg < ctx->size  &&  CH(beg) == fence_ch)
   5027         beg++;
   5028     /* Trim initial spaces. */
   5029     while(beg < ctx->size  &&  CH(beg) == _T(' '))
   5030         beg++;
   5031 
   5032     /* Trim trailing spaces. */
   5033     while(end > beg  &&  CH(end-1) == _T(' '))
   5034         end--;
   5035 
   5036     /* Build info string attribute. */
   5037     MD_CHECK(md_build_attribute(ctx, STR(beg), end - beg, 0, &det->info, info_build));
   5038 
   5039     /* Build info string attribute. */
   5040     lang_end = beg;
   5041     while(lang_end < end  &&  !ISWHITESPACE(lang_end))
   5042         lang_end++;
   5043     MD_CHECK(md_build_attribute(ctx, STR(beg), lang_end - beg, 0, &det->lang, lang_build));
   5044 
   5045     det->fence_char = fence_ch;
   5046 
   5047 abort:
   5048     return ret;
   5049 }
   5050 
   5051 static int
   5052 md_process_leaf_block(MD_CTX* ctx, const MD_BLOCK* block)
   5053 {
   5054     union {
   5055         MD_BLOCK_H_DETAIL header;
   5056         MD_BLOCK_CODE_DETAIL code;
   5057         MD_BLOCK_TABLE_DETAIL table;
   5058     } det;
   5059     MD_ATTRIBUTE_BUILD info_build;
   5060     MD_ATTRIBUTE_BUILD lang_build;
   5061     int is_in_tight_list;
   5062     int clean_fence_code_detail = FALSE;
   5063     int ret = 0;
   5064 
   5065     memset(&det, 0, sizeof(det));
   5066 
   5067     if(ctx->n_containers == 0)
   5068         is_in_tight_list = FALSE;
   5069     else
   5070         is_in_tight_list = !ctx->containers[ctx->n_containers-1].is_loose;
   5071 
   5072     switch(block->type) {
   5073         case MD_BLOCK_H:
   5074             det.header.level = block->data;
   5075             break;
   5076 
   5077         case MD_BLOCK_CODE:
   5078             /* For fenced code block, we may need to set the info string. */
   5079             if(block->data != 0) {
   5080                 memset(&det.code, 0, sizeof(MD_BLOCK_CODE_DETAIL));
   5081                 clean_fence_code_detail = TRUE;
   5082                 MD_CHECK(md_setup_fenced_code_detail(ctx, block, &det.code, &info_build, &lang_build));
   5083             }
   5084             break;
   5085 
   5086         case MD_BLOCK_TABLE:
   5087             det.table.col_count = block->data;
   5088             det.table.head_row_count = 1;
   5089             det.table.body_row_count = block->n_lines - 2;
   5090             break;
   5091 
   5092         default:
   5093             /* Noop. */
   5094             break;
   5095     }
   5096 
   5097     if(!is_in_tight_list  ||  block->type != MD_BLOCK_P)
   5098         MD_ENTER_BLOCK(block->type, (void*) &det);
   5099 
   5100     /* Process the block contents accordingly to is type. */
   5101     switch(block->type) {
   5102         case MD_BLOCK_HR:
   5103             /* noop */
   5104             break;
   5105 
   5106         case MD_BLOCK_CODE:
   5107             MD_CHECK(md_process_code_block_contents(ctx, (block->data != 0),
   5108                             (const MD_VERBATIMLINE*)(block + 1), block->n_lines));
   5109             break;
   5110 
   5111         case MD_BLOCK_HTML:
   5112             MD_CHECK(md_process_verbatim_block_contents(ctx, MD_TEXT_HTML,
   5113                             (const MD_VERBATIMLINE*)(block + 1), block->n_lines));
   5114             break;
   5115 
   5116         case MD_BLOCK_TABLE:
   5117             MD_CHECK(md_process_table_block_contents(ctx, block->data,
   5118                             (const MD_LINE*)(block + 1), block->n_lines));
   5119             break;
   5120 
   5121         default:
   5122             MD_CHECK(md_process_normal_block_contents(ctx,
   5123                             (const MD_LINE*)(block + 1), block->n_lines));
   5124             break;
   5125     }
   5126 
   5127     if(!is_in_tight_list  ||  block->type != MD_BLOCK_P)
   5128         MD_LEAVE_BLOCK(block->type, (void*) &det);
   5129 
   5130 abort:
   5131     if(clean_fence_code_detail) {
   5132         md_free_attribute(ctx, &info_build);
   5133         md_free_attribute(ctx, &lang_build);
   5134     }
   5135     return ret;
   5136 }
   5137 
   5138 static int
   5139 md_process_all_blocks(MD_CTX* ctx)
   5140 {
   5141     int byte_off = 0;
   5142     int ret = 0;
   5143 
   5144     /* ctx->containers now is not needed for detection of lists and list items
   5145      * so we reuse it for tracking what lists are loose or tight. We rely
   5146      * on the fact the vector is large enough to hold the deepest nesting
   5147      * level of lists. */
   5148     ctx->n_containers = 0;
   5149 
   5150     while(byte_off < ctx->n_block_bytes) {
   5151         MD_BLOCK* block = (MD_BLOCK*)((char*)ctx->block_bytes + byte_off);
   5152         union {
   5153             MD_BLOCK_UL_DETAIL ul;
   5154             MD_BLOCK_OL_DETAIL ol;
   5155             MD_BLOCK_LI_DETAIL li;
   5156         } det;
   5157 
   5158         switch(block->type) {
   5159             case MD_BLOCK_UL:
   5160                 det.ul.is_tight = (block->flags & MD_BLOCK_LOOSE_LIST) ? FALSE : TRUE;
   5161                 det.ul.mark = (CHAR) block->data;
   5162                 break;
   5163 
   5164             case MD_BLOCK_OL:
   5165                 det.ol.start = block->n_lines;
   5166                 det.ol.is_tight =  (block->flags & MD_BLOCK_LOOSE_LIST) ? FALSE : TRUE;
   5167                 det.ol.mark_delimiter = (CHAR) block->data;
   5168                 break;
   5169 
   5170             case MD_BLOCK_LI:
   5171                 det.li.is_task = (block->data != 0);
   5172                 det.li.task_mark = (CHAR) block->data;
   5173                 det.li.task_mark_offset = (OFF) block->n_lines;
   5174                 break;
   5175 
   5176             default:
   5177                 /* noop */
   5178                 break;
   5179         }
   5180 
   5181         if(block->flags & MD_BLOCK_CONTAINER) {
   5182             if(block->flags & MD_BLOCK_CONTAINER_CLOSER) {
   5183                 MD_LEAVE_BLOCK(block->type, &det);
   5184 
   5185                 if(block->type == MD_BLOCK_UL || block->type == MD_BLOCK_OL || block->type == MD_BLOCK_QUOTE)
   5186                     ctx->n_containers--;
   5187             }
   5188 
   5189             if(block->flags & MD_BLOCK_CONTAINER_OPENER) {
   5190                 MD_ENTER_BLOCK(block->type, &det);
   5191 
   5192                 if(block->type == MD_BLOCK_UL || block->type == MD_BLOCK_OL) {
   5193                     ctx->containers[ctx->n_containers].is_loose = (block->flags & MD_BLOCK_LOOSE_LIST);
   5194                     ctx->n_containers++;
   5195                 } else if(block->type == MD_BLOCK_QUOTE) {
   5196                     /* This causes that any text in a block quote, even if
   5197                      * nested inside a tight list item, is wrapped with
   5198                      * <p>...</p>. */
   5199                     ctx->containers[ctx->n_containers].is_loose = TRUE;
   5200                     ctx->n_containers++;
   5201                 }
   5202             }
   5203         } else {
   5204             MD_CHECK(md_process_leaf_block(ctx, block));
   5205 
   5206             if(block->type == MD_BLOCK_CODE || block->type == MD_BLOCK_HTML)
   5207                 byte_off += block->n_lines * sizeof(MD_VERBATIMLINE);
   5208             else
   5209                 byte_off += block->n_lines * sizeof(MD_LINE);
   5210         }
   5211 
   5212         byte_off += sizeof(MD_BLOCK);
   5213     }
   5214 
   5215     ctx->n_block_bytes = 0;
   5216 
   5217 abort:
   5218     return ret;
   5219 }
   5220 
   5221 
   5222 /************************************
   5223  ***  Grouping Lines into Blocks  ***
   5224  ************************************/
   5225 
   5226 static void*
   5227 md_push_block_bytes(MD_CTX* ctx, int n_bytes)
   5228 {
   5229     void* ptr;
   5230 
   5231     if(ctx->n_block_bytes + n_bytes > ctx->alloc_block_bytes) {
   5232         void* new_block_bytes;
   5233 
   5234         ctx->alloc_block_bytes = (ctx->alloc_block_bytes > 0
   5235                 ? ctx->alloc_block_bytes + ctx->alloc_block_bytes / 2
   5236                 : 512);
   5237         new_block_bytes = realloc(ctx->block_bytes, ctx->alloc_block_bytes);
   5238         if(new_block_bytes == NULL) {
   5239             MD_LOG("realloc() failed.");
   5240             return NULL;
   5241         }
   5242 
   5243         /* Fix the ->current_block after the reallocation. */
   5244         if(ctx->current_block != NULL) {
   5245             OFF off_current_block = (OFF) ((char*) ctx->current_block - (char*) ctx->block_bytes);
   5246             ctx->current_block = (MD_BLOCK*) ((char*) new_block_bytes + off_current_block);
   5247         }
   5248 
   5249         ctx->block_bytes = new_block_bytes;
   5250     }
   5251 
   5252     ptr = (char*)ctx->block_bytes + ctx->n_block_bytes;
   5253     ctx->n_block_bytes += n_bytes;
   5254     return ptr;
   5255 }
   5256 
   5257 static int
   5258 md_start_new_block(MD_CTX* ctx, const MD_LINE_ANALYSIS* line)
   5259 {
   5260     MD_BLOCK* block;
   5261 
   5262     MD_ASSERT(ctx->current_block == NULL);
   5263 
   5264     block = (MD_BLOCK*) md_push_block_bytes(ctx, sizeof(MD_BLOCK));
   5265     if(block == NULL)
   5266         return -1;
   5267 
   5268     switch(line->type) {
   5269         case MD_LINE_HR:
   5270             block->type = MD_BLOCK_HR;
   5271             break;
   5272 
   5273         case MD_LINE_ATXHEADER:
   5274         case MD_LINE_SETEXTHEADER:
   5275             block->type = MD_BLOCK_H;
   5276             break;
   5277 
   5278         case MD_LINE_FENCEDCODE:
   5279         case MD_LINE_INDENTEDCODE:
   5280             block->type = MD_BLOCK_CODE;
   5281             break;
   5282 
   5283         case MD_LINE_TEXT:
   5284             block->type = MD_BLOCK_P;
   5285             break;
   5286 
   5287         case MD_LINE_HTML:
   5288             block->type = MD_BLOCK_HTML;
   5289             break;
   5290 
   5291         case MD_LINE_BLANK:
   5292         case MD_LINE_SETEXTUNDERLINE:
   5293         case MD_LINE_TABLEUNDERLINE:
   5294         default:
   5295             MD_UNREACHABLE();
   5296             break;
   5297     }
   5298 
   5299     block->flags = 0;
   5300     block->data = line->data;
   5301     block->n_lines = 0;
   5302 
   5303     ctx->current_block = block;
   5304     return 0;
   5305 }
   5306 
   5307 /* Eat from start of current (textual) block any reference definitions and
   5308  * remember them so we can resolve any links referring to them.
   5309  *
   5310  * (Reference definitions can only be at start of it as they cannot break
   5311  * a paragraph.)
   5312  */
   5313 static int
   5314 md_consume_link_reference_definitions(MD_CTX* ctx)
   5315 {
   5316     MD_LINE* lines = (MD_LINE*) (ctx->current_block + 1);
   5317     int n_lines = ctx->current_block->n_lines;
   5318     int n = 0;
   5319 
   5320     /* Compute how many lines at the start of the block form one or more
   5321      * reference definitions. */
   5322     while(n < n_lines) {
   5323         int n_link_ref_lines;
   5324 
   5325         n_link_ref_lines = md_is_link_reference_definition(ctx,
   5326                                     lines + n, n_lines - n);
   5327         /* Not a reference definition? */
   5328         if(n_link_ref_lines == 0)
   5329             break;
   5330 
   5331         /* We fail if it is the ref. def. but it could not be stored due
   5332          * a memory allocation error. */
   5333         if(n_link_ref_lines < 0)
   5334             return -1;
   5335 
   5336         n += n_link_ref_lines;
   5337     }
   5338 
   5339     /* If there was at least one reference definition, we need to remove
   5340      * its lines from the block, or perhaps even the whole block. */
   5341     if(n > 0) {
   5342         if(n == n_lines) {
   5343             /* Remove complete block. */
   5344             ctx->n_block_bytes -= n * sizeof(MD_LINE);
   5345             ctx->n_block_bytes -= sizeof(MD_BLOCK);
   5346             ctx->current_block = NULL;
   5347         } else {
   5348             /* Remove just some initial lines from the block. */
   5349             memmove(lines, lines + n, (n_lines - n) * sizeof(MD_LINE));
   5350             ctx->current_block->n_lines -= n;
   5351             ctx->n_block_bytes -= n * sizeof(MD_LINE);
   5352         }
   5353     }
   5354 
   5355     return 0;
   5356 }
   5357 
   5358 static int
   5359 md_end_current_block(MD_CTX* ctx)
   5360 {
   5361     int ret = 0;
   5362 
   5363     if(ctx->current_block == NULL)
   5364         return ret;
   5365 
   5366     /* Check whether there is a reference definition. (We do this here instead
   5367      * of in md_analyze_line() because reference definition can take multiple
   5368      * lines.) */
   5369     if(ctx->current_block->type == MD_BLOCK_P  ||
   5370        (ctx->current_block->type == MD_BLOCK_H  &&  (ctx->current_block->flags & MD_BLOCK_SETEXT_HEADER)))
   5371     {
   5372         MD_LINE* lines = (MD_LINE*) (ctx->current_block + 1);
   5373         if(CH(lines[0].beg) == _T('[')) {
   5374             MD_CHECK(md_consume_link_reference_definitions(ctx));
   5375             if(ctx->current_block == NULL)
   5376                 return ret;
   5377         }
   5378     }
   5379 
   5380     if(ctx->current_block->type == MD_BLOCK_H  &&  (ctx->current_block->flags & MD_BLOCK_SETEXT_HEADER)) {
   5381         int n_lines = ctx->current_block->n_lines;
   5382 
   5383         if(n_lines > 1) {
   5384             /* Get rid of the underline. */
   5385             ctx->current_block->n_lines--;
   5386             ctx->n_block_bytes -= sizeof(MD_LINE);
   5387         } else {
   5388             /* Only the underline has left after eating the ref. defs.
   5389              * Keep the line as beginning of a new ordinary paragraph. */
   5390             ctx->current_block->type = MD_BLOCK_P;
   5391             return 0;
   5392         }
   5393     }
   5394 
   5395     /* Mark we are not building any block anymore. */
   5396     ctx->current_block = NULL;
   5397 
   5398 abort:
   5399     return ret;
   5400 }
   5401 
   5402 static int
   5403 md_add_line_into_current_block(MD_CTX* ctx, const MD_LINE_ANALYSIS* analysis)
   5404 {
   5405     MD_ASSERT(ctx->current_block != NULL);
   5406 
   5407     if(ctx->current_block->type == MD_BLOCK_CODE || ctx->current_block->type == MD_BLOCK_HTML) {
   5408         MD_VERBATIMLINE* line;
   5409 
   5410         line = (MD_VERBATIMLINE*) md_push_block_bytes(ctx, sizeof(MD_VERBATIMLINE));
   5411         if(line == NULL)
   5412             return -1;
   5413 
   5414         line->indent = analysis->indent;
   5415         line->beg = analysis->beg;
   5416         line->end = analysis->end;
   5417     } else {
   5418         MD_LINE* line;
   5419 
   5420         line = (MD_LINE*) md_push_block_bytes(ctx, sizeof(MD_LINE));
   5421         if(line == NULL)
   5422             return -1;
   5423 
   5424         line->beg = analysis->beg;
   5425         line->end = analysis->end;
   5426     }
   5427     ctx->current_block->n_lines++;
   5428 
   5429     return 0;
   5430 }
   5431 
   5432 static int
   5433 md_push_container_bytes(MD_CTX* ctx, MD_BLOCKTYPE type, unsigned start,
   5434                         unsigned data, unsigned flags)
   5435 {
   5436     MD_BLOCK* block;
   5437     int ret = 0;
   5438 
   5439     MD_CHECK(md_end_current_block(ctx));
   5440 
   5441     block = (MD_BLOCK*) md_push_block_bytes(ctx, sizeof(MD_BLOCK));
   5442     if(block == NULL)
   5443         return -1;
   5444 
   5445     block->type = type;
   5446     block->flags = flags;
   5447     block->data = data;
   5448     block->n_lines = start;
   5449 
   5450 abort:
   5451     return ret;
   5452 }
   5453 
   5454 
   5455 
   5456 /***********************
   5457  ***  Line Analysis  ***
   5458  ***********************/
   5459 
   5460 static int
   5461 md_is_hr_line(MD_CTX* ctx, OFF beg, OFF* p_end, OFF* p_killer)
   5462 {
   5463     OFF off = beg + 1;
   5464     int n = 1;
   5465 
   5466     while(off < ctx->size  &&  (CH(off) == CH(beg) || CH(off) == _T(' ') || CH(off) == _T('\t'))) {
   5467         if(CH(off) == CH(beg))
   5468             n++;
   5469         off++;
   5470     }
   5471 
   5472     if(n < 3) {
   5473         *p_killer = off;
   5474         return FALSE;
   5475     }
   5476 
   5477     /* Nothing else can be present on the line. */
   5478     if(off < ctx->size  &&  !ISNEWLINE(off)) {
   5479         *p_killer = off;
   5480         return FALSE;
   5481     }
   5482 
   5483     *p_end = off;
   5484     return TRUE;
   5485 }
   5486 
   5487 static int
   5488 md_is_atxheader_line(MD_CTX* ctx, OFF beg, OFF* p_beg, OFF* p_end, unsigned* p_level)
   5489 {
   5490     int n;
   5491     OFF off = beg + 1;
   5492 
   5493     while(off < ctx->size  &&  CH(off) == _T('#')  &&  off - beg < 7)
   5494         off++;
   5495     n = off - beg;
   5496 
   5497     if(n > 6)
   5498         return FALSE;
   5499     *p_level = n;
   5500 
   5501     if(!(ctx->parser.flags & MD_FLAG_PERMISSIVEATXHEADERS)  &&  off < ctx->size  &&
   5502        CH(off) != _T(' ')  &&  CH(off) != _T('\t')  &&  !ISNEWLINE(off))
   5503         return FALSE;
   5504 
   5505     while(off < ctx->size  &&  CH(off) == _T(' '))
   5506         off++;
   5507     *p_beg = off;
   5508     *p_end = off;
   5509     return TRUE;
   5510 }
   5511 
   5512 static int
   5513 md_is_setext_underline(MD_CTX* ctx, OFF beg, OFF* p_end, unsigned* p_level)
   5514 {
   5515     OFF off = beg + 1;
   5516 
   5517     while(off < ctx->size  &&  CH(off) == CH(beg))
   5518         off++;
   5519 
   5520     /* Optionally, space(s) can follow. */
   5521     while(off < ctx->size  &&  CH(off) == _T(' '))
   5522         off++;
   5523 
   5524     /* But nothing more is allowed on the line. */
   5525     if(off < ctx->size  &&  !ISNEWLINE(off))
   5526         return FALSE;
   5527 
   5528     *p_level = (CH(beg) == _T('=') ? 1 : 2);
   5529     *p_end = off;
   5530     return TRUE;
   5531 }
   5532 
   5533 static int
   5534 md_is_table_underline(MD_CTX* ctx, OFF beg, OFF* p_end, unsigned* p_col_count)
   5535 {
   5536     OFF off = beg;
   5537     int found_pipe = FALSE;
   5538     unsigned col_count = 0;
   5539 
   5540     if(off < ctx->size  &&  CH(off) == _T('|')) {
   5541         found_pipe = TRUE;
   5542         off++;
   5543         while(off < ctx->size  &&  ISWHITESPACE(off))
   5544             off++;
   5545     }
   5546 
   5547     while(1) {
   5548         int delimited = FALSE;
   5549 
   5550         /* Cell underline ("-----", ":----", "----:" or ":----:") */
   5551         if(off < ctx->size  &&  CH(off) == _T(':'))
   5552             off++;
   5553         if(off >= ctx->size  ||  CH(off) != _T('-'))
   5554             return FALSE;
   5555         while(off < ctx->size  &&  CH(off) == _T('-'))
   5556             off++;
   5557         if(off < ctx->size  &&  CH(off) == _T(':'))
   5558             off++;
   5559 
   5560         col_count++;
   5561 
   5562         /* Pipe delimiter (optional at the end of line). */
   5563         while(off < ctx->size  &&  ISWHITESPACE(off))
   5564             off++;
   5565         if(off < ctx->size  &&  CH(off) == _T('|')) {
   5566             delimited = TRUE;
   5567             found_pipe =  TRUE;
   5568             off++;
   5569             while(off < ctx->size  &&  ISWHITESPACE(off))
   5570                 off++;
   5571         }
   5572 
   5573         /* Success, if we reach end of line. */
   5574         if(off >= ctx->size  ||  ISNEWLINE(off))
   5575             break;
   5576 
   5577         if(!delimited)
   5578             return FALSE;
   5579     }
   5580 
   5581     if(!found_pipe)
   5582         return FALSE;
   5583 
   5584     *p_end = off;
   5585     *p_col_count = col_count;
   5586     return TRUE;
   5587 }
   5588 
   5589 static int
   5590 md_is_opening_code_fence(MD_CTX* ctx, OFF beg, OFF* p_end)
   5591 {
   5592     OFF off = beg;
   5593 
   5594     while(off < ctx->size && CH(off) == CH(beg))
   5595         off++;
   5596 
   5597     /* Fence must have at least three characters. */
   5598     if(off - beg < 3)
   5599         return FALSE;
   5600 
   5601     ctx->code_fence_length = off - beg;
   5602 
   5603     /* Optionally, space(s) can follow. */
   5604     while(off < ctx->size  &&  CH(off) == _T(' '))
   5605         off++;
   5606 
   5607     /* Optionally, an info string can follow. */
   5608     while(off < ctx->size  &&  !ISNEWLINE(off)) {
   5609         /* Backtick-based fence must not contain '`' in the info string. */
   5610         if(CH(beg) == _T('`')  &&  CH(off) == _T('`'))
   5611             return FALSE;
   5612         off++;
   5613     }
   5614 
   5615     *p_end = off;
   5616     return TRUE;
   5617 }
   5618 
   5619 static int
   5620 md_is_closing_code_fence(MD_CTX* ctx, CHAR ch, OFF beg, OFF* p_end)
   5621 {
   5622     OFF off = beg;
   5623     int ret = FALSE;
   5624 
   5625     /* Closing fence must have at least the same length and use same char as
   5626      * opening one. */
   5627     while(off < ctx->size  &&  CH(off) == ch)
   5628         off++;
   5629     if(off - beg < ctx->code_fence_length)
   5630         goto out;
   5631 
   5632     /* Optionally, space(s) can follow */
   5633     while(off < ctx->size  &&  CH(off) == _T(' '))
   5634         off++;
   5635 
   5636     /* But nothing more is allowed on the line. */
   5637     if(off < ctx->size  &&  !ISNEWLINE(off))
   5638         goto out;
   5639 
   5640     ret = TRUE;
   5641 
   5642 out:
   5643     /* Note we set *p_end even on failure: If we are not closing fence, caller
   5644      * would eat the line anyway without any parsing. */
   5645     *p_end = off;
   5646     return ret;
   5647 }
   5648 
   5649 /* Returns type of the raw HTML block, or FALSE if it is not HTML block.
   5650  * (Refer to CommonMark specification for details about the types.)
   5651  */
   5652 static int
   5653 md_is_html_block_start_condition(MD_CTX* ctx, OFF beg)
   5654 {
   5655     typedef struct TAG_tag TAG;
   5656     struct TAG_tag {
   5657         const CHAR* name;
   5658         unsigned len    : 8;
   5659     };
   5660 
   5661     /* Type 6 is started by a long list of allowed tags. We use two-level
   5662      * tree to speed-up the search. */
   5663 #ifdef X
   5664     #undef X
   5665 #endif
   5666 #define X(name)     { _T(name), (sizeof(name)-1) / sizeof(CHAR) }
   5667 #define Xend        { NULL, 0 }
   5668     static const TAG t1[] = { X("pre"), X("script"), X("style"), X("textarea"), Xend };
   5669 
   5670     static const TAG a6[] = { X("address"), X("article"), X("aside"), Xend };
   5671     static const TAG b6[] = { X("base"), X("basefont"), X("blockquote"), X("body"), Xend };
   5672     static const TAG c6[] = { X("caption"), X("center"), X("col"), X("colgroup"), Xend };
   5673     static const TAG d6[] = { X("dd"), X("details"), X("dialog"), X("dir"),
   5674                               X("div"), X("dl"), X("dt"), Xend };
   5675     static const TAG f6[] = { X("fieldset"), X("figcaption"), X("figure"), X("footer"),
   5676                               X("form"), X("frame"), X("frameset"), Xend };
   5677     static const TAG h6[] = { X("h1"), X("head"), X("header"), X("hr"), X("html"), Xend };
   5678     static const TAG i6[] = { X("iframe"), Xend };
   5679     static const TAG l6[] = { X("legend"), X("li"), X("link"), Xend };
   5680     static const TAG m6[] = { X("main"), X("menu"), X("menuitem"), Xend };
   5681     static const TAG n6[] = { X("nav"), X("noframes"), Xend };
   5682     static const TAG o6[] = { X("ol"), X("optgroup"), X("option"), Xend };
   5683     static const TAG p6[] = { X("p"), X("param"), Xend };
   5684     static const TAG s6[] = { X("section"), X("source"), X("summary"), Xend };
   5685     static const TAG t6[] = { X("table"), X("tbody"), X("td"), X("tfoot"), X("th"),
   5686                               X("thead"), X("title"), X("tr"), X("track"), Xend };
   5687     static const TAG u6[] = { X("ul"), Xend };
   5688     static const TAG xx[] = { Xend };
   5689 #undef X
   5690 
   5691     static const TAG* map6[26] = {
   5692         a6, b6, c6, d6, xx, f6, xx, h6, i6, xx, xx, l6, m6,
   5693         n6, o6, p6, xx, xx, s6, t6, u6, xx, xx, xx, xx, xx
   5694     };
   5695     OFF off = beg + 1;
   5696     int i;
   5697 
   5698     /* Check for type 1: <script, <pre, or <style */
   5699     for(i = 0; t1[i].name != NULL; i++) {
   5700         if(off + t1[i].len <= ctx->size) {
   5701             if(md_ascii_case_eq(STR(off), t1[i].name, t1[i].len))
   5702                 return 1;
   5703         }
   5704     }
   5705 
   5706     /* Check for type 2: <!-- */
   5707     if(off + 3 < ctx->size  &&  CH(off) == _T('!')  &&  CH(off+1) == _T('-')  &&  CH(off+2) == _T('-'))
   5708         return 2;
   5709 
   5710     /* Check for type 3: <? */
   5711     if(off < ctx->size  &&  CH(off) == _T('?'))
   5712         return 3;
   5713 
   5714     /* Check for type 4 or 5: <! */
   5715     if(off < ctx->size  &&  CH(off) == _T('!')) {
   5716         /* Check for type 4: <! followed by uppercase letter. */
   5717         if(off + 1 < ctx->size  &&  ISASCII(off+1))
   5718             return 4;
   5719 
   5720         /* Check for type 5: <![CDATA[ */
   5721         if(off + 8 < ctx->size) {
   5722             if(md_ascii_eq(STR(off), _T("![CDATA["), 8))
   5723                 return 5;
   5724         }
   5725     }
   5726 
   5727     /* Check for type 6: Many possible starting tags listed above. */
   5728     if(off + 1 < ctx->size  &&  (ISALPHA(off) || (CH(off) == _T('/') && ISALPHA(off+1)))) {
   5729         int slot;
   5730         const TAG* tags;
   5731 
   5732         if(CH(off) == _T('/'))
   5733             off++;
   5734 
   5735         slot = (ISUPPER(off) ? CH(off) - 'A' : CH(off) - 'a');
   5736         tags = map6[slot];
   5737 
   5738         for(i = 0; tags[i].name != NULL; i++) {
   5739             if(off + tags[i].len <= ctx->size) {
   5740                 if(md_ascii_case_eq(STR(off), tags[i].name, tags[i].len)) {
   5741                     OFF tmp = off + tags[i].len;
   5742                     if(tmp >= ctx->size)
   5743                         return 6;
   5744                     if(ISBLANK(tmp) || ISNEWLINE(tmp) || CH(tmp) == _T('>'))
   5745                         return 6;
   5746                     if(tmp+1 < ctx->size && CH(tmp) == _T('/') && CH(tmp+1) == _T('>'))
   5747                         return 6;
   5748                     break;
   5749                 }
   5750             }
   5751         }
   5752     }
   5753 
   5754     /* Check for type 7: any COMPLETE other opening or closing tag. */
   5755     if(off + 1 < ctx->size) {
   5756         OFF end;
   5757 
   5758         if(md_is_html_tag(ctx, NULL, 0, beg, ctx->size, &end)) {
   5759             /* Only optional whitespace and new line may follow. */
   5760             while(end < ctx->size  &&  ISWHITESPACE(end))
   5761                 end++;
   5762             if(end >= ctx->size  ||  ISNEWLINE(end))
   5763                 return 7;
   5764         }
   5765     }
   5766 
   5767     return FALSE;
   5768 }
   5769 
   5770 /* Case sensitive check whether there is a substring 'what' between 'beg'
   5771  * and end of line. */
   5772 static int
   5773 md_line_contains(MD_CTX* ctx, OFF beg, const CHAR* what, SZ what_len, OFF* p_end)
   5774 {
   5775     OFF i;
   5776     for(i = beg; i + what_len < ctx->size; i++) {
   5777         if(ISNEWLINE(i))
   5778             break;
   5779         if(memcmp(STR(i), what, what_len * sizeof(CHAR)) == 0) {
   5780             *p_end = i + what_len;
   5781             return TRUE;
   5782         }
   5783     }
   5784 
   5785     *p_end = i;
   5786     return FALSE;
   5787 }
   5788 
   5789 /* Returns type of HTML block end condition or FALSE if not an end condition.
   5790  *
   5791  * Note it fills p_end even when it is not end condition as the caller
   5792  * does not need to analyze contents of a raw HTML block.
   5793  */
   5794 static int
   5795 md_is_html_block_end_condition(MD_CTX* ctx, OFF beg, OFF* p_end)
   5796 {
   5797     switch(ctx->html_block_type) {
   5798         case 1:
   5799         {
   5800             OFF off = beg;
   5801 
   5802             while(off < ctx->size  &&  !ISNEWLINE(off)) {
   5803                 if(CH(off) == _T('<')) {
   5804                   #define FIND_TAG_END(string, length) \
   5805                     if(off + length <= ctx->size && \
   5806                        md_ascii_case_eq(STR(off), _T(string), length)) { \
   5807                         *p_end = off + length; \
   5808                         return TRUE; \
   5809                     }
   5810                   FIND_TAG_END("</script>", 9)
   5811                   FIND_TAG_END("</style>", 8)
   5812                   FIND_TAG_END("</pre>", 6)
   5813                   #undef FIND_TAG_END
   5814                 }
   5815 
   5816                 off++;
   5817             }
   5818             *p_end = off;
   5819             return FALSE;
   5820         }
   5821 
   5822         case 2:
   5823             return (md_line_contains(ctx, beg, _T("-->"), 3, p_end) ? 2 : FALSE);
   5824 
   5825         case 3:
   5826             return (md_line_contains(ctx, beg, _T("?>"), 2, p_end) ? 3 : FALSE);
   5827 
   5828         case 4:
   5829             return (md_line_contains(ctx, beg, _T(">"), 1, p_end) ? 4 : FALSE);
   5830 
   5831         case 5:
   5832             return (md_line_contains(ctx, beg, _T("]]>"), 3, p_end) ? 5 : FALSE);
   5833 
   5834         case 6:     /* Pass through */
   5835         case 7:
   5836             *p_end = beg;
   5837             return (beg >= ctx->size || ISNEWLINE(beg) ? ctx->html_block_type : FALSE);
   5838 
   5839         default:
   5840             MD_UNREACHABLE();
   5841     }
   5842     return FALSE;
   5843 }
   5844 
   5845 
   5846 static int
   5847 md_is_container_compatible(const MD_CONTAINER* pivot, const MD_CONTAINER* container)
   5848 {
   5849     /* Block quote has no "items" like lists. */
   5850     if(container->ch == _T('>'))
   5851         return FALSE;
   5852 
   5853     if(container->ch != pivot->ch)
   5854         return FALSE;
   5855     if(container->mark_indent > pivot->contents_indent)
   5856         return FALSE;
   5857 
   5858     return TRUE;
   5859 }
   5860 
   5861 static int
   5862 md_push_container(MD_CTX* ctx, const MD_CONTAINER* container)
   5863 {
   5864     if(ctx->n_containers >= ctx->alloc_containers) {
   5865         MD_CONTAINER* new_containers;
   5866 
   5867         ctx->alloc_containers = (ctx->alloc_containers > 0
   5868                 ? ctx->alloc_containers + ctx->alloc_containers / 2
   5869                 : 16);
   5870         new_containers = realloc(ctx->containers, ctx->alloc_containers * sizeof(MD_CONTAINER));
   5871         if(new_containers == NULL) {
   5872             MD_LOG("realloc() failed.");
   5873             return -1;
   5874         }
   5875 
   5876         ctx->containers = new_containers;
   5877     }
   5878 
   5879     memcpy(&ctx->containers[ctx->n_containers++], container, sizeof(MD_CONTAINER));
   5880     return 0;
   5881 }
   5882 
   5883 static int
   5884 md_enter_child_containers(MD_CTX* ctx, int n_children)
   5885 {
   5886     int i;
   5887     int ret = 0;
   5888 
   5889     for(i = ctx->n_containers - n_children; i < ctx->n_containers; i++) {
   5890         MD_CONTAINER* c = &ctx->containers[i];
   5891         int is_ordered_list = FALSE;
   5892 
   5893         switch(c->ch) {
   5894             case _T(')'):
   5895             case _T('.'):
   5896                 is_ordered_list = TRUE;
   5897                 MD_FALLTHROUGH();
   5898 
   5899             case _T('-'):
   5900             case _T('+'):
   5901             case _T('*'):
   5902                 /* Remember offset in ctx->block_bytes so we can revisit the
   5903                  * block if we detect it is a loose list. */
   5904                 md_end_current_block(ctx);
   5905                 c->block_byte_off = ctx->n_block_bytes;
   5906 
   5907                 MD_CHECK(md_push_container_bytes(ctx,
   5908                                 (is_ordered_list ? MD_BLOCK_OL : MD_BLOCK_UL),
   5909                                 c->start, c->ch, MD_BLOCK_CONTAINER_OPENER));
   5910                 MD_CHECK(md_push_container_bytes(ctx, MD_BLOCK_LI,
   5911                                 c->task_mark_off,
   5912                                 (c->is_task ? CH(c->task_mark_off) : 0),
   5913                                 MD_BLOCK_CONTAINER_OPENER));
   5914                 break;
   5915 
   5916             case _T('>'):
   5917                 MD_CHECK(md_push_container_bytes(ctx, MD_BLOCK_QUOTE, 0, 0, MD_BLOCK_CONTAINER_OPENER));
   5918                 break;
   5919 
   5920             default:
   5921                 MD_UNREACHABLE();
   5922                 break;
   5923         }
   5924     }
   5925 
   5926 abort:
   5927     return ret;
   5928 }
   5929 
   5930 static int
   5931 md_leave_child_containers(MD_CTX* ctx, int n_keep)
   5932 {
   5933     int ret = 0;
   5934 
   5935     while(ctx->n_containers > n_keep) {
   5936         MD_CONTAINER* c = &ctx->containers[ctx->n_containers-1];
   5937         int is_ordered_list = FALSE;
   5938 
   5939         switch(c->ch) {
   5940             case _T(')'):
   5941             case _T('.'):
   5942                 is_ordered_list = TRUE;
   5943                 MD_FALLTHROUGH();
   5944 
   5945             case _T('-'):
   5946             case _T('+'):
   5947             case _T('*'):
   5948                 MD_CHECK(md_push_container_bytes(ctx, MD_BLOCK_LI,
   5949                                 c->task_mark_off, (c->is_task ? CH(c->task_mark_off) : 0),
   5950                                 MD_BLOCK_CONTAINER_CLOSER));
   5951                 MD_CHECK(md_push_container_bytes(ctx,
   5952                                 (is_ordered_list ? MD_BLOCK_OL : MD_BLOCK_UL), 0,
   5953                                 c->ch, MD_BLOCK_CONTAINER_CLOSER));
   5954                 break;
   5955 
   5956             case _T('>'):
   5957                 MD_CHECK(md_push_container_bytes(ctx, MD_BLOCK_QUOTE, 0,
   5958                                 0, MD_BLOCK_CONTAINER_CLOSER));
   5959                 break;
   5960 
   5961             default:
   5962                 MD_UNREACHABLE();
   5963                 break;
   5964         }
   5965 
   5966         ctx->n_containers--;
   5967     }
   5968 
   5969 abort:
   5970     return ret;
   5971 }
   5972 
   5973 static int
   5974 md_is_container_mark(MD_CTX* ctx, unsigned indent, OFF beg, OFF* p_end, MD_CONTAINER* p_container)
   5975 {
   5976     OFF off = beg;
   5977     OFF max_end;
   5978 
   5979     if(off >= ctx->size  ||  indent >= ctx->code_indent_offset)
   5980         return FALSE;
   5981 
   5982     /* Check for block quote mark. */
   5983     if(CH(off) == _T('>')) {
   5984         off++;
   5985         p_container->ch = _T('>');
   5986         p_container->is_loose = FALSE;
   5987         p_container->is_task = FALSE;
   5988         p_container->mark_indent = indent;
   5989         p_container->contents_indent = indent + 1;
   5990         *p_end = off;
   5991         return TRUE;
   5992     }
   5993 
   5994     /* Check for list item bullet mark. */
   5995     if(ISANYOF(off, _T("-+*"))  &&  (off+1 >= ctx->size || ISBLANK(off+1) || ISNEWLINE(off+1))) {
   5996         p_container->ch = CH(off);
   5997         p_container->is_loose = FALSE;
   5998         p_container->is_task = FALSE;
   5999         p_container->mark_indent = indent;
   6000         p_container->contents_indent = indent + 1;
   6001         *p_end = off+1;
   6002         return TRUE;
   6003     }
   6004 
   6005     /* Check for ordered list item marks. */
   6006     max_end = off + 9;
   6007     if(max_end > ctx->size)
   6008         max_end = ctx->size;
   6009     p_container->start = 0;
   6010     while(off < max_end  &&  ISDIGIT(off)) {
   6011         p_container->start = p_container->start * 10 + CH(off) - _T('0');
   6012         off++;
   6013     }
   6014     if(off > beg  &&
   6015        off < ctx->size  &&
   6016        (CH(off) == _T('.') || CH(off) == _T(')'))  &&
   6017        (off+1 >= ctx->size || ISBLANK(off+1) || ISNEWLINE(off+1)))
   6018     {
   6019         p_container->ch = CH(off);
   6020         p_container->is_loose = FALSE;
   6021         p_container->is_task = FALSE;
   6022         p_container->mark_indent = indent;
   6023         p_container->contents_indent = indent + off - beg + 1;
   6024         *p_end = off+1;
   6025         return TRUE;
   6026     }
   6027 
   6028     return FALSE;
   6029 }
   6030 
   6031 static unsigned
   6032 md_line_indentation(MD_CTX* ctx, unsigned total_indent, OFF beg, OFF* p_end)
   6033 {
   6034     OFF off = beg;
   6035     unsigned indent = total_indent;
   6036 
   6037     while(off < ctx->size  &&  ISBLANK(off)) {
   6038         if(CH(off) == _T('\t'))
   6039             indent = (indent + 4) & ~3;
   6040         else
   6041             indent++;
   6042         off++;
   6043     }
   6044 
   6045     *p_end = off;
   6046     return indent - total_indent;
   6047 }
   6048 
   6049 static const MD_LINE_ANALYSIS md_dummy_blank_line = { MD_LINE_BLANK, 0, 0, 0, 0 };
   6050 
   6051 /* Analyze type of the line and find some its properties. This serves as a
   6052  * main input for determining type and boundaries of a block. */
   6053 static int
   6054 md_analyze_line(MD_CTX* ctx, OFF beg, OFF* p_end,
   6055                 const MD_LINE_ANALYSIS* pivot_line, MD_LINE_ANALYSIS* line)
   6056 {
   6057     unsigned total_indent = 0;
   6058     int n_parents = 0;
   6059     int n_brothers = 0;
   6060     int n_children = 0;
   6061     MD_CONTAINER container = { 0 };
   6062     int prev_line_has_list_loosening_effect = ctx->last_line_has_list_loosening_effect;
   6063     OFF off = beg;
   6064     OFF hr_killer = 0;
   6065     int ret = 0;
   6066 
   6067     line->indent = md_line_indentation(ctx, total_indent, off, &off);
   6068     total_indent += line->indent;
   6069     line->beg = off;
   6070 
   6071     /* Given the indentation and block quote marks '>', determine how many of
   6072      * the current containers are our parents. */
   6073     while(n_parents < ctx->n_containers) {
   6074         MD_CONTAINER* c = &ctx->containers[n_parents];
   6075 
   6076         if(c->ch == _T('>')  &&  line->indent < ctx->code_indent_offset  &&
   6077             off < ctx->size  &&  CH(off) == _T('>'))
   6078         {
   6079             /* Block quote mark. */
   6080             off++;
   6081             total_indent++;
   6082             line->indent = md_line_indentation(ctx, total_indent, off, &off);
   6083             total_indent += line->indent;
   6084 
   6085             /* The optional 1st space after '>' is part of the block quote mark. */
   6086             if(line->indent > 0)
   6087                 line->indent--;
   6088 
   6089             line->beg = off;
   6090 
   6091         } else if(c->ch != _T('>')  &&  line->indent >= c->contents_indent) {
   6092             /* List. */
   6093             line->indent -= c->contents_indent;
   6094         } else {
   6095             break;
   6096         }
   6097 
   6098         n_parents++;
   6099     }
   6100 
   6101     if(off >= ctx->size  ||  ISNEWLINE(off)) {
   6102         /* Blank line does not need any real indentation to be nested inside
   6103          * a list. */
   6104         if(n_brothers + n_children == 0) {
   6105             while(n_parents < ctx->n_containers  &&  ctx->containers[n_parents].ch != _T('>'))
   6106                 n_parents++;
   6107         }
   6108     }
   6109 
   6110     while(TRUE) {
   6111         /* Check whether we are fenced code continuation. */
   6112         if(pivot_line->type == MD_LINE_FENCEDCODE) {
   6113             line->beg = off;
   6114 
   6115             /* We are another MD_LINE_FENCEDCODE unless we are closing fence
   6116              * which we transform into MD_LINE_BLANK. */
   6117             if(line->indent < ctx->code_indent_offset) {
   6118                 if(md_is_closing_code_fence(ctx, CH(pivot_line->beg), off, &off)) {
   6119                     line->type = MD_LINE_BLANK;
   6120                     ctx->last_line_has_list_loosening_effect = FALSE;
   6121                     break;
   6122                 }
   6123             }
   6124 
   6125             /* Change indentation accordingly to the initial code fence. */
   6126             if(n_parents == ctx->n_containers) {
   6127                 if(line->indent > pivot_line->indent)
   6128                     line->indent -= pivot_line->indent;
   6129                 else
   6130                     line->indent = 0;
   6131 
   6132                 line->type = MD_LINE_FENCEDCODE;
   6133                 break;
   6134             }
   6135         }
   6136 
   6137         /* Check whether we are HTML block continuation. */
   6138         if(pivot_line->type == MD_LINE_HTML  &&  ctx->html_block_type > 0) {
   6139             if(n_parents < ctx->n_containers) {
   6140                 /* HTML block is implicitly ended if the enclosing container
   6141                  * block ends. */
   6142                 ctx->html_block_type = 0;
   6143             } else {
   6144                 int html_block_type;
   6145 
   6146                 html_block_type = md_is_html_block_end_condition(ctx, off, &off);
   6147                 if(html_block_type > 0) {
   6148                     MD_ASSERT(html_block_type == ctx->html_block_type);
   6149 
   6150                     /* Make sure this is the last line of the block. */
   6151                     ctx->html_block_type = 0;
   6152 
   6153                     /* Some end conditions serve as blank lines at the same time. */
   6154                     if(html_block_type == 6 || html_block_type == 7) {
   6155                         line->type = MD_LINE_BLANK;
   6156                         line->indent = 0;
   6157                         break;
   6158                     }
   6159                 }
   6160 
   6161                 line->type = MD_LINE_HTML;
   6162                 n_parents = ctx->n_containers;
   6163                 break;
   6164             }
   6165         }
   6166 
   6167         /* Check for blank line. */
   6168         if(off >= ctx->size  ||  ISNEWLINE(off)) {
   6169             if(pivot_line->type == MD_LINE_INDENTEDCODE  &&  n_parents == ctx->n_containers) {
   6170                 line->type = MD_LINE_INDENTEDCODE;
   6171                 if(line->indent > ctx->code_indent_offset)
   6172                     line->indent -= ctx->code_indent_offset;
   6173                 else
   6174                     line->indent = 0;
   6175                 ctx->last_line_has_list_loosening_effect = FALSE;
   6176             } else {
   6177                 line->type = MD_LINE_BLANK;
   6178                 ctx->last_line_has_list_loosening_effect = (n_parents > 0  &&
   6179                         n_brothers + n_children == 0  &&
   6180                         ctx->containers[n_parents-1].ch != _T('>'));
   6181 
   6182     #if 1
   6183                 /* See https://github.com/mity/md4c/issues/6
   6184                  *
   6185                  * This ugly checking tests we are in (yet empty) list item but
   6186                  * not its very first line (i.e. not the line with the list
   6187                  * item mark).
   6188                  *
   6189                  * If we are such a blank line, then any following non-blank
   6190                  * line which would be part of the list item actually has to
   6191                  * end the list because according to the specification, "a list
   6192                  * item can begin with at most one blank line."
   6193                  */
   6194                 if(n_parents > 0  &&  ctx->containers[n_parents-1].ch != _T('>')  &&
   6195                    n_brothers + n_children == 0  &&  ctx->current_block == NULL  &&
   6196                    ctx->n_block_bytes > (int) sizeof(MD_BLOCK))
   6197                 {
   6198                     MD_BLOCK* top_block = (MD_BLOCK*) ((char*)ctx->block_bytes + ctx->n_block_bytes - sizeof(MD_BLOCK));
   6199                     if(top_block->type == MD_BLOCK_LI)
   6200                         ctx->last_list_item_starts_with_two_blank_lines = TRUE;
   6201                 }
   6202     #endif
   6203             }
   6204             break;
   6205         } else {
   6206     #if 1
   6207             /* This is the 2nd half of the hack. If the flag is set (i.e. there
   6208              * was a 2nd blank line at the beginning of the list item) and if
   6209              * we would otherwise still belong to the list item, we enforce
   6210              * the end of the list. */
   6211             ctx->last_line_has_list_loosening_effect = FALSE;
   6212             if(ctx->last_list_item_starts_with_two_blank_lines) {
   6213                 if(n_parents > 0  &&  ctx->containers[n_parents-1].ch != _T('>')  &&
   6214                    n_brothers + n_children == 0  &&  ctx->current_block == NULL  &&
   6215                    ctx->n_block_bytes > (int) sizeof(MD_BLOCK))
   6216                 {
   6217                     MD_BLOCK* top_block = (MD_BLOCK*) ((char*)ctx->block_bytes + ctx->n_block_bytes - sizeof(MD_BLOCK));
   6218                     if(top_block->type == MD_BLOCK_LI)
   6219                         n_parents--;
   6220                 }
   6221 
   6222                 ctx->last_list_item_starts_with_two_blank_lines = FALSE;
   6223             }
   6224     #endif
   6225         }
   6226 
   6227         /* Check whether we are Setext underline. */
   6228         if(line->indent < ctx->code_indent_offset  &&  pivot_line->type == MD_LINE_TEXT
   6229             &&  off < ctx->size  &&  ISANYOF2(off, _T('='), _T('-'))
   6230             &&  (n_parents == ctx->n_containers))
   6231         {
   6232             unsigned level;
   6233 
   6234             if(md_is_setext_underline(ctx, off, &off, &level)) {
   6235                 line->type = MD_LINE_SETEXTUNDERLINE;
   6236                 line->data = level;
   6237                 break;
   6238             }
   6239         }
   6240 
   6241         /* Check for thematic break line. */
   6242         if(line->indent < ctx->code_indent_offset
   6243             &&  off < ctx->size  &&  off >= hr_killer
   6244             &&  ISANYOF(off, _T("-_*")))
   6245         {
   6246             if(md_is_hr_line(ctx, off, &off, &hr_killer)) {
   6247                 line->type = MD_LINE_HR;
   6248                 break;
   6249             }
   6250         }
   6251 
   6252         /* Check for "brother" container. I.e. whether we are another list item
   6253          * in already started list. */
   6254         if(n_parents < ctx->n_containers  &&  n_brothers + n_children == 0) {
   6255             OFF tmp;
   6256 
   6257             if(md_is_container_mark(ctx, line->indent, off, &tmp, &container)  &&
   6258                md_is_container_compatible(&ctx->containers[n_parents], &container))
   6259             {
   6260                 pivot_line = &md_dummy_blank_line;
   6261 
   6262                 off = tmp;
   6263 
   6264                 total_indent += container.contents_indent - container.mark_indent;
   6265                 line->indent = md_line_indentation(ctx, total_indent, off, &off);
   6266                 total_indent += line->indent;
   6267                 line->beg = off;
   6268 
   6269                 /* Some of the following whitespace actually still belongs to the mark. */
   6270                 if(off >= ctx->size || ISNEWLINE(off)) {
   6271                     container.contents_indent++;
   6272                 } else if(line->indent <= ctx->code_indent_offset) {
   6273                     container.contents_indent += line->indent;
   6274                     line->indent = 0;
   6275                 } else {
   6276                     container.contents_indent += 1;
   6277                     line->indent--;
   6278                 }
   6279 
   6280                 ctx->containers[n_parents].mark_indent = container.mark_indent;
   6281                 ctx->containers[n_parents].contents_indent = container.contents_indent;
   6282 
   6283                 n_brothers++;
   6284                 continue;
   6285             }
   6286         }
   6287 
   6288         /* Check for indented code.
   6289          * Note indented code block cannot interrupt a paragraph. */
   6290         if(line->indent >= ctx->code_indent_offset  &&
   6291             (pivot_line->type == MD_LINE_BLANK || pivot_line->type == MD_LINE_INDENTEDCODE))
   6292         {
   6293             line->type = MD_LINE_INDENTEDCODE;
   6294             MD_ASSERT(line->indent >= ctx->code_indent_offset);
   6295             line->indent -= ctx->code_indent_offset;
   6296             line->data = 0;
   6297             break;
   6298         }
   6299 
   6300         /* Check for start of a new container block. */
   6301         if(line->indent < ctx->code_indent_offset  &&
   6302            md_is_container_mark(ctx, line->indent, off, &off, &container))
   6303         {
   6304             if(pivot_line->type == MD_LINE_TEXT  &&  n_parents == ctx->n_containers  &&
   6305                         (off >= ctx->size || ISNEWLINE(off))  &&  container.ch != _T('>'))
   6306             {
   6307                 /* Noop. List mark followed by a blank line cannot interrupt a paragraph. */
   6308             } else if(pivot_line->type == MD_LINE_TEXT  &&  n_parents == ctx->n_containers  &&
   6309                         ISANYOF2_(container.ch, _T('.'), _T(')'))  &&  container.start != 1)
   6310             {
   6311                 /* Noop. Ordered list cannot interrupt a paragraph unless the start index is 1. */
   6312             } else {
   6313                 total_indent += container.contents_indent - container.mark_indent;
   6314                 line->indent = md_line_indentation(ctx, total_indent, off, &off);
   6315                 total_indent += line->indent;
   6316 
   6317                 line->beg = off;
   6318                 line->data = container.ch;
   6319 
   6320                 /* Some of the following whitespace actually still belongs to the mark. */
   6321                 if(off >= ctx->size || ISNEWLINE(off)) {
   6322                     container.contents_indent++;
   6323                 } else if(line->indent <= ctx->code_indent_offset) {
   6324                     container.contents_indent += line->indent;
   6325                     line->indent = 0;
   6326                 } else {
   6327                     container.contents_indent += 1;
   6328                     line->indent--;
   6329                 }
   6330 
   6331                 if(n_brothers + n_children == 0)
   6332                     pivot_line = &md_dummy_blank_line;
   6333 
   6334                 if(n_children == 0)
   6335                     MD_CHECK(md_leave_child_containers(ctx, n_parents + n_brothers));
   6336 
   6337                 n_children++;
   6338                 MD_CHECK(md_push_container(ctx, &container));
   6339                 continue;
   6340             }
   6341         }
   6342 
   6343         /* Check whether we are table continuation. */
   6344         if(pivot_line->type == MD_LINE_TABLE  &&  n_parents == ctx->n_containers) {
   6345             line->type = MD_LINE_TABLE;
   6346             break;
   6347         }
   6348 
   6349         /* Check for ATX header. */
   6350         if(line->indent < ctx->code_indent_offset  &&
   6351                 off < ctx->size  &&  CH(off) == _T('#'))
   6352         {
   6353             unsigned level;
   6354 
   6355             if(md_is_atxheader_line(ctx, off, &line->beg, &off, &level)) {
   6356                 line->type = MD_LINE_ATXHEADER;
   6357                 line->data = level;
   6358                 break;
   6359             }
   6360         }
   6361 
   6362         /* Check whether we are starting code fence. */
   6363         if(off < ctx->size  &&  ISANYOF2(off, _T('`'), _T('~'))) {
   6364             if(md_is_opening_code_fence(ctx, off, &off)) {
   6365                 line->type = MD_LINE_FENCEDCODE;
   6366                 line->data = 1;
   6367                 break;
   6368             }
   6369         }
   6370 
   6371         /* Check for start of raw HTML block. */
   6372         if(off < ctx->size  &&  CH(off) == _T('<')
   6373             &&  !(ctx->parser.flags & MD_FLAG_NOHTMLBLOCKS))
   6374         {
   6375             ctx->html_block_type = md_is_html_block_start_condition(ctx, off);
   6376 
   6377             /* HTML block type 7 cannot interrupt paragraph. */
   6378             if(ctx->html_block_type == 7  &&  pivot_line->type == MD_LINE_TEXT)
   6379                 ctx->html_block_type = 0;
   6380 
   6381             if(ctx->html_block_type > 0) {
   6382                 /* The line itself also may immediately close the block. */
   6383                 if(md_is_html_block_end_condition(ctx, off, &off) == ctx->html_block_type) {
   6384                     /* Make sure this is the last line of the block. */
   6385                     ctx->html_block_type = 0;
   6386                 }
   6387 
   6388                 line->type = MD_LINE_HTML;
   6389                 break;
   6390             }
   6391         }
   6392 
   6393         /* Check for table underline. */
   6394         if((ctx->parser.flags & MD_FLAG_TABLES)  &&  pivot_line->type == MD_LINE_TEXT
   6395             &&  off < ctx->size  &&  ISANYOF3(off, _T('|'), _T('-'), _T(':'))
   6396             &&  n_parents == ctx->n_containers)
   6397         {
   6398             unsigned col_count;
   6399 
   6400             if(ctx->current_block != NULL  &&  ctx->current_block->n_lines == 1  &&
   6401                 md_is_table_underline(ctx, off, &off, &col_count))
   6402             {
   6403                 line->data = col_count;
   6404                 line->type = MD_LINE_TABLEUNDERLINE;
   6405                 break;
   6406             }
   6407         }
   6408 
   6409         /* By default, we are normal text line. */
   6410         line->type = MD_LINE_TEXT;
   6411         if(pivot_line->type == MD_LINE_TEXT  &&  n_brothers + n_children == 0) {
   6412             /* Lazy continuation. */
   6413             n_parents = ctx->n_containers;
   6414         }
   6415 
   6416         /* Check for task mark. */
   6417         if((ctx->parser.flags & MD_FLAG_TASKLISTS)  &&  n_brothers + n_children > 0  &&
   6418            ISANYOF_(ctx->containers[ctx->n_containers-1].ch, _T("-+*.)")))
   6419         {
   6420             OFF tmp = off;
   6421 
   6422             while(tmp < ctx->size  &&  tmp < off + 3  &&  ISBLANK(tmp))
   6423                 tmp++;
   6424             if(tmp + 2 < ctx->size  &&  CH(tmp) == _T('[')  &&
   6425                ISANYOF(tmp+1, _T("xX "))  &&  CH(tmp+2) == _T(']')  &&
   6426                (tmp + 3 == ctx->size  ||  ISBLANK(tmp+3)  ||  ISNEWLINE(tmp+3)))
   6427             {
   6428                 MD_CONTAINER* task_container = (n_children > 0 ? &ctx->containers[ctx->n_containers-1] : &container);
   6429                 task_container->is_task = TRUE;
   6430                 task_container->task_mark_off = tmp + 1;
   6431                 off = tmp + 3;
   6432                 while(off < ctx->size && ISWHITESPACE(off))
   6433                     off++;
   6434                 if (off == ctx->size) break;
   6435                 line->beg = off;
   6436             }
   6437         }
   6438 
   6439         break;
   6440     }
   6441 
   6442     /* Scan for end of the line.
   6443      *
   6444      * Note this is quite a bottleneck of the parsing as we here iterate almost
   6445      * over compete document.
   6446      */
   6447 #if defined __linux__ && !defined MD4C_USE_UTF16
   6448     /* Recent glibc versions have superbly optimized strcspn(), even using
   6449      * vectorization if available. */
   6450     if(ctx->doc_ends_with_newline  &&  off < ctx->size) {
   6451         while(TRUE) {
   6452             off += (OFF) strcspn(STR(off), "\r\n");
   6453 
   6454             /* strcspn() can stop on zero terminator; but that can appear
   6455              * anywhere in the Markfown input... */
   6456             if(CH(off) == _T('\0'))
   6457                 off++;
   6458             else
   6459                 break;
   6460         }
   6461     } else
   6462 #endif
   6463     {
   6464         /* Optimization: Use some loop unrolling. */
   6465         while(off + 3 < ctx->size  &&  !ISNEWLINE(off+0)  &&  !ISNEWLINE(off+1)
   6466                                    &&  !ISNEWLINE(off+2)  &&  !ISNEWLINE(off+3))
   6467             off += 4;
   6468         while(off < ctx->size  &&  !ISNEWLINE(off))
   6469             off++;
   6470     }
   6471 
   6472     /* Set end of the line. */
   6473     line->end = off;
   6474 
   6475     /* But for ATX header, we should exclude the optional trailing mark. */
   6476     if(line->type == MD_LINE_ATXHEADER) {
   6477         OFF tmp = line->end;
   6478         while(tmp > line->beg && CH(tmp-1) == _T(' '))
   6479             tmp--;
   6480         while(tmp > line->beg && CH(tmp-1) == _T('#'))
   6481             tmp--;
   6482         if(tmp == line->beg || CH(tmp-1) == _T(' ') || (ctx->parser.flags & MD_FLAG_PERMISSIVEATXHEADERS))
   6483             line->end = tmp;
   6484     }
   6485 
   6486     /* Trim trailing spaces. */
   6487     if(line->type != MD_LINE_INDENTEDCODE  &&  line->type != MD_LINE_FENCEDCODE) {
   6488         while(line->end > line->beg && CH(line->end-1) == _T(' '))
   6489             line->end--;
   6490     }
   6491 
   6492     /* Eat also the new line. */
   6493     if(off < ctx->size && CH(off) == _T('\r'))
   6494         off++;
   6495     if(off < ctx->size && CH(off) == _T('\n'))
   6496         off++;
   6497 
   6498     *p_end = off;
   6499 
   6500     /* If we belong to a list after seeing a blank line, the list is loose. */
   6501     if(prev_line_has_list_loosening_effect  &&  line->type != MD_LINE_BLANK  &&  n_parents + n_brothers > 0) {
   6502         MD_CONTAINER* c = &ctx->containers[n_parents + n_brothers - 1];
   6503         if(c->ch != _T('>')) {
   6504             MD_BLOCK* block = (MD_BLOCK*) (((char*)ctx->block_bytes) + c->block_byte_off);
   6505             block->flags |= MD_BLOCK_LOOSE_LIST;
   6506         }
   6507     }
   6508 
   6509     /* Leave any containers we are not part of anymore. */
   6510     if(n_children == 0  &&  n_parents + n_brothers < ctx->n_containers)
   6511         MD_CHECK(md_leave_child_containers(ctx, n_parents + n_brothers));
   6512 
   6513     /* Enter any container we found a mark for. */
   6514     if(n_brothers > 0) {
   6515         MD_ASSERT(n_brothers == 1);
   6516         MD_CHECK(md_push_container_bytes(ctx, MD_BLOCK_LI,
   6517                     ctx->containers[n_parents].task_mark_off,
   6518                     (ctx->containers[n_parents].is_task ? CH(ctx->containers[n_parents].task_mark_off) : 0),
   6519                     MD_BLOCK_CONTAINER_CLOSER));
   6520         MD_CHECK(md_push_container_bytes(ctx, MD_BLOCK_LI,
   6521                     container.task_mark_off,
   6522                     (container.is_task ? CH(container.task_mark_off) : 0),
   6523                     MD_BLOCK_CONTAINER_OPENER));
   6524         ctx->containers[n_parents].is_task = container.is_task;
   6525         ctx->containers[n_parents].task_mark_off = container.task_mark_off;
   6526     }
   6527 
   6528     if(n_children > 0)
   6529         MD_CHECK(md_enter_child_containers(ctx, n_children));
   6530 
   6531 abort:
   6532     return ret;
   6533 }
   6534 
   6535 static int
   6536 md_process_line(MD_CTX* ctx, const MD_LINE_ANALYSIS** p_pivot_line, MD_LINE_ANALYSIS* line)
   6537 {
   6538     const MD_LINE_ANALYSIS* pivot_line = *p_pivot_line;
   6539     int ret = 0;
   6540 
   6541     /* Blank line ends current leaf block. */
   6542     if(line->type == MD_LINE_BLANK) {
   6543         MD_CHECK(md_end_current_block(ctx));
   6544         *p_pivot_line = &md_dummy_blank_line;
   6545         return 0;
   6546     }
   6547 
   6548     /* Some line types form block on their own. */
   6549     if(line->type == MD_LINE_HR || line->type == MD_LINE_ATXHEADER) {
   6550         MD_CHECK(md_end_current_block(ctx));
   6551 
   6552         /* Add our single-line block. */
   6553         MD_CHECK(md_start_new_block(ctx, line));
   6554         MD_CHECK(md_add_line_into_current_block(ctx, line));
   6555         MD_CHECK(md_end_current_block(ctx));
   6556         *p_pivot_line = &md_dummy_blank_line;
   6557         return 0;
   6558     }
   6559 
   6560     /* MD_LINE_SETEXTUNDERLINE changes meaning of the current block and ends it. */
   6561     if(line->type == MD_LINE_SETEXTUNDERLINE) {
   6562         MD_ASSERT(ctx->current_block != NULL);
   6563         ctx->current_block->type = MD_BLOCK_H;
   6564         ctx->current_block->data = line->data;
   6565         ctx->current_block->flags |= MD_BLOCK_SETEXT_HEADER;
   6566         MD_CHECK(md_add_line_into_current_block(ctx, line));
   6567         MD_CHECK(md_end_current_block(ctx));
   6568         if(ctx->current_block == NULL) {
   6569             *p_pivot_line = &md_dummy_blank_line;
   6570         } else {
   6571             /* This happens if we have consumed all the body as link ref. defs.
   6572              * and downgraded the underline into start of a new paragraph block. */
   6573             line->type = MD_LINE_TEXT;
   6574             *p_pivot_line = line;
   6575         }
   6576         return 0;
   6577     }
   6578 
   6579     /* MD_LINE_TABLEUNDERLINE changes meaning of the current block. */
   6580     if(line->type == MD_LINE_TABLEUNDERLINE) {
   6581         MD_ASSERT(ctx->current_block != NULL);
   6582         MD_ASSERT(ctx->current_block->n_lines == 1);
   6583         ctx->current_block->type = MD_BLOCK_TABLE;
   6584         ctx->current_block->data = line->data;
   6585         MD_ASSERT(pivot_line != &md_dummy_blank_line);
   6586         ((MD_LINE_ANALYSIS*)pivot_line)->type = MD_LINE_TABLE;
   6587         MD_CHECK(md_add_line_into_current_block(ctx, line));
   6588         return 0;
   6589     }
   6590 
   6591     /* The current block also ends if the line has different type. */
   6592     if(line->type != pivot_line->type)
   6593         MD_CHECK(md_end_current_block(ctx));
   6594 
   6595     /* The current line may start a new block. */
   6596     if(ctx->current_block == NULL) {
   6597         MD_CHECK(md_start_new_block(ctx, line));
   6598         *p_pivot_line = line;
   6599     }
   6600 
   6601     /* In all other cases the line is just a continuation of the current block. */
   6602     MD_CHECK(md_add_line_into_current_block(ctx, line));
   6603 
   6604 abort:
   6605     return ret;
   6606 }
   6607 
   6608 static int
   6609 md_process_doc(MD_CTX *ctx)
   6610 {
   6611     const MD_LINE_ANALYSIS* pivot_line = &md_dummy_blank_line;
   6612     MD_LINE_ANALYSIS line_buf[2];
   6613     MD_LINE_ANALYSIS* line = &line_buf[0];
   6614     OFF off = 0;
   6615     int ret = 0;
   6616 
   6617     MD_ENTER_BLOCK(MD_BLOCK_DOC, NULL);
   6618 
   6619     while(off < ctx->size) {
   6620         if(line == pivot_line)
   6621             line = (line == &line_buf[0] ? &line_buf[1] : &line_buf[0]);
   6622 
   6623         MD_CHECK(md_analyze_line(ctx, off, &off, pivot_line, line));
   6624         MD_CHECK(md_process_line(ctx, &pivot_line, line));
   6625     }
   6626 
   6627     md_end_current_block(ctx);
   6628 
   6629     MD_CHECK(md_build_ref_def_hashtable(ctx));
   6630 
   6631     /* Process all blocks. */
   6632     MD_CHECK(md_leave_child_containers(ctx, 0));
   6633     MD_CHECK(md_process_all_blocks(ctx));
   6634 
   6635     MD_LEAVE_BLOCK(MD_BLOCK_DOC, NULL);
   6636 
   6637 abort:
   6638 
   6639 #if 0
   6640     /* Output some memory consumption statistics. */
   6641     {
   6642         char buffer[256];
   6643         sprintf(buffer, "Alloced %u bytes for block buffer.",
   6644                     (unsigned)(ctx->alloc_block_bytes));
   6645         MD_LOG(buffer);
   6646 
   6647         sprintf(buffer, "Alloced %u bytes for containers buffer.",
   6648                     (unsigned)(ctx->alloc_containers * sizeof(MD_CONTAINER)));
   6649         MD_LOG(buffer);
   6650 
   6651         sprintf(buffer, "Alloced %u bytes for marks buffer.",
   6652                     (unsigned)(ctx->alloc_marks * sizeof(MD_MARK)));
   6653         MD_LOG(buffer);
   6654 
   6655         sprintf(buffer, "Alloced %u bytes for aux. buffer.",
   6656                     (unsigned)(ctx->alloc_buffer * sizeof(MD_CHAR)));
   6657         MD_LOG(buffer);
   6658     }
   6659 #endif
   6660 
   6661     return ret;
   6662 }
   6663 
   6664 
   6665 /********************
   6666  ***  Public API  ***
   6667  ********************/
   6668 
   6669 int
   6670 md_parse(const MD_CHAR* text, MD_SIZE size, const MD_PARSER* parser, void* userdata)
   6671 {
   6672     MD_CTX ctx = {.text = text,
   6673                   .size = size,
   6674                   .userdata = userdata,
   6675                   .code_indent_offset = (ctx.parser.flags & MD_FLAG_NOINDENTEDCODEBLOCKS) ? (OFF)(-1) : 4,
   6676                   .doc_ends_with_newline = (size > 0  &&  ISNEWLINE_(text[size-1]))};
   6677     int i;
   6678     int ret;
   6679 
   6680     if(parser->abi_version != 0) {
   6681         if(parser->debug_log != NULL)
   6682             parser->debug_log("Unsupported abi_version.", userdata);
   6683         return -1;
   6684     }
   6685 
   6686     /* Setup context structure. */
   6687     memcpy(&ctx.parser, parser, sizeof(MD_PARSER));
   6688     md_build_mark_char_map(&ctx);
   6689 
   6690     /* Reset all unresolved opener mark chains. */
   6691     for(i = 0; i < (int) SIZEOF_ARRAY(ctx.mark_chains); i++) {
   6692         ctx.mark_chains[i].head = -1;
   6693         ctx.mark_chains[i].tail = -1;
   6694     }
   6695     ctx.unresolved_link_head = -1;
   6696     ctx.unresolved_link_tail = -1;
   6697 
   6698     /* All the work. */
   6699     ret = md_process_doc(&ctx);
   6700 
   6701     /* Clean-up. */
   6702     md_free_ref_defs(&ctx);
   6703     md_free_ref_def_hashtable(&ctx);
   6704     free(ctx.buffer);
   6705     free(ctx.marks);
   6706     free(ctx.block_bytes);
   6707     free(ctx.containers);
   6708 
   6709     return ret;
   6710 }