md4c

C Markdown parser. Fast. SAX-like interface. Compliant to CommonMark specification.
git clone https://noulin.net/git/md4c.git
Log | Files | Refs | README | LICENSE

md2html.c (11657B)


      1 /*
      2  * MD4C: Markdown parser for C
      3  * (http://github.com/mity/md4c)
      4  *
      5  * Copyright (c) 2016-2017 Martin Mitas
      6  *
      7  * Permission is hereby granted, free of charge, to any person obtaining a
      8  * copy of this software and associated documentation files (the "Software"),
      9  * to deal in the Software without restriction, including without limitation
     10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
     11  * and/or sell copies of the Software, and to permit persons to whom the
     12  * Software is furnished to do so, subject to the following conditions:
     13  *
     14  * The above copyright notice and this permission notice shall be included in
     15  * all copies or substantial portions of the Software.
     16  *
     17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
     18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
     20  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     22  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     23  * IN THE SOFTWARE.
     24  */
     25 
     26 #include <stdio.h>
     27 #include <stdlib.h>
     28 #include <string.h>
     29 #include <time.h>
     30 
     31 #include "render_html.h"
     32 #include "cmdline.h"
     33 
     34 
     35 
     36 /* Global options. */
     37 static unsigned parser_flags = 0;
     38 static unsigned renderer_flags = MD_RENDER_FLAG_DEBUG;
     39 static int want_fullhtml = 0;
     40 static int want_stat = 0;
     41 
     42 
     43 /*********************************
     44  ***  Simple grow-able buffer  ***
     45  *********************************/
     46 
     47 /* We render to a memory buffer instead of directly outputting the rendered
     48  * documents, as this allows using this utility for evaluating performance
     49  * of MD4C (--stat option). This allows us to measure just time of the parser,
     50  * without the I/O.
     51  */
     52 
     53 struct membuffer {
     54     char* data;
     55     MD_SIZE asize;
     56     MD_SIZE size;
     57 };
     58 
     59 static void
     60 membuf_init(struct membuffer* buf, MD_SIZE new_asize)
     61 {
     62     buf->size = 0;
     63     buf->asize = new_asize;
     64     buf->data = malloc(buf->asize);
     65     if(buf->data == NULL) {
     66         fprintf(stderr, "membuf_init: malloc() failed.");
     67         exit(1);
     68     }
     69 }
     70 
     71 static void
     72 membuf_fini(struct membuffer* buf)
     73 {
     74     if(buf->data)
     75         free(buf->data);
     76 }
     77 
     78 static void
     79 membuf_grow(struct membuffer* buf, MD_SIZE new_asize)
     80 {
     81     buf->data = realloc(buf->data, new_asize);
     82     if(buf->data == NULL) {
     83         fprintf(stderr, "membuf_grow: realloc() failed.");
     84         exit(1);
     85     }
     86     buf->asize = new_asize;
     87 }
     88 
     89 static void
     90 membuf_append(struct membuffer* buf, const char* data, MD_SIZE size)
     91 {
     92     if(buf->asize < buf->size + size)
     93         membuf_grow(buf, (buf->size + size) * 2);
     94     memcpy(buf->data + buf->size, data, size);
     95     buf->size += size;
     96 }
     97 
     98 
     99 /**********************
    100  ***  Main program  ***
    101  **********************/
    102 
    103 static void
    104 process_output(const MD_CHAR* text, MD_SIZE size, void* userdata)
    105 {
    106     membuf_append((struct membuffer*) userdata, text, size);
    107 }
    108 
    109 static int
    110 process_file(FILE* in, FILE* out)
    111 {
    112     MD_SIZE n;
    113     struct membuffer buf_in = {0};
    114     struct membuffer buf_out = {0};
    115     int ret = -1;
    116     clock_t t0, t1;
    117 
    118     membuf_init(&buf_in, 32 * 1024);
    119 
    120     /* Read the input file into a buffer. */
    121     while(1) {
    122         if(buf_in.size >= buf_in.asize)
    123             membuf_grow(&buf_in, 2 * buf_in.asize);
    124 
    125         n = fread(buf_in.data + buf_in.size, 1, buf_in.asize - buf_in.size, in);
    126         if(n == 0)
    127             break;
    128         buf_in.size += n;
    129     }
    130 
    131     /* Input size is good estimation of output size. Add some more reserve to
    132      * deal with the HTML header/footer and tags. */
    133     membuf_init(&buf_out, buf_in.size + buf_in.size/8 + 64);
    134 
    135     /* Parse the document. This shall call our callbacks provided via the
    136      * md_renderer_t structure. */
    137     t0 = clock();
    138 
    139     ret = md_render_html(buf_in.data, buf_in.size, process_output,
    140                 (void*) &buf_out, parser_flags, renderer_flags);
    141 
    142     t1 = clock();
    143     if(ret != 0) {
    144         fprintf(stderr, "Parsing failed.\n");
    145         goto out;
    146     }
    147 
    148     /* Write down the document in the HTML format. */
    149     if(want_fullhtml) {
    150         fprintf(out, "<html>\n");
    151         fprintf(out, "<head>\n");
    152         fprintf(out, "<title></title>\n");
    153         fprintf(out, "<meta name=\"generator\" content=\"md2html\">\n");
    154         fprintf(out, "</head>\n");
    155         fprintf(out, "<body>\n");
    156     }
    157 
    158     fwrite(buf_out.data, 1, buf_out.size, out);
    159 
    160     if(want_fullhtml) {
    161         fprintf(out, "</body>\n");
    162         fprintf(out, "</html>\n");
    163     }
    164 
    165     if(want_stat) {
    166         if(t0 != (clock_t)-1  &&  t1 != (clock_t)-1) {
    167             double elapsed = (double)(t1 - t0) / CLOCKS_PER_SEC;
    168             if (elapsed < 1)
    169                 fprintf(stderr, "Time spent on parsing: %7.2f ms.\n", elapsed*1e3);
    170             else
    171                 fprintf(stderr, "Time spent on parsing: %6.3f s.\n", elapsed);
    172         }
    173     }
    174 
    175     /* Success if we have reached here. */
    176     ret = 0;
    177 
    178 out:
    179     membuf_fini(&buf_in);
    180     membuf_fini(&buf_out);
    181 
    182     return ret;
    183 }
    184 
    185 
    186 #define OPTION_ARG_NONE         0
    187 #define OPTION_ARG_REQUIRED     1
    188 #define OPTION_ARG_OPTIONAL     2
    189 
    190 static const option cmdline_options[] = {
    191     { "output",                     'o', 'o', OPTION_ARG_REQUIRED },
    192     { "full-html",                  'f', 'f', OPTION_ARG_NONE },
    193     { "stat",                       's', 's', OPTION_ARG_NONE },
    194     { "help",                       'h', 'h', OPTION_ARG_NONE },
    195     { "version",                    'v', 'v', OPTION_ARG_NONE },
    196     { "commonmark",                  0,  'c', OPTION_ARG_NONE },
    197     { "github",                      0,  'g', OPTION_ARG_NONE },
    198     { "fverbatim-entities",          0,  'E', OPTION_ARG_NONE },
    199     { "fpermissive-atx-headers",     0,  'A', OPTION_ARG_NONE },
    200     { "fpermissive-url-autolinks",   0,  'U', OPTION_ARG_NONE },
    201     { "fpermissive-www-autolinks",   0,  '.', OPTION_ARG_NONE },
    202     { "fpermissive-email-autolinks", 0,  '@', OPTION_ARG_NONE },
    203     { "fpermissive-autolinks",       0,  'V', OPTION_ARG_NONE },
    204     { "fno-indented-code",           0,  'I', OPTION_ARG_NONE },
    205     { "fno-html-blocks",             0,  'F', OPTION_ARG_NONE },
    206     { "fno-html-spans",              0,  'G', OPTION_ARG_NONE },
    207     { "fno-html",                    0,  'H', OPTION_ARG_NONE },
    208     { "fcollapse-whitespace",        0,  'W', OPTION_ARG_NONE },
    209     { "ftables",                     0,  'T', OPTION_ARG_NONE },
    210     { "fstrikethrough",              0,  'S', OPTION_ARG_NONE },
    211     { 0 }
    212 };
    213 
    214 static void
    215 usage(void)
    216 {
    217     printf(
    218         "Usage: md2html [OPTION]... [FILE]\n"
    219         "Convert input FILE (or standard input) in Markdown format to HTML.\n"
    220         "\n"
    221         "General options:\n"
    222         "  -o  --output=FILE    Output file (default is standard output)\n"
    223         "  -f, --full-html      Generate full HTML document, including header\n"
    224         "  -s, --stat           Measure time of input parsing\n"
    225         "  -h, --help           Display this help and exit\n"
    226         "  -v, --version        Display version and exit\n"
    227         "\n"
    228         "Markdown dialect options:\n"
    229         "(note these are equivalent to some combinations of flags below)\n"
    230         "      --commonmark     CommonMark (this is default)\n"
    231         "      --github         Github Flavored Markdown\n"
    232         "\n"
    233         "Markdown extension options:\n"
    234         "      --fcollapse-whitespace\n"
    235         "                       Collapse non-trivial whitespace\n"
    236         "      --fverbatim-entities\n"
    237         "                       Do not translate entities\n"
    238         "      --fpermissive-atx-headers\n"
    239         "                       Allow ATX headers without delimiting space\n"
    240         "      --fpermissive-url-autolinks\n"
    241         "                       Allow URL autolinks without '<', '>'\n"
    242         "      --fpermissive-www-autolinks\n"
    243         "                       Allow WWW autolinks without any scheme (e.g. 'www.example.com')\n"
    244         "      --fpermissive-email-autolinks  \n"
    245         "                       Allow e-mail autolinks without '<', '>' and 'mailto:'\n"
    246         "      --fpermissive-autolinks\n"
    247         "                       Same as --fpermissive-url-autolinks --fpermissive-www-autolinks\n"
    248         "                       --fpermissive-email-autolinks\n"
    249         "      --fno-indented-code\n"
    250         "                       Disable indented code blocks\n"
    251         "      --fno-html-blocks\n"
    252         "                       Disable raw HTML blocks\n"
    253         "      --fno-html-spans\n"
    254         "                       Disable raw HTML spans\n"
    255         "      --fno-html       Same as --fno-html-blocks --fno-html-spans\n"
    256         "      --ftables        Enable tables\n"
    257         "      --fstrikethrough Enable strikethrough spans\n"
    258     );
    259 }
    260 
    261 static void
    262 version(void)
    263 {
    264     printf("%d.%d.%d\n", MD_VERSION_MAJOR, MD_VERSION_MINOR, MD_VERSION_RELEASE);
    265 }
    266 
    267 static const char* input_path = NULL;
    268 static const char* output_path = NULL;
    269 
    270 static int
    271 cmdline_callback(int opt, char const* value, void* data)
    272 {
    273     switch(opt) {
    274         case 0:
    275             if(input_path) {
    276                 fprintf(stderr, "Too many arguments. Only one input file can be specified.\n");
    277                 fprintf(stderr, "Use --help for more info.\n");
    278                 exit(1);
    279             }
    280             input_path = value;
    281             break;
    282 
    283         case 'o':   output_path = value; break;
    284         case 'f':   want_fullhtml = 1; break;
    285         case 's':   want_stat = 1; break;
    286         case 'h':   usage(); exit(0); break;
    287         case 'v':   version(); exit(0); break;
    288 
    289         case 'c':   parser_flags = MD_DIALECT_COMMONMARK; break;
    290         case 'g':   parser_flags = MD_DIALECT_GITHUB; break;
    291 
    292         case 'E':   renderer_flags |= MD_RENDER_FLAG_VERBATIM_ENTITIES; break;
    293         case 'A':   parser_flags |= MD_FLAG_PERMISSIVEATXHEADERS; break;
    294         case 'I':   parser_flags |= MD_FLAG_NOINDENTEDCODEBLOCKS; break;
    295         case 'F':   parser_flags |= MD_FLAG_NOHTMLBLOCKS; break;
    296         case 'G':   parser_flags |= MD_FLAG_NOHTMLSPANS; break;
    297         case 'H':   parser_flags |= MD_FLAG_NOHTML; break;
    298         case 'W':   parser_flags |= MD_FLAG_COLLAPSEWHITESPACE; break;
    299         case 'U':   parser_flags |= MD_FLAG_PERMISSIVEURLAUTOLINKS; break;
    300         case '.':   parser_flags |= MD_FLAG_PERMISSIVEWWWAUTOLINKS; break;
    301         case '@':   parser_flags |= MD_FLAG_PERMISSIVEEMAILAUTOLINKS; break;
    302         case 'V':   parser_flags |= MD_FLAG_PERMISSIVEAUTOLINKS; break;
    303         case 'T':   parser_flags |= MD_FLAG_TABLES; break;
    304         case 'S':   parser_flags |= MD_FLAG_STRIKETHROUGH; break;
    305 
    306         default:
    307             fprintf(stderr, "Illegal option: %s\n", value);
    308             fprintf(stderr, "Use --help for more info.\n");
    309             exit(1);
    310             break;
    311     }
    312 
    313     return 0;
    314 }
    315 
    316 int
    317 main(int argc, char** argv)
    318 {
    319     FILE* in = stdin;
    320     FILE* out = stdout;
    321     int ret = 0;
    322 
    323     if(readoptions(cmdline_options, argc, argv, cmdline_callback, NULL) < 0) {
    324         usage();
    325         exit(1);
    326     }
    327 
    328     if(input_path != NULL && strcmp(input_path, "-") != 0) {
    329         in = fopen(input_path, "rb");
    330         if(in == NULL) {
    331             fprintf(stderr, "Cannot open %s.\n", input_path);
    332             exit(1);
    333         }
    334     }
    335     if(output_path != NULL && strcmp(output_path, "-") != 0) {
    336         out = fopen(output_path, "wt");
    337         if(out == NULL) {
    338             fprintf(stderr, "Cannot open %s.\n", input_path);
    339             exit(1);
    340         }
    341     }
    342 
    343     ret = process_file(in, out);
    344     if(in != stdin)
    345         fclose(in);
    346     if(out != stdout)
    347         fclose(out);
    348 
    349     return ret;
    350 }