commit efed58af8eeb417e9251e3fc8ad260c4ce246614
Author: Martin Mitas <mity@morous.org>
Date: Mon, 3 Oct 2016 20:17:15 +0200
Initial commit.
Diffstat:
| A | CMakeLists.txt | | | 32 | ++++++++++++++++++++++++++++++++ |
| A | LICENSE.md | | | 25 | +++++++++++++++++++++++++ |
| A | README.md | | | 155 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| A | md2html/CMakeLists.txt | | | 5 | +++++ |
| A | md2html/cmdline.c | | | 296 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| A | md2html/cmdline.h | | | 86 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| A | md2html/md2html.c | | | 367 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| A | md4c/CMakeLists.txt | | | 2 | ++ |
| A | md4c/md4c.c | | | 406 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| A | md4c/md4c.h | | | 136 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
10 files changed, 1510 insertions(+), 0 deletions(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -0,0 +1,32 @@
+
+cmake_minimum_required(VERSION 2.8)
+project(MD4C C)
+
+
+set(CMAKE_CONFIGURATION_TYPES Debug Release RelWithDebInfo MinSizeRel)
+if("${CMAKE_BUILD_TYPE}" STREQUAL "")
+ set(CMAKE_BUILD_TYPE $ENV{CMAKE_BUILD_TYPE})
+endif()
+
+
+if(CMAKE_COMPILER_IS_GNUCC)
+ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall")
+
+ # By default, CMake uses -O3 for Release builds. Lets stick with safer -O2:
+ string(REGEX REPLACE "(^| )-O[0-9a-z]+" "" CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE}")
+ set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -O2")
+elseif(MSVC)
+ # Disable warnings about the so-called unsecured functions:
+ add_definitions(/D_CRT_SECURE_NO_WARNINGS)
+
+ # Specify proper C runtime library:
+ set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} /MTd")
+ set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /MT")
+ set(CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELEASE} /MT")
+ set(CMAKE_C_FLAGS_MINSIZEREL "${CMAKE_C_FLAGS_RELEASE} /MT")
+endif()
+
+
+set(EXECUTABLE_OUTPUT_PATH "${PROJECT_BINARY_DIR}")
+add_subdirectory(md4c)
+add_subdirectory(md2html)
diff --git a/LICENSE.md b/LICENSE.md
@@ -0,0 +1,25 @@
+
+MD4C is licensed under the MIT License.
+
+> Copyright (c) 2016: Martin Mitáš and other contributors:
+>
+> https://github.com/mity/md4c/contributors
+>
+> Permission is hereby granted, free of charge, to any person obtaining
+> a copy of this software and associated documentation files (the
+> "Software"), to deal in the Software without restriction, including
+> without limitation the rights to use, copy, modify, merge, publish,
+> distribute, sublicense, and/or sell copies of the Software, and to
+> permit persons to whom the Software is furnished to do so, subject to
+> the following conditions:
+>
+> The above copyright notice and this permission notice shall be
+> included in all copies or substantial portions of the Software.
+>
+> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+> EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+> MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+> NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+> LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+> OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+> WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/README.md b/README.md
@@ -0,0 +1,155 @@
+
+# MD4C Readme
+
+Home: http://github.com/mity/md4c
+
+
+**Warning:** This project is very young (read "immature") and work in progress.
+Most important features are not yet implemented. See the current status below.
+And there may be bugs.
+
+
+## What is Markdown
+
+In short, Markdown is the markup language this `README.md` file is written in.
+
+The following resources can explain more if you are unfamiliar with it:
+* [Wikipedia article](http://en.wikipedia.org/wiki/Markdown)
+* [CommonMark site](http://commonmark.org)
+
+
+## What is MD4C
+
+MD4C stands for "MarkDown for C" and, unsurprisingly, it is a C Markdown parser
+implementation.
+
+Main features:
+* **Compactness:** MD4C is implemented in one source file and one header file.
+* **Flexibility:** Flags allow to tune the desired dialect of the Markdown
+ parser.
+* **Encoding agnosticism:** As much as possible, MD4C by design does not care
+ about input text encoding, relying only on the Markdown control characters
+ being ASCII compatible. (The actual text data are propagated back to the
+ caller in the same encoding unchanged.)
+* **UTF-16LE support:** On Windows, MD4C may be built to consume (and produce)
+ wide strings (`WCHAR*` instead of `char*`).
+* **Easily embeddable:** MD4C depends only on few functions of C standard
+ library.
+* **Portability:** MD4C builds and works on Windows and Linux, and it should
+ be fairly trivial to build it also on other systems.
+* **Permissive license:** MD4C is available under the MIT license.
+
+
+## Using MD4C
+
+The parser is implemented in a single C source file `md4c.c` and its
+accompanying header `md4c.h`.
+
+The main provided function is `md_parse()`. It takes a text in Markdown syntax
+as an input and a renderer structure which holds pointers to few callback
+functions. As `md_parse()` eats the input, it calls appropriate callbacks
+allowing application to convert it into another format or render it onto
+the screen.
+
+Refer to the header file for more details, the API is mostly self-explaining
+and there are some explanatory comments.
+
+Example implementation of simple renderer is available in the `md2html`
+directory which implements a conversion utility from Markdown to HTML.
+
+
+## Current status ##
+
+### CommonMark Specification ###
+
+The goal is be compliant to the latest version of
+[CommonMark specification](http://spec.commonmark.org/).
+
+The list below corresponds to chapters of the specification version 0.26 and
+more or less forms our to do list.
+
+- **Preliminaries:**
+ - [ ] 2.1 Character and lines
+ - [ ] 2.2 Tabs
+ - [ ] 2.3 Insecure characters
+
+- **Blocks and Inlines:**
+ - [ ] 3.1 Precedence
+ - [ ] 3.2 Container blocks and leaf blocks
+
+- **Leaf Blocks:**
+ - [ ] 4.1 Thematic breaks
+ - [ ] 4.2 ATX headings
+ - [ ] 4.3 Setext headings
+ - [ ] 4.4 Indented code blocks
+ - [ ] 4.5 Fenced code blocks
+ - [ ] 4.6 HTML blocks
+ - [ ] 4.7 Link reference definitions
+ - [x] 4.8 Paragraphs
+ - [x] 4.9 Blank lines
+
+- **Container Blocks:**
+ - [ ] 5.1 Block quotes
+ - [ ] 5.2 List items
+ - [ ] 5.3 Lists
+
+- **Inlines:**
+ - [ ] 6.1 Backslash escapes
+ - [ ] 6.2 Entity and numeric character references
+ - [ ] 6.3 Code spans
+ - [ ] 6.4 Emphasis and strong emphasis
+ - [ ] 6.5 Links
+ - [ ] 6.6 Images
+ - [ ] 6.7 Autolinks
+ - [ ] 6.8 Raw HTML
+ - [ ] 6.9 Hard line breaks
+ - [ ] 6.10 Soft line breaks
+ - [x] 6.11 Textual content
+
+
+### Considered Extensions ###
+
+Aside of CommonMark features, various Markdown implementations out there support
+various extensions and/or some deviations from the CommonMark specification
+which may be found desired or useful in some situations.
+
+Therefore some extensions or deviations from the CommonMark specification may
+be considered and implemented. However, such extensions and deviations from the
+standard shall be enabled only if explicitly enabled by the application.
+
+Default behavior shall stick to the CommonMark specification.
+
+The list below is incomplete list of extensions I see as worth of
+consideration.
+
+- **Block Extensions:**
+ - [ ] Tables
+ - [ ] Header anchors: `## Chapter {#anchor}`
+ (allowing fragment links pointing to it, e.g. `[link text](#anchor)`)
+
+- **Inline Extensions:**
+ - [ ] Underline: `__foo bar__`
+ - [ ] Strikethrough: `~~foo bar~~`
+ - [ ] Highlight: `==foo bar==`
+ - [ ] Quote: `"foo bar"`
+ - [ ] Superscript: `a^2^ + b^2^ = c^2^`
+ - [ ] Subscript: `matrix A~i,j~`
+
+- **Miscellaneous:**
+ - [ ] Permissive ATX headers: `###Header` (without space)
+ - [ ] Permissive autolinks: `http://google.com` (without `<`...`>`)
+ - [ ] Disabling indented code blocks
+ - [ ] Disabling raw HTML blocks/spans
+
+
+## License
+
+MD4C is covered with MIT license, see the file `LICENSE.md`.
+
+
+## Reporting Bugs
+
+If you encounter any bug, please be so kind and report it. Unheard bugs cannot
+get fixed. You can submit bug reports here:
+
+* http://github.com/mity/md4c/issues
diff --git a/md2html/CMakeLists.txt b/md2html/CMakeLists.txt
@@ -0,0 +1,5 @@
+
+include_directories("${PROJECT_SOURCE_DIR}/md4c")
+
+add_executable(md2html cmdline.c cmdline.h md2html.c)
+target_link_libraries(md2html md4c)
diff --git a/md2html/cmdline.c b/md2html/cmdline.c
@@ -0,0 +1,296 @@
+/* cmdline.c: a reentrant version of getopt(). Written 2006 by Brian
+ * Raiter. This code is in the public domain.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include "cmdline.h"
+
+#define docallback(opt, val) \
+ do { if ((r = callback(opt, val, data)) != 0) return r; } while (0)
+
+/* Parse the given cmdline arguments.
+ */
+int readoptions(option const* list, int argc, char **argv,
+ int (*callback)(int, char const*, void*), void *data)
+{
+ char argstring[] = "--";
+ option const *opt;
+ char const *val;
+ char const *p;
+ int stop = 0;
+ int argi, len, r;
+
+ if (!list || !callback)
+ return -1;
+
+ for (argi = 1 ; argi < argc ; ++argi)
+ {
+ /* First, check for "--", which forces all remaining arguments
+ * to be treated as non-options.
+ */
+ if (!stop && argv[argi][0] == '-' && argv[argi][1] == '-'
+ && argv[argi][2] == '\0') {
+ stop = 1;
+ continue;
+ }
+
+ /* Arguments that do not begin with '-' (or are only "-") are
+ * not options.
+ */
+ if (stop || argv[argi][0] != '-' || argv[argi][1] == '\0') {
+ docallback(0, argv[argi]);
+ continue;
+ }
+
+ if (argv[argi][1] == '-')
+ {
+ /* Arguments that begin with a double-dash are long
+ * options.
+ */
+ p = argv[argi] + 2;
+ val = strchr(p, '=');
+ if (val)
+ len = val++ - p;
+ else
+ len = strlen(p);
+
+ /* Is it on the list of valid options? If so, does it
+ * expect a parameter?
+ */
+ for (opt = list ; opt->optval ; ++opt)
+ if (opt->name && !strncmp(p, opt->name, len)
+ && !opt->name[len])
+ break;
+ if (!opt->optval) {
+ docallback('?', argv[argi]);
+ } else if (!val && opt->arg == 1) {
+ docallback(':', argv[argi]);
+ } else if (val && opt->arg == 0) {
+ docallback('=', argv[argi]);
+ } else {
+ docallback(opt->optval, val);
+ }
+ }
+ else
+ {
+ /* Arguments that begin with a single dash contain one or
+ * more short options. Each character in the argument is
+ * examined in turn, unless a parameter consumes the rest
+ * of the argument (or possibly even the following
+ * argument).
+ */
+ for (p = argv[argi] + 1 ; *p ; ++p) {
+ for (opt = list ; opt->optval ; ++opt)
+ if (opt->chname == *p)
+ break;
+ if (!opt->optval) {
+ argstring[1] = *p;
+ docallback('?', argstring);
+ continue;
+ } else if (opt->arg == 0) {
+ docallback(opt->optval, NULL);
+ continue;
+ } else if (p[1]) {
+ docallback(opt->optval, p + 1);
+ break;
+ } else if (argi + 1 < argc && strcmp(argv[argi + 1], "--")) {
+ ++argi;
+ docallback(opt->optval, argv[argi]);
+ break;
+ } else if (opt->arg == 2) {
+ docallback(opt->optval, NULL);
+ continue;
+ } else {
+ argstring[1] = *p;
+ docallback(':', argstring);
+ break;
+ }
+ }
+ }
+ }
+ return 0;
+}
+
+/* Verify that str points to an ASCII zero or one (optionally with
+ * whitespace) and return the value present, or -1 if str's contents
+ * are anything else.
+ */
+static int readboolvalue(char const *str)
+{
+ char d;
+
+ while (isspace(*str))
+ ++str;
+ if (!*str)
+ return -1;
+ d = *str++;
+ while (isspace(*str))
+ ++str;
+ if (*str)
+ return -1;
+ if (d == '0')
+ return 0;
+ else if (d == '1')
+ return 1;
+ else
+ return -1;
+}
+
+/* Parse a configuration file.
+ */
+int readcfgfile(option const* list, FILE *fp,
+ int (*callback)(int, char const*, void*), void *data)
+{
+ char buf[1024];
+ option const *opt;
+ char *name, *val, *p;
+ int len, f, r;
+
+ while (fgets(buf, sizeof buf, fp) != NULL)
+ {
+ /* Strip off the trailing newline and any leading whitespace.
+ * If the line begins with a hash sign, skip it entirely.
+ */
+ len = strlen(buf);
+ if (len && buf[len - 1] == '\n')
+ buf[--len] = '\0';
+ for (p = buf ; isspace(*p) ; ++p) ;
+ if (!*p || *p == '#')
+ continue;
+
+ /* Find the end of the option's name and the beginning of the
+ * parameter, if any.
+ */
+ for (name = p ; *p && *p != '=' && !isspace(*p) ; ++p) ;
+ len = p - name;
+ for ( ; *p == '=' || isspace(*p) ; ++p) ;
+ val = p;
+
+ /* Is it on the list of valid options? Does it take a
+ * full parameter, or just an optional boolean?
+ */
+ for (opt = list ; opt->optval ; ++opt)
+ if (opt->name && !strncmp(name, opt->name, len)
+ && !opt->name[len])
+ break;
+ if (!opt->optval) {
+ docallback('?', name);
+ } else if (!*val && opt->arg == 1) {
+ docallback(':', name);
+ } else if (*val && opt->arg == 0) {
+ f = readboolvalue(val);
+ if (f < 0)
+ docallback('=', name);
+ else if (f == 1)
+ docallback(opt->optval, NULL);
+ } else {
+ docallback(opt->optval, val);
+ }
+ }
+ return ferror(fp) ? -1 : 0;
+}
+
+/* Turn a string containing a cmdline into an argc-argv pair.
+ */
+int makecmdline(char const *cmdline, int *argcp, char ***argvp)
+{
+ char **argv;
+ int argc;
+ char const *s;
+ int n, quoted;
+
+ if (!cmdline)
+ return 0;
+
+ /* Calcuate argc by counting the number of "clumps" of non-spaces.
+ */
+ for (s = cmdline ; isspace(*s) ; ++s) ;
+ if (!*s) {
+ *argcp = 1;
+ if (argvp) {
+ *argvp = malloc(2 * sizeof(char*));
+ if (!*argvp)
+ return 0;
+ (*argvp)[0] = NULL;
+ (*argvp)[1] = NULL;
+ }
+ return 1;
+ }
+ for (argc = 2, quoted = 0 ; *s ; ++s) {
+ if (quoted == '"') {
+ if (*s == '"')
+ quoted = 0;
+ else if (*s == '\\' && s[1])
+ ++s;
+ } else if (quoted == '\'') {
+ if (*s == '\'')
+ quoted = 0;
+ } else {
+ if (isspace(*s)) {
+ for ( ; isspace(s[1]) ; ++s) ;
+ if (!s[1])
+ break;
+ ++argc;
+ } else if (*s == '"' || *s == '\'') {
+ quoted = *s;
+ }
+ }
+ }
+
+ *argcp = argc;
+ if (!argvp)
+ return 1;
+
+ /* Allocate space for all the arguments and their pointers.
+ */
+ argv = malloc((argc + 1) * sizeof(char*) + strlen(cmdline) + 1);
+ *argvp = argv;
+ if (!argv)
+ return 0;
+ argv[0] = NULL;
+ argv[1] = (char*)(argv + argc + 1);
+
+ /* Copy the string into the allocated memory immediately after the
+ * argv array. Where spaces immediately follows a nonspace,
+ * replace it with a \0. Where a nonspace immediately follows
+ * spaces, store a pointer to it. (Except, of course, when the
+ * space-nonspace transitions occur within quotes.)
+ */
+ for (s = cmdline ; isspace(*s) ; ++s) ;
+ for (argc = 1, n = 0, quoted = 0 ; *s ; ++s) {
+ if (quoted == '"') {
+ if (*s == '"') {
+ quoted = 0;
+ } else {
+ if (*s == '\\' && s[1])
+ ++s;
+ argv[argc][n++] = *s;
+ }
+ } else if (quoted == '\'') {
+ if (*s == '\'')
+ quoted = 0;
+ else
+ argv[argc][n++] = *s;
+ } else {
+ if (isspace(*s)) {
+ argv[argc][n] = '\0';
+ for ( ; isspace(s[1]) ; ++s) ;
+ if (!s[1])
+ break;
+ argv[argc + 1] = argv[argc] + n + 1;
+ ++argc;
+ n = 0;
+ } else {
+ if (*s == '"' || *s == '\'')
+ quoted = *s;
+ else
+ argv[argc][n++] = *s;
+ }
+ }
+ }
+ argv[argc + 1] = NULL;
+ return 1;
+}
diff --git a/md2html/cmdline.h b/md2html/cmdline.h
@@ -0,0 +1,86 @@
+/* cmdline.h: a reentrant version of getopt(). Written 2006 by Brian
+ * Raiter. This code is in the public domain.
+ */
+
+#ifndef _cmdline_h_
+#define _cmdline_h_
+
+/* The information specifying a single cmdline option.
+ */
+typedef struct option {
+ char const *name; /* the option's long name, or "" if none */
+ char chname; /* a single-char name, or zero if none */
+ int optval; /* a unique value representing this option */
+ int arg; /* 0 = no arg, 1 = arg req'd, 2 = optional */
+} option;
+
+/* Parse the given cmdline arguments. list is an array of option
+ * structs, each entry specifying a valid option. The last struct in
+ * the array must have name set to NULL. argc and argv give the
+ * cmdline to parse. callback is the function to call for each option
+ * and non-option found on the cmdline. data is a pointer that is
+ * passed to each invocation of callback. The return value of callback
+ * should be zero to continue processing the cmdline, or any other
+ * value to abort. The return value of readoptions() is the value
+ * returned from the last callback, or zero if no arguments were
+ * found, or -1 if an error occurred.
+ *
+ * When readoptions() encounters a regular cmdline argument (i.e. a
+ * non-option argument), callback() is invoked with opt equal to zero
+ * and val pointing to the argument. When an option is found,
+ * callback() is invoked with opt equal to the optval field in the
+ * option struct corresponding to that option, and val points to the
+ * option's paramter, or is NULL if the option does not take a
+ * parameter. If readoptions() finds an option that does not appear in
+ * the list of valid options, callback() is invoked with opt equal to
+ * '?'. If readoptions() encounters an option that is missing its
+ * required parameter, callback() is invoked with opt equal to ':'. If
+ * readoptions() finds a parameter on a long option that does not
+ * admit a parameter, callback() is invoked with opt equal to '='. In
+ * each of these cases, val will point to the erroneous option
+ * argument.
+ */
+extern int readoptions(option const* list, int argc, char **argv,
+ int (*callback)(int opt, char const *val, void *data),
+ void *data);
+
+/* Parse the given file. list is an array of option structs, in the
+ * same form as taken by readoptions(). fp is a pointer to an open
+ * text file. callback is the function to call for each line found in
+ * the configuration file. data is a pointer that is passed to each
+ * invocation of callback. The return value of readcfgfile() is the
+ * value returned from the last callback, or zero if no arguments were
+ * found, or -1 if an error occurred while reading the file.
+ *
+ * The function will ignore lines that contain only whitespace, or
+ * lines that begin with a hash sign. All other lines should be of the
+ * form "OPTION=VALUE", where OPTION is one of the long options in
+ * list. Whitespace around the equal sign is permitted. An option that
+ * takes no arguments can either have a VALUE of 0 or 1, or omit the
+ * "=VALUE" entirely. (A VALUE of 0 will behave the same as if the
+ * line was not present.)
+ */
+extern int readcfgfile(option const* list, FILE *fp,
+ int (*callback)(int opt, char const *val, void *data),
+ void *data);
+
+
+/* Create an argc-argv pair from a string containing a command line.
+ * cmdline is the string to be parsed. argcp points to the variable to
+ * receive the argc value, and argvp points to the variable to receive
+ * the argv value. argvp can be NULL if the caller just wants to get
+ * argc. Zero is returned on failure. This function allocates memory
+ * on behalf of the caller. The memory is allocated as a single block,
+ * so it is sufficient to simply free() the pointer returned through
+ * argvp. Note that argv[0] will always be initialized to NULL; the
+ * first argument will be stored in argv[1]. The string is parsed by
+ * separating arguments on whitespace boundaries. Space within
+ * substrings enclosed in single-quotes is ignored. A substring
+ * enclosed in double-quotes is treated the same, except that the
+ * backslash is recognized as an escape character within such a
+ * substring. Enclosing quotes and escaping backslashes are not copied
+ * into the argv values.
+ */
+extern int makecmdline(char const *cmdline, int *argcp, char ***argvp);
+
+#endif
diff --git a/md2html/md2html.c b/md2html/md2html.c
@@ -0,0 +1,367 @@
+/*
+ * MD4C: Markdown parser for C
+ * (http://github.com/mity/md4c)
+ *
+ * Copyright (c) 2016 Martin Mitas
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+#include "md4c.h"
+#include "cmdline.h"
+
+
+/********************************
+ *** Simple growable buffer ***
+ ********************************/
+
+/* We render to a memory buffer instead of directly outputting the rendered
+ * documents, as this allows using this utility for evaluating performance
+ * of MD4C (--stat option). This allows us to measure just time of the parser,
+ * without the I/O.
+ */
+
+struct membuffer {
+ char* data;
+ MD_SIZE asize;
+ MD_SIZE size;
+};
+
+static void
+membuf_init(struct membuffer* buf, MD_SIZE new_asize)
+{
+ buf->size = 0;
+ buf->asize = new_asize;
+ buf->data = malloc(buf->asize);
+ if(buf->data == NULL) {
+ fprintf(stderr, "membuf_init: malloc() failed.");
+ exit(1);
+ }
+}
+
+static void
+membuf_fini(struct membuffer* buf)
+{
+ if(buf->data)
+ free(buf->data);
+}
+
+static void
+membuf_grow(struct membuffer* buf, MD_SIZE new_asize)
+{
+ buf->data = realloc(buf->data, new_asize);
+ if(buf->data == NULL) {
+ fprintf(stderr, "membuf_grow: realloc() failed.");
+ exit(1);
+ }
+ buf->asize = new_asize;
+}
+
+static void
+membuf_append(struct membuffer* buf, const char* data, MD_SIZE size)
+{
+ if(buf->asize < buf->size + size)
+ membuf_grow(buf, (buf->size + size) * 2);
+ memcpy(buf->data + buf->size, data, size);
+ buf->size += size;
+}
+
+#define MEMBUF_APPEND_LITERAL(buf, literal) membuf_append((buf), (literal), strlen(literal))
+
+#define HTML_NEED_ESCAPE(ch) ((ch) == '&' || (ch) == '<' || (ch) == '>' || (ch) == '"')
+
+static void
+membuf_append_escaped(struct membuffer* buf, const char* data, MD_SIZE size)
+{
+ MD_OFFSET beg = 0;
+ MD_OFFSET off = 0;
+
+ /* Some characters need to be escaped in normal HTML text. */
+
+ while(1) {
+ while(off < size && !HTML_NEED_ESCAPE(data[off]))
+ off++;
+ if(off > beg)
+ membuf_append(buf, data + beg, off - beg);
+
+ if(off < size) {
+ switch(data[off]) {
+ case '&': MEMBUF_APPEND_LITERAL(buf, "&"); break;
+ case '<': MEMBUF_APPEND_LITERAL(buf, "<"); break;
+ case '>': MEMBUF_APPEND_LITERAL(buf, ">"); break;
+ case '"': MEMBUF_APPEND_LITERAL(buf, """); break;
+ }
+ off++;
+ } else {
+ break;
+ }
+ beg = off;
+ }
+}
+
+/**************************************
+ *** HTML renderer implementation ***
+ **************************************/
+
+static int
+enter_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata)
+{
+ struct membuffer* out = (struct membuffer*) userdata;
+
+ switch(type) {
+ case MD_BLOCK_DOC: /* noop */ break;
+ case MD_BLOCK_P: MEMBUF_APPEND_LITERAL(out, "<p>"); break;
+ }
+
+ return 0;
+}
+
+static int
+leave_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata)
+{
+ struct membuffer* out = (struct membuffer*) userdata;
+
+ switch(type) {
+ case MD_BLOCK_DOC: /*noop*/ break;
+ case MD_BLOCK_P: MEMBUF_APPEND_LITERAL(out, "</p>\n"); break;
+ }
+
+ return 0;
+}
+
+static int
+enter_span_callback(MD_SPANTYPE type, void* detail, void* userdata)
+{
+ return 0;
+}
+
+static int
+leave_span_callback(MD_SPANTYPE type, void* detail, void* userdata)
+{
+ return 0;
+}
+
+static int
+text_callback(MD_TEXTTYPE type, const MD_CHAR* text, MD_SIZE size, void* userdata)
+{
+ struct membuffer* out = (struct membuffer*) userdata;
+
+ switch(type) {
+ default: membuf_append_escaped(out, text, size); break;
+ }
+
+ return 0;
+}
+
+static void
+debug_log_callback(const char* msg, void* userdata)
+{
+ fprintf(stderr, "Error:%s\n", msg);
+}
+
+
+/**********************
+ *** Main program ***
+ **********************/
+
+static int
+process_file(FILE* in, FILE* out, unsigned flags, int fullhtml, int print_stat)
+{
+ MD_RENDERER renderer = {
+ enter_block_callback,
+ leave_block_callback,
+ enter_span_callback,
+ leave_span_callback,
+ text_callback,
+ debug_log_callback,
+ flags
+ };
+
+ MD_SIZE n;
+ struct membuffer buf_in = {0};
+ struct membuffer buf_out = {0};
+ int ret = -1;
+ clock_t t0, t1;
+
+ membuf_init(&buf_in, 32 * 1024);
+
+ /* Read the input file into a buffer. */
+ while(1) {
+ if(buf_in.size >= buf_in.asize)
+ membuf_grow(&buf_in, 2 * buf_in.asize);
+
+ n = fread(buf_in.data + buf_in.size, 1, buf_in.asize - buf_in.size, in);
+ if(n == 0)
+ break;
+ buf_in.size += n;
+ }
+
+ /* Input size is good estimation of output size. Add some more reserve to
+ * deal with the HTML header/footer and tags. */
+ membuf_init(&buf_out, buf_in.size + buf_in.size/8 + 64);
+
+ /* Parse the document. This shall call our callbacks provided via the
+ * md_renderer_t structure. */
+ t0 = clock();
+ ret = md_parse(buf_in.data, buf_in.size, &renderer, (void*) &buf_out);
+ t1 = clock();
+ if(ret != 0) {
+ fprintf(stderr, "Parsing failed.\n");
+ goto out;
+ }
+
+ /* Write down the document in the HTML format. */
+ if(fullhtml) {
+ fprintf(out, "<html>\n");
+ fprintf(out, "<head>\n");
+ fprintf(out, "<title></title>\n");
+ fprintf(out, "<meta name=\"generator\" content=\"md2html\">\n");
+ fprintf(out, "</head>\n");
+ fprintf(out, "<body>\n");
+ }
+
+ fwrite(buf_out.data, 1, buf_out.size, out);
+
+ if(fullhtml) {
+ fprintf(out, "</body>\n");
+ fprintf(out, "</html>\n");
+ }
+
+ if(print_stat) {
+ if(t0 != (clock_t)-1 && t1 != (clock_t)-1) {
+ double elapsed = (double)(t1 - t0) / CLOCKS_PER_SEC;
+ if (elapsed < 1)
+ fprintf(stderr, "Time spent on parsing: %7.2f ms.\n", elapsed*1e3);
+ else
+ fprintf(stderr, "Time spent on parsing: %6.3f s.\n", elapsed);
+ }
+ }
+
+ /* Success if we have reached here. */
+ ret = 0;
+
+out:
+ membuf_fini(&buf_in);
+ membuf_fini(&buf_out);
+
+ return ret;
+}
+
+
+#define OPTION_ARG_NONE 0
+#define OPTION_ARG_REQUIRED 1
+#define OPTION_ARG_OPTIONAL 2
+
+static const option cmdline_options[] = {
+ { "output", 'o', 'o', OPTION_ARG_REQUIRED },
+ { "full-html", 'f', 'f', OPTION_ARG_NONE },
+ { "stat", 's', 's', OPTION_ARG_NONE },
+ { "help", 'h', 'h', OPTION_ARG_NONE },
+ { 0 }
+};
+
+static void
+usage(void)
+{
+ printf(
+ "Usage: md2html [OPTION]... [FILE]\n"
+ "Convert input FILE (or standard input) in Markdown format to HTML.\n"
+ "\n"
+ "General options:\n"
+ " -o --output=FILE output file (default is standard output)\n"
+ " -f, --full-html generate full HTML document, including header\n"
+ " -s, --stat measure time of input parsing\n"
+ " -h, --help display this help and exit\n"
+ );
+}
+
+static const char* input_path = NULL;
+static const char* output_path = NULL;
+static int want_fullhtml = 0;
+static int want_stat = 0;
+
+static int
+cmdline_callback(int opt, char const* value, void* data)
+{
+ switch(opt) {
+ case 0:
+ if(input_path) {
+ fprintf(stderr, "Too many arguments. Only one input file can be specified.\n");
+ fprintf(stderr, "Use --help for more info.\n");
+ exit(1);
+ }
+ input_path = value;
+ break;
+
+ case 'o': output_path = value; break;
+ case 'f': want_fullhtml = 1; break;
+ case 's': want_stat = 1; break;
+ case 'h': usage(); exit(0); break;
+
+ default:
+ fprintf(stderr, "Illegal option: %s\n", value);
+ fprintf(stderr, "Use --help for more info.\n");
+ exit(1);
+ break;
+ }
+
+ return 0;
+}
+
+int
+main(int argc, char** argv)
+{
+ FILE* in = stdin;
+ FILE* out = stdout;
+ int ret = 0;
+
+ if(readoptions(cmdline_options, argc, argv, cmdline_callback, NULL) < 0) {
+ usage();
+ exit(1);
+ }
+
+ if(input_path != NULL && strcmp(input_path, "-") != 0) {
+ in = fopen(input_path, "rb");
+ if(in == NULL) {
+ fprintf(stderr, "Cannot open %s.\n", input_path);
+ exit(1);
+ }
+ }
+ if(output_path != NULL && strcmp(output_path, "-") != 0) {
+ out = fopen(output_path, "wt");
+ if(out == NULL) {
+ fprintf(stderr, "Cannot open %s.\n", input_path);
+ exit(1);
+ }
+ }
+
+ ret = process_file(in, out, 0, want_fullhtml, want_stat);
+ if(in != stdin)
+ fclose(in);
+ if(out != stdout)
+ fclose(out);
+
+ return ret;
+}
diff --git a/md4c/CMakeLists.txt b/md4c/CMakeLists.txt
@@ -0,0 +1,2 @@
+
+add_library(md4c STATIC md4c.c md4c.h)
diff --git a/md4c/md4c.c b/md4c/md4c.c
@@ -0,0 +1,406 @@
+/*
+ * MD4C: Markdown parser for C
+ * (http://github.com/mity/md4c)
+ *
+ * Copyright (c) 2016 Martin Mitas
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "md4c.h"
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+
+/*****************************
+ *** Miscellaneous Stuff ***
+ *****************************/
+
+/* Magic to support UTF16-LE (i.e. what is called Unicode among Windows
+ * developers) input/output on Windows. */
+#ifdef _T
+ #undef _T
+#endif
+#if defined _WIN32 && defined MD_WIN_UNICODE
+ #define _T(x) L##x
+#else
+ #define _T(x) x
+#endif
+
+/* Misc. macros. */
+#define SIZEOF_ARRAY(a) (sizeof(a) / sizeof(a[0]))
+
+
+/************************
+ *** Internal Types ***
+ ************************/
+
+/* These are omnipresent so lets save some typing. */
+typedef MD_CHAR CHAR;
+typedef MD_SIZE SZ;
+typedef MD_OFFSET OFF;
+
+/* Context propagated through all the parsing. */
+typedef struct MD_CTX_tag MD_CTX;
+struct MD_CTX_tag {
+ /* Immutables (parameters of md_parse()). */
+ const CHAR* text;
+ SZ size;
+ MD_RENDERER r;
+ void* userdata;
+};
+
+typedef enum MD_LINETYPE_tag MD_LINETYPE;
+enum MD_LINETYPE_tag {
+ MD_LINE_BLANK,
+ MD_LINE_TEXT
+};
+
+typedef struct MD_LINE_tag MD_LINE;
+struct MD_LINE_tag {
+ MD_LINETYPE type;
+ OFF beg;
+ OFF end;
+};
+
+
+/*******************
+ *** Debugging ***
+ *******************/
+
+static void
+md_log(MD_CTX* ctx, const char* fmt, ...)
+{
+ char buffer[256];
+ va_list args;
+
+ if(ctx->r.debug_log == NULL)
+ return;
+
+ va_start(args, fmt);
+ vsnprintf(buffer, sizeof(buffer), fmt, args);
+ va_end(args);
+ buffer[sizeof(buffer) - 1] = '\0';
+ ctx->r.debug_log(buffer, ctx->userdata);
+}
+
+#ifdef DEBUG
+ #define MD_ASSERT(cond) \
+ do { \
+ if(!(cond)) { \
+ md_log(ctx, "%s:%d: Assertion '" #cond "' failed.", \
+ __FILE__, (int)__LINE__); \
+ ret = -2; \
+ goto abort; \
+ } \
+ } while(0)
+#else
+ #ifdef __gnuc__
+ #define MD_ASSERT(cond) do { __builtin_expect((condition) != 0, !0); } while(0)
+ #elif defined _MSC_VER && _MSC_VER > 120
+ #define MD_ASSERT(cond) do { __assume(cond); } while(0)
+ #else
+ #define MD_ASSERT(cond) do {} while(0)
+ #endif
+#endif
+
+#define MD_UNREACHABLE() MD_ASSERT(1 == 0)
+
+
+/*****************
+ *** Helpers ***
+ *****************/
+
+/* Character accessors. */
+#define CH(off) (ctx->text[(off)])
+#define STR(off) (ctx->text + (off))
+
+/* Character classification.
+ * Note we assume ASCII compatibility of code points < 128 here. */
+#define ISASCII_(ch) ((ch) <= 127)
+#define ISBLANK_(ch) ((ch) == _T(' ') || (ch) == _T('\t'))
+#define ISNEWLINE_(ch) ((ch) == _T('\r') || (ch) == _T('\n'))
+#define ISWHITESPACE_(ch) (ISBLANK_(ch) || ch == _T('\v') || ch == _T('\f'))
+#define ISCNTRL_(ch) ((ch) <= 31 || (ch) == 127)
+#define ISPUNCT_(ch) ((33 <= (ch) && (ch) <= 47) || (58 <= (ch) && (ch) <= 64) || (91 <= (ch) && (ch) <= 96) || (123 <= (ch) && (ch) <= 126))
+#define ISUPPER_(ch) (_T('A') <= (ch) && (ch) <= _T('Z'))
+#define ISLOWER_(ch) (_T('a') <= (ch) && (ch) <= _T('z'))
+#define ISALPHA_(ch) (ISUPPER_(ch) || ISLOWER_(ch))
+#define ISDIGIT_(ch) (_T('0') <= (ch) && (ch) <= _T('9'))
+#define ISXDIGIT_(ch) (ISDIGIT_(ch) || (_T('a') < (ch) && (ch) <= _T('f') || (_T('A') < (ch) && (ch) <= _T('F'))
+#define ISALNUM_(ch) (ISALPHA_(ch) || ISDIGIT_(ch))
+
+#define ISASCII(off) ISASCII_(CH(off))
+#define ISBLANK(off) ISBLANK_(CH(off))
+#define ISNEWLINE(off) ISNEWLINE_(CH(off))
+#define ISWHITESPACE(off) ISWHITESPACE_(CH(off))
+#define ISCNTRL(off) ISCNTRL_(CH(off))
+#define ISPUNCT(off) ISPUNCT_(CH(off))
+#define ISUPPER(off) ISUPPER_(CH(off))
+#define ISLOWER(off) ISLOWER_(CH(off))
+#define ISALPHA(off) ISALPHA_(CH(off))
+#define ISDIGIT(off) ISDIGIT_(CH(off))
+#define ISXDIGIT(off) ISXDIGIT_(CH(off))
+#define ISALNUM(off) ISALNUM_(CH(off))
+
+
+#define MD_ENTER_BLOCK(type, arg) \
+ do { \
+ ret = ctx->r.enter_block((type), (arg), ctx->userdata); \
+ if(ret != 0) { \
+ md_log(ctx, "Aborted from enter_block() callback."); \
+ goto abort; \
+ } \
+ } while(0)
+
+#define MD_LEAVE_BLOCK(type, arg) \
+ do { \
+ ret = ctx->r.leave_block((type), (arg), ctx->userdata); \
+ if(ret != 0) { \
+ md_log(ctx, "Aborted from leave_block() callback."); \
+ goto abort; \
+ } \
+ } while(0)
+
+#define MD_ENTER_SPAN(type, arg) \
+ do { \
+ ret = ctx->r.enter_span((type), (arg), ctx->userdata); \
+ if(ret != 0) { \
+ md_log(ctx, "Aborted from enter_span() callback."); \
+ goto abort; \
+ } \
+ } while(0)
+
+#define MD_LEAVE_SPAN(type, arg) \
+ do { \
+ ret = ctx->r.leave_span((type), (arg), ctx->userdata); \
+ if(ret != 0) { \
+ md_log(ctx, "Aborted from leave_span() callback."); \
+ goto abort; \
+ } \
+ } while(0)
+
+#define MD_TEXT(type, str, size) \
+ do { \
+ if(size > 0) { \
+ ret = ctx->r.text((type), (str), (size), ctx->userdata); \
+ if(ret != 0) { \
+ md_log(ctx, "Aborted from text() callback."); \
+ goto abort; \
+ } \
+ } \
+ } while(0)
+
+
+/******************************************
+ *** Processing Single Block Contents ***
+ ******************************************/
+
+static int
+md_process_normal_block(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
+{
+ int i;
+ int ret = 0;
+
+ for(i = 0; i < n_lines; i++) {
+ MD_TEXT(MD_TEXT_NORMAL, STR(lines[i].beg), lines[i].end - lines[i].beg);
+ MD_TEXT(MD_TEXT_NORMAL, _T("\n"), 1);
+ }
+
+abort:
+ return ret;
+}
+
+
+/***************************************
+ *** Breaking Document into Blocks ***
+ ***************************************/
+
+/* Analyze type of the line and find some its properties. This serves as a
+ * main input for determining type and boundaries of a block. */
+static void
+md_analyze_line(MD_CTX* ctx, OFF beg, OFF* p_end, const MD_LINE* pivot_line, MD_LINE* line)
+{
+ OFF off = beg;
+
+ line->type = MD_LINE_BLANK;
+
+ /* Eat indentation. */
+ while(off < ctx->size && ISBLANK(off)) {
+ off++;
+ }
+
+ line->beg = off;
+
+ /* Check whether we are blank line. Note we fall here even if we are beyond
+ * the document end. */
+ if(off >= ctx->size || ISNEWLINE(off)) {
+ line->type = MD_LINE_BLANK;
+ goto done;
+ }
+
+ /* By default, we are normal text line. */
+ line->type = MD_LINE_TEXT;
+
+done:
+ /* Eat rest of the line contents */
+ while(off < ctx->size && !ISNEWLINE(off))
+ off++;
+
+ /* Set end of the line. */
+ line->end = off;
+
+ /* Eat also the new line. */
+ if(off < ctx->size && CH(off) == _T('\r'))
+ off++;
+ if(off < ctx->size && CH(off) == _T('\n'))
+ off++;
+
+ *p_end = off;
+}
+
+/* Determine type of the block (from type of its 1st line and some context),
+ * call block_enter() callback, then appropriate function to parse contents
+ * of the block, and finally block_leave() callback.
+ */
+static int
+md_process_block(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
+{
+ MD_BLOCKTYPE block_type;
+ int ret = 0;
+
+ if(n_lines == 0)
+ return 0;
+
+ /* Derive block type from type of the first line. */
+ switch(lines[0].type) {
+ case MD_LINE_BLANK:
+ return 0;
+
+ case MD_LINE_TEXT:
+ block_type = MD_BLOCK_P;
+ break;
+ }
+
+ /* Process the block accordingly to is type. */
+ MD_ENTER_BLOCK(block_type, NULL);
+ ret = md_process_normal_block(ctx, lines, n_lines);
+ if(ret != 0)
+ goto abort;
+ MD_LEAVE_BLOCK(block_type, NULL);
+
+abort:
+ return ret;
+}
+
+/* Go through the document, analyze each line, on the fly identify block
+ * boundaries and call md_process_block() for sequence of MD_LINE composing
+ * the block.
+ */
+static int
+md_process_doc(MD_CTX *ctx)
+{
+ static const MD_LINE dummy_line = { MD_LINE_BLANK, 0 };
+ const MD_LINE* pivot_line = &dummy_line;
+ MD_LINE* line;
+ MD_LINE* lines = NULL;
+ int alloc_lines = 0;
+ int n_lines = 0;
+ OFF off = 0;
+ int ret = 0;
+
+ MD_ENTER_BLOCK(MD_BLOCK_DOC, NULL);
+
+ while(off < ctx->size) {
+ if(n_lines >= alloc_lines) {
+ MD_LINE* new_lines;
+
+ alloc_lines = (alloc_lines == 0 ? 32 : alloc_lines * 2);
+ new_lines = (MD_LINE*) realloc(lines, alloc_lines * sizeof(MD_LINE));
+ if(new_lines == NULL) {
+ md_log(ctx, "realloc() failed.");
+ ret = -1;
+ goto abort;
+ }
+
+ lines = new_lines;
+ }
+
+ md_analyze_line(ctx, off, &off, pivot_line, &lines[n_lines]);
+ line = &lines[n_lines];
+
+ /* The same block continues as long lines are of the same type. */
+ if(line->type == pivot_line->type) {
+ /* Do not grow the 'lines' because of blank lines. Semantically
+ * one blank line is equivalent to many. */
+ if(line->type != MD_LINE_BLANK)
+ n_lines++;
+
+ continue;
+ }
+
+ /* Otherwise the old block is complete and we have to process it. */
+ ret = md_process_block(ctx, lines, n_lines);
+ if(ret != 0)
+ goto abort;
+
+ /* Keep the current line as the new pivot. */
+ if(line != &lines[0])
+ memcpy(&lines[0], line, sizeof(MD_LINE));
+ pivot_line = &lines[0];
+ n_lines = 1;
+ }
+
+ /* Process also the last block. */
+ if(pivot_line->type != MD_LINE_BLANK) {
+ ret = md_process_block(ctx, lines, n_lines);
+ if(ret != 0)
+ goto abort;
+ }
+
+ MD_LEAVE_BLOCK(MD_BLOCK_DOC, NULL);
+
+abort:
+ free(lines);
+ return ret;
+}
+
+
+/********************
+ *** Public API ***
+ ********************/
+
+int
+md_parse(const MD_CHAR* text, MD_SIZE size, const MD_RENDERER* renderer, void* userdata)
+{
+ MD_CTX ctx;
+
+ /* Setup context structure. */
+ memset(&ctx, 0, sizeof(MD_CTX));
+ ctx.text = text;
+ ctx.size = size;
+ memcpy(&ctx.r, renderer, sizeof(MD_RENDERER));
+ ctx.userdata = userdata;
+
+ /* Doo all the hard work. */
+ return md_process_doc(&ctx);
+}
diff --git a/md4c/md4c.h b/md4c/md4c.h
@@ -0,0 +1,136 @@
+/*
+ * MD4C: Markdown parser for C
+ * (http://github.com/mity/md4c)
+ *
+ * Copyright (c) 2016 Martin Mitas
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef MD4C_MARKDOWN_H
+#define MD4C_MARKDOWN_H
+
+#ifdef __cplusplus
+ extern "C" {
+#endif
+
+
+/* Magic to support UTF16-LE (i.e. what is called Unicode among Windows
+ * developers) input/output on Windows.
+ *
+ * On most platforms, we handle char strings and do not care about encoding
+ * as far as the controlling Markdown syntax is actually ASCII-friendly.
+ * The actual text is provided into callbacks as it is.
+ *
+ * On Windows, when UNICODE is defined, we by default switch to WCHAR.
+ * This behavior may be disabled by predefining MD4C_DISABLE_WIN_UNICODE.
+ */
+#if defined _WIN32 && defined UNICODE && !defined MD4C_DISABLE_WIN_UNICODE
+ #include <windows.h>
+
+ #define MD4C_USE_WIN_UNICODE
+ typedef WCHAR MD_CHAR;
+#else
+ typedef char MD_CHAR;
+#endif
+
+typedef unsigned MD_SIZE;
+typedef unsigned MD_OFFSET;
+
+
+/* Block represents a part of document hierarchy structure like a paragraph
+ * or list item. */
+typedef enum MD_BLOCKTYPE_tag MD_BLOCKTYPE;
+enum MD_BLOCKTYPE_tag {
+ /* <body>...</body> */
+ MD_BLOCK_DOC = 0,
+
+ /* <p>...</p> */
+ MD_BLOCK_P
+};
+
+
+/* Span represents an in-line piece of a document which should be rendered with
+ * the same font, color and other attributes. A sequence of spans forms a block
+ * like paragraph or list item. */
+typedef enum MD_SPANTYPE_tag MD_SPANTYPE;
+enum MD_SPANTYPE_tag {
+ MD_SPAN_DUMMY = 0 /* not yet used... */
+};
+
+
+/* Text is the actual textual contents of span. */
+typedef enum MD_TEXTTYPE_tag MD_TEXTTYPE;
+enum MD_TEXTTYPE_tag {
+ /* Normal text. */
+ MD_TEXT_NORMAL = 0
+};
+
+
+/* Caller-provided callbacks.
+ *
+ * For some block/span types, more detailed information is provided in a
+ * type-specific structure pointed by the argument 'detail'.
+ *
+ * The last argument of all callbacks, 'userdata', is just propagated from
+ * md_parse() and is available for ue by the caller.
+ *
+ * Callbacks may abort further parsing of the document by returning non-zero.
+ */
+typedef struct MD_RENDERER_tag MD_RENDERER;
+struct MD_RENDERER_tag {
+ int (*enter_block)(MD_BLOCKTYPE /*type*/, void* /*detail*/, void* /*userdata*/);
+ int (*leave_block)(MD_BLOCKTYPE /*type*/, void* /*detail*/, void* /*userdata*/);
+
+ int (*enter_span)(MD_SPANTYPE /*type*/, void* /*detail*/, void* /*userdata*/);
+ int (*leave_span)(MD_SPANTYPE /*type*/, void* /*detail*/, void* /*userdata*/);
+
+ int (*text)(MD_TEXTTYPE /*type*/, const MD_CHAR* /*text*/, MD_SIZE /*size*/, void* /*userdata*/);
+
+ /* If not NULL and something goes wrong, this function gets called.
+ * This is intended for debugging and problem diagnosis for developers;
+ * it is not intended to provide any errors suitable for displaying to an
+ * end user.
+ */
+ void (*debug_log)(const char* /*msg*/, void* /*userdata*/);
+
+ /* Dialect options. */
+ unsigned flags;
+};
+
+
+/* Parse the Markdown document stored in the string 'text' of size 'size'.
+ * The renderer provides callbacks to be called during the parsing so the
+ * caller can render the document on the screen or convert the Markdown
+ * to another format.
+ *
+ * Zero is returned on success. If a runtime error occurs (e.g. a memory
+ * fails), -1 is returned. If an internal error occurs (i.e. an internal
+ * assertion fails, implying there is a bug in MD4C), then -2 is returned.
+ * If the processing is aborted due any callback returning non-zero,
+ * md_parse() returns return value of the callback.
+ */
+int md_parse(const MD_CHAR* text, MD_SIZE size, const MD_RENDERER* renderer, void* userdata);
+
+
+#ifdef __cplusplus
+ } /* extern "C" { */
+#endif
+
+#endif /* MD4C_MARKDOWN_H */