md

cat markdown files with syntax highlighting
git clone https://noulin.net/git/md.git
Log | Files | Refs | README | LICENSE

commit 78d585ded125e3841b6643546ebf4b6d41328972
parent 82132b41ac7d21bad90deb1a6d36e789e5e451ba
Author: Remy Noulin <loader2x@gmail.com>
Date:   Mon, 26 Dec 2022 21:12:47 +0100

add support for github flavored markdown and terminal effects

fix recursive blocks and spans
fix headers
fix lists and sublists
change color of block code to magenta
improve blockquotes (use | instead of spaces)
add html entities
add tables with libfort
change horizontal rule to '-'
add support for colors (RGB and 0 to 15): [color fg 9 bg 3](#9#3)
add underline '_'
add crossed '~'
add faint '-'
add inverse '%'
add conceal/hidden '!'
add blink '^'
add anchors with ids: destination [|id] and link syntax [to anchor
id](|id)

README.md                    |   34 +
entities.h                   | 2236 ++++++++++++
fort.c                       | 7767 ++++++++++++++++++++++++++++++++++++++++++
fort.h                       | 1057 ++++++
genEntities.c                |   26 +
md.c                         |  739 +++-
package.yml                  |    2 +-
shpPackages/md4c/md4c.c      | 6706 ++++++++++++++++++++++++++++++++++++
shpPackages/md4c/md4c.h      |  417 +++
shpPackages/md4c/package.yml |   18 +
10 files changed, 18850 insertions(+), 152 deletions(-)

Diffstat:
MREADME.md | 34++++++++++++++++++++++++++++++++++
Aentities.h | 2236+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Afort.c | 7767+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Afort.h | 1057+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
AgenEntities.c | 26++++++++++++++++++++++++++
Mmd.c | 739+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------------
Mpackage.yml | 2+-
AshpPackages/md4c/md4c.c | 6706+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
AshpPackages/md4c/md4c.h | 417+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
AshpPackages/md4c/package.yml | 18++++++++++++++++++
10 files changed, 18850 insertions(+), 152 deletions(-)

diff --git a/README.md b/README.md @@ -9,3 +9,37 @@ spm install # run ./md.c README.md ``` + +# Supported syntax + +- GFM github flavored markdown +- \_underline\_ _underline_ +- \-faint\- -faint- +- \%inverse\% %inverse% +- \!conceal/hidden\! !conceal/hidden! +- \~crossed\~ ~crossed~ +- \^blink\^ ^blink^ +- colors + +[fgx800034](#x800034) + +[fg9](#9) [fg15](#15) - standard 16 terminal **colors** + +background color + +[fgx12AB34 bgxffffff](#x12AB34#xffffff) + +[fgxffAB34 bg1](#xffAB34#1) + +[bgxffffff](##xffffff) < default fg color + +[fgd bg1](##1) [fg9 bg2](#9#2) - standard 16 terminal colors + +- anchors: destination \[|id\] and link \[to id\](|id) +- html entities: \&xUnicodeHex; or \&name; +- tables + +| foo | bar | +| --- | --- | +| baz | bim | +| bot | bel | diff --git a/entities.h b/entities.h @@ -0,0 +1,2236 @@ +// spec https://html.spec.whatwg.org/entities.json +char *entitiesString = +"{" +" '&AElig': 'Æ'," +" '&AElig;': 'Æ'," +" '&AMP': '&'," +" '&AMP;': '&'," +" '&Aacute': 'Á'," +" '&Aacute;': 'Á'," +" '&Abreve;': 'Ă'," +" '&Acirc': 'Â'," +" '&Acirc;': 'Â'," +" '&Acy;': 'А'," +" '&Afr;': '𝔄'," +" '&Agrave': 'À'," +" '&Agrave;': 'À'," +" '&Alpha;': 'Α'," +" '&Amacr;': 'Ā'," +" '&And;': '⩓'," +" '&Aogon;': 'Ą'," +" '&Aopf;': '𝔸'," +" '&ApplyFunction;': '⁡'," +" '&Aring': 'Å'," +" '&Aring;': 'Å'," +" '&Ascr;': '𝒜'," +" '&Assign;': '≔'," +" '&Atilde': 'Ã'," +" '&Atilde;': 'Ã'," +" '&Auml': 'Ä'," +" '&Auml;': 'Ä'," +" '&Backslash;': '∖'," +" '&Barv;': '⫧'," +" '&Barwed;': '⌆'," +" '&Bcy;': 'Б'," +" '&Because;': '∵'," +" '&Bernoullis;': 'ℬ'," +" '&Beta;': 'Β'," +" '&Bfr;': '𝔅'," +" '&Bopf;': '𝔹'," +" '&Breve;': '˘'," +" '&Bscr;': 'ℬ'," +" '&Bumpeq;': '≎'," +" '&CHcy;': 'Ч'," +" '&COPY': '©'," +" '&COPY;': '©'," +" '&Cacute;': 'Ć'," +" '&Cap;': '⋒'," +" '&CapitalDifferentialD;': 'ⅅ'," +" '&Cayleys;': 'ℭ'," +" '&Ccaron;': 'Č'," +" '&Ccedil': 'Ç'," +" '&Ccedil;': 'Ç'," +" '&Ccirc;': 'Ĉ'," +" '&Cconint;': '∰'," +" '&Cdot;': 'Ċ'," +" '&Cedilla;': '¸'," +" '&CenterDot;': '·'," +" '&Cfr;': 'ℭ'," +" '&Chi;': 'Χ'," +" '&CircleDot;': '⊙'," +" '&CircleMinus;': '⊖'," +" '&CirclePlus;': '⊕'," +" '&CircleTimes;': '⊗'," +" '&ClockwiseContourIntegral;': '∲'," +" '&CloseCurlyDoubleQuote;': '”'," +" '&CloseCurlyQuote;': '’'," +" '&Colon;': '∷'," +" '&Colone;': '⩴'," +" '&Congruent;': '≡'," +" '&Conint;': '∯'," +" '&ContourIntegral;': '∮'," +" '&Copf;': 'ℂ'," +" '&Coproduct;': '∐'," +" '&CounterClockwiseContourIntegral;': '∳'," +" '&Cross;': '⨯'," +" '&Cscr;': '𝒞'," +" '&Cup;': '⋓'," +" '&CupCap;': '≍'," +" '&DD;': 'ⅅ'," +" '&DDotrahd;': '⤑'," +" '&DJcy;': 'Ђ'," +" '&DScy;': 'Ѕ'," +" '&DZcy;': 'Џ'," +" '&Dagger;': '‡'," +" '&Darr;': '↡'," +" '&Dashv;': '⫤'," +" '&Dcaron;': 'Ď'," +" '&Dcy;': 'Д'," +" '&Del;': '∇'," +" '&Delta;': 'Δ'," +" '&Dfr;': '𝔇'," +" '&DiacriticalAcute;': '´'," +" '&DiacriticalDot;': '˙'," +" '&DiacriticalDoubleAcute;': '˝'," +" '&DiacriticalGrave;': '`'," +" '&DiacriticalTilde;': '˜'," +" '&Diamond;': '⋄'," +" '&DifferentialD;': 'ⅆ'," +" '&Dopf;': '𝔻'," +" '&Dot;': '¨'," +" '&DotDot;': '⃜'," +" '&DotEqual;': '≐'," +" '&DoubleContourIntegral;': '∯'," +" '&DoubleDot;': '¨'," +" '&DoubleDownArrow;': '⇓'," +" '&DoubleLeftArrow;': '⇐'," +" '&DoubleLeftRightArrow;': '⇔'," +" '&DoubleLeftTee;': '⫤'," +" '&DoubleLongLeftArrow;': '⟸'," +" '&DoubleLongLeftRightArrow;': '⟺'," +" '&DoubleLongRightArrow;': '⟹'," +" '&DoubleRightArrow;': '⇒'," +" '&DoubleRightTee;': '⊨'," +" '&DoubleUpArrow;': '⇑'," +" '&DoubleUpDownArrow;': '⇕'," +" '&DoubleVerticalBar;': '∥'," +" '&DownArrow;': '↓'," +" '&DownArrowBar;': '⤓'," +" '&DownArrowUpArrow;': '⇵'," +" '&DownBreve;': '̑'," +" '&DownLeftRightVector;': '⥐'," +" '&DownLeftTeeVector;': '⥞'," +" '&DownLeftVector;': '↽'," +" '&DownLeftVectorBar;': '⥖'," +" '&DownRightTeeVector;': '⥟'," +" '&DownRightVector;': '⇁'," +" '&DownRightVectorBar;': '⥗'," +" '&DownTee;': '⊤'," +" '&DownTeeArrow;': '↧'," +" '&Downarrow;': '⇓'," +" '&Dscr;': '𝒟'," +" '&Dstrok;': 'Đ'," +" '&ENG;': 'Ŋ'," +" '&ETH': 'Ð'," +" '&ETH;': 'Ð'," +" '&Eacute': 'É'," +" '&Eacute;': 'É'," +" '&Ecaron;': 'Ě'," +" '&Ecirc': 'Ê'," +" '&Ecirc;': 'Ê'," +" '&Ecy;': 'Э'," +" '&Edot;': 'Ė'," +" '&Efr;': '𝔈'," +" '&Egrave': 'È'," +" '&Egrave;': 'È'," +" '&Element;': '∈'," +" '&Emacr;': 'Ē'," +" '&EmptySmallSquare;': '◻'," +" '&EmptyVerySmallSquare;': '▫'," +" '&Eogon;': 'Ę'," +" '&Eopf;': '𝔼'," +" '&Epsilon;': 'Ε'," +" '&Equal;': '⩵'," +" '&EqualTilde;': '≂'," +" '&Equilibrium;': '⇌'," +" '&Escr;': 'ℰ'," +" '&Esim;': '⩳'," +" '&Eta;': 'Η'," +" '&Euml': 'Ë'," +" '&Euml;': 'Ë'," +" '&Exists;': '∃'," +" '&ExponentialE;': 'ⅇ'," +" '&Fcy;': 'Ф'," +" '&Ffr;': '𝔉'," +" '&FilledSmallSquare;': '◼'," +" '&FilledVerySmallSquare;': '▪'," +" '&Fopf;': '𝔽'," +" '&ForAll;': '∀'," +" '&Fouriertrf;': 'ℱ'," +" '&Fscr;': 'ℱ'," +" '&GJcy;': 'Ѓ'," +" '&GT': '>'," +" '&GT;': '>'," +" '&Gamma;': 'Γ'," +" '&Gammad;': 'Ϝ'," +" '&Gbreve;': 'Ğ'," +" '&Gcedil;': 'Ģ'," +" '&Gcirc;': 'Ĝ'," +" '&Gcy;': 'Г'," +" '&Gdot;': 'Ġ'," +" '&Gfr;': '𝔊'," +" '&Gg;': '⋙'," +" '&Gopf;': '𝔾'," +" '&GreaterEqual;': '≥'," +" '&GreaterEqualLess;': '⋛'," +" '&GreaterFullEqual;': '≧'," +" '&GreaterGreater;': '⪢'," +" '&GreaterLess;': '≷'," +" '&GreaterSlantEqual;': '⩾'," +" '&GreaterTilde;': '≳'," +" '&Gscr;': '𝒢'," +" '&Gt;': '≫'," +" '&HARDcy;': 'Ъ'," +" '&Hacek;': 'ˇ'," +" '&Hat;': '^'," +" '&Hcirc;': 'Ĥ'," +" '&Hfr;': 'ℌ'," +" '&HilbertSpace;': 'ℋ'," +" '&Hopf;': 'ℍ'," +" '&HorizontalLine;': '─'," +" '&Hscr;': 'ℋ'," +" '&Hstrok;': 'Ħ'," +" '&HumpDownHump;': '≎'," +" '&HumpEqual;': '≏'," +" '&IEcy;': 'Е'," +" '&IJlig;': 'IJ'," +" '&IOcy;': 'Ё'," +" '&Iacute': 'Í'," +" '&Iacute;': 'Í'," +" '&Icirc': 'Î'," +" '&Icirc;': 'Î'," +" '&Icy;': 'И'," +" '&Idot;': 'İ'," +" '&Ifr;': 'ℑ'," +" '&Igrave': 'Ì'," +" '&Igrave;': 'Ì'," +" '&Im;': 'ℑ'," +" '&Imacr;': 'Ī'," +" '&ImaginaryI;': 'ⅈ'," +" '&Implies;': '⇒'," +" '&Int;': '∬'," +" '&Integral;': '∫'," +" '&Intersection;': '⋂'," +" '&InvisibleComma;': '⁣'," +" '&InvisibleTimes;': '⁢'," +" '&Iogon;': 'Į'," +" '&Iopf;': '𝕀'," +" '&Iota;': 'Ι'," +" '&Iscr;': 'ℐ'," +" '&Itilde;': 'Ĩ'," +" '&Iukcy;': 'І'," +" '&Iuml': 'Ï'," +" '&Iuml;': 'Ï'," +" '&Jcirc;': 'Ĵ'," +" '&Jcy;': 'Й'," +" '&Jfr;': '𝔍'," +" '&Jopf;': '𝕁'," +" '&Jscr;': '𝒥'," +" '&Jsercy;': 'Ј'," +" '&Jukcy;': 'Є'," +" '&KHcy;': 'Х'," +" '&KJcy;': 'Ќ'," +" '&Kappa;': 'Κ'," +" '&Kcedil;': 'Ķ'," +" '&Kcy;': 'К'," +" '&Kfr;': '𝔎'," +" '&Kopf;': '𝕂'," +" '&Kscr;': '𝒦'," +" '&LJcy;': 'Љ'," +" '&LT': '<'," +" '&LT;': '<'," +" '&Lacute;': 'Ĺ'," +" '&Lambda;': 'Λ'," +" '&Lang;': '⟪'," +" '&Laplacetrf;': 'ℒ'," +" '&Larr;': '↞'," +" '&Lcaron;': 'Ľ'," +" '&Lcedil;': 'Ļ'," +" '&Lcy;': 'Л'," +" '&LeftAngleBracket;': '⟨'," +" '&LeftArrow;': '←'," +" '&LeftArrowBar;': '⇤'," +" '&LeftArrowRightArrow;': '⇆'," +" '&LeftCeiling;': '⌈'," +" '&LeftDoubleBracket;': '⟦'," +" '&LeftDownTeeVector;': '⥡'," +" '&LeftDownVector;': '⇃'," +" '&LeftDownVectorBar;': '⥙'," +" '&LeftFloor;': '⌊'," +" '&LeftRightArrow;': '↔'," +" '&LeftRightVector;': '⥎'," +" '&LeftTee;': '⊣'," +" '&LeftTeeArrow;': '↤'," +" '&LeftTeeVector;': '⥚'," +" '&LeftTriangle;': '⊲'," +" '&LeftTriangleBar;': '⧏'," +" '&LeftTriangleEqual;': '⊴'," +" '&LeftUpDownVector;': '⥑'," +" '&LeftUpTeeVector;': '⥠'," +" '&LeftUpVector;': '↿'," +" '&LeftUpVectorBar;': '⥘'," +" '&LeftVector;': '↼'," +" '&LeftVectorBar;': '⥒'," +" '&Leftarrow;': '⇐'," +" '&Leftrightarrow;': '⇔'," +" '&LessEqualGreater;': '⋚'," +" '&LessFullEqual;': '≦'," +" '&LessGreater;': '≶'," +" '&LessLess;': '⪡'," +" '&LessSlantEqual;': '⩽'," +" '&LessTilde;': '≲'," +" '&Lfr;': '𝔏'," +" '&Ll;': '⋘'," +" '&Lleftarrow;': '⇚'," +" '&Lmidot;': 'Ŀ'," +" '&LongLeftArrow;': '⟵'," +" '&LongLeftRightArrow;': '⟷'," +" '&LongRightArrow;': '⟶'," +" '&Longleftarrow;': '⟸'," +" '&Longleftrightarrow;': '⟺'," +" '&Longrightarrow;': '⟹'," +" '&Lopf;': '𝕃'," +" '&LowerLeftArrow;': '↙'," +" '&LowerRightArrow;': '↘'," +" '&Lscr;': 'ℒ'," +" '&Lsh;': '↰'," +" '&Lstrok;': 'Ł'," +" '&Lt;': '≪'," +" '&Map;': '⤅'," +" '&Mcy;': 'М'," +" '&MediumSpace;': ' '," +" '&Mellintrf;': 'ℳ'," +" '&Mfr;': '𝔐'," +" '&MinusPlus;': '∓'," +" '&Mopf;': '𝕄'," +" '&Mscr;': 'ℳ'," +" '&Mu;': 'Μ'," +" '&NJcy;': 'Њ'," +" '&Nacute;': 'Ń'," +" '&Ncaron;': 'Ň'," +" '&Ncedil;': 'Ņ'," +" '&Ncy;': 'Н'," +" '&NegativeMediumSpace;': '​'," +" '&NegativeThickSpace;': '​'," +" '&NegativeThinSpace;': '​'," +" '&NegativeVeryThinSpace;': '​'," +" '&NestedGreaterGreater;': '≫'," +" '&NestedLessLess;': '≪'," +" '&NewLine;': '" +"'," +" '&Nfr;': '𝔑'," +" '&NoBreak;': '⁠'," +" '&NonBreakingSpace;': ' '," +" '&Nopf;': 'ℕ'," +" '&Not;': '⫬'," +" '&NotCongruent;': '≢'," +" '&NotCupCap;': '≭'," +" '&NotDoubleVerticalBar;': '∦'," +" '&NotElement;': '∉'," +" '&NotEqual;': '≠'," +" '&NotEqualTilde;': '≂'," +" '&NotExists;': '∄'," +" '&NotGreater;': '≯'," +" '&NotGreaterEqual;': '≱'," +" '&NotGreaterFullEqual;': '≧'," +" '&NotGreaterGreater;': '≫'," +" '&NotGreaterLess;': '≹'," +" '&NotGreaterSlantEqual;': '⩾'," +" '&NotGreaterTilde;': '≵'," +" '&NotHumpDownHump;': '≎'," +" '&NotHumpEqual;': '≏'," +" '&NotLeftTriangle;': '⋪'," +" '&NotLeftTriangleBar;': '⧏'," +" '&NotLeftTriangleEqual;': '⋬'," +" '&NotLess;': '≮'," +" '&NotLessEqual;': '≰'," +" '&NotLessGreater;': '≸'," +" '&NotLessLess;': '≪'," +" '&NotLessSlantEqual;': '⩽'," +" '&NotLessTilde;': '≴'," +" '&NotNestedGreaterGreater;': '⪢'," +" '&NotNestedLessLess;': '⪡'," +" '&NotPrecedes;': '⊀'," +" '&NotPrecedesEqual;': '⪯'," +" '&NotPrecedesSlantEqual;': '⋠'," +" '&NotReverseElement;': '∌'," +" '&NotRightTriangle;': '⋫'," +" '&NotRightTriangleBar;': '⧐'," +" '&NotRightTriangleEqual;': '⋭'," +" '&NotSquareSubset;': '⊏'," +" '&NotSquareSubsetEqual;': '⋢'," +" '&NotSquareSuperset;': '⊐'," +" '&NotSquareSupersetEqual;': '⋣'," +" '&NotSubset;': '⊂'," +" '&NotSubsetEqual;': '⊈'," +" '&NotSucceeds;': '⊁'," +" '&NotSucceedsEqual;': '⪰'," +" '&NotSucceedsSlantEqual;': '⋡'," +" '&NotSucceedsTilde;': '≿'," +" '&NotSuperset;': '⊃'," +" '&NotSupersetEqual;': '⊉'," +" '&NotTilde;': '≁'," +" '&NotTildeEqual;': '≄'," +" '&NotTildeFullEqual;': '≇'," +" '&NotTildeTilde;': '≉'," +" '&NotVerticalBar;': '∤'," +" '&Nscr;': '𝒩'," +" '&Ntilde': 'Ñ'," +" '&Ntilde;': 'Ñ'," +" '&Nu;': 'Ν'," +" '&OElig;': 'Œ'," +" '&Oacute': 'Ó'," +" '&Oacute;': 'Ó'," +" '&Ocirc': 'Ô'," +" '&Ocirc;': 'Ô'," +" '&Ocy;': 'О'," +" '&Odblac;': 'Ő'," +" '&Ofr;': '𝔒'," +" '&Ograve': 'Ò'," +" '&Ograve;': 'Ò'," +" '&Omacr;': 'Ō'," +" '&Omega;': 'Ω'," +" '&Omicron;': 'Ο'," +" '&Oopf;': '𝕆'," +" '&OpenCurlyDoubleQuote;': '“'," +" '&OpenCurlyQuote;': '‘'," +" '&Or;': '⩔'," +" '&Oscr;': '𝒪'," +" '&Oslash': 'Ø'," +" '&Oslash;': 'Ø'," +" '&Otilde': 'Õ'," +" '&Otilde;': 'Õ'," +" '&Otimes;': '⨷'," +" '&Ouml': 'Ö'," +" '&Ouml;': 'Ö'," +" '&OverBar;': '‾'," +" '&OverBrace;': '⏞'," +" '&OverBracket;': '⎴'," +" '&OverParenthesis;': '⏜'," +" '&PartialD;': '∂'," +" '&Pcy;': 'П'," +" '&Pfr;': '𝔓'," +" '&Phi;': 'Φ'," +" '&Pi;': 'Π'," +" '&PlusMinus;': '±'," +" '&Poincareplane;': 'ℌ'," +" '&Popf;': 'ℙ'," +" '&Pr;': '⪻'," +" '&Precedes;': '≺'," +" '&PrecedesEqual;': '⪯'," +" '&PrecedesSlantEqual;': '≼'," +" '&PrecedesTilde;': '≾'," +" '&Prime;': '″'," +" '&Product;': '∏'," +" '&Proportion;': '∷'," +" '&Proportional;': '∝'," +" '&Pscr;': '𝒫'," +" '&Psi;': 'Ψ'," +" '&QUOT': '\\''," +" '&QUOT;': '\\''," +" '&Qfr;': '𝔔'," +" '&Qopf;': 'ℚ'," +" '&Qscr;': '𝒬'," +" '&RBarr;': '⤐'," +" '&REG': '®'," +" '&REG;': '®'," +" '&Racute;': 'Ŕ'," +" '&Rang;': '⟫'," +" '&Rarr;': '↠'," +" '&Rarrtl;': '⤖'," +" '&Rcaron;': 'Ř'," +" '&Rcedil;': 'Ŗ'," +" '&Rcy;': 'Р'," +" '&Re;': 'ℜ'," +" '&ReverseElement;': '∋'," +" '&ReverseEquilibrium;': '⇋'," +" '&ReverseUpEquilibrium;': '⥯'," +" '&Rfr;': 'ℜ'," +" '&Rho;': 'Ρ'," +" '&RightAngleBracket;': '⟩'," +" '&RightArrow;': '→'," +" '&RightArrowBar;': '⇥'," +" '&RightArrowLeftArrow;': '⇄'," +" '&RightCeiling;': '⌉'," +" '&RightDoubleBracket;': '⟧'," +" '&RightDownTeeVector;': '⥝'," +" '&RightDownVector;': '⇂'," +" '&RightDownVectorBar;': '⥕'," +" '&RightFloor;': '⌋'," +" '&RightTee;': '⊢'," +" '&RightTeeArrow;': '↦'," +" '&RightTeeVector;': '⥛'," +" '&RightTriangle;': '⊳'," +" '&RightTriangleBar;': '⧐'," +" '&RightTriangleEqual;': '⊵'," +" '&RightUpDownVector;': '⥏'," +" '&RightUpTeeVector;': '⥜'," +" '&RightUpVector;': '↾'," +" '&RightUpVectorBar;': '⥔'," +" '&RightVector;': '⇀'," +" '&RightVectorBar;': '⥓'," +" '&Rightarrow;': '⇒'," +" '&Ropf;': 'ℝ'," +" '&RoundImplies;': '⥰'," +" '&Rrightarrow;': '⇛'," +" '&Rscr;': 'ℛ'," +" '&Rsh;': '↱'," +" '&RuleDelayed;': '⧴'," +" '&SHCHcy;': 'Щ'," +" '&SHcy;': 'Ш'," +" '&SOFTcy;': 'Ь'," +" '&Sacute;': 'Ś'," +" '&Sc;': '⪼'," +" '&Scaron;': 'Š'," +" '&Scedil;': 'Ş'," +" '&Scirc;': 'Ŝ'," +" '&Scy;': 'С'," +" '&Sfr;': '𝔖'," +" '&ShortDownArrow;': '↓'," +" '&ShortLeftArrow;': '←'," +" '&ShortRightArrow;': '→'," +" '&ShortUpArrow;': '↑'," +" '&Sigma;': 'Σ'," +" '&SmallCircle;': '∘'," +" '&Sopf;': '𝕊'," +" '&Sqrt;': '√'," +" '&Square;': '□'," +" '&SquareIntersection;': '⊓'," +" '&SquareSubset;': '⊏'," +" '&SquareSubsetEqual;': '⊑'," +" '&SquareSuperset;': '⊐'," +" '&SquareSupersetEqual;': '⊒'," +" '&SquareUnion;': '⊔'," +" '&Sscr;': '𝒮'," +" '&Star;': '⋆'," +" '&Sub;': '⋐'," +" '&Subset;': '⋐'," +" '&SubsetEqual;': '⊆'," +" '&Succeeds;': '≻'," +" '&SucceedsEqual;': '⪰'," +" '&SucceedsSlantEqual;': '≽'," +" '&SucceedsTilde;': '≿'," +" '&SuchThat;': '∋'," +" '&Sum;': '∑'," +" '&Sup;': '⋑'," +" '&Superset;': '⊃'," +" '&SupersetEqual;': '⊇'," +" '&Supset;': '⋑'," +" '&THORN': 'Þ'," +" '&THORN;': 'Þ'," +" '&TRADE;': '™'," +" '&TSHcy;': 'Ћ'," +" '&TScy;': 'Ц'," +" '&Tab;': ' '," +" '&Tau;': 'Τ'," +" '&Tcaron;': 'Ť'," +" '&Tcedil;': 'Ţ'," +" '&Tcy;': 'Т'," +" '&Tfr;': '𝔗'," +" '&Therefore;': '∴'," +" '&Theta;': 'Θ'," +" '&ThickSpace;': ' '," +" '&ThinSpace;': ' '," +" '&Tilde;': '∼'," +" '&TildeEqual;': '≃'," +" '&TildeFullEqual;': '≅'," +" '&TildeTilde;': '≈'," +" '&Topf;': '𝕋'," +" '&TripleDot;': '⃛'," +" '&Tscr;': '𝒯'," +" '&Tstrok;': 'Ŧ'," +" '&Uacute': 'Ú'," +" '&Uacute;': 'Ú'," +" '&Uarr;': '↟'," +" '&Uarrocir;': '⥉'," +" '&Ubrcy;': 'Ў'," +" '&Ubreve;': 'Ŭ'," +" '&Ucirc': 'Û'," +" '&Ucirc;': 'Û'," +" '&Ucy;': 'У'," +" '&Udblac;': 'Ű'," +" '&Ufr;': '𝔘'," +" '&Ugrave': 'Ù'," +" '&Ugrave;': 'Ù'," +" '&Umacr;': 'Ū'," +" '&UnderBar;': '_'," +" '&UnderBrace;': '⏟'," +" '&UnderBracket;': '⎵'," +" '&UnderParenthesis;': '⏝'," +" '&Union;': '⋃'," +" '&UnionPlus;': '⊎'," +" '&Uogon;': 'Ų'," +" '&Uopf;': '𝕌'," +" '&UpArrow;': '↑'," +" '&UpArrowBar;': '⤒'," +" '&UpArrowDownArrow;': '⇅'," +" '&UpDownArrow;': '↕'," +" '&UpEquilibrium;': '⥮'," +" '&UpTee;': '⊥'," +" '&UpTeeArrow;': '↥'," +" '&Uparrow;': '⇑'," +" '&Updownarrow;': '⇕'," +" '&UpperLeftArrow;': '↖'," +" '&UpperRightArrow;': '↗'," +" '&Upsi;': 'ϒ'," +" '&Upsilon;': 'Υ'," +" '&Uring;': 'Ů'," +" '&Uscr;': '𝒰'," +" '&Utilde;': 'Ũ'," +" '&Uuml': 'Ü'," +" '&Uuml;': 'Ü'," +" '&VDash;': '⊫'," +" '&Vbar;': '⫫'," +" '&Vcy;': 'В'," +" '&Vdash;': '⊩'," +" '&Vdashl;': '⫦'," +" '&Vee;': '⋁'," +" '&Verbar;': '‖'," +" '&Vert;': '‖'," +" '&VerticalBar;': '∣'," +" '&VerticalLine;': '|'," +" '&VerticalSeparator;': '❘'," +" '&VerticalTilde;': '≀'," +" '&VeryThinSpace;': ' '," +" '&Vfr;': '𝔙'," +" '&Vopf;': '𝕍'," +" '&Vscr;': '𝒱'," +" '&Vvdash;': '⊪'," +" '&Wcirc;': 'Ŵ'," +" '&Wedge;': '⋀'," +" '&Wfr;': '𝔚'," +" '&Wopf;': '𝕎'," +" '&Wscr;': '𝒲'," +" '&Xfr;': '𝔛'," +" '&Xi;': 'Ξ'," +" '&Xopf;': '𝕏'," +" '&Xscr;': '𝒳'," +" '&YAcy;': 'Я'," +" '&YIcy;': 'Ї'," +" '&YUcy;': 'Ю'," +" '&Yacute': 'Ý'," +" '&Yacute;': 'Ý'," +" '&Ycirc;': 'Ŷ'," +" '&Ycy;': 'Ы'," +" '&Yfr;': '𝔜'," +" '&Yopf;': '𝕐'," +" '&Yscr;': '𝒴'," +" '&Yuml;': 'Ÿ'," +" '&ZHcy;': 'Ж'," +" '&Zacute;': 'Ź'," +" '&Zcaron;': 'Ž'," +" '&Zcy;': 'З'," +" '&Zdot;': 'Ż'," +" '&ZeroWidthSpace;': '​'," +" '&Zeta;': 'Ζ'," +" '&Zfr;': 'ℨ'," +" '&Zopf;': 'ℤ'," +" '&Zscr;': '𝒵'," +" '&aacute': 'á'," +" '&aacute;': 'á'," +" '&abreve;': 'ă'," +" '&ac;': '∾'," +" '&acE;': '∾'," +" '&acd;': '∿'," +" '&acirc': 'â'," +" '&acirc;': 'â'," +" '&acute': '´'," +" '&acute;': '´'," +" '&acy;': 'а'," +" '&aelig': 'æ'," +" '&aelig;': 'æ'," +" '&af;': '⁡'," +" '&afr;': '𝔞'," +" '&agrave': 'à'," +" '&agrave;': 'à'," +" '&alefsym;': 'ℵ'," +" '&aleph;': 'ℵ'," +" '&alpha;': 'α'," +" '&amacr;': 'ā'," +" '&amalg;': '⨿'," +" '&amp': '&'," +" '&amp;': '&'," +" '&and;': '∧'," +" '&andand;': '⩕'," +" '&andd;': '⩜'," +" '&andslope;': '⩘'," +" '&andv;': '⩚'," +" '&ang;': '∠'," +" '&ange;': '⦤'," +" '&angle;': '∠'," +" '&angmsd;': '∡'," +" '&angmsdaa;': '⦨'," +" '&angmsdab;': '⦩'," +" '&angmsdac;': '⦪'," +" '&angmsdad;': '⦫'," +" '&angmsdae;': '⦬'," +" '&angmsdaf;': '⦭'," +" '&angmsdag;': '⦮'," +" '&angmsdah;': '⦯'," +" '&angrt;': '∟'," +" '&angrtvb;': '⊾'," +" '&angrtvbd;': '⦝'," +" '&angsph;': '∢'," +" '&angst;': 'Å'," +" '&angzarr;': '⍼'," +" '&aogon;': 'ą'," +" '&aopf;': '𝕒'," +" '&ap;': '≈'," +" '&apE;': '⩰'," +" '&apacir;': '⩯'," +" '&ape;': '≊'," +" '&apid;': '≋'," +" '&apos;': '\\''," +" '&approx;': '≈'," +" '&approxeq;': '≊'," +" '&aring': 'å'," +" '&aring;': 'å'," +" '&ascr;': '𝒶'," +" '&ast;': '*'," +" '&asymp;': '≈'," +" '&asympeq;': '≍'," +" '&atilde': 'ã'," +" '&atilde;': 'ã'," +" '&auml': 'ä'," +" '&auml;': 'ä'," +" '&awconint;': '∳'," +" '&awint;': '⨑'," +" '&bNot;': '⫭'," +" '&backcong;': '≌'," +" '&backepsilon;': '϶'," +" '&backprime;': '‵'," +" '&backsim;': '∽'," +" '&backsimeq;': '⋍'," +" '&barvee;': '⊽'," +" '&barwed;': '⌅'," +" '&barwedge;': '⌅'," +" '&bbrk;': '⎵'," +" '&bbrktbrk;': '⎶'," +" '&bcong;': '≌'," +" '&bcy;': 'б'," +" '&bdquo;': '„'," +" '&becaus;': '∵'," +" '&because;': '∵'," +" '&bemptyv;': '⦰'," +" '&bepsi;': '϶'," +" '&bernou;': 'ℬ'," +" '&beta;': 'β'," +" '&beth;': 'ℶ'," +" '&between;': '≬'," +" '&bfr;': '𝔟'," +" '&bigcap;': '⋂'," +" '&bigcirc;': '◯'," +" '&bigcup;': '⋃'," +" '&bigodot;': '⨀'," +" '&bigoplus;': '⨁'," +" '&bigotimes;': '⨂'," +" '&bigsqcup;': '⨆'," +" '&bigstar;': '★'," +" '&bigtriangledown;': '▽'," +" '&bigtriangleup;': '△'," +" '&biguplus;': '⨄'," +" '&bigvee;': '⋁'," +" '&bigwedge;': '⋀'," +" '&bkarow;': '⤍'," +" '&blacklozenge;': '⧫'," +" '&blacksquare;': '▪'," +" '&blacktriangle;': '▴'," +" '&blacktriangledown;': '▾'," +" '&blacktriangleleft;': '◂'," +" '&blacktriangleright;': '▸'," +" '&blank;': '␣'," +" '&blk12;': '▒'," +" '&blk14;': '░'," +" '&blk34;': '▓'," +" '&block;': '█'," +" '&bne;': '='," +" '&bnequiv;': '≡'," +" '&bnot;': '⌐'," +" '&bopf;': '𝕓'," +" '&bot;': '⊥'," +" '&bottom;': '⊥'," +" '&bowtie;': '⋈'," +" '&boxDL;': '╗'," +" '&boxDR;': '╔'," +" '&boxDl;': '╖'," +" '&boxDr;': '╓'," +" '&boxH;': '═'," +" '&boxHD;': '╦'," +" '&boxHU;': '╩'," +" '&boxHd;': '╤'," +" '&boxHu;': '╧'," +" '&boxUL;': '╝'," +" '&boxUR;': '╚'," +" '&boxUl;': '╜'," +" '&boxUr;': '╙'," +" '&boxV;': '║'," +" '&boxVH;': '╬'," +" '&boxVL;': '╣'," +" '&boxVR;': '╠'," +" '&boxVh;': '╫'," +" '&boxVl;': '╢'," +" '&boxVr;': '╟'," +" '&boxbox;': '⧉'," +" '&boxdL;': '╕'," +" '&boxdR;': '╒'," +" '&boxdl;': '┐'," +" '&boxdr;': '┌'," +" '&boxh;': '─'," +" '&boxhD;': '╥'," +" '&boxhU;': '╨'," +" '&boxhd;': '┬'," +" '&boxhu;': '┴'," +" '&boxminus;': '⊟'," +" '&boxplus;': '⊞'," +" '&boxtimes;': '⊠'," +" '&boxuL;': '╛'," +" '&boxuR;': '╘'," +" '&boxul;': '┘'," +" '&boxur;': '└'," +" '&boxv;': '│'," +" '&boxvH;': '╪'," +" '&boxvL;': '╡'," +" '&boxvR;': '╞'," +" '&boxvh;': '┼'," +" '&boxvl;': '┤'," +" '&boxvr;': '├'," +" '&bprime;': '‵'," +" '&breve;': '˘'," +" '&brvbar': '¦'," +" '&brvbar;': '¦'," +" '&bscr;': '𝒷'," +" '&bsemi;': '⁏'," +" '&bsim;': '∽'," +" '&bsime;': '⋍'," +" '&bsol;': '\\\\'," +" '&bsolb;': '⧅'," +" '&bsolhsub;': '⟈'," +" '&bull;': '•'," +" '&bullet;': '•'," +" '&bump;': '≎'," +" '&bumpE;': '⪮'," +" '&bumpe;': '≏'," +" '&bumpeq;': '≏'," +" '&cacute;': 'ć'," +" '&cap;': '∩'," +" '&capand;': '⩄'," +" '&capbrcup;': '⩉'," +" '&capcap;': '⩋'," +" '&capcup;': '⩇'," +" '&capdot;': '⩀'," +" '&caps;': '∩'," +" '&caret;': '⁁'," +" '&caron;': 'ˇ'," +" '&ccaps;': '⩍'," +" '&ccaron;': 'č'," +" '&ccedil': 'ç'," +" '&ccedil;': 'ç'," +" '&ccirc;': 'ĉ'," +" '&ccups;': '⩌'," +" '&ccupssm;': '⩐'," +" '&cdot;': 'ċ'," +" '&cedil': '¸'," +" '&cedil;': '¸'," +" '&cemptyv;': '⦲'," +" '&cent': '¢'," +" '&cent;': '¢'," +" '&centerdot;': '·'," +" '&cfr;': '𝔠'," +" '&chcy;': 'ч'," +" '&check;': '✓'," +" '&checkmark;': '✓'," +" '&chi;': 'χ'," +" '&cir;': '○'," +" '&cirE;': '⧃'," +" '&circ;': 'ˆ'," +" '&circeq;': '≗'," +" '&circlearrowleft;': '↺'," +" '&circlearrowright;': '↻'," +" '&circledR;': '®'," +" '&circledS;': 'Ⓢ'," +" '&circledast;': '⊛'," +" '&circledcirc;': '⊚'," +" '&circleddash;': '⊝'," +" '&cire;': '≗'," +" '&cirfnint;': '⨐'," +" '&cirmid;': '⫯'," +" '&cirscir;': '⧂'," +" '&clubs;': '♣'," +" '&clubsuit;': '♣'," +" '&colon;': ':'," +" '&colone;': '≔'," +" '&coloneq;': '≔'," +" '&comma;': ','," +" '&commat;': '@'," +" '&comp;': '∁'," +" '&compfn;': '∘'," +" '&complement;': '∁'," +" '&complexes;': 'ℂ'," +" '&cong;': '≅'," +" '&congdot;': '⩭'," +" '&conint;': '∮'," +" '&copf;': '𝕔'," +" '&coprod;': '∐'," +" '&copy': '©'," +" '&copy;': '©'," +" '&copysr;': '℗'," +" '&crarr;': '↵'," +" '&cross;': '✗'," +" '&cscr;': '𝒸'," +" '&csub;': '⫏'," +" '&csube;': '⫑'," +" '&csup;': '⫐'," +" '&csupe;': '⫒'," +" '&ctdot;': '⋯'," +" '&cudarrl;': '⤸'," +" '&cudarrr;': '⤵'," +" '&cuepr;': '⋞'," +" '&cuesc;': '⋟'," +" '&cularr;': '↶'," +" '&cularrp;': '⤽'," +" '&cup;': '∪'," +" '&cupbrcap;': '⩈'," +" '&cupcap;': '⩆'," +" '&cupcup;': '⩊'," +" '&cupdot;': '⊍'," +" '&cupor;': '⩅'," +" '&cups;': '∪'," +" '&curarr;': '↷'," +" '&curarrm;': '⤼'," +" '&curlyeqprec;': '⋞'," +" '&curlyeqsucc;': '⋟'," +" '&curlyvee;': '⋎'," +" '&curlywedge;': '⋏'," +" '&curren': '¤'," +" '&curren;': '¤'," +" '&curvearrowleft;': '↶'," +" '&curvearrowright;': '↷'," +" '&cuvee;': '⋎'," +" '&cuwed;': '⋏'," +" '&cwconint;': '∲'," +" '&cwint;': '∱'," +" '&cylcty;': '⌭'," +" '&dArr;': '⇓'," +" '&dHar;': '⥥'," +" '&dagger;': '†'," +" '&daleth;': 'ℸ'," +" '&darr;': '↓'," +" '&dash;': '‐'," +" '&dashv;': '⊣'," +" '&dbkarow;': '⤏'," +" '&dblac;': '˝'," +" '&dcaron;': 'ď'," +" '&dcy;': 'д'," +" '&dd;': 'ⅆ'," +" '&ddagger;': '‡'," +" '&ddarr;': '⇊'," +" '&ddotseq;': '⩷'," +" '&deg': '°'," +" '&deg;': '°'," +" '&delta;': 'δ'," +" '&demptyv;': '⦱'," +" '&dfisht;': '⥿'," +" '&dfr;': '𝔡'," +" '&dharl;': '⇃'," +" '&dharr;': '⇂'," +" '&diam;': '⋄'," +" '&diamond;': '⋄'," +" '&diamondsuit;': '♦'," +" '&diams;': '♦'," +" '&die;': '¨'," +" '&digamma;': 'ϝ'," +" '&disin;': '⋲'," +" '&div;': '÷'," +" '&divide': '÷'," +" '&divide;': '÷'," +" '&divideontimes;': '⋇'," +" '&divonx;': '⋇'," +" '&djcy;': 'ђ'," +" '&dlcorn;': '⌞'," +" '&dlcrop;': '⌍'," +" '&dollar;': '$'," +" '&dopf;': '𝕕'," +" '&dot;': '˙'," +" '&doteq;': '≐'," +" '&doteqdot;': '≑'," +" '&dotminus;': '∸'," +" '&dotplus;': '∔'," +" '&dotsquare;': '⊡'," +" '&doublebarwedge;': '⌆'," +" '&downarrow;': '↓'," +" '&downdownarrows;': '⇊'," +" '&downharpoonleft;': '⇃'," +" '&downharpoonright;': '⇂'," +" '&drbkarow;': '⤐'," +" '&drcorn;': '⌟'," +" '&drcrop;': '⌌'," +" '&dscr;': '𝒹'," +" '&dscy;': 'ѕ'," +" '&dsol;': '⧶'," +" '&dstrok;': 'đ'," +" '&dtdot;': '⋱'," +" '&dtri;': '▿'," +" '&dtrif;': '▾'," +" '&duarr;': '⇵'," +" '&duhar;': '⥯'," +" '&dwangle;': '⦦'," +" '&dzcy;': 'џ'," +" '&dzigrarr;': '⟿'," +" '&eDDot;': '⩷'," +" '&eDot;': '≑'," +" '&eacute': 'é'," +" '&eacute;': 'é'," +" '&easter;': '⩮'," +" '&ecaron;': 'ě'," +" '&ecir;': '≖'," +" '&ecirc': 'ê'," +" '&ecirc;': 'ê'," +" '&ecolon;': '≕'," +" '&ecy;': 'э'," +" '&edot;': 'ė'," +" '&ee;': 'ⅇ'," +" '&efDot;': '≒'," +" '&efr;': '𝔢'," +" '&eg;': '⪚'," +" '&egrave': 'è'," +" '&egrave;': 'è'," +" '&egs;': '⪖'," +" '&egsdot;': '⪘'," +" '&el;': '⪙'," +" '&elinters;': '⏧'," +" '&ell;': 'ℓ'," +" '&els;': '⪕'," +" '&elsdot;': '⪗'," +" '&emacr;': 'ē'," +" '&empty;': '∅'," +" '&emptyset;': '∅'," +" '&emptyv;': '∅'," +" '&emsp13;': ' '," +" '&emsp14;': ' '," +" '&emsp;': ' '," +" '&eng;': 'ŋ'," +" '&ensp;': ' '," +" '&eogon;': 'ę'," +" '&eopf;': '𝕖'," +" '&epar;': '⋕'," +" '&eparsl;': '⧣'," +" '&eplus;': '⩱'," +" '&epsi;': 'ε'," +" '&epsilon;': 'ε'," +" '&epsiv;': 'ϵ'," +" '&eqcirc;': '≖'," +" '&eqcolon;': '≕'," +" '&eqsim;': '≂'," +" '&eqslantgtr;': '⪖'," +" '&eqslantless;': '⪕'," +" '&equals;': '='," +" '&equest;': '≟'," +" '&equiv;': '≡'," +" '&equivDD;': '⩸'," +" '&eqvparsl;': '⧥'," +" '&erDot;': '≓'," +" '&erarr;': '⥱'," +" '&escr;': 'ℯ'," +" '&esdot;': '≐'," +" '&esim;': '≂'," +" '&eta;': 'η'," +" '&eth': 'ð'," +" '&eth;': 'ð'," +" '&euml': 'ë'," +" '&euml;': 'ë'," +" '&euro;': '€'," +" '&excl;': '!'," +" '&exist;': '∃'," +" '&expectation;': 'ℰ'," +" '&exponentiale;': 'ⅇ'," +" '&fallingdotseq;': '≒'," +" '&fcy;': 'ф'," +" '&female;': '♀'," +" '&ffilig;': 'ffi'," +" '&fflig;': 'ff'," +" '&ffllig;': 'ffl'," +" '&ffr;': '𝔣'," +" '&filig;': 'fi'," +" '&fjlig;': 'f'," +" '&flat;': '♭'," +" '&fllig;': 'fl'," +" '&fltns;': '▱'," +" '&fnof;': 'ƒ'," +" '&fopf;': '𝕗'," +" '&forall;': '∀'," +" '&fork;': '⋔'," +" '&forkv;': '⫙'," +" '&fpartint;': '⨍'," +" '&frac12': '½'," +" '&frac12;': '½'," +" '&frac13;': '⅓'," +" '&frac14': '¼'," +" '&frac14;': '¼'," +" '&frac15;': '⅕'," +" '&frac16;': '⅙'," +" '&frac18;': '⅛'," +" '&frac23;': '⅔'," +" '&frac25;': '⅖'," +" '&frac34': '¾'," +" '&frac34;': '¾'," +" '&frac35;': '⅗'," +" '&frac38;': '⅜'," +" '&frac45;': '⅘'," +" '&frac56;': '⅚'," +" '&frac58;': '⅝'," +" '&frac78;': '⅞'," +" '&frasl;': '⁄'," +" '&frown;': '⌢'," +" '&fscr;': '𝒻'," +" '&gE;': '≧'," +" '&gEl;': '⪌'," +" '&gacute;': 'ǵ'," +" '&gamma;': 'γ'," +" '&gammad;': 'ϝ'," +" '&gap;': '⪆'," +" '&gbreve;': 'ğ'," +" '&gcirc;': 'ĝ'," +" '&gcy;': 'г'," +" '&gdot;': 'ġ'," +" '&ge;': '≥'," +" '&gel;': '⋛'," +" '&geq;': '≥'," +" '&geqq;': '≧'," +" '&geqslant;': '⩾'," +" '&ges;': '⩾'," +" '&gescc;': '⪩'," +" '&gesdot;': '⪀'," +" '&gesdoto;': '⪂'," +" '&gesdotol;': '⪄'," +" '&gesl;': '⋛'," +" '&gesles;': '⪔'," +" '&gfr;': '𝔤'," +" '&gg;': '≫'," +" '&ggg;': '⋙'," +" '&gimel;': 'ℷ'," +" '&gjcy;': 'ѓ'," +" '&gl;': '≷'," +" '&glE;': '⪒'," +" '&gla;': '⪥'," +" '&glj;': '⪤'," +" '&gnE;': '≩'," +" '&gnap;': '⪊'," +" '&gnapprox;': '⪊'," +" '&gne;': '⪈'," +" '&gneq;': '⪈'," +" '&gneqq;': '≩'," +" '&gnsim;': '⋧'," +" '&gopf;': '𝕘'," +" '&grave;': '`'," +" '&gscr;': 'ℊ'," +" '&gsim;': '≳'," +" '&gsime;': '⪎'," +" '&gsiml;': '⪐'," +" '&gt': '>'," +" '&gt;': '>'," +" '&gtcc;': '⪧'," +" '&gtcir;': '⩺'," +" '&gtdot;': '⋗'," +" '&gtlPar;': '⦕'," +" '&gtquest;': '⩼'," +" '&gtrapprox;': '⪆'," +" '&gtrarr;': '⥸'," +" '&gtrdot;': '⋗'," +" '&gtreqless;': '⋛'," +" '&gtreqqless;': '⪌'," +" '&gtrless;': '≷'," +" '&gtrsim;': '≳'," +" '&gvertneqq;': '≩'," +" '&gvnE;': '≩'," +" '&hArr;': '⇔'," +" '&hairsp;': ' '," +" '&half;': '½'," +" '&hamilt;': 'ℋ'," +" '&hardcy;': 'ъ'," +" '&harr;': '↔'," +" '&harrcir;': '⥈'," +" '&harrw;': '↭'," +" '&hbar;': 'ℏ'," +" '&hcirc;': 'ĥ'," +" '&hearts;': '♥'," +" '&heartsuit;': '♥'," +" '&hellip;': '…'," +" '&hercon;': '⊹'," +" '&hfr;': '𝔥'," +" '&hksearow;': '⤥'," +" '&hkswarow;': '⤦'," +" '&hoarr;': '⇿'," +" '&homtht;': '∻'," +" '&hookleftarrow;': '↩'," +" '&hookrightarrow;': '↪'," +" '&hopf;': '𝕙'," +" '&horbar;': '―'," +" '&hscr;': '𝒽'," +" '&hslash;': 'ℏ'," +" '&hstrok;': 'ħ'," +" '&hybull;': '⁃'," +" '&hyphen;': '‐'," +" '&iacute': 'í'," +" '&iacute;': 'í'," +" '&ic;': '⁣'," +" '&icirc': 'î'," +" '&icirc;': 'î'," +" '&icy;': 'и'," +" '&iecy;': 'е'," +" '&iexcl': '¡'," +" '&iexcl;': '¡'," +" '&iff;': '⇔'," +" '&ifr;': '𝔦'," +" '&igrave': 'ì'," +" '&igrave;': 'ì'," +" '&ii;': 'ⅈ'," +" '&iiiint;': '⨌'," +" '&iiint;': '∭'," +" '&iinfin;': '⧜'," +" '&iiota;': '℩'," +" '&ijlig;': 'ij'," +" '&imacr;': 'ī'," +" '&image;': 'ℑ'," +" '&imagline;': 'ℐ'," +" '&imagpart;': 'ℑ'," +" '&imath;': 'ı'," +" '&imof;': '⊷'," +" '&imped;': 'Ƶ'," +" '&in;': '∈'," +" '&incare;': '℅'," +" '&infin;': '∞'," +" '&infintie;': '⧝'," +" '&inodot;': 'ı'," +" '&int;': '∫'," +" '&intcal;': '⊺'," +" '&integers;': 'ℤ'," +" '&intercal;': '⊺'," +" '&intlarhk;': '⨗'," +" '&intprod;': '⨼'," +" '&iocy;': 'ё'," +" '&iogon;': 'į'," +" '&iopf;': '𝕚'," +" '&iota;': 'ι'," +" '&iprod;': '⨼'," +" '&iquest': '¿'," +" '&iquest;': '¿'," +" '&iscr;': '𝒾'," +" '&isin;': '∈'," +" '&isinE;': '⋹'," +" '&isindot;': '⋵'," +" '&isins;': '⋴'," +" '&isinsv;': '⋳'," +" '&isinv;': '∈'," +" '&it;': '⁢'," +" '&itilde;': 'ĩ'," +" '&iukcy;': 'і'," +" '&iuml': 'ï'," +" '&iuml;': 'ï'," +" '&jcirc;': 'ĵ'," +" '&jcy;': 'й'," +" '&jfr;': '𝔧'," +" '&jmath;': 'ȷ'," +" '&jopf;': '𝕛'," +" '&jscr;': '𝒿'," +" '&jsercy;': 'ј'," +" '&jukcy;': 'є'," +" '&kappa;': 'κ'," +" '&kappav;': 'ϰ'," +" '&kcedil;': 'ķ'," +" '&kcy;': 'к'," +" '&kfr;': '𝔨'," +" '&kgreen;': 'ĸ'," +" '&khcy;': 'х'," +" '&kjcy;': 'ќ'," +" '&kopf;': '𝕜'," +" '&kscr;': '𝓀'," +" '&lAarr;': '⇚'," +" '&lArr;': '⇐'," +" '&lAtail;': '⤛'," +" '&lBarr;': '⤎'," +" '&lE;': '≦'," +" '&lEg;': '⪋'," +" '&lHar;': '⥢'," +" '&lacute;': 'ĺ'," +" '&laemptyv;': '⦴'," +" '&lagran;': 'ℒ'," +" '&lambda;': 'λ'," +" '&lang;': '⟨'," +" '&langd;': '⦑'," +" '&langle;': '⟨'," +" '&lap;': '⪅'," +" '&laquo': '«'," +" '&laquo;': '«'," +" '&larr;': '←'," +" '&larrb;': '⇤'," +" '&larrbfs;': '⤟'," +" '&larrfs;': '⤝'," +" '&larrhk;': '↩'," +" '&larrlp;': '↫'," +" '&larrpl;': '⤹'," +" '&larrsim;': '⥳'," +" '&larrtl;': '↢'," +" '&lat;': '⪫'," +" '&latail;': '⤙'," +" '&late;': '⪭'," +" '&lates;': '⪭'," +" '&lbarr;': '⤌'," +" '&lbbrk;': '❲'," +" '&lbrace;': '{'," +" '&lbrack;': '['," +" '&lbrke;': '⦋'," +" '&lbrksld;': '⦏'," +" '&lbrkslu;': '⦍'," +" '&lcaron;': 'ľ'," +" '&lcedil;': 'ļ'," +" '&lceil;': '⌈'," +" '&lcub;': '{'," +" '&lcy;': 'л'," +" '&ldca;': '⤶'," +" '&ldquo;': '“'," +" '&ldquor;': '„'," +" '&ldrdhar;': '⥧'," +" '&ldrushar;': '⥋'," +" '&ldsh;': '↲'," +" '&le;': '≤'," +" '&leftarrow;': '←'," +" '&leftarrowtail;': '↢'," +" '&leftharpoondown;': '↽'," +" '&leftharpoonup;': '↼'," +" '&leftleftarrows;': '⇇'," +" '&leftrightarrow;': '↔'," +" '&leftrightarrows;': '⇆'," +" '&leftrightharpoons;': '⇋'," +" '&leftrightsquigarrow;': '↭'," +" '&leftthreetimes;': '⋋'," +" '&leg;': '⋚'," +" '&leq;': '≤'," +" '&leqq;': '≦'," +" '&leqslant;': '⩽'," +" '&les;': '⩽'," +" '&lescc;': '⪨'," +" '&lesdot;': '⩿'," +" '&lesdoto;': '⪁'," +" '&lesdotor;': '⪃'," +" '&lesg;': '⋚'," +" '&lesges;': '⪓'," +" '&lessapprox;': '⪅'," +" '&lessdot;': '⋖'," +" '&lesseqgtr;': '⋚'," +" '&lesseqqgtr;': '⪋'," +" '&lessgtr;': '≶'," +" '&lesssim;': '≲'," +" '&lfisht;': '⥼'," +" '&lfloor;': '⌊'," +" '&lfr;': '𝔩'," +" '&lg;': '≶'," +" '&lgE;': '⪑'," +" '&lhard;': '↽'," +" '&lharu;': '↼'," +" '&lharul;': '⥪'," +" '&lhblk;': '▄'," +" '&ljcy;': 'љ'," +" '&ll;': '≪'," +" '&llarr;': '⇇'," +" '&llcorner;': '⌞'," +" '&llhard;': '⥫'," +" '&lltri;': '◺'," +" '&lmidot;': 'ŀ'," +" '&lmoust;': '⎰'," +" '&lmoustache;': '⎰'," +" '&lnE;': '≨'," +" '&lnap;': '⪉'," +" '&lnapprox;': '⪉'," +" '&lne;': '⪇'," +" '&lneq;': '⪇'," +" '&lneqq;': '≨'," +" '&lnsim;': '⋦'," +" '&loang;': '⟬'," +" '&loarr;': '⇽'," +" '&lobrk;': '⟦'," +" '&longleftarrow;': '⟵'," +" '&longleftrightarrow;': '⟷'," +" '&longmapsto;': '⟼'," +" '&longrightarrow;': '⟶'," +" '&looparrowleft;': '↫'," +" '&looparrowright;': '↬'," +" '&lopar;': '⦅'," +" '&lopf;': '𝕝'," +" '&loplus;': '⨭'," +" '&lotimes;': '⨴'," +" '&lowast;': '∗'," +" '&lowbar;': '_'," +" '&loz;': '◊'," +" '&lozenge;': '◊'," +" '&lozf;': '⧫'," +" '&lpar;': '('," +" '&lparlt;': '⦓'," +" '&lrarr;': '⇆'," +" '&lrcorner;': '⌟'," +" '&lrhar;': '⇋'," +" '&lrhard;': '⥭'," +" '&lrm;': '‎'," +" '&lrtri;': '⊿'," +" '&lsaquo;': '‹'," +" '&lscr;': '𝓁'," +" '&lsh;': '↰'," +" '&lsim;': '≲'," +" '&lsime;': '⪍'," +" '&lsimg;': '⪏'," +" '&lsqb;': '['," +" '&lsquo;': '‘'," +" '&lsquor;': '‚'," +" '&lstrok;': 'ł'," +" '&lt': '<'," +" '&lt;': '<'," +" '&ltcc;': '⪦'," +" '&ltcir;': '⩹'," +" '&ltdot;': '⋖'," +" '&lthree;': '⋋'," +" '&ltimes;': '⋉'," +" '&ltlarr;': '⥶'," +" '&ltquest;': '⩻'," +" '&ltrPar;': '⦖'," +" '&ltri;': '◃'," +" '&ltrie;': '⊴'," +" '&ltrif;': '◂'," +" '&lurdshar;': '⥊'," +" '&luruhar;': '⥦'," +" '&lvertneqq;': '≨'," +" '&lvnE;': '≨'," +" '&mDDot;': '∺'," +" '&macr': '¯'," +" '&macr;': '¯'," +" '&male;': '♂'," +" '&malt;': '✠'," +" '&maltese;': '✠'," +" '&map;': '↦'," +" '&mapsto;': '↦'," +" '&mapstodown;': '↧'," +" '&mapstoleft;': '↤'," +" '&mapstoup;': '↥'," +" '&marker;': '▮'," +" '&mcomma;': '⨩'," +" '&mcy;': 'м'," +" '&mdash;': '—'," +" '&measuredangle;': '∡'," +" '&mfr;': '𝔪'," +" '&mho;': '℧'," +" '&micro': 'µ'," +" '&micro;': 'µ'," +" '&mid;': '∣'," +" '&midast;': '*'," +" '&midcir;': '⫰'," +" '&middot': '·'," +" '&middot;': '·'," +" '&minus;': '−'," +" '&minusb;': '⊟'," +" '&minusd;': '∸'," +" '&minusdu;': '⨪'," +" '&mlcp;': '⫛'," +" '&mldr;': '…'," +" '&mnplus;': '∓'," +" '&models;': '⊧'," +" '&mopf;': '𝕞'," +" '&mp;': '∓'," +" '&mscr;': '𝓂'," +" '&mstpos;': '∾'," +" '&mu;': 'μ'," +" '&multimap;': '⊸'," +" '&mumap;': '⊸'," +" '&nGg;': '⋙'," +" '&nGt;': '≫'," +" '&nGtv;': '≫'," +" '&nLeftarrow;': '⇍'," +" '&nLeftrightarrow;': '⇎'," +" '&nLl;': '⋘'," +" '&nLt;': '≪'," +" '&nLtv;': '≪'," +" '&nRightarrow;': '⇏'," +" '&nVDash;': '⊯'," +" '&nVdash;': '⊮'," +" '&nabla;': '∇'," +" '&nacute;': 'ń'," +" '&nang;': '∠'," +" '&nap;': '≉'," +" '&napE;': '⩰'," +" '&napid;': '≋'," +" '&napos;': 'ʼn'," +" '&napprox;': '≉'," +" '&natur;': '♮'," +" '&natural;': '♮'," +" '&naturals;': 'ℕ'," +" '&nbsp': ' '," +" '&nbsp;': ' '," +" '&nbump;': '≎'," +" '&nbumpe;': '≏'," +" '&ncap;': '⩃'," +" '&ncaron;': 'ň'," +" '&ncedil;': 'ņ'," +" '&ncong;': '≇'," +" '&ncongdot;': '⩭'," +" '&ncup;': '⩂'," +" '&ncy;': 'н'," +" '&ndash;': '–'," +" '&ne;': '≠'," +" '&neArr;': '⇗'," +" '&nearhk;': '⤤'," +" '&nearr;': '↗'," +" '&nearrow;': '↗'," +" '&nedot;': '≐'," +" '&nequiv;': '≢'," +" '&nesear;': '⤨'," +" '&nesim;': '≂'," +" '&nexist;': '∄'," +" '&nexists;': '∄'," +" '&nfr;': '𝔫'," +" '&ngE;': '≧'," +" '&nge;': '≱'," +" '&ngeq;': '≱'," +" '&ngeqq;': '≧'," +" '&ngeqslant;': '⩾'," +" '&nges;': '⩾'," +" '&ngsim;': '≵'," +" '&ngt;': '≯'," +" '&ngtr;': '≯'," +" '&nhArr;': '⇎'," +" '&nharr;': '↮'," +" '&nhpar;': '⫲'," +" '&ni;': '∋'," +" '&nis;': '⋼'," +" '&nisd;': '⋺'," +" '&niv;': '∋'," +" '&njcy;': 'њ'," +" '&nlArr;': '⇍'," +" '&nlE;': '≦'," +" '&nlarr;': '↚'," +" '&nldr;': '‥'," +" '&nle;': '≰'," +" '&nleftarrow;': '↚'," +" '&nleftrightarrow;': '↮'," +" '&nleq;': '≰'," +" '&nleqq;': '≦'," +" '&nleqslant;': '⩽'," +" '&nles;': '⩽'," +" '&nless;': '≮'," +" '&nlsim;': '≴'," +" '&nlt;': '≮'," +" '&nltri;': '⋪'," +" '&nltrie;': '⋬'," +" '&nmid;': '∤'," +" '&nopf;': '𝕟'," +" '&not': '¬'," +" '&not;': '¬'," +" '&notin;': '∉'," +" '&notinE;': '⋹'," +" '&notindot;': '⋵'," +" '&notinva;': '∉'," +" '&notinvb;': '⋷'," +" '&notinvc;': '⋶'," +" '&notni;': '∌'," +" '&notniva;': '∌'," +" '&notnivb;': '⋾'," +" '&notnivc;': '⋽'," +" '&npar;': '∦'," +" '&nparallel;': '∦'," +" '&nparsl;': '⫽'," +" '&npart;': '∂'," +" '&npolint;': '⨔'," +" '&npr;': '⊀'," +" '&nprcue;': '⋠'," +" '&npre;': '⪯'," +" '&nprec;': '⊀'," +" '&npreceq;': '⪯'," +" '&nrArr;': '⇏'," +" '&nrarr;': '↛'," +" '&nrarrc;': '⤳'," +" '&nrarrw;': '↝'," +" '&nrightarrow;': '↛'," +" '&nrtri;': '⋫'," +" '&nrtrie;': '⋭'," +" '&nsc;': '⊁'," +" '&nsccue;': '⋡'," +" '&nsce;': '⪰'," +" '&nscr;': '𝓃'," +" '&nshortmid;': '∤'," +" '&nshortparallel;': '∦'," +" '&nsim;': '≁'," +" '&nsime;': '≄'," +" '&nsimeq;': '≄'," +" '&nsmid;': '∤'," +" '&nspar;': '∦'," +" '&nsqsube;': '⋢'," +" '&nsqsupe;': '⋣'," +" '&nsub;': '⊄'," +" '&nsubE;': '⫅'," +" '&nsube;': '⊈'," +" '&nsubset;': '⊂'," +" '&nsubseteq;': '⊈'," +" '&nsubseteqq;': '⫅'," +" '&nsucc;': '⊁'," +" '&nsucceq;': '⪰'," +" '&nsup;': '⊅'," +" '&nsupE;': '⫆'," +" '&nsupe;': '⊉'," +" '&nsupset;': '⊃'," +" '&nsupseteq;': '⊉'," +" '&nsupseteqq;': '⫆'," +" '&ntgl;': '≹'," +" '&ntilde': 'ñ'," +" '&ntilde;': 'ñ'," +" '&ntlg;': '≸'," +" '&ntriangleleft;': '⋪'," +" '&ntrianglelefteq;': '⋬'," +" '&ntriangleright;': '⋫'," +" '&ntrianglerighteq;': '⋭'," +" '&nu;': 'ν'," +" '&num;': '#'," +" '&numero;': '№'," +" '&numsp;': ' '," +" '&nvDash;': '⊭'," +" '&nvHarr;': '⤄'," +" '&nvap;': '≍'," +" '&nvdash;': '⊬'," +" '&nvge;': '≥'," +" '&nvgt;': '>'," +" '&nvinfin;': '⧞'," +" '&nvlArr;': '⤂'," +" '&nvle;': '≤'," +" '&nvlt;': '<'," +" '&nvltrie;': '⊴'," +" '&nvrArr;': '⤃'," +" '&nvrtrie;': '⊵'," +" '&nvsim;': '∼'," +" '&nwArr;': '⇖'," +" '&nwarhk;': '⤣'," +" '&nwarr;': '↖'," +" '&nwarrow;': '↖'," +" '&nwnear;': '⤧'," +" '&oS;': 'Ⓢ'," +" '&oacute': 'ó'," +" '&oacute;': 'ó'," +" '&oast;': '⊛'," +" '&ocir;': '⊚'," +" '&ocirc': 'ô'," +" '&ocirc;': 'ô'," +" '&ocy;': 'о'," +" '&odash;': '⊝'," +" '&odblac;': 'ő'," +" '&odiv;': '⨸'," +" '&odot;': '⊙'," +" '&odsold;': '⦼'," +" '&oelig;': 'œ'," +" '&ofcir;': '⦿'," +" '&ofr;': '𝔬'," +" '&ogon;': '˛'," +" '&ograve': 'ò'," +" '&ograve;': 'ò'," +" '&ogt;': '⧁'," +" '&ohbar;': '⦵'," +" '&ohm;': 'Ω'," +" '&oint;': '∮'," +" '&olarr;': '↺'," +" '&olcir;': '⦾'," +" '&olcross;': '⦻'," +" '&oline;': '‾'," +" '&olt;': '⧀'," +" '&omacr;': 'ō'," +" '&omega;': 'ω'," +" '&omicron;': 'ο'," +" '&omid;': '⦶'," +" '&ominus;': '⊖'," +" '&oopf;': '𝕠'," +" '&opar;': '⦷'," +" '&operp;': '⦹'," +" '&oplus;': '⊕'," +" '&or;': '∨'," +" '&orarr;': '↻'," +" '&ord;': '⩝'," +" '&order;': 'ℴ'," +" '&orderof;': 'ℴ'," +" '&ordf': 'ª'," +" '&ordf;': 'ª'," +" '&ordm': 'º'," +" '&ordm;': 'º'," +" '&origof;': '⊶'," +" '&oror;': '⩖'," +" '&orslope;': '⩗'," +" '&orv;': '⩛'," +" '&oscr;': 'ℴ'," +" '&oslash': 'ø'," +" '&oslash;': 'ø'," +" '&osol;': '⊘'," +" '&otilde': 'õ'," +" '&otilde;': 'õ'," +" '&otimes;': '⊗'," +" '&otimesas;': '⨶'," +" '&ouml': 'ö'," +" '&ouml;': 'ö'," +" '&ovbar;': '⌽'," +" '&par;': '∥'," +" '&para': '¶'," +" '&para;': '¶'," +" '&parallel;': '∥'," +" '&parsim;': '⫳'," +" '&parsl;': '⫽'," +" '&part;': '∂'," +" '&pcy;': 'п'," +" '&percnt;': '%'," +" '&period;': '.'," +" '&permil;': '‰'," +" '&perp;': '⊥'," +" '&pertenk;': '‱'," +" '&pfr;': '𝔭'," +" '&phi;': 'φ'," +" '&phiv;': 'ϕ'," +" '&phmmat;': 'ℳ'," +" '&phone;': '☎'," +" '&pi;': 'π'," +" '&pitchfork;': '⋔'," +" '&piv;': 'ϖ'," +" '&planck;': 'ℏ'," +" '&planckh;': 'ℎ'," +" '&plankv;': 'ℏ'," +" '&plus;': '+'," +" '&plusacir;': '⨣'," +" '&plusb;': '⊞'," +" '&pluscir;': '⨢'," +" '&plusdo;': '∔'," +" '&plusdu;': '⨥'," +" '&pluse;': '⩲'," +" '&plusmn': '±'," +" '&plusmn;': '±'," +" '&plussim;': '⨦'," +" '&plustwo;': '⨧'," +" '&pm;': '±'," +" '&pointint;': '⨕'," +" '&popf;': '𝕡'," +" '&pound': '£'," +" '&pound;': '£'," +" '&pr;': '≺'," +" '&prE;': '⪳'," +" '&prap;': '⪷'," +" '&prcue;': '≼'," +" '&pre;': '⪯'," +" '&prec;': '≺'," +" '&precapprox;': '⪷'," +" '&preccurlyeq;': '≼'," +" '&preceq;': '⪯'," +" '&precnapprox;': '⪹'," +" '&precneqq;': '⪵'," +" '&precnsim;': '⋨'," +" '&precsim;': '≾'," +" '&prime;': '′'," +" '&primes;': 'ℙ'," +" '&prnE;': '⪵'," +" '&prnap;': '⪹'," +" '&prnsim;': '⋨'," +" '&prod;': '∏'," +" '&profalar;': '⌮'," +" '&profline;': '⌒'," +" '&profsurf;': '⌓'," +" '&prop;': '∝'," +" '&propto;': '∝'," +" '&prsim;': '≾'," +" '&prurel;': '⊰'," +" '&pscr;': '𝓅'," +" '&psi;': 'ψ'," +" '&puncsp;': ' '," +" '&qfr;': '𝔮'," +" '&qint;': '⨌'," +" '&qopf;': '𝕢'," +" '&qprime;': '⁗'," +" '&qscr;': '𝓆'," +" '&quaternions;': 'ℍ'," +" '&quatint;': '⨖'," +" '&quest;': '?'," +" '&questeq;': '≟'," +" '&quot': '\\''," +" '&quot;': '\\''," +" '&rAarr;': '⇛'," +" '&rArr;': '⇒'," +" '&rAtail;': '⤜'," +" '&rBarr;': '⤏'," +" '&rHar;': '⥤'," +" '&race;': '∽'," +" '&racute;': 'ŕ'," +" '&radic;': '√'," +" '&raemptyv;': '⦳'," +" '&rang;': '⟩'," +" '&rangd;': '⦒'," +" '&range;': '⦥'," +" '&rangle;': '⟩'," +" '&raquo': '»'," +" '&raquo;': '»'," +" '&rarr;': '→'," +" '&rarrap;': '⥵'," +" '&rarrb;': '⇥'," +" '&rarrbfs;': '⤠'," +" '&rarrc;': '⤳'," +" '&rarrfs;': '⤞'," +" '&rarrhk;': '↪'," +" '&rarrlp;': '↬'," +" '&rarrpl;': '⥅'," +" '&rarrsim;': '⥴'," +" '&rarrtl;': '↣'," +" '&rarrw;': '↝'," +" '&ratail;': '⤚'," +" '&ratio;': '∶'," +" '&rationals;': 'ℚ'," +" '&rbarr;': '⤍'," +" '&rbbrk;': '❳'," +" '&rbrace;': '}'," +" '&rbrack;': ']'," +" '&rbrke;': '⦌'," +" '&rbrksld;': '⦎'," +" '&rbrkslu;': '⦐'," +" '&rcaron;': 'ř'," +" '&rcedil;': 'ŗ'," +" '&rceil;': '⌉'," +" '&rcub;': '}'," +" '&rcy;': 'р'," +" '&rdca;': '⤷'," +" '&rdldhar;': '⥩'," +" '&rdquo;': '”'," +" '&rdquor;': '”'," +" '&rdsh;': '↳'," +" '&real;': 'ℜ'," +" '&realine;': 'ℛ'," +" '&realpart;': 'ℜ'," +" '&reals;': 'ℝ'," +" '&rect;': '▭'," +" '&reg': '®'," +" '&reg;': '®'," +" '&rfisht;': '⥽'," +" '&rfloor;': '⌋'," +" '&rfr;': '𝔯'," +" '&rhard;': '⇁'," +" '&rharu;': '⇀'," +" '&rharul;': '⥬'," +" '&rho;': 'ρ'," +" '&rhov;': 'ϱ'," +" '&rightarrow;': '→'," +" '&rightarrowtail;': '↣'," +" '&rightharpoondown;': '⇁'," +" '&rightharpoonup;': '⇀'," +" '&rightleftarrows;': '⇄'," +" '&rightleftharpoons;': '⇌'," +" '&rightrightarrows;': '⇉'," +" '&rightsquigarrow;': '↝'," +" '&rightthreetimes;': '⋌'," +" '&ring;': '˚'," +" '&risingdotseq;': '≓'," +" '&rlarr;': '⇄'," +" '&rlhar;': '⇌'," +" '&rlm;': '‏'," +" '&rmoust;': '⎱'," +" '&rmoustache;': '⎱'," +" '&rnmid;': '⫮'," +" '&roang;': '⟭'," +" '&roarr;': '⇾'," +" '&robrk;': '⟧'," +" '&ropar;': '⦆'," +" '&ropf;': '𝕣'," +" '&roplus;': '⨮'," +" '&rotimes;': '⨵'," +" '&rpar;': ')'," +" '&rpargt;': '⦔'," +" '&rppolint;': '⨒'," +" '&rrarr;': '⇉'," +" '&rsaquo;': '›'," +" '&rscr;': '𝓇'," +" '&rsh;': '↱'," +" '&rsqb;': ']'," +" '&rsquo;': '’'," +" '&rsquor;': '’'," +" '&rthree;': '⋌'," +" '&rtimes;': '⋊'," +" '&rtri;': '▹'," +" '&rtrie;': '⊵'," +" '&rtrif;': '▸'," +" '&rtriltri;': '⧎'," +" '&ruluhar;': '⥨'," +" '&rx;': '℞'," +" '&sacute;': 'ś'," +" '&sbquo;': '‚'," +" '&sc;': '≻'," +" '&scE;': '⪴'," +" '&scap;': '⪸'," +" '&scaron;': 'š'," +" '&sccue;': '≽'," +" '&sce;': '⪰'," +" '&scedil;': 'ş'," +" '&scirc;': 'ŝ'," +" '&scnE;': '⪶'," +" '&scnap;': '⪺'," +" '&scnsim;': '⋩'," +" '&scpolint;': '⨓'," +" '&scsim;': '≿'," +" '&scy;': 'с'," +" '&sdot;': '⋅'," +" '&sdotb;': '⊡'," +" '&sdote;': '⩦'," +" '&seArr;': '⇘'," +" '&searhk;': '⤥'," +" '&searr;': '↘'," +" '&searrow;': '↘'," +" '&sect': '§'," +" '&sect;': '§'," +" '&semi;': ';'," +" '&seswar;': '⤩'," +" '&setminus;': '∖'," +" '&setmn;': '∖'," +" '&sext;': '✶'," +" '&sfr;': '𝔰'," +" '&sfrown;': '⌢'," +" '&sharp;': '♯'," +" '&shchcy;': 'щ'," +" '&shcy;': 'ш'," +" '&shortmid;': '∣'," +" '&shortparallel;': '∥'," +" '&shy': '­'," +" '&shy;': '­'," +" '&sigma;': 'σ'," +" '&sigmaf;': 'ς'," +" '&sigmav;': 'ς'," +" '&sim;': '∼'," +" '&simdot;': '⩪'," +" '&sime;': '≃'," +" '&simeq;': '≃'," +" '&simg;': '⪞'," +" '&simgE;': '⪠'," +" '&siml;': '⪝'," +" '&simlE;': '⪟'," +" '&simne;': '≆'," +" '&simplus;': '⨤'," +" '&simrarr;': '⥲'," +" '&slarr;': '←'," +" '&smallsetminus;': '∖'," +" '&smashp;': '⨳'," +" '&smeparsl;': '⧤'," +" '&smid;': '∣'," +" '&smile;': '⌣'," +" '&smt;': '⪪'," +" '&smte;': '⪬'," +" '&smtes;': '⪬'," +" '&softcy;': 'ь'," +" '&sol;': '/'," +" '&solb;': '⧄'," +" '&solbar;': '⌿'," +" '&sopf;': '𝕤'," +" '&spades;': '♠'," +" '&spadesuit;': '♠'," +" '&spar;': '∥'," +" '&sqcap;': '⊓'," +" '&sqcaps;': '⊓'," +" '&sqcup;': '⊔'," +" '&sqcups;': '⊔'," +" '&sqsub;': '⊏'," +" '&sqsube;': '⊑'," +" '&sqsubset;': '⊏'," +" '&sqsubseteq;': '⊑'," +" '&sqsup;': '⊐'," +" '&sqsupe;': '⊒'," +" '&sqsupset;': '⊐'," +" '&sqsupseteq;': '⊒'," +" '&squ;': '□'," +" '&square;': '□'," +" '&squarf;': '▪'," +" '&squf;': '▪'," +" '&srarr;': '→'," +" '&sscr;': '𝓈'," +" '&ssetmn;': '∖'," +" '&ssmile;': '⌣'," +" '&sstarf;': '⋆'," +" '&star;': '☆'," +" '&starf;': '★'," +" '&straightepsilon;': 'ϵ'," +" '&straightphi;': 'ϕ'," +" '&strns;': '¯'," +" '&sub;': '⊂'," +" '&subE;': '⫅'," +" '&subdot;': '⪽'," +" '&sube;': '⊆'," +" '&subedot;': '⫃'," +" '&submult;': '⫁'," +" '&subnE;': '⫋'," +" '&subne;': '⊊'," +" '&subplus;': '⪿'," +" '&subrarr;': '⥹'," +" '&subset;': '⊂'," +" '&subseteq;': '⊆'," +" '&subseteqq;': '⫅'," +" '&subsetneq;': '⊊'," +" '&subsetneqq;': '⫋'," +" '&subsim;': '⫇'," +" '&subsub;': '⫕'," +" '&subsup;': '⫓'," +" '&succ;': '≻'," +" '&succapprox;': '⪸'," +" '&succcurlyeq;': '≽'," +" '&succeq;': '⪰'," +" '&succnapprox;': '⪺'," +" '&succneqq;': '⪶'," +" '&succnsim;': '⋩'," +" '&succsim;': '≿'," +" '&sum;': '∑'," +" '&sung;': '♪'," +" '&sup1': '¹'," +" '&sup1;': '¹'," +" '&sup2': '²'," +" '&sup2;': '²'," +" '&sup3': '³'," +" '&sup3;': '³'," +" '&sup;': '⊃'," +" '&supE;': '⫆'," +" '&supdot;': '⪾'," +" '&supdsub;': '⫘'," +" '&supe;': '⊇'," +" '&supedot;': '⫄'," +" '&suphsol;': '⟉'," +" '&suphsub;': '⫗'," +" '&suplarr;': '⥻'," +" '&supmult;': '⫂'," +" '&supnE;': '⫌'," +" '&supne;': '⊋'," +" '&supplus;': '⫀'," +" '&supset;': '⊃'," +" '&supseteq;': '⊇'," +" '&supseteqq;': '⫆'," +" '&supsetneq;': '⊋'," +" '&supsetneqq;': '⫌'," +" '&supsim;': '⫈'," +" '&supsub;': '⫔'," +" '&supsup;': '⫖'," +" '&swArr;': '⇙'," +" '&swarhk;': '⤦'," +" '&swarr;': '↙'," +" '&swarrow;': '↙'," +" '&swnwar;': '⤪'," +" '&szlig': 'ß'," +" '&szlig;': 'ß'," +" '&target;': '⌖'," +" '&tau;': 'τ'," +" '&tbrk;': '⎴'," +" '&tcaron;': 'ť'," +" '&tcedil;': 'ţ'," +" '&tcy;': 'т'," +" '&tdot;': '⃛'," +" '&telrec;': '⌕'," +" '&tfr;': '𝔱'," +" '&there4;': '∴'," +" '&therefore;': '∴'," +" '&theta;': 'θ'," +" '&thetasym;': 'ϑ'," +" '&thetav;': 'ϑ'," +" '&thickapprox;': '≈'," +" '&thicksim;': '∼'," +" '&thinsp;': ' '," +" '&thkap;': '≈'," +" '&thksim;': '∼'," +" '&thorn': 'þ'," +" '&thorn;': 'þ'," +" '&tilde;': '˜'," +" '&times': '×'," +" '&times;': '×'," +" '&timesb;': '⊠'," +" '&timesbar;': '⨱'," +" '&timesd;': '⨰'," +" '&tint;': '∭'," +" '&toea;': '⤨'," +" '&top;': '⊤'," +" '&topbot;': '⌶'," +" '&topcir;': '⫱'," +" '&topf;': '𝕥'," +" '&topfork;': '⫚'," +" '&tosa;': '⤩'," +" '&tprime;': '‴'," +" '&trade;': '™'," +" '&triangle;': '▵'," +" '&triangledown;': '▿'," +" '&triangleleft;': '◃'," +" '&trianglelefteq;': '⊴'," +" '&triangleq;': '≜'," +" '&triangleright;': '▹'," +" '&trianglerighteq;': '⊵'," +" '&tridot;': '◬'," +" '&trie;': '≜'," +" '&triminus;': '⨺'," +" '&triplus;': '⨹'," +" '&trisb;': '⧍'," +" '&tritime;': '⨻'," +" '&trpezium;': '⏢'," +" '&tscr;': '𝓉'," +" '&tscy;': 'ц'," +" '&tshcy;': 'ћ'," +" '&tstrok;': 'ŧ'," +" '&twixt;': '≬'," +" '&twoheadleftarrow;': '↞'," +" '&twoheadrightarrow;': '↠'," +" '&uArr;': '⇑'," +" '&uHar;': '⥣'," +" '&uacute': 'ú'," +" '&uacute;': 'ú'," +" '&uarr;': '↑'," +" '&ubrcy;': 'ў'," +" '&ubreve;': 'ŭ'," +" '&ucirc': 'û'," +" '&ucirc;': 'û'," +" '&ucy;': 'у'," +" '&udarr;': '⇅'," +" '&udblac;': 'ű'," +" '&udhar;': '⥮'," +" '&ufisht;': '⥾'," +" '&ufr;': '𝔲'," +" '&ugrave': 'ù'," +" '&ugrave;': 'ù'," +" '&uharl;': '↿'," +" '&uharr;': '↾'," +" '&uhblk;': '▀'," +" '&ulcorn;': '⌜'," +" '&ulcorner;': '⌜'," +" '&ulcrop;': '⌏'," +" '&ultri;': '◸'," +" '&umacr;': 'ū'," +" '&uml': '¨'," +" '&uml;': '¨'," +" '&uogon;': 'ų'," +" '&uopf;': '𝕦'," +" '&uparrow;': '↑'," +" '&updownarrow;': '↕'," +" '&upharpoonleft;': '↿'," +" '&upharpoonright;': '↾'," +" '&uplus;': '⊎'," +" '&upsi;': 'υ'," +" '&upsih;': 'ϒ'," +" '&upsilon;': 'υ'," +" '&upuparrows;': '⇈'," +" '&urcorn;': '⌝'," +" '&urcorner;': '⌝'," +" '&urcrop;': '⌎'," +" '&uring;': 'ů'," +" '&urtri;': '◹'," +" '&uscr;': '𝓊'," +" '&utdot;': '⋰'," +" '&utilde;': 'ũ'," +" '&utri;': '▵'," +" '&utrif;': '▴'," +" '&uuarr;': '⇈'," +" '&uuml': 'ü'," +" '&uuml;': 'ü'," +" '&uwangle;': '⦧'," +" '&vArr;': '⇕'," +" '&vBar;': '⫨'," +" '&vBarv;': '⫩'," +" '&vDash;': '⊨'," +" '&vangrt;': '⦜'," +" '&varepsilon;': 'ϵ'," +" '&varkappa;': 'ϰ'," +" '&varnothing;': '∅'," +" '&varphi;': 'ϕ'," +" '&varpi;': 'ϖ'," +" '&varpropto;': '∝'," +" '&varr;': '↕'," +" '&varrho;': 'ϱ'," +" '&varsigma;': 'ς'," +" '&varsubsetneq;': '⊊'," +" '&varsubsetneqq;': '⫋'," +" '&varsupsetneq;': '⊋'," +" '&varsupsetneqq;': '⫌'," +" '&vartheta;': 'ϑ'," +" '&vartriangleleft;': '⊲'," +" '&vartriangleright;': '⊳'," +" '&vcy;': 'в'," +" '&vdash;': '⊢'," +" '&vee;': '∨'," +" '&veebar;': '⊻'," +" '&veeeq;': '≚'," +" '&vellip;': '⋮'," +" '&verbar;': '|'," +" '&vert;': '|'," +" '&vfr;': '𝔳'," +" '&vltri;': '⊲'," +" '&vnsub;': '⊂'," +" '&vnsup;': '⊃'," +" '&vopf;': '𝕧'," +" '&vprop;': '∝'," +" '&vrtri;': '⊳'," +" '&vscr;': '𝓋'," +" '&vsubnE;': '⫋'," +" '&vsubne;': '⊊'," +" '&vsupnE;': '⫌'," +" '&vsupne;': '⊋'," +" '&vzigzag;': '⦚'," +" '&wcirc;': 'ŵ'," +" '&wedbar;': '⩟'," +" '&wedge;': '∧'," +" '&wedgeq;': '≙'," +" '&weierp;': '℘'," +" '&wfr;': '𝔴'," +" '&wopf;': '𝕨'," +" '&wp;': '℘'," +" '&wr;': '≀'," +" '&wreath;': '≀'," +" '&wscr;': '𝓌'," +" '&xcap;': '⋂'," +" '&xcirc;': '◯'," +" '&xcup;': '⋃'," +" '&xdtri;': '▽'," +" '&xfr;': '𝔵'," +" '&xhArr;': '⟺'," +" '&xharr;': '⟷'," +" '&xi;': 'ξ'," +" '&xlArr;': '⟸'," +" '&xlarr;': '⟵'," +" '&xmap;': '⟼'," +" '&xnis;': '⋻'," +" '&xodot;': '⨀'," +" '&xopf;': '𝕩'," +" '&xoplus;': '⨁'," +" '&xotime;': '⨂'," +" '&xrArr;': '⟹'," +" '&xrarr;': '⟶'," +" '&xscr;': '𝓍'," +" '&xsqcup;': '⨆'," +" '&xuplus;': '⨄'," +" '&xutri;': '△'," +" '&xvee;': '⋁'," +" '&xwedge;': '⋀'," +" '&yacute': 'ý'," +" '&yacute;': 'ý'," +" '&yacy;': 'я'," +" '&ycirc;': 'ŷ'," +" '&ycy;': 'ы'," +" '&yen': '¥'," +" '&yen;': '¥'," +" '&yfr;': '𝔶'," +" '&yicy;': 'ї'," +" '&yopf;': '𝕪'," +" '&yscr;': '𝓎'," +" '&yucy;': 'ю'," +" '&yuml': 'ÿ'," +" '&yuml;': 'ÿ'," +" '&zacute;': 'ź'," +" '&zcaron;': 'ž'," +" '&zcy;': 'з'," +" '&zdot;': 'ż'," +" '&zeetrf;': 'ℨ'," +" '&zeta;': 'ζ'," +" '&zfr;': '𝔷'," +" '&zhcy;': 'ж'," +" '&zigrarr;': '⇝'," +" '&zopf;': '𝕫'," +" '&zscr;': '𝓏'," +" '&zwj;': '‍'," +" '&zwnj;': '‌'" +"}"; diff --git a/fort.c b/fort.c @@ -0,0 +1,7767 @@ +/* +libfort + +MIT License + +Copyright (c) 2017 - 2020 Seleznev Anton + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ + +/* The file was GENERATED by an amalgamation script.*/ +/* DO NOT EDIT BY HAND!!! */ + + +#define FT_AMALGAMED_SOURCE /* Macros to make internal libfort functions static */ + + +/******************************************************** + Begin of file "fort_utils.h" + ********************************************************/ + +#ifndef FORT_IMPL_H +#define FORT_IMPL_H + +#if defined(_MSC_VER) +#define _CRT_SECURE_NO_WARNINGS /* To disable warnings for unsafe functions */ +#endif + +#include <stddef.h> +#include <stdlib.h> +#include <string.h> +#include <assert.h> +#include <stdio.h> +#include <stdbool.h> +#include "fort.h" + +/* Define FT_INTERNAL to make internal libfort functions static + * in the result amalgamed source file. + */ +#ifdef FT_AMALGAMED_SOURCE +#define FT_INTERNAL static +#else +#define FT_INTERNAL +#endif /* FT_AMALGAMED_SORCE */ + + +#define FORT_DEFAULT_COL_SEPARATOR '|' +extern char g_col_separator; + +#define FORT_COL_SEPARATOR_LENGTH 1 + +#define FORT_UNUSED __attribute__((unused)) + +#define F_MALLOC fort_malloc +#define F_FREE fort_free +#define F_CALLOC fort_calloc +#define F_REALLOC fort_realloc +#define F_STRDUP fort_strdup +#define F_WCSDUP fort_wcsdup +/* @todo: replace with custom impl !!!*/ +#define F_UTF8DUP utf8dup + +#define F_CREATE(type) ((type *)F_CALLOC(sizeof(type), 1)) + +#define MAX(a,b) ((a) > (b) ? (a) : (b)) +#define MIN(a,b) ((a) < (b) ? (a) : (b)) + +#define FT_NEWLINE "\n" +#define FT_SPACE " " + +/***************************************************************************** + * DEFAULT_SIZES + * ***************************************************************************/ +#define DEFAULT_STR_BUF_SIZE 1024 +#define DEFAULT_VECTOR_CAPACITY 10 + +/***************************************************************************** + * DATA TYPES + * ***************************************************************************/ + +enum f_get_policy { + CREATE_ON_NULL, + DONT_CREATE_ON_NULL +}; + +enum f_bool { + F_FALSE = 0, + F_TRUE = 1 +}; + +enum f_cell_type { + COMMON_CELL, + GROUP_MASTER_CELL, + GROUP_SLAVE_CELL +}; + +enum f_geometry_type { + VISIBLE_GEOMETRY, + INTERN_REPR_GEOMETRY +}; + +enum f_string_type { + CHAR_BUF, +#ifdef FT_HAVE_WCHAR + W_CHAR_BUF, +#endif /* FT_HAVE_WCHAR */ +#ifdef FT_HAVE_UTF8 + UTF8_BUF, +#endif /* FT_HAVE_WCHAR */ +}; + +struct f_string_view { + union { + const char *cstr; +#ifdef FT_HAVE_WCHAR + const wchar_t *wstr; +#endif +#ifdef FT_HAVE_UTF8 + const void *u8str; +#endif + const void *data; + } u; + enum f_string_type type; +}; +typedef struct f_string_view f_string_view_t; + + +#define FT_STR_2_CAT_(arg1, arg2) \ + arg1##arg2 +#define FT_STR_2_CAT(arg1, arg2) \ + FT_STR_2_CAT_(arg1, arg2) + +#define UNIQUE_NAME_(prefix) \ + FT_STR_2_CAT(prefix,__COUNTER__) +#define UNIQUE_NAME(prefix) \ + UNIQUE_NAME_(prefix) + +typedef int f_status; + + + + +struct f_table_properties; +struct f_row; +struct f_vector; +struct f_cell; +struct f_string_buffer; +struct f_separator { + int enabled; +}; + +typedef struct f_table_properties f_table_properties_t; +typedef struct f_vector f_vector_t; +typedef struct f_cell f_cell_t; +typedef struct f_string_buffer f_string_buffer_t; +typedef struct f_row f_row_t; +typedef struct f_separator f_separator_t; + +struct f_context { + f_table_properties_t *table_properties; + size_t row; + size_t column; +}; +typedef struct f_context f_context_t; + +struct f_conv_context { + union { + char *buf; +#ifdef FT_HAVE_WCHAR + wchar_t *wbuf; +#endif +#ifdef FT_HAVE_UTF8 + const void *u8str; +#endif + } u; + size_t raw_avail; + struct f_context *cntx; + enum f_string_type b_type; +}; +typedef struct f_conv_context f_conv_context_t; + + +/***************************************************************************** + * LIBFORT helpers + *****************************************************************************/ + +extern void *(*fort_malloc)(size_t size); +extern void (*fort_free)(void *ptr); +extern void *(*fort_calloc)(size_t nmemb, size_t size); +extern void *(*fort_realloc)(void *ptr, size_t size); + +FT_INTERNAL +void set_memory_funcs(void *(*f_malloc)(size_t size), void (*f_free)(void *ptr)); + +FT_INTERNAL +char *fort_strdup(const char *str); + + + +FT_INTERNAL +size_t number_of_columns_in_format_string(const f_string_view_t *fmt); + +FT_INTERNAL +size_t number_of_columns_in_format_buffer(const f_string_buffer_t *fmt); + +#if defined(FT_HAVE_WCHAR) +FT_INTERNAL +wchar_t *fort_wcsdup(const wchar_t *str); +#endif + + + +FT_INTERNAL +int print_n_strings(f_conv_context_t *cntx, size_t n, const char *str); + + +FT_INTERNAL +int ft_nprint(f_conv_context_t *cntx, const char *str, size_t strlen); +#ifdef FT_HAVE_WCHAR +FT_INTERNAL +int ft_nwprint(f_conv_context_t *cntx, const wchar_t *str, size_t strlen); +#endif /* FT_HAVE_WCHAR */ +#ifdef FT_HAVE_UTF8 +FT_INTERNAL +int ft_nu8print(f_conv_context_t *cntx, const void *beg, const void *end); +#endif /* FT_HAVE_UTF8 */ + + +/*#define PRINT_DEBUG_INFO fprintf(stderr, "error in %s(%s:%d)\n", __FUNCTION__, __FILE__, __LINE__);*/ +#define PRINT_DEBUG_INFO + +#define FT_CHECK(statement) \ + do { \ + tmp = statement; \ + if (tmp < 0) {\ + PRINT_DEBUG_INFO \ + goto clear; \ + } \ + } while(0) + +#define CHCK_RSLT_ADD_TO_WRITTEN(statement) \ + do { \ + tmp = statement; \ + if (tmp < 0) {\ + PRINT_DEBUG_INFO \ + goto clear; \ + } \ + written += (size_t)tmp; \ + } while(0) + +#define CHCK_RSLT_ADD_TO_INVISIBLE_WRITTEN(statement) \ + do { \ + tmp = statement; \ + if (tmp < 0) {\ + PRINT_DEBUG_INFO \ + goto clear; \ + } \ + invisible_written += (size_t)tmp; \ + } while(0) + + +#define CHECK_NOT_NEGATIVE(x) \ + do { if ((x) < 0) goto fort_fail; } while (0) + +#endif /* FORT_IMPL_H */ + +/******************************************************** + End of file "fort_utils.h" + ********************************************************/ + + +/******************************************************** + Begin of file "vector.h" + ********************************************************/ + +#ifndef VECTOR_H +#define VECTOR_H + +/* #include "fort_utils.h" */ /* Commented by amalgamation script */ + + +#define INVALID_VEC_INDEX ((size_t) -1) + +FT_INTERNAL +f_vector_t *create_vector(size_t item_size, size_t capacity); + +FT_INTERNAL +void destroy_vector(f_vector_t *); + +FT_INTERNAL +size_t vector_size(const f_vector_t *); + +FT_INTERNAL +size_t vector_capacity(const f_vector_t *); + +FT_INTERNAL +int vector_push(f_vector_t *, const void *item); + +FT_INTERNAL +int vector_insert(f_vector_t *, const void *item, size_t pos); + +FT_INTERNAL +f_vector_t *vector_split(f_vector_t *, size_t pos); + +FT_INTERNAL +const void *vector_at_c(const f_vector_t *vector, size_t index); + +FT_INTERNAL +void *vector_at(f_vector_t *, size_t index); + +FT_INTERNAL +f_status vector_swap(f_vector_t *cur_vec, f_vector_t *mv_vec, size_t pos); + +FT_INTERNAL +void vector_clear(f_vector_t *); + +FT_INTERNAL +int vector_erase(f_vector_t *, size_t index); + +#ifdef FT_TEST_BUILD +f_vector_t *copy_vector(f_vector_t *); +size_t vector_index_of(const f_vector_t *, const void *item); +#endif + +#define VECTOR_AT(vector, pos, data_type) \ + *(data_type *)vector_at((vector), (pos)) + +#define VECTOR_AT_C(vector, pos, const_data_type) \ + *(const_data_type *)vector_at_c((vector), (pos)) + +#endif /* VECTOR_H */ + +/******************************************************** + End of file "vector.h" + ********************************************************/ + + +/******************************************************** + Begin of file "wcwidth.h" + ********************************************************/ + +#ifndef WCWIDTH_H +#define WCWIDTH_H + +/* #include "fort_utils.h" */ /* Commented by amalgamation script */ + +#ifdef FT_HAVE_WCHAR +#include <wchar.h> + +FT_INTERNAL +int mk_wcswidth(const wchar_t *pwcs, size_t n); + +#endif /* FT_HAVE_WCHAR */ + +#endif /* WCWIDTH_H */ + +/******************************************************** + End of file "wcwidth.h" + ********************************************************/ + + +/******************************************************** + Begin of file "utf8.h" + ********************************************************/ + +// The latest version of this library is available on GitHub; +// https://github.com/sheredom/utf8.h + +// This is free and unencumbered software released into the public domain. +// +// Anyone is free to copy, modify, publish, use, compile, sell, or +// distribute this software, either in source code form or as a compiled +// binary, for any purpose, commercial or non-commercial, and by any +// means. +// +// In jurisdictions that recognize copyright laws, the author or authors +// of this software dedicate any and all copyright interest in the +// software to the public domain. We make this dedication for the benefit +// of the public at large and to the detriment of our heirs and +// successors. We intend this dedication to be an overt act of +// relinquishment in perpetuity of all present and future rights to this +// software under copyright law. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +// IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. +// +// For more information, please refer to <http://unlicense.org/> + +#ifndef SHEREDOM_UTF8_H_INCLUDED +#define SHEREDOM_UTF8_H_INCLUDED + +#if defined(_MSC_VER) +#pragma warning(push) + +// disable 'bytes padding added after construct' warning +#pragma warning(disable : 4820) +#endif + +#include <stddef.h> +#include <stdlib.h> + +#if defined(_MSC_VER) +#pragma warning(pop) +#endif + +#if defined(_MSC_VER) +typedef __int32 utf8_int32_t; +#else +#include <stdint.h> +typedef int32_t utf8_int32_t; +#endif + +#if defined(__clang__) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wold-style-cast" +#pragma clang diagnostic ignored "-Wcast-qual" +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +#if defined(__clang__) || defined(__GNUC__) +#define utf8_nonnull __attribute__((nonnull)) +#define utf8_pure __attribute__((pure)) +#define utf8_restrict __restrict__ +#define utf8_weak __attribute__((weak)) +#elif defined(_MSC_VER) +#define utf8_nonnull +#define utf8_pure +#define utf8_restrict __restrict +#define utf8_weak __inline +#else +#define utf8_nonnull +#define utf8_pure +#define utf8_restrict +#define utf8_weak inline +#endif + +#ifdef __cplusplus +#define utf8_null NULL +#else +#define utf8_null 0 +#endif + +// Return less than 0, 0, greater than 0 if src1 < src2, src1 == src2, src1 > +// src2 respectively, case insensitive. +utf8_nonnull utf8_pure utf8_weak int utf8casecmp(const void *src1, + const void *src2); + +// Append the utf8 string src onto the utf8 string dst. +utf8_nonnull utf8_weak void *utf8cat(void *utf8_restrict dst, + const void *utf8_restrict src); + +// Find the first match of the utf8 codepoint chr in the utf8 string src. +utf8_nonnull utf8_pure utf8_weak void *utf8chr(const void *src, + utf8_int32_t chr); + +// Return less than 0, 0, greater than 0 if src1 < src2, +// src1 == src2, src1 > src2 respectively. +utf8_nonnull utf8_pure utf8_weak int utf8cmp(const void *src1, + const void *src2); + +// Copy the utf8 string src onto the memory allocated in dst. +utf8_nonnull utf8_weak void *utf8cpy(void *utf8_restrict dst, + const void *utf8_restrict src); + +// Number of utf8 codepoints in the utf8 string src that consists entirely +// of utf8 codepoints not from the utf8 string reject. +utf8_nonnull utf8_pure utf8_weak size_t utf8cspn(const void *src, + const void *reject); + +// Duplicate the utf8 string src by getting its size, malloc'ing a new buffer +// copying over the data, and returning that. Or 0 if malloc failed. +utf8_nonnull utf8_weak void *utf8dup(const void *src); + +// Number of utf8 codepoints in the utf8 string str, +// excluding the null terminating byte. +utf8_nonnull utf8_pure utf8_weak size_t utf8len(const void *str); + +// Visible width of utf8string. +utf8_nonnull utf8_pure utf8_weak size_t utf8width(const void *str); + +// Visible width of codepoint. +utf8_nonnull utf8_pure utf8_weak int utf8cwidth(utf8_int32_t c); + +// Return less than 0, 0, greater than 0 if src1 < src2, src1 == src2, src1 > +// src2 respectively, case insensitive. Checking at most n bytes of each utf8 +// string. +utf8_nonnull utf8_pure utf8_weak int utf8ncasecmp(const void *src1, + const void *src2, size_t n); + +// Append the utf8 string src onto the utf8 string dst, +// writing at most n+1 bytes. Can produce an invalid utf8 +// string if n falls partway through a utf8 codepoint. +utf8_nonnull utf8_weak void *utf8ncat(void *utf8_restrict dst, + const void *utf8_restrict src, size_t n); + +// Return less than 0, 0, greater than 0 if src1 < src2, +// src1 == src2, src1 > src2 respectively. Checking at most n +// bytes of each utf8 string. +utf8_nonnull utf8_pure utf8_weak int utf8ncmp(const void *src1, + const void *src2, size_t n); + +// Copy the utf8 string src onto the memory allocated in dst. +// Copies at most n bytes. If there is no terminating null byte in +// the first n bytes of src, the string placed into dst will not be +// null-terminated. If the size (in bytes) of src is less than n, +// extra null terminating bytes are appended to dst such that at +// total of n bytes are written. Can produce an invalid utf8 +// string if n falls partway through a utf8 codepoint. +utf8_nonnull utf8_weak void *utf8ncpy(void *utf8_restrict dst, + const void *utf8_restrict src, size_t n); + +// Similar to utf8dup, except that at most n bytes of src are copied. If src is +// longer than n, only n bytes are copied and a null byte is added. +// +// Returns a new string if successful, 0 otherwise +utf8_nonnull utf8_weak void *utf8ndup(const void *src, size_t n); + +// Locates the first occurence in the utf8 string str of any byte in the +// utf8 string accept, or 0 if no match was found. +utf8_nonnull utf8_pure utf8_weak void *utf8pbrk(const void *str, + const void *accept); + +// Find the last match of the utf8 codepoint chr in the utf8 string src. +utf8_nonnull utf8_pure utf8_weak void *utf8rchr(const void *src, int chr); + +// Number of bytes in the utf8 string str, +// including the null terminating byte. +utf8_nonnull utf8_pure utf8_weak size_t utf8size(const void *str); + +// Number of utf8 codepoints in the utf8 string src that consists entirely +// of utf8 codepoints from the utf8 string accept. +utf8_nonnull utf8_pure utf8_weak size_t utf8spn(const void *src, + const void *accept); + +// The position of the utf8 string needle in the utf8 string haystack. +utf8_nonnull utf8_pure utf8_weak void *utf8str(const void *haystack, + const void *needle); + +// The position of the utf8 string needle in the utf8 string haystack, case +// insensitive. +utf8_nonnull utf8_pure utf8_weak void *utf8casestr(const void *haystack, + const void *needle); + +// Return 0 on success, or the position of the invalid +// utf8 codepoint on failure. +utf8_nonnull utf8_pure utf8_weak void *utf8valid(const void *str); + +// Sets out_codepoint to the next utf8 codepoint in str, and returns the address +// of the utf8 codepoint after the current one in str. +utf8_nonnull utf8_weak void * +utf8codepoint(const void *utf8_restrict str, + utf8_int32_t *utf8_restrict out_codepoint); + +// Returns the size of the given codepoint in bytes. +utf8_weak size_t utf8codepointsize(utf8_int32_t chr); + +// Write a codepoint to the given string, and return the address to the next +// place after the written codepoint. Pass how many bytes left in the buffer to +// n. If there is not enough space for the codepoint, this function returns +// null. +utf8_nonnull utf8_weak void *utf8catcodepoint(void *utf8_restrict str, + utf8_int32_t chr, size_t n); + +// Returns 1 if the given character is lowercase, or 0 if it is not. +utf8_weak int utf8islower(utf8_int32_t chr); + +// Returns 1 if the given character is uppercase, or 0 if it is not. +utf8_weak int utf8isupper(utf8_int32_t chr); + +// Transform the given string into all lowercase codepoints. +utf8_nonnull utf8_weak void utf8lwr(void *utf8_restrict str); + +// Transform the given string into all uppercase codepoints. +utf8_nonnull utf8_weak void utf8upr(void *utf8_restrict str); + +// Make a codepoint lower case if possible. +utf8_weak utf8_int32_t utf8lwrcodepoint(utf8_int32_t cp); + +// Make a codepoint upper case if possible. +utf8_weak utf8_int32_t utf8uprcodepoint(utf8_int32_t cp); + +#undef utf8_weak +#undef utf8_pure +#undef utf8_nonnull + +int utf8casecmp(const void *src1, const void *src2) +{ + utf8_int32_t src1_cp, src2_cp, src1_orig_cp, src2_orig_cp; + + for (;;) { + src1 = utf8codepoint(src1, &src1_cp); + src2 = utf8codepoint(src2, &src2_cp); + + // Take a copy of src1 & src2 + src1_orig_cp = src1_cp; + src2_orig_cp = src2_cp; + + // Lower the srcs if required + src1_cp = utf8lwrcodepoint(src1_cp); + src2_cp = utf8lwrcodepoint(src2_cp); + + // Check if the lowered codepoints match + if ((0 == src1_orig_cp) && (0 == src2_orig_cp)) { + return 0; + } else if (src1_cp == src2_cp) { + continue; + } + + // If they don't match, then we return which of the original's are less + if (src1_orig_cp < src2_orig_cp) { + return -1; + } else if (src1_orig_cp > src2_orig_cp) { + return 1; + } + } +} + +void *utf8cat(void *utf8_restrict dst, const void *utf8_restrict src) +{ + char *d = (char *)dst; + const char *s = (const char *)src; + + // find the null terminating byte in dst + while ('\0' != *d) { + d++; + } + + // overwriting the null terminating byte in dst, append src byte-by-byte + while ('\0' != *s) { + *d++ = *s++; + } + + // write out a new null terminating byte into dst + *d = '\0'; + + return dst; +} + +void *utf8chr(const void *src, utf8_int32_t chr) +{ + char c[5] = {'\0', '\0', '\0', '\0', '\0'}; + + if (0 == chr) { + // being asked to return position of null terminating byte, so + // just run s to the end, and return! + const char *s = (const char *)src; + while ('\0' != *s) { + s++; + } + return (void *)s; + } else if (0 == ((utf8_int32_t)0xffffff80 & chr)) { + // 1-byte/7-bit ascii + // (0b0xxxxxxx) + c[0] = (char)chr; + } else if (0 == ((utf8_int32_t)0xfffff800 & chr)) { + // 2-byte/11-bit utf8 code point + // (0b110xxxxx 0b10xxxxxx) + c[0] = 0xc0 | (char)(chr >> 6); + c[1] = 0x80 | (char)(chr & 0x3f); + } else if (0 == ((utf8_int32_t)0xffff0000 & chr)) { + // 3-byte/16-bit utf8 code point + // (0b1110xxxx 0b10xxxxxx 0b10xxxxxx) + c[0] = 0xe0 | (char)(chr >> 12); + c[1] = 0x80 | (char)((chr >> 6) & 0x3f); + c[2] = 0x80 | (char)(chr & 0x3f); + } else { // if (0 == ((int)0xffe00000 & chr)) { + // 4-byte/21-bit utf8 code point + // (0b11110xxx 0b10xxxxxx 0b10xxxxxx 0b10xxxxxx) + c[0] = 0xf0 | (char)(chr >> 18); + c[1] = 0x80 | (char)((chr >> 12) & 0x3f); + c[2] = 0x80 | (char)((chr >> 6) & 0x3f); + c[3] = 0x80 | (char)(chr & 0x3f); + } + + // we've made c into a 2 utf8 codepoint string, one for the chr we are + // seeking, another for the null terminating byte. Now use utf8str to + // search + return utf8str(src, c); +} + +int utf8cmp(const void *src1, const void *src2) +{ + const unsigned char *s1 = (const unsigned char *)src1; + const unsigned char *s2 = (const unsigned char *)src2; + + while (('\0' != *s1) || ('\0' != *s2)) { + if (*s1 < *s2) { + return -1; + } else if (*s1 > *s2) { + return 1; + } + + s1++; + s2++; + } + + // both utf8 strings matched + return 0; +} + +int utf8coll(const void *src1, const void *src2); + +void *utf8cpy(void *utf8_restrict dst, const void *utf8_restrict src) +{ + char *d = (char *)dst; + const char *s = (const char *)src; + + // overwriting anything previously in dst, write byte-by-byte + // from src + while ('\0' != *s) { + *d++ = *s++; + } + + // append null terminating byte + *d = '\0'; + + return dst; +} + +size_t utf8cspn(const void *src, const void *reject) +{ + const char *s = (const char *)src; + size_t chars = 0; + + while ('\0' != *s) { + const char *r = (const char *)reject; + size_t offset = 0; + + while ('\0' != *r) { + // checking that if *r is the start of a utf8 codepoint + // (it is not 0b10xxxxxx) and we have successfully matched + // a previous character (0 < offset) - we found a match + if ((0x80 != (0xc0 & *r)) && (0 < offset)) { + return chars; + } else { + if (*r == s[offset]) { + // part of a utf8 codepoint matched, so move our checking + // onwards to the next byte + offset++; + r++; + } else { + // r could be in the middle of an unmatching utf8 code point, + // so we need to march it on to the next character beginning, + + do { + r++; + } while (0x80 == (0xc0 & *r)); + + // reset offset too as we found a mismatch + offset = 0; + } + } + } + + // the current utf8 codepoint in src did not match reject, but src + // could have been partway through a utf8 codepoint, so we need to + // march it onto the next utf8 codepoint starting byte + do { + s++; + } while ((0x80 == (0xc0 & *s))); + chars++; + } + + return chars; +} + +size_t utf8size(const void *str); + +void *utf8dup(const void *src) +{ + const char *s = (const char *)src; + char *n = utf8_null; + + // figure out how many bytes (including the terminator) we need to copy first + size_t bytes = utf8size(src); + + n = (char *)malloc(bytes); + + if (utf8_null == n) { + // out of memory so we bail + return utf8_null; + } else { + bytes = 0; + + // copy src byte-by-byte into our new utf8 string + while ('\0' != s[bytes]) { + n[bytes] = s[bytes]; + bytes++; + } + + // append null terminating byte + n[bytes] = '\0'; + return n; + } +} + +void *utf8fry(const void *str); + +size_t utf8len(const void *str) +{ + const unsigned char *s = (const unsigned char *)str; + size_t length = 0; + + while ('\0' != *s) { + if (0xf0 == (0xf8 & *s)) { + // 4-byte utf8 code point (began with 0b11110xxx) + s += 4; + } else if (0xe0 == (0xf0 & *s)) { + // 3-byte utf8 code point (began with 0b1110xxxx) + s += 3; + } else if (0xc0 == (0xe0 & *s)) { + // 2-byte utf8 code point (began with 0b110xxxxx) + s += 2; + } else { // if (0x00 == (0x80 & *s)) { + // 1-byte ascii (began with 0b0xxxxxxx) + s += 1; + } + + // no matter the bytes we marched s forward by, it was + // only 1 utf8 codepoint + length++; + } + + return length; +} + +// See +// https://unicode.org/Public/UNIDATA/EastAsianWidth.txt +// http://www.unicode.org/reports/tr11/tr11-33.html +int utf8cwidth(utf8_int32_t c) +{ + // TODO: add non printable characters check + if (c == 0) + return 0; + + if (c < 0x1100) + return 1; + + // Fullwidth + if ((0x3000 == c) || + (0xFF01 <= c && c <= 0xFF60) || + (0xFFE0 <= c && c <= 0xFFE6)) { + return 2; + } + + // Wide + if ((0x1100 <= c && c <= 0x115F) || + (0x11A3 <= c && c <= 0x11A7) || + (0x11FA <= c && c <= 0x11FF) || + (0x2329 <= c && c <= 0x232A) || + (0x2E80 <= c && c <= 0x2E99) || + (0x2E9B <= c && c <= 0x2EF3) || + (0x2F00 <= c && c <= 0x2FD5) || + (0x2FF0 <= c && c <= 0x2FFB) || + (0x3001 <= c && c <= 0x303E) || + (0x3041 <= c && c <= 0x3096) || + (0x3099 <= c && c <= 0x30FF) || + (0x3105 <= c && c <= 0x312D) || + (0x3131 <= c && c <= 0x318E) || + (0x3190 <= c && c <= 0x31BA) || + (0x31C0 <= c && c <= 0x31E3) || + (0x31F0 <= c && c <= 0x321E) || + (0x3220 <= c && c <= 0x3247) || + (0x3250 <= c && c <= 0x32FE) || + (0x3300 <= c && c <= 0x4DBF) || + (0x4E00 <= c && c <= 0xA48C) || + (0xA490 <= c && c <= 0xA4C6) || + (0xA960 <= c && c <= 0xA97C) || + (0xAC00 <= c && c <= 0xD7A3) || + (0xD7B0 <= c && c <= 0xD7C6) || + (0xD7CB <= c && c <= 0xD7FB) || + (0xF900 <= c && c <= 0xFAFF) || + (0xFE10 <= c && c <= 0xFE19) || + (0xFE30 <= c && c <= 0xFE52) || + (0xFE54 <= c && c <= 0xFE66) || + (0xFE68 <= c && c <= 0xFE6B) || + (0x1B000 <= c && c <= 0x1B001) || + (0x1F200 <= c && c <= 0x1F202) || + (0x1F210 <= c && c <= 0x1F23A) || + (0x1F240 <= c && c <= 0x1F248) || + (0x1F250 <= c && c <= 0x1F251) || + (0x20000 <= c && c <= 0x2F73F) || + (0x2B740 <= c && c <= 0x2FFFD) || + (0x30000 <= c && c <= 0x3FFFD)) { + return 2; + } + + return 1; +} + +size_t utf8width(const void *str) +{ + size_t length = 0; + utf8_int32_t c = 0; + + str = utf8codepoint(str, &c); + while (c != 0) { + length += utf8cwidth(c); + str = utf8codepoint(str, &c); + } + return length; +} + +int utf8ncasecmp(const void *src1, const void *src2, size_t n) +{ + utf8_int32_t src1_cp, src2_cp, src1_orig_cp, src2_orig_cp; + + do { + const unsigned char *const s1 = (const unsigned char *)src1; + const unsigned char *const s2 = (const unsigned char *)src2; + + // first check that we have enough bytes left in n to contain an entire + // codepoint + if (0 == n) { + return 0; + } + + if ((1 == n) && ((0xc0 == (0xe0 & *s1)) || (0xc0 == (0xe0 & *s2)))) { + const utf8_int32_t c1 = (0xe0 & *s1); + const utf8_int32_t c2 = (0xe0 & *s2); + + if (c1 < c2) { + return -1; + } else if (c1 > c2) { + return 1; + } else { + return 0; + } + } + + if ((2 >= n) && ((0xe0 == (0xf0 & *s1)) || (0xe0 == (0xf0 & *s2)))) { + const utf8_int32_t c1 = (0xf0 & *s1); + const utf8_int32_t c2 = (0xf0 & *s2); + + if (c1 < c2) { + return -1; + } else if (c1 > c2) { + return 1; + } else { + return 0; + } + } + + if ((3 >= n) && ((0xf0 == (0xf8 & *s1)) || (0xf0 == (0xf8 & *s2)))) { + const utf8_int32_t c1 = (0xf8 & *s1); + const utf8_int32_t c2 = (0xf8 & *s2); + + if (c1 < c2) { + return -1; + } else if (c1 > c2) { + return 1; + } else { + return 0; + } + } + + src1 = utf8codepoint(src1, &src1_cp); + src2 = utf8codepoint(src2, &src2_cp); + n -= utf8codepointsize(src1_cp); + + // Take a copy of src1 & src2 + src1_orig_cp = src1_cp; + src2_orig_cp = src2_cp; + + // Lower srcs if required + src1_cp = utf8lwrcodepoint(src1_cp); + src2_cp = utf8lwrcodepoint(src2_cp); + + // Check if the lowered codepoints match + if ((0 == src1_orig_cp) && (0 == src2_orig_cp)) { + return 0; + } else if (src1_cp == src2_cp) { + continue; + } + + // If they don't match, then we return which of the original's are less + if (src1_orig_cp < src2_orig_cp) { + return -1; + } else if (src1_orig_cp > src2_orig_cp) { + return 1; + } + } while (0 < n); + + // both utf8 strings matched + return 0; +} + +void *utf8ncat(void *utf8_restrict dst, const void *utf8_restrict src, + size_t n) +{ + char *d = (char *)dst; + const char *s = (const char *)src; + + // find the null terminating byte in dst + while ('\0' != *d) { + d++; + } + + // overwriting the null terminating byte in dst, append src byte-by-byte + // stopping if we run out of space + do { + *d++ = *s++; + } while (('\0' != *s) && (0 != --n)); + + // write out a new null terminating byte into dst + *d = '\0'; + + return dst; +} + +int utf8ncmp(const void *src1, const void *src2, size_t n) +{ + const unsigned char *s1 = (const unsigned char *)src1; + const unsigned char *s2 = (const unsigned char *)src2; + + while ((0 != n--) && (('\0' != *s1) || ('\0' != *s2))) { + if (*s1 < *s2) { + return -1; + } else if (*s1 > *s2) { + return 1; + } + + s1++; + s2++; + } + + // both utf8 strings matched + return 0; +} + +void *utf8ncpy(void *utf8_restrict dst, const void *utf8_restrict src, + size_t n) +{ + char *d = (char *)dst; + const char *s = (const char *)src; + size_t index; + + // overwriting anything previously in dst, write byte-by-byte + // from src + for (index = 0; index < n; index++) { + d[index] = s[index]; + if ('\0' == s[index]) { + break; + } + } + + // append null terminating byte + for (; index < n; index++) { + d[index] = 0; + } + + return dst; +} + +void *utf8ndup(const void *src, size_t n) +{ + const char *s = (const char *)src; + char *c = utf8_null; + size_t bytes = 0; + + // Find the end of the string or stop when n is reached + while ('\0' != s[bytes] && bytes < n) { + bytes++; + } + + // In case bytes is actually less than n, we need to set it + // to be used later in the copy byte by byte. + n = bytes; + + c = (char *)malloc(bytes + 1); + if (utf8_null == c) { + // out of memory so we bail + return utf8_null; + } + + bytes = 0; + + // copy src byte-by-byte into our new utf8 string + while ('\0' != s[bytes] && bytes < n) { + c[bytes] = s[bytes]; + bytes++; + } + + // append null terminating byte + c[bytes] = '\0'; + return c; +} + +void *utf8rchr(const void *src, int chr) +{ + const char *s = (const char *)src; + const char *match = utf8_null; + char c[5] = {'\0', '\0', '\0', '\0', '\0'}; + + if (0 == chr) { + // being asked to return position of null terminating byte, so + // just run s to the end, and return! + while ('\0' != *s) { + s++; + } + return (void *)s; + } else if (0 == ((int)0xffffff80 & chr)) { + // 1-byte/7-bit ascii + // (0b0xxxxxxx) + c[0] = (char)chr; + } else if (0 == ((int)0xfffff800 & chr)) { + // 2-byte/11-bit utf8 code point + // (0b110xxxxx 0b10xxxxxx) + c[0] = 0xc0 | (char)(chr >> 6); + c[1] = 0x80 | (char)(chr & 0x3f); + } else if (0 == ((int)0xffff0000 & chr)) { + // 3-byte/16-bit utf8 code point + // (0b1110xxxx 0b10xxxxxx 0b10xxxxxx) + c[0] = 0xe0 | (char)(chr >> 12); + c[1] = 0x80 | (char)((chr >> 6) & 0x3f); + c[2] = 0x80 | (char)(chr & 0x3f); + } else { // if (0 == ((int)0xffe00000 & chr)) { + // 4-byte/21-bit utf8 code point + // (0b11110xxx 0b10xxxxxx 0b10xxxxxx 0b10xxxxxx) + c[0] = 0xf0 | (char)(chr >> 18); + c[1] = 0x80 | (char)((chr >> 12) & 0x3f); + c[2] = 0x80 | (char)((chr >> 6) & 0x3f); + c[3] = 0x80 | (char)(chr & 0x3f); + } + + // we've created a 2 utf8 codepoint string in c that is + // the utf8 character asked for by chr, and a null + // terminating byte + + while ('\0' != *s) { + size_t offset = 0; + + while (s[offset] == c[offset]) { + offset++; + } + + if ('\0' == c[offset]) { + // we found a matching utf8 code point + match = s; + s += offset; + } else { + s += offset; + + // need to march s along to next utf8 codepoint start + // (the next byte that doesn't match 0b10xxxxxx) + if ('\0' != *s) { + do { + s++; + } while (0x80 == (0xc0 & *s)); + } + } + } + + // return the last match we found (or 0 if no match was found) + return (void *)match; +} + +void *utf8pbrk(const void *str, const void *accept) +{ + const char *s = (const char *)str; + + while ('\0' != *s) { + const char *a = (const char *)accept; + size_t offset = 0; + + while ('\0' != *a) { + // checking that if *a is the start of a utf8 codepoint + // (it is not 0b10xxxxxx) and we have successfully matched + // a previous character (0 < offset) - we found a match + if ((0x80 != (0xc0 & *a)) && (0 < offset)) { + return (void *)s; + } else { + if (*a == s[offset]) { + // part of a utf8 codepoint matched, so move our checking + // onwards to the next byte + offset++; + a++; + } else { + // r could be in the middle of an unmatching utf8 code point, + // so we need to march it on to the next character beginning, + + do { + a++; + } while (0x80 == (0xc0 & *a)); + + // reset offset too as we found a mismatch + offset = 0; + } + } + } + + // we found a match on the last utf8 codepoint + if (0 < offset) { + return (void *)s; + } + + // the current utf8 codepoint in src did not match accept, but src + // could have been partway through a utf8 codepoint, so we need to + // march it onto the next utf8 codepoint starting byte + do { + s++; + } while ((0x80 == (0xc0 & *s))); + } + + return utf8_null; +} + +size_t utf8size(const void *str) +{ + const char *s = (const char *)str; + size_t size = 0; + while ('\0' != s[size]) { + size++; + } + + // we are including the null terminating byte in the size calculation + size++; + return size; +} + +size_t utf8spn(const void *src, const void *accept) +{ + const char *s = (const char *)src; + size_t chars = 0; + + while ('\0' != *s) { + const char *a = (const char *)accept; + size_t offset = 0; + + while ('\0' != *a) { + // checking that if *r is the start of a utf8 codepoint + // (it is not 0b10xxxxxx) and we have successfully matched + // a previous character (0 < offset) - we found a match + if ((0x80 != (0xc0 & *a)) && (0 < offset)) { + // found a match, so increment the number of utf8 codepoints + // that have matched and stop checking whether any other utf8 + // codepoints in a match + chars++; + s += offset; + break; + } else { + if (*a == s[offset]) { + offset++; + a++; + } else { + // a could be in the middle of an unmatching utf8 codepoint, + // so we need to march it on to the next character beginning, + do { + a++; + } while (0x80 == (0xc0 & *a)); + + // reset offset too as we found a mismatch + offset = 0; + } + } + } + + // if a got to its terminating null byte, then we didn't find a match. + // Return the current number of matched utf8 codepoints + if ('\0' == *a) { + return chars; + } + } + + return chars; +} + +void *utf8str(const void *haystack, const void *needle) +{ + const char *h = (const char *)haystack; + utf8_int32_t throwaway_codepoint; + + // if needle has no utf8 codepoints before the null terminating + // byte then return haystack + if ('\0' == *((const char *)needle)) { + return (void *)haystack; + } + + while ('\0' != *h) { + const char *maybeMatch = h; + const char *n = (const char *)needle; + + while (*h == *n && (*h != '\0' && *n != '\0')) { + n++; + h++; + } + + if ('\0' == *n) { + // we found the whole utf8 string for needle in haystack at + // maybeMatch, so return it + return (void *)maybeMatch; + } else { + // h could be in the middle of an unmatching utf8 codepoint, + // so we need to march it on to the next character beginning + // starting from the current character + h = (const char *)utf8codepoint(maybeMatch, &throwaway_codepoint); + } + } + + // no match + return utf8_null; +} + +void *utf8casestr(const void *haystack, const void *needle) +{ + const void *h = haystack; + + // if needle has no utf8 codepoints before the null terminating + // byte then return haystack + if ('\0' == *((const char *)needle)) { + return (void *)haystack; + } + + for (;;) { + const void *maybeMatch = h; + const void *n = needle; + utf8_int32_t h_cp, n_cp; + + // Get the next code point and track it + const void *nextH = h = utf8codepoint(h, &h_cp); + n = utf8codepoint(n, &n_cp); + + while ((0 != h_cp) && (0 != n_cp)) { + h_cp = utf8lwrcodepoint(h_cp); + n_cp = utf8lwrcodepoint(n_cp); + + // if we find a mismatch, bail out! + if (h_cp != n_cp) { + break; + } + + h = utf8codepoint(h, &h_cp); + n = utf8codepoint(n, &n_cp); + } + + if (0 == n_cp) { + // we found the whole utf8 string for needle in haystack at + // maybeMatch, so return it + return (void *)maybeMatch; + } + + if (0 == h_cp) { + // no match + return utf8_null; + } + + // Roll back to the next code point in the haystack to test + h = nextH; + } +} + +void *utf8valid(const void *str) +{ + const char *s = (const char *)str; + + while ('\0' != *s) { + if (0xf0 == (0xf8 & *s)) { + // ensure each of the 3 following bytes in this 4-byte + // utf8 codepoint began with 0b10xxxxxx + if ((0x80 != (0xc0 & s[1])) || (0x80 != (0xc0 & s[2])) || + (0x80 != (0xc0 & s[3]))) { + return (void *)s; + } + + // ensure that our utf8 codepoint ended after 4 bytes + if (0x80 == (0xc0 & s[4])) { + return (void *)s; + } + + // ensure that the top 5 bits of this 4-byte utf8 + // codepoint were not 0, as then we could have used + // one of the smaller encodings + if ((0 == (0x07 & s[0])) && (0 == (0x30 & s[1]))) { + return (void *)s; + } + + // 4-byte utf8 code point (began with 0b11110xxx) + s += 4; + } else if (0xe0 == (0xf0 & *s)) { + // ensure each of the 2 following bytes in this 3-byte + // utf8 codepoint began with 0b10xxxxxx + if ((0x80 != (0xc0 & s[1])) || (0x80 != (0xc0 & s[2]))) { + return (void *)s; + } + + // ensure that our utf8 codepoint ended after 3 bytes + if (0x80 == (0xc0 & s[3])) { + return (void *)s; + } + + // ensure that the top 5 bits of this 3-byte utf8 + // codepoint were not 0, as then we could have used + // one of the smaller encodings + if ((0 == (0x0f & s[0])) && (0 == (0x20 & s[1]))) { + return (void *)s; + } + + // 3-byte utf8 code point (began with 0b1110xxxx) + s += 3; + } else if (0xc0 == (0xe0 & *s)) { + // ensure the 1 following byte in this 2-byte + // utf8 codepoint began with 0b10xxxxxx + if (0x80 != (0xc0 & s[1])) { + return (void *)s; + } + + // ensure that our utf8 codepoint ended after 2 bytes + if (0x80 == (0xc0 & s[2])) { + return (void *)s; + } + + // ensure that the top 4 bits of this 2-byte utf8 + // codepoint were not 0, as then we could have used + // one of the smaller encodings + if (0 == (0x1e & s[0])) { + return (void *)s; + } + + // 2-byte utf8 code point (began with 0b110xxxxx) + s += 2; + } else if (0x00 == (0x80 & *s)) { + // 1-byte ascii (began with 0b0xxxxxxx) + s += 1; + } else { + // we have an invalid 0b1xxxxxxx utf8 code point entry + return (void *)s; + } + } + + return utf8_null; +} + +void *utf8codepoint(const void *utf8_restrict str, + utf8_int32_t *utf8_restrict out_codepoint) +{ + const char *s = (const char *)str; + + if (0xf0 == (0xf8 & s[0])) { + // 4 byte utf8 codepoint + *out_codepoint = ((0x07 & s[0]) << 18) | ((0x3f & s[1]) << 12) | + ((0x3f & s[2]) << 6) | (0x3f & s[3]); + s += 4; + } else if (0xe0 == (0xf0 & s[0])) { + // 3 byte utf8 codepoint + *out_codepoint = + ((0x0f & s[0]) << 12) | ((0x3f & s[1]) << 6) | (0x3f & s[2]); + s += 3; + } else if (0xc0 == (0xe0 & s[0])) { + // 2 byte utf8 codepoint + *out_codepoint = ((0x1f & s[0]) << 6) | (0x3f & s[1]); + s += 2; + } else { + // 1 byte utf8 codepoint otherwise + *out_codepoint = s[0]; + s += 1; + } + + return (void *)s; +} + +size_t utf8codepointsize(utf8_int32_t chr) +{ + if (0 == ((utf8_int32_t)0xffffff80 & chr)) { + return 1; + } else if (0 == ((utf8_int32_t)0xfffff800 & chr)) { + return 2; + } else if (0 == ((utf8_int32_t)0xffff0000 & chr)) { + return 3; + } else { // if (0 == ((int)0xffe00000 & chr)) { + return 4; + } +} + +void *utf8catcodepoint(void *utf8_restrict str, utf8_int32_t chr, size_t n) +{ + char *s = (char *)str; + + if (0 == ((utf8_int32_t)0xffffff80 & chr)) { + // 1-byte/7-bit ascii + // (0b0xxxxxxx) + if (n < 1) { + return utf8_null; + } + s[0] = (char)chr; + s += 1; + } else if (0 == ((utf8_int32_t)0xfffff800 & chr)) { + // 2-byte/11-bit utf8 code point + // (0b110xxxxx 0b10xxxxxx) + if (n < 2) { + return utf8_null; + } + s[0] = 0xc0 | (char)(chr >> 6); + s[1] = 0x80 | (char)(chr & 0x3f); + s += 2; + } else if (0 == ((utf8_int32_t)0xffff0000 & chr)) { + // 3-byte/16-bit utf8 code point + // (0b1110xxxx 0b10xxxxxx 0b10xxxxxx) + if (n < 3) { + return utf8_null; + } + s[0] = 0xe0 | (char)(chr >> 12); + s[1] = 0x80 | (char)((chr >> 6) & 0x3f); + s[2] = 0x80 | (char)(chr & 0x3f); + s += 3; + } else { // if (0 == ((int)0xffe00000 & chr)) { + // 4-byte/21-bit utf8 code point + // (0b11110xxx 0b10xxxxxx 0b10xxxxxx 0b10xxxxxx) + if (n < 4) { + return utf8_null; + } + s[0] = 0xf0 | (char)(chr >> 18); + s[1] = 0x80 | (char)((chr >> 12) & 0x3f); + s[2] = 0x80 | (char)((chr >> 6) & 0x3f); + s[3] = 0x80 | (char)(chr & 0x3f); + s += 4; + } + + return s; +} + +int utf8islower(utf8_int32_t chr) { return chr != utf8uprcodepoint(chr); } + +int utf8isupper(utf8_int32_t chr) { return chr != utf8lwrcodepoint(chr); } + +void utf8lwr(void *utf8_restrict str) +{ + void *p, *pn; + utf8_int32_t cp; + + p = (char *)str; + pn = utf8codepoint(p, &cp); + + while (cp != 0) { + const utf8_int32_t lwr_cp = utf8lwrcodepoint(cp); + const size_t size = utf8codepointsize(lwr_cp); + + if (lwr_cp != cp) { + utf8catcodepoint(p, lwr_cp, size); + } + + p = pn; + pn = utf8codepoint(p, &cp); + } +} + +void utf8upr(void *utf8_restrict str) +{ + void *p, *pn; + utf8_int32_t cp; + + p = (char *)str; + pn = utf8codepoint(p, &cp); + + while (cp != 0) { + const utf8_int32_t lwr_cp = utf8uprcodepoint(cp); + const size_t size = utf8codepointsize(lwr_cp); + + if (lwr_cp != cp) { + utf8catcodepoint(p, lwr_cp, size); + } + + p = pn; + pn = utf8codepoint(p, &cp); + } +} + +utf8_int32_t utf8lwrcodepoint(utf8_int32_t cp) +{ + if (((0x0041 <= cp) && (0x005a >= cp)) || + ((0x00c0 <= cp) && (0x00d6 >= cp)) || + ((0x00d8 <= cp) && (0x00de >= cp)) || + ((0x0391 <= cp) && (0x03a1 >= cp)) || + ((0x03a3 <= cp) && (0x03ab >= cp))) { + cp += 32; + } else if (((0x0100 <= cp) && (0x012f >= cp)) || + ((0x0132 <= cp) && (0x0137 >= cp)) || + ((0x014a <= cp) && (0x0177 >= cp)) || + ((0x0182 <= cp) && (0x0185 >= cp)) || + ((0x01a0 <= cp) && (0x01a5 >= cp)) || + ((0x01de <= cp) && (0x01ef >= cp)) || + ((0x01f8 <= cp) && (0x021f >= cp)) || + ((0x0222 <= cp) && (0x0233 >= cp)) || + ((0x0246 <= cp) && (0x024f >= cp)) || + ((0x03d8 <= cp) && (0x03ef >= cp))) { + cp |= 0x1; + } else if (((0x0139 <= cp) && (0x0148 >= cp)) || + ((0x0179 <= cp) && (0x017e >= cp)) || + ((0x01af <= cp) && (0x01b0 >= cp)) || + ((0x01b3 <= cp) && (0x01b6 >= cp)) || + ((0x01cd <= cp) && (0x01dc >= cp))) { + cp += 1; + cp &= ~0x1; + } else { + switch (cp) { + default: break; + case 0x0178: cp = 0x00ff; break; + case 0x0243: cp = 0x0180; break; + case 0x018e: cp = 0x01dd; break; + case 0x023d: cp = 0x019a; break; + case 0x0220: cp = 0x019e; break; + case 0x01b7: cp = 0x0292; break; + case 0x01c4: cp = 0x01c6; break; + case 0x01c7: cp = 0x01c9; break; + case 0x01ca: cp = 0x01cc; break; + case 0x01f1: cp = 0x01f3; break; + case 0x01f7: cp = 0x01bf; break; + case 0x0187: cp = 0x0188; break; + case 0x018b: cp = 0x018c; break; + case 0x0191: cp = 0x0192; break; + case 0x0198: cp = 0x0199; break; + case 0x01a7: cp = 0x01a8; break; + case 0x01ac: cp = 0x01ad; break; + case 0x01af: cp = 0x01b0; break; + case 0x01b8: cp = 0x01b9; break; + case 0x01bc: cp = 0x01bd; break; + case 0x01f4: cp = 0x01f5; break; + case 0x023b: cp = 0x023c; break; + case 0x0241: cp = 0x0242; break; + case 0x03fd: cp = 0x037b; break; + case 0x03fe: cp = 0x037c; break; + case 0x03ff: cp = 0x037d; break; + case 0x037f: cp = 0x03f3; break; + case 0x0386: cp = 0x03ac; break; + case 0x0388: cp = 0x03ad; break; + case 0x0389: cp = 0x03ae; break; + case 0x038a: cp = 0x03af; break; + case 0x038c: cp = 0x03cc; break; + case 0x038e: cp = 0x03cd; break; + case 0x038f: cp = 0x03ce; break; + case 0x0370: cp = 0x0371; break; + case 0x0372: cp = 0x0373; break; + case 0x0376: cp = 0x0377; break; + case 0x03f4: cp = 0x03d1; break; + case 0x03cf: cp = 0x03d7; break; + case 0x03f9: cp = 0x03f2; break; + case 0x03f7: cp = 0x03f8; break; + case 0x03fa: cp = 0x03fb; break; + }; + } + + return cp; +} + +utf8_int32_t utf8uprcodepoint(utf8_int32_t cp) +{ + if (((0x0061 <= cp) && (0x007a >= cp)) || + ((0x00e0 <= cp) && (0x00f6 >= cp)) || + ((0x00f8 <= cp) && (0x00fe >= cp)) || + ((0x03b1 <= cp) && (0x03c1 >= cp)) || + ((0x03c3 <= cp) && (0x03cb >= cp))) { + cp -= 32; + } else if (((0x0100 <= cp) && (0x012f >= cp)) || + ((0x0132 <= cp) && (0x0137 >= cp)) || + ((0x014a <= cp) && (0x0177 >= cp)) || + ((0x0182 <= cp) && (0x0185 >= cp)) || + ((0x01a0 <= cp) && (0x01a5 >= cp)) || + ((0x01de <= cp) && (0x01ef >= cp)) || + ((0x01f8 <= cp) && (0x021f >= cp)) || + ((0x0222 <= cp) && (0x0233 >= cp)) || + ((0x0246 <= cp) && (0x024f >= cp)) || + ((0x03d8 <= cp) && (0x03ef >= cp))) { + cp &= ~0x1; + } else if (((0x0139 <= cp) && (0x0148 >= cp)) || + ((0x0179 <= cp) && (0x017e >= cp)) || + ((0x01af <= cp) && (0x01b0 >= cp)) || + ((0x01b3 <= cp) && (0x01b6 >= cp)) || + ((0x01cd <= cp) && (0x01dc >= cp))) { + cp -= 1; + cp |= 0x1; + } else { + switch (cp) { + default: break; + case 0x00ff: cp = 0x0178; break; + case 0x0180: cp = 0x0243; break; + case 0x01dd: cp = 0x018e; break; + case 0x019a: cp = 0x023d; break; + case 0x019e: cp = 0x0220; break; + case 0x0292: cp = 0x01b7; break; + case 0x01c6: cp = 0x01c4; break; + case 0x01c9: cp = 0x01c7; break; + case 0x01cc: cp = 0x01ca; break; + case 0x01f3: cp = 0x01f1; break; + case 0x01bf: cp = 0x01f7; break; + case 0x0188: cp = 0x0187; break; + case 0x018c: cp = 0x018b; break; + case 0x0192: cp = 0x0191; break; + case 0x0199: cp = 0x0198; break; + case 0x01a8: cp = 0x01a7; break; + case 0x01ad: cp = 0x01ac; break; + case 0x01b0: cp = 0x01af; break; + case 0x01b9: cp = 0x01b8; break; + case 0x01bd: cp = 0x01bc; break; + case 0x01f5: cp = 0x01f4; break; + case 0x023c: cp = 0x023b; break; + case 0x0242: cp = 0x0241; break; + case 0x037b: cp = 0x03fd; break; + case 0x037c: cp = 0x03fe; break; + case 0x037d: cp = 0x03ff; break; + case 0x03f3: cp = 0x037f; break; + case 0x03ac: cp = 0x0386; break; + case 0x03ad: cp = 0x0388; break; + case 0x03ae: cp = 0x0389; break; + case 0x03af: cp = 0x038a; break; + case 0x03cc: cp = 0x038c; break; + case 0x03cd: cp = 0x038e; break; + case 0x03ce: cp = 0x038f; break; + case 0x0371: cp = 0x0370; break; + case 0x0373: cp = 0x0372; break; + case 0x0377: cp = 0x0376; break; + case 0x03d1: cp = 0x03f4; break; + case 0x03d7: cp = 0x03cf; break; + case 0x03f2: cp = 0x03f9; break; + case 0x03f8: cp = 0x03f7; break; + case 0x03fb: cp = 0x03fa; break; + }; + } + + return cp; +} + +#undef utf8_restrict +#undef utf8_null + +#ifdef __cplusplus +} // extern "C" +#endif + +#if defined(__clang__) +#pragma clang diagnostic pop +#endif + +#endif // SHEREDOM_UTF8_H_INCLUDED + + +/******************************************************** + End of file "utf8.h" + ********************************************************/ + + +/******************************************************** + Begin of file "string_buffer.h" + ********************************************************/ + +#ifndef STRING_BUFFER_H +#define STRING_BUFFER_H + +/* #include "fort_utils.h" */ /* Commented by amalgamation script */ + + +/***************************************************************************** + * STRING BUFFER + * ***************************************************************************/ + +struct f_string_buffer { + union { + char *cstr; +#ifdef FT_HAVE_WCHAR + wchar_t *wstr; +#endif +#ifdef FT_HAVE_UTF8 + void *u8str; +#endif + void *data; + } str; + size_t data_sz; + enum f_string_type type; +}; + +FT_INTERNAL +f_string_buffer_t *create_string_buffer(size_t number_of_chars, enum f_string_type type); + +FT_INTERNAL +void destroy_string_buffer(f_string_buffer_t *buffer); + +FT_INTERNAL +f_string_buffer_t *copy_string_buffer(const f_string_buffer_t *buffer); + +FT_INTERNAL +f_status realloc_string_buffer_without_copy(f_string_buffer_t *buffer); + +FT_INTERNAL +f_status fill_buffer_from_string(f_string_buffer_t *buffer, const char *str); + +#ifdef FT_HAVE_WCHAR +FT_INTERNAL +f_status fill_buffer_from_wstring(f_string_buffer_t *buffer, const wchar_t *str); +#endif /* FT_HAVE_WCHAR */ + +#ifdef FT_HAVE_UTF8 +FT_INTERNAL +f_status fill_buffer_from_u8string(f_string_buffer_t *buffer, const void *str); +#endif /* FT_HAVE_UTF8 */ + +FT_INTERNAL +size_t buffer_text_visible_width(const f_string_buffer_t *buffer); + +FT_INTERNAL +size_t buffer_text_visible_height(const f_string_buffer_t *buffer); + +FT_INTERNAL +size_t string_buffer_cod_width_capacity(const f_string_buffer_t *buffer); + +FT_INTERNAL +size_t string_buffer_raw_capacity(const f_string_buffer_t *buffer); + +FT_INTERNAL +size_t string_buffer_width_capacity(const f_string_buffer_t *buffer); + +FT_INTERNAL +void *buffer_get_data(f_string_buffer_t *buffer); + +FT_INTERNAL +int buffer_check_align(f_string_buffer_t *buffer); + +FT_INTERNAL +int buffer_printf(f_string_buffer_t *buffer, size_t buffer_row, f_conv_context_t *cntx, size_t cod_width, + const char *content_style_tag, const char *reset_content_style_tag); + +#ifdef FT_HAVE_UTF8 +FT_INTERNAL +void buffer_set_u8strwid_func(int (*u8strwid)(const void *beg, const void *end, size_t *width)); +#endif /* FT_HAVE_UTF8 */ + + +#endif /* STRING_BUFFER_H */ + +/******************************************************** + End of file "string_buffer.h" + ********************************************************/ + + +/******************************************************** + Begin of file "properties.h" + ********************************************************/ + +#ifndef PROPERTIES_H +#define PROPERTIES_H + +/* #include "fort_utils.h" */ /* Commented by amalgamation script */ +#include <stdint.h> +#include <limits.h> + +#define PROP_IS_SET(ft_props, property) ((ft_props) & (property)) +#define PROP_SET(ft_props, property) ((ft_props) |=(property)) +#define PROP_UNSET(ft_props, property) ((ft_props) &= ~((uint32_t)(property))) + +#define TEXT_STYLE_TAG_MAX_SIZE (64 * 2) + +FT_INTERNAL +void get_style_tag_for_cell(const f_table_properties_t *props, + size_t row, size_t col, char *style_tag, size_t sz); + +FT_INTERNAL +void get_reset_style_tag_for_cell(const f_table_properties_t *props, + size_t row, size_t col, char *style_tag, size_t sz); + +FT_INTERNAL +void get_style_tag_for_content(const f_table_properties_t *props, + size_t row, size_t col, char *style_tag, size_t sz); + +FT_INTERNAL +void get_reset_style_tag_for_content(const f_table_properties_t *props, + size_t row, size_t col, char *style_tag, size_t sz); + + +struct f_cell_props { + size_t cell_row; + size_t cell_col; + uint32_t properties_flags; + + unsigned int col_min_width; + enum ft_text_alignment align; + unsigned int cell_padding_top; + unsigned int cell_padding_bottom; + unsigned int cell_padding_left; + unsigned int cell_padding_right; + unsigned int cell_empty_string_height; + enum ft_row_type row_type; + unsigned int content_fg_color_number; + unsigned int content_bg_color_number; + unsigned int cell_bg_color_number; + enum ft_text_style cell_text_style; + enum ft_text_style content_text_style; + bool rgb; +}; + +typedef struct f_cell_props f_cell_props_t; +typedef f_vector_t f_cell_prop_container_t; + +FT_INTERNAL +f_cell_prop_container_t *create_cell_prop_container(void); + +FT_INTERNAL +void destroy_cell_prop_container(f_cell_prop_container_t *cont); + +FT_INTERNAL +const f_cell_props_t *cget_cell_prop(const f_cell_prop_container_t *cont, size_t row, size_t col); + +FT_INTERNAL +f_cell_props_t *get_cell_prop_and_create_if_not_exists(f_cell_prop_container_t *cont, size_t row, size_t col); + +FT_INTERNAL +f_status set_cell_property(f_cell_prop_container_t *cont, size_t row, size_t col, uint32_t property, int value); + +FT_INTERNAL +int get_cell_property_hierarchically(const f_table_properties_t *properties, size_t row, size_t column, uint32_t property); + +FT_INTERNAL +f_status set_default_cell_property(uint32_t property, int value); + + +/* TABLE BORDER DESСRIPTION + * + * + * TL TT TT TT TV TT TT TT TT TT TT TT TR + * LL IV RR + * LL IV RR + * LH IH IH IH II IH IH IH TI IH IH IH RH + * LL IV IV RR + * LL IV IV RR + * LL LI IH IH IH RI RH + * LL IV IV RR + * LL IV IV RR + * LH IH IH IH BI IH IH IH II IH IH IH RH + * LL IV RR + * LL IV RR + * BL BB BB BB BV BB BB BB BV BB BB BB BR + */ + + +/* HORIZONTAL SEPARATOR DESCRIPTION + * + * + * TL TT TT TT TV TT TT TT TV TT TT TT TR <----- TOP_SEPARATOR + * LL IV IV RR + * LH IH IH IH II IH IH IH II IH IH IH RH <----- INSIDE_SEPARATOR + * LL IV IV RR + * BL BB BB BB BV BB BB BB BV BB BB BB BR <----- BOTTOM_SEPARATOR + */ + +enum f_hor_separator_pos { + TOP_SEPARATOR, + INSIDE_SEPARATOR, + BOTTOM_SEPARATOR +}; + +enum f_border_item_pos { + TL_bip = 0, + TT_bip = 1, + TV_bip = 2, + TR_bip = 3, + + LL_bip = 4, + IV_bip = 5, + RR_bip = 6, + + LH_bip = 7, + IH_bip = 8, + II_bip = 9, + RH_bip = 10, + + BL_bip = 11, + BB_bip = 12, + BV_bip = 13, + BR_bip = 14, + + LI_bip = 15, + TI_bip = 16, + RI_bip = 17, + BI_bip = 18, + + BORDER_ITEM_POS_SIZE +}; + + +enum f_separator_item_pos { + LH_sip = 0, + IH_sip = 1, + II_sip = 2, + RH_sip = 3, + + TI_sip = 4, + BI_sip = 5, + + SEPARATOR_ITEM_POS_SIZE +}; + + +struct fort_border_style { + const char *border_chars[BORDER_ITEM_POS_SIZE]; + const char *header_border_chars[BORDER_ITEM_POS_SIZE]; + const char *separator_chars[SEPARATOR_ITEM_POS_SIZE]; +}; +extern struct fort_border_style FORT_BASIC_STYLE; +extern struct fort_border_style FORT_BASIC2_STYLE; +extern struct fort_border_style FORT_SIMPLE_STYLE; +extern struct fort_border_style FORT_PLAIN_STYLE; +extern struct fort_border_style FORT_DOT_STYLE; +extern struct fort_border_style FORT_EMPTY_STYLE; +extern struct fort_border_style FORT_EMPTY2_STYLE; +extern struct fort_border_style FORT_SOLID_STYLE; +extern struct fort_border_style FORT_SOLID_ROUND_STYLE; +extern struct fort_border_style FORT_NICE_STYLE; +extern struct fort_border_style FORT_DOUBLE_STYLE; +extern struct fort_border_style FORT_DOUBLE2_STYLE; +extern struct fort_border_style FORT_BOLD_STYLE; +extern struct fort_border_style FORT_BOLD2_STYLE; +extern struct fort_border_style FORT_FRAME_STYLE; + + +struct fort_entire_table_properties { + unsigned int left_margin; + unsigned int top_margin; + unsigned int right_margin; + unsigned int bottom_margin; + enum ft_adding_strategy add_strategy; +}; +typedef struct fort_entire_table_properties fort_entire_table_properties_t; +extern fort_entire_table_properties_t g_entire_table_properties; + +FT_INTERNAL +f_status set_entire_table_property(f_table_properties_t *table_properties, uint32_t property, int value); + +FT_INTERNAL +f_status set_default_entire_table_property(uint32_t property, int value); + +struct f_table_properties { + struct fort_border_style border_style; + f_cell_prop_container_t *cell_properties; + fort_entire_table_properties_t entire_table_properties; +}; +extern f_table_properties_t g_table_properties; + +FT_INTERNAL +size_t max_border_elem_strlen(struct f_table_properties *); + +FT_INTERNAL +f_table_properties_t *create_table_properties(void); + +FT_INTERNAL +void destroy_table_properties(f_table_properties_t *properties); + +FT_INTERNAL +f_table_properties_t *copy_table_properties(const f_table_properties_t *property); + +#endif /* PROPERTIES_H */ + +/******************************************************** + End of file "properties.h" + ********************************************************/ + + +/******************************************************** + Begin of file "cell.h" + ********************************************************/ + +#ifndef CELL_H +#define CELL_H + +/* #include "fort_utils.h" */ /* Commented by amalgamation script */ + +FT_INTERNAL +f_cell_t *create_cell(void); + +FT_INTERNAL +void destroy_cell(f_cell_t *cell); + +FT_INTERNAL +f_cell_t *copy_cell(f_cell_t *cell); + +FT_INTERNAL +size_t cell_vis_width(const f_cell_t *cell, const f_context_t *context); + +FT_INTERNAL +size_t cell_invis_codes_width(const f_cell_t *cell, const f_context_t *context); + +FT_INTERNAL +size_t hint_height_cell(const f_cell_t *cell, const f_context_t *context); + +FT_INTERNAL +void set_cell_type(f_cell_t *cell, enum f_cell_type type); + +FT_INTERNAL +enum f_cell_type get_cell_type(const f_cell_t *cell); + +FT_INTERNAL +int cell_printf(f_cell_t *cell, size_t row, f_conv_context_t *cntx, size_t cod_width); + +FT_INTERNAL +f_status fill_cell_from_string(f_cell_t *cell, const char *str); + +#ifdef FT_HAVE_WCHAR +FT_INTERNAL +f_status fill_cell_from_wstring(f_cell_t *cell, const wchar_t *str); +#endif + +FT_INTERNAL +f_status fill_cell_from_buffer(f_cell_t *cell, const f_string_buffer_t *buf); + +FT_INTERNAL +f_string_buffer_t *cell_get_string_buffer(f_cell_t *cell); + +#endif /* CELL_H */ + +/******************************************************** + End of file "cell.h" + ********************************************************/ + + +/******************************************************** + Begin of file "row.h" + ********************************************************/ + +#ifndef ROW_H +#define ROW_H + +/* #include "fort_utils.h" */ /* Commented by amalgamation script */ +#include "fort.h" +#include <stdarg.h> +/* #include "properties.h" */ /* Commented by amalgamation script */ +#ifdef FT_HAVE_WCHAR +#include <wchar.h> +#endif + +FT_INTERNAL +f_row_t *create_row(void); + +FT_INTERNAL +void destroy_row(f_row_t *row); + +FT_INTERNAL +f_row_t *copy_row(f_row_t *row); + +FT_INTERNAL +f_row_t *split_row(f_row_t *row, size_t pos); + +// Delete range [left; right] of cells (both ends included) +FT_INTERNAL +int ft_row_erase_range(f_row_t *row, size_t left, size_t right); + +FT_INTERNAL +f_row_t *create_row_from_string(const char *str); + +FT_INTERNAL +f_row_t *create_row_from_fmt_string(const struct f_string_view *fmt, va_list *va_args); + +FT_INTERNAL +size_t columns_in_row(const f_row_t *row); + +FT_INTERNAL +f_cell_t *get_cell(f_row_t *row, size_t col); + +FT_INTERNAL +const f_cell_t *get_cell_c(const f_row_t *row, size_t col); + +FT_INTERNAL +f_cell_t *get_cell_and_create_if_not_exists(f_row_t *row, size_t col); + +FT_INTERNAL +f_cell_t *create_cell_in_position(f_row_t *row, size_t col); + +FT_INTERNAL +f_status swap_row(f_row_t *cur_row, f_row_t *ins_row, size_t pos); + +FT_INTERNAL +f_status insert_row(f_row_t *cur_row, f_row_t *ins_row, size_t pos); + +FT_INTERNAL +size_t group_cell_number(const f_row_t *row, size_t master_cell_col); + +FT_INTERNAL +int get_row_cell_types(const f_row_t *row, enum f_cell_type *types, size_t types_sz); + +FT_INTERNAL +f_status row_set_cell_span(f_row_t *row, size_t cell_column, size_t hor_span); + +FT_INTERNAL +int print_row_separator(f_conv_context_t *cntx, + const size_t *col_width_arr, size_t cols, + const f_row_t *upper_row, const f_row_t *lower_row, + enum f_hor_separator_pos separatorPos, const f_separator_t *sep); + +FT_INTERNAL +int snprintf_row(const f_row_t *row, f_conv_context_t *cntx, size_t *col_width_arr, size_t col_width_arr_sz, + size_t row_height); + +#ifdef FT_HAVE_WCHAR +FT_INTERNAL +f_row_t *create_row_from_wstring(const wchar_t *str); +#endif + + +#endif /* ROW_H */ + +/******************************************************** + End of file "row.h" + ********************************************************/ + + +/******************************************************** + Begin of file "table.h" + ********************************************************/ + +#ifndef TABLE_H +#define TABLE_H + +/* #include "fort_utils.h" */ /* Commented by amalgamation script */ + +struct ft_table { + f_vector_t *rows; + f_table_properties_t *properties; + f_string_buffer_t *conv_buffer; + size_t cur_row; + size_t cur_col; + f_vector_t *separators; +}; + +FT_INTERNAL +f_separator_t *create_separator(int enabled); + +FT_INTERNAL +void destroy_separator(f_separator_t *sep); + +FT_INTERNAL +f_separator_t *copy_separator(f_separator_t *sep); + +FT_INTERNAL +f_status get_table_sizes(const ft_table_t *table, size_t *rows, size_t *cols); + +FT_INTERNAL +f_row_t *get_row(ft_table_t *table, size_t row); + +FT_INTERNAL +const f_row_t *get_row_c(const ft_table_t *table, size_t row); + +FT_INTERNAL +f_row_t *get_row_and_create_if_not_exists(ft_table_t *table, size_t row); + +FT_INTERNAL +f_string_buffer_t *get_cur_str_buffer_and_create_if_not_exists(ft_table_t *table); + + +FT_INTERNAL +f_status table_rows_and_cols_geometry(const ft_table_t *table, + size_t **col_width_arr_p, size_t *col_width_arr_sz, + size_t **row_height_arr_p, size_t *row_height_arr_sz, + enum f_geometry_type geom); + +FT_INTERNAL +f_status table_geometry(const ft_table_t *table, size_t *height, size_t *width); + +/* + * Returns geometry in codepoints(characters) (include codepoints of invisible + * elements: e.g. styles tags). + */ +FT_INTERNAL +f_status table_internal_codepoints_geometry(const ft_table_t *table, size_t *height, size_t *width); + +#endif /* TABLE_H */ + +/******************************************************** + End of file "table.h" + ********************************************************/ + + +/******************************************************** + Begin of file "cell.c" + ********************************************************/ + +/* #include "cell.h" */ /* Commented by amalgamation script */ +/* #include "properties.h" */ /* Commented by amalgamation script */ +/* #include "string_buffer.h" */ /* Commented by amalgamation script */ +#include <assert.h> + +struct f_cell { + f_string_buffer_t *str_buffer; + enum f_cell_type cell_type; +}; + +FT_INTERNAL +f_cell_t *create_cell(void) +{ + f_cell_t *cell = (f_cell_t *)F_CALLOC(sizeof(f_cell_t), 1); + if (cell == NULL) + return NULL; + cell->str_buffer = create_string_buffer(DEFAULT_STR_BUF_SIZE, CHAR_BUF); + if (cell->str_buffer == NULL) { + F_FREE(cell); + return NULL; + } + cell->cell_type = COMMON_CELL; + return cell; +} + +FT_INTERNAL +void destroy_cell(f_cell_t *cell) +{ + if (cell == NULL) + return; + destroy_string_buffer(cell->str_buffer); + F_FREE(cell); +} + +FT_INTERNAL +f_cell_t *copy_cell(f_cell_t *cell) +{ + assert(cell); + + f_cell_t *result = create_cell(); + if (result == NULL) + return NULL; + destroy_string_buffer(result->str_buffer); + result->str_buffer = copy_string_buffer(cell->str_buffer); + if (result->str_buffer == NULL) { + destroy_cell(result); + return NULL; + } + result->cell_type = cell->cell_type; + return result; +} + +FT_INTERNAL +void set_cell_type(f_cell_t *cell, enum f_cell_type type) +{ + assert(cell); + cell->cell_type = type; +} + +FT_INTERNAL +enum f_cell_type get_cell_type(const f_cell_t *cell) +{ + assert(cell); + return cell->cell_type; +} + +FT_INTERNAL +size_t cell_vis_width(const f_cell_t *cell, const f_context_t *context) +{ + /* todo: + * At the moment min width includes paddings. Maybe it is better that min width weren't include + * paddings but be min width of the cell content without padding + */ + + assert(cell); + assert(context); + + f_table_properties_t *properties = context->table_properties; + size_t row = context->row; + size_t column = context->column; + + size_t padding_left = get_cell_property_hierarchically(properties, row, column, FT_CPROP_LEFT_PADDING); + size_t padding_right = get_cell_property_hierarchically(properties, row, column, FT_CPROP_RIGHT_PADDING); + size_t result = padding_left + padding_right; + if (cell->str_buffer && cell->str_buffer->str.data) { + result += buffer_text_visible_width(cell->str_buffer); + } + result = MAX(result, (size_t)get_cell_property_hierarchically(properties, row, column, FT_CPROP_MIN_WIDTH)); + return result; +} + +FT_INTERNAL +size_t cell_invis_codes_width(const f_cell_t *cell, const f_context_t *context) +{ + assert(cell); + assert(context); + + f_table_properties_t *properties = context->table_properties; + size_t row = context->row; + size_t column = context->column; + + size_t result = 0; + char cell_style_tag[TEXT_STYLE_TAG_MAX_SIZE]; + get_style_tag_for_cell(properties, row, column, cell_style_tag, TEXT_STYLE_TAG_MAX_SIZE); + result += strlen(cell_style_tag); + + char reset_cell_style_tag[TEXT_STYLE_TAG_MAX_SIZE]; + get_reset_style_tag_for_cell(properties, row, column, reset_cell_style_tag, TEXT_STYLE_TAG_MAX_SIZE); + result += strlen(reset_cell_style_tag); + + char content_style_tag[TEXT_STYLE_TAG_MAX_SIZE]; + get_style_tag_for_content(properties, row, column, content_style_tag, TEXT_STYLE_TAG_MAX_SIZE); + result += strlen(content_style_tag); + + char reset_content_style_tag[TEXT_STYLE_TAG_MAX_SIZE]; + get_reset_style_tag_for_content(properties, row, column, reset_content_style_tag, TEXT_STYLE_TAG_MAX_SIZE); + result += strlen(reset_content_style_tag); + return result; +} + +FT_INTERNAL +size_t hint_height_cell(const f_cell_t *cell, const f_context_t *context) +{ + assert(cell); + assert(context); + f_table_properties_t *properties = context->table_properties; + size_t row = context->row; + size_t column = context->column; + + size_t padding_top = get_cell_property_hierarchically(properties, row, column, FT_CPROP_TOP_PADDING); + size_t padding_bottom = get_cell_property_hierarchically(properties, row, column, FT_CPROP_BOTTOM_PADDING); + size_t empty_string_height = get_cell_property_hierarchically(properties, row, column, FT_CPROP_EMPTY_STR_HEIGHT); + + size_t result = padding_top + padding_bottom; + if (cell->str_buffer && cell->str_buffer->str.data) { + size_t text_height = buffer_text_visible_height(cell->str_buffer); + result += text_height == 0 ? empty_string_height : text_height; + } + return result; +} + + +FT_INTERNAL +int cell_printf(f_cell_t *cell, size_t row, f_conv_context_t *cntx, size_t vis_width) +{ + const f_context_t *context = cntx->cntx; + size_t buf_len = vis_width; + + if (cell == NULL || (vis_width < cell_vis_width(cell, context))) { + return -1; + } + + f_table_properties_t *properties = context->table_properties; + unsigned int padding_top = get_cell_property_hierarchically(properties, context->row, context->column, FT_CPROP_TOP_PADDING); + unsigned int padding_left = get_cell_property_hierarchically(properties, context->row, context->column, FT_CPROP_LEFT_PADDING); + unsigned int padding_right = get_cell_property_hierarchically(properties, context->row, context->column, FT_CPROP_RIGHT_PADDING); + + size_t written = 0; + size_t invisible_written = 0; + int tmp = 0; + + /* todo: Dirty hack with changing buf_len! need refactoring. */ + /* Also maybe it is better to move all struff with colors to buffers? */ + char cell_style_tag[TEXT_STYLE_TAG_MAX_SIZE]; + get_style_tag_for_cell(properties, context->row, context->column, cell_style_tag, TEXT_STYLE_TAG_MAX_SIZE); + buf_len += strlen(cell_style_tag); + + char reset_cell_style_tag[TEXT_STYLE_TAG_MAX_SIZE]; + get_reset_style_tag_for_cell(properties, context->row, context->column, reset_cell_style_tag, TEXT_STYLE_TAG_MAX_SIZE); + buf_len += strlen(reset_cell_style_tag); + + char content_style_tag[TEXT_STYLE_TAG_MAX_SIZE]; + get_style_tag_for_content(properties, context->row, context->column, content_style_tag, TEXT_STYLE_TAG_MAX_SIZE); + buf_len += strlen(content_style_tag); + + char reset_content_style_tag[TEXT_STYLE_TAG_MAX_SIZE]; + get_reset_style_tag_for_content(properties, context->row, context->column, reset_content_style_tag, TEXT_STYLE_TAG_MAX_SIZE); + buf_len += strlen(reset_content_style_tag); + + /* CELL_STYLE_T LEFT_PADDING CONTENT_STYLE_T CONTENT RESET_CONTENT_STYLE_T RIGHT_PADDING RESET_CELL_STYLE_T + * | | | | | | | | + * L1 R1 + * L2 R2 + * L3 R3 + */ + + size_t L2 = padding_left; + + size_t R2 = padding_right; + size_t R3 = strlen(reset_cell_style_tag); + +#define TOTAL_WRITTEN (written + invisible_written) +#define RIGHT (padding_right + extra_right) + +#define WRITE_CELL_STYLE_TAG CHCK_RSLT_ADD_TO_INVISIBLE_WRITTEN(print_n_strings(cntx, 1, cell_style_tag)) +#define WRITE_RESET_CELL_STYLE_TAG CHCK_RSLT_ADD_TO_INVISIBLE_WRITTEN(print_n_strings(cntx, 1, reset_cell_style_tag)) +#define WRITE_CONTENT_STYLE_TAG CHCK_RSLT_ADD_TO_INVISIBLE_WRITTEN(print_n_strings(cntx, 1, content_style_tag)) +#define WRITE_RESET_CONTENT_STYLE_TAG CHCK_RSLT_ADD_TO_INVISIBLE_WRITTEN(print_n_strings(cntx, 1, reset_content_style_tag)) + + if (row >= hint_height_cell(cell, context) + || row < padding_top + || row >= (padding_top + buffer_text_visible_height(cell->str_buffer))) { + WRITE_CELL_STYLE_TAG; + WRITE_CONTENT_STYLE_TAG; + WRITE_RESET_CONTENT_STYLE_TAG; + CHCK_RSLT_ADD_TO_WRITTEN(print_n_strings(cntx, buf_len - TOTAL_WRITTEN - R3, FT_SPACE)); + WRITE_RESET_CELL_STYLE_TAG; + + return (int)TOTAL_WRITTEN; + } + + WRITE_CELL_STYLE_TAG; + CHCK_RSLT_ADD_TO_WRITTEN(print_n_strings(cntx, L2, FT_SPACE)); + if (cell->str_buffer) { + CHCK_RSLT_ADD_TO_WRITTEN(buffer_printf(cell->str_buffer, row - padding_top, cntx, vis_width - L2 - R2, content_style_tag, reset_content_style_tag)); + } else { + WRITE_CONTENT_STYLE_TAG; + WRITE_RESET_CONTENT_STYLE_TAG; + CHCK_RSLT_ADD_TO_WRITTEN(print_n_strings(cntx, vis_width - L2 - R2, FT_SPACE)); + } + CHCK_RSLT_ADD_TO_WRITTEN(print_n_strings(cntx, R2, FT_SPACE)); + WRITE_RESET_CELL_STYLE_TAG; + + return (int)TOTAL_WRITTEN; + +clear: + return -1; +#undef WRITE_CELL_STYLE_TAG +#undef WRITE_RESET_CELL_STYLE_TAG +#undef WRITE_CONTENT_STYLE_TAG +#undef WRITE_RESET_CONTENT_STYLE_TAG +#undef TOTAL_WRITTEN +#undef RIGHT +} + +FT_INTERNAL +f_status fill_cell_from_string(f_cell_t *cell, const char *str) +{ + assert(str); + assert(cell); + + return fill_buffer_from_string(cell->str_buffer, str); +} + +#ifdef FT_HAVE_WCHAR +FT_INTERNAL +f_status fill_cell_from_wstring(f_cell_t *cell, const wchar_t *str) +{ + assert(str); + assert(cell); + + return fill_buffer_from_wstring(cell->str_buffer, str); +} +#endif + +#ifdef FT_HAVE_UTF8 +static +f_status fill_cell_from_u8string(f_cell_t *cell, const void *str) +{ + assert(str); + assert(cell); + return fill_buffer_from_u8string(cell->str_buffer, str); +} +#endif /* FT_HAVE_UTF8 */ + +FT_INTERNAL +f_string_buffer_t *cell_get_string_buffer(f_cell_t *cell) +{ + assert(cell); + assert(cell->str_buffer); + return cell->str_buffer; +} + +FT_INTERNAL +f_status fill_cell_from_buffer(f_cell_t *cell, const f_string_buffer_t *buffer) +{ + assert(cell); + assert(buffer); + switch (buffer->type) { + case CHAR_BUF: + return fill_cell_from_string(cell, buffer->str.cstr); +#ifdef FT_HAVE_WCHAR + case W_CHAR_BUF: + return fill_cell_from_wstring(cell, buffer->str.wstr); +#endif /* FT_HAVE_WCHAR */ +#ifdef FT_HAVE_UTF8 + case UTF8_BUF: + return fill_cell_from_u8string(cell, buffer->str.u8str); +#endif /* FT_HAVE_UTF8 */ + default: + assert(0); + return FT_GEN_ERROR; + } + +} + +/******************************************************** + End of file "cell.c" + ********************************************************/ + + +/******************************************************** + Begin of file "fort_impl.c" + ********************************************************/ + +/* +libfort + +MIT License + +Copyright (c) 2017 - 2018 Seleznev Anton + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ + +#include <stdlib.h> +#include <stdarg.h> +#include <stdio.h> +#include "fort.h" +#include <assert.h> +#include <string.h> +#include <wchar.h> + +/* #include "vector.h" */ /* Commented by amalgamation script */ +/* #include "fort_utils.h" */ /* Commented by amalgamation script */ +/* #include "string_buffer.h" */ /* Commented by amalgamation script */ +/* #include "table.h" */ /* Commented by amalgamation script */ +/* #include "row.h" */ /* Commented by amalgamation script */ +/* #include "properties.h" */ /* Commented by amalgamation script */ + + +ft_table_t *ft_create_table(void) +{ + ft_table_t *result = (ft_table_t *)F_CALLOC(1, sizeof(ft_table_t)); + if (result == NULL) + return NULL; + + result->rows = create_vector(sizeof(f_row_t *), DEFAULT_VECTOR_CAPACITY); + if (result->rows == NULL) { + F_FREE(result); + return NULL; + } + result->separators = create_vector(sizeof(f_separator_t *), DEFAULT_VECTOR_CAPACITY); + if (result->separators == NULL) { + destroy_vector(result->rows); + F_FREE(result); + return NULL; + } + + result->properties = create_table_properties(); + if (result->properties == NULL) { + destroy_vector(result->separators); + destroy_vector(result->rows); + F_FREE(result); + return NULL; + } + result->conv_buffer = NULL; + result->cur_row = 0; + result->cur_col = 0; + return result; +} + + +void ft_destroy_table(ft_table_t *table) +{ + size_t i = 0; + + if (table == NULL) + return; + + if (table->rows) { + size_t row_n = vector_size(table->rows); + for (i = 0; i < row_n; ++i) { + destroy_row(VECTOR_AT(table->rows, i, f_row_t *)); + } + destroy_vector(table->rows); + } + if (table->separators) { + size_t row_n = vector_size(table->separators); + for (i = 0; i < row_n; ++i) { + destroy_separator(VECTOR_AT(table->separators, i, f_separator_t *)); + } + destroy_vector(table->separators); + } + destroy_table_properties(table->properties); + destroy_string_buffer(table->conv_buffer); + F_FREE(table); +} + +ft_table_t *ft_copy_table(ft_table_t *table) +{ + if (table == NULL) + return NULL; + + ft_table_t *result = ft_create_table(); + if (result == NULL) + return NULL; + + size_t i = 0; + size_t rows_n = vector_size(table->rows); + for (i = 0; i < rows_n; ++i) { + f_row_t *row = VECTOR_AT(table->rows, i, f_row_t *); + f_row_t *new_row = copy_row(row); + if (new_row == NULL) { + ft_destroy_table(result); + return NULL; + } + vector_push(result->rows, &new_row); + } + + size_t sep_sz = vector_size(table->separators); + for (i = 0; i < sep_sz; ++i) { + f_separator_t *sep = VECTOR_AT(table->separators, i, f_separator_t *); + f_separator_t *new_sep = copy_separator(sep); + if (new_sep == NULL) { + ft_destroy_table(result); + return NULL; + } + vector_push(result->separators, &new_sep); + } + + /* note: by default new table has allocated default properties, so we + * have to destroy them first. + */ + if (result->properties) { + destroy_table_properties(result->properties); + } + result->properties = copy_table_properties(table->properties); + if (result->properties == NULL) { + ft_destroy_table(result); + return NULL; + } + + /* todo: copy conv_buffer ?? */ + + result->cur_row = table->cur_row; + result->cur_col = table->cur_col; + return result; +} + +static int split_cur_row(ft_table_t *table, f_row_t **tail_of_cur_row) +{ + if (table->cur_row >= vector_size(table->rows)) { + tail_of_cur_row = NULL; + return 0; + } + + f_row_t *row = VECTOR_AT(table->rows, table->cur_row, f_row_t *); + if (table->cur_col >= columns_in_row(row)) { + tail_of_cur_row = NULL; + return 0; + } + + f_row_t *tail = split_row(row, table->cur_col); + if (!tail) { + tail_of_cur_row = NULL; + return FT_GEN_ERROR; + } + + *tail_of_cur_row = tail; + return 0; +} + +int ft_ln(ft_table_t *table) +{ + assert(table); + fort_entire_table_properties_t *table_props = &table->properties->entire_table_properties; + switch (table_props->add_strategy) { + case FT_STRATEGY_INSERT: { + f_row_t *new_row = NULL; + if (FT_IS_ERROR(split_cur_row(table, &new_row))) { + return FT_GEN_ERROR; + } + if (new_row) { + if (FT_IS_ERROR(vector_insert(table->rows, &new_row, table->cur_row + 1))) { + destroy_row(new_row); + return FT_GEN_ERROR; + } + } + break; + } + case FT_STRATEGY_REPLACE: + // do nothing + break; + default: + assert(0 && "Unexpected situation inside libfort"); + break; + } + table->cur_col = 0; + table->cur_row++; + return FT_SUCCESS; +} + +size_t ft_cur_row(const ft_table_t *table) +{ + assert(table); + return table->cur_row; +} + +size_t ft_cur_col(const ft_table_t *table) +{ + assert(table); + return table->cur_col; +} + +void ft_set_cur_cell(ft_table_t *table, size_t row, size_t col) +{ + assert(table); + table->cur_row = row; + table->cur_col = col; +} + +int ft_is_empty(const ft_table_t *table) +{ + assert(table); + return ft_row_count(table) == 0; +} + +size_t ft_row_count(const ft_table_t *table) +{ + assert(table && table->rows); + return vector_size(table->rows); +} + +size_t ft_col_count(const ft_table_t *table) +{ + assert(table && table->rows); + size_t i = 0; + size_t cols_n = 0; + size_t rows_n = vector_size(table->rows); + for (i = 0; i < rows_n; ++i) { + f_row_t *row = VECTOR_AT(table->rows, i, f_row_t *); + size_t ncols = columns_in_row(row); + cols_n = MAX(cols_n, ncols); + } + return cols_n; +} + +int ft_erase_range(ft_table_t *table, + size_t top_left_row, size_t top_left_col, + size_t bottom_right_row, size_t bottom_right_col) +{ + assert(table && table->rows); + int status = FT_SUCCESS; + + size_t rows_n = vector_size(table->rows); + + if (top_left_row == FT_CUR_ROW) + top_left_row = table->cur_row; + if (bottom_right_row == FT_CUR_ROW) + bottom_right_row = table->cur_row; + + if (top_left_col == FT_CUR_COLUMN) + top_left_col = table->cur_row; + if (bottom_right_col == FT_CUR_COLUMN) + bottom_right_col = table->cur_row; + + if (top_left_row > bottom_right_row || top_left_col > bottom_right_col) + return FT_EINVAL; + + f_row_t *row = NULL; + size_t i = top_left_row; + while (i < rows_n && i <= bottom_right_row) { + row = VECTOR_AT(table->rows, i, f_row_t *); + status = ft_row_erase_range(row, top_left_col, bottom_right_col); + if (FT_IS_ERROR(status)) + return status; + ++i; + } + + f_separator_t *separator = NULL; + + size_t n_iterations = MIN(rows_n - 1, bottom_right_row) - top_left_row + 1; + size_t j = 0; + i = top_left_row; + for (j = 0; j < n_iterations; ++j) { + row = VECTOR_AT(table->rows, i, f_row_t *); + if (columns_in_row(row)) { + ++i; + } else { + destroy_row(row); + status = vector_erase(table->rows, i); + if (FT_IS_ERROR(status)) + return status; + if (i < vector_size(table->separators)) { + separator = VECTOR_AT(table->separators, i, f_separator_t *); + destroy_separator(separator); + vector_erase(table->separators, i); + } + } + } + + return FT_SUCCESS; +} + + +static int ft_row_printf_impl_(ft_table_t *table, size_t row, const struct f_string_view *fmt, va_list *va) +{ + size_t i = 0; + size_t new_cols = 0; + + if (table == NULL) + return -1; + + f_row_t *new_row = create_row_from_fmt_string(fmt, va); + + if (new_row == NULL) { + return -1; + } + + f_row_t **cur_row_p = NULL; + size_t sz = vector_size(table->rows); + if (row >= sz) { + size_t push_n = row - sz + 1; + for (i = 0; i < push_n; ++i) { + f_row_t *padding_row = create_row(); + if (padding_row == NULL) + goto clear; + + if (FT_IS_ERROR(vector_push(table->rows, &padding_row))) { + destroy_row(padding_row); + goto clear; + } + } + } + /* todo: clearing pushed items in case of error ?? */ + + new_cols = columns_in_row(new_row); + cur_row_p = &VECTOR_AT(table->rows, row, f_row_t *); + + switch (table->properties->entire_table_properties.add_strategy) { + case FT_STRATEGY_INSERT: { + if (FT_IS_ERROR(insert_row(*cur_row_p, new_row, table->cur_col))) + goto clear; + break; + } + case FT_STRATEGY_REPLACE: { + if (FT_IS_ERROR(swap_row(*cur_row_p, new_row, table->cur_col))) + goto clear; + break; + } + default: + assert(0 && "Unexpected situation inside libfort"); + break; + } + + table->cur_col += new_cols; + destroy_row(new_row); + return (int)new_cols; + +clear: + destroy_row(new_row); + return -1; +} + +#if defined(FT_CLANG_COMPILER) || defined(FT_GCC_COMPILER) +#define FT_PRINTF ft_printf +#define FT_PRINTF_LN ft_printf_ln +#else +#define FT_PRINTF ft_printf_impl +#define FT_PRINTF_LN ft_printf_ln_impl +#endif + + + +int FT_PRINTF(ft_table_t *table, const char *fmt, ...) +{ + assert(table); + va_list va; + va_start(va, fmt); + + struct f_string_view fmt_str; + fmt_str.type = CHAR_BUF; + fmt_str.u.cstr = fmt; + int result = ft_row_printf_impl_(table, table->cur_row, &fmt_str, &va); + va_end(va); + return result; +} + +int FT_PRINTF_LN(ft_table_t *table, const char *fmt, ...) +{ + assert(table); + va_list va; + va_start(va, fmt); + + struct f_string_view fmt_str; + fmt_str.type = CHAR_BUF; + fmt_str.u.cstr = fmt; + int result = ft_row_printf_impl_(table, table->cur_row, &fmt_str, &va); + if (result >= 0) { + ft_ln(table); + } + va_end(va); + return result; +} + +#undef FT_PRINTF +#undef FT_PRINTF_LN + +#ifdef FT_HAVE_WCHAR +int ft_wprintf(ft_table_t *table, const wchar_t *fmt, ...) +{ + assert(table); + va_list va; + va_start(va, fmt); + + struct f_string_view fmt_str; + fmt_str.type = W_CHAR_BUF; + fmt_str.u.wstr = fmt; + int result = ft_row_printf_impl_(table, table->cur_row, &fmt_str, &va); + va_end(va); + return result; +} + +int ft_wprintf_ln(ft_table_t *table, const wchar_t *fmt, ...) +{ + assert(table); + va_list va; + va_start(va, fmt); + + struct f_string_view fmt_str; + fmt_str.type = W_CHAR_BUF; + fmt_str.u.wstr = fmt; + int result = ft_row_printf_impl_(table, table->cur_row, &fmt_str, &va); + if (result >= 0) { + ft_ln(table); + } + va_end(va); + return result; +} + +#endif + +void ft_set_default_printf_field_separator(char separator) +{ + g_col_separator = separator; +} + +static int ft_write_impl_(ft_table_t *table, const f_string_view_t *cell_content) +{ + assert(table); + f_string_buffer_t *buf = get_cur_str_buffer_and_create_if_not_exists(table); + if (buf == NULL) + return FT_GEN_ERROR; + + int status = FT_SUCCESS; + switch (cell_content->type) { + case CHAR_BUF: + status = fill_buffer_from_string(buf, cell_content->u.cstr); + break; +#ifdef FT_HAVE_WCHAR + case W_CHAR_BUF: + status = fill_buffer_from_wstring(buf, cell_content->u.wstr); + break; +#endif +#ifdef FT_HAVE_UTF8 + case UTF8_BUF: + status = fill_buffer_from_u8string(buf, cell_content->u.u8str); + break; +#endif + default: + status = FT_GEN_ERROR; + } + if (FT_IS_SUCCESS(status)) { + table->cur_col++; + } + return status; +} + +static int ft_write_impl(ft_table_t *table, const char *cell_content) +{ + f_string_view_t content; + content.type = CHAR_BUF; + content.u.cstr = cell_content; + return ft_write_impl_(table, &content); +} + +#ifdef FT_HAVE_UTF8 +static int ft_u8write_impl(ft_table_t *table, const void *cell_content) +{ + f_string_view_t content; + content.type = UTF8_BUF; + content.u.u8str = cell_content; + return ft_write_impl_(table, &content); +} +#endif /* FT_HAVE_UTF8 */ + +#ifdef FT_HAVE_WCHAR +static int ft_wwrite_impl(ft_table_t *table, const wchar_t *cell_content) +{ + f_string_view_t content; + content.type = W_CHAR_BUF; + content.u.wstr = cell_content; + return ft_write_impl_(table, &content); +} +#endif + + +int ft_nwrite(ft_table_t *table, size_t count, const char *cell_content, ...) +{ + size_t i = 0; + assert(table); + int status = ft_write_impl(table, cell_content); + if (FT_IS_ERROR(status)) + return status; + + va_list va; + va_start(va, cell_content); + --count; + for (i = 0; i < count; ++i) { + const char *cell = va_arg(va, const char *); + status = ft_write_impl(table, cell); + if (FT_IS_ERROR(status)) { + va_end(va); + return status; + } + } + va_end(va); + return status; +} + +int ft_nwrite_ln(ft_table_t *table, size_t count, const char *cell_content, ...) +{ + size_t i = 0; + assert(table); + int status = ft_write_impl(table, cell_content); + if (FT_IS_ERROR(status)) + return status; + + va_list va; + va_start(va, cell_content); + --count; + for (i = 0; i < count; ++i) { + const char *cell = va_arg(va, const char *); + status = ft_write_impl(table, cell); + if (FT_IS_ERROR(status)) { + va_end(va); + return status; + } + } + va_end(va); + + ft_ln(table); + return status; +} + + + + +#ifdef FT_HAVE_WCHAR + +int ft_nwwrite(ft_table_t *table, size_t n, const wchar_t *cell_content, ...) +{ + size_t i = 0; + assert(table); + int status = ft_wwrite_impl(table, cell_content); + if (FT_IS_ERROR(status)) + return status; + + va_list va; + va_start(va, cell_content); + --n; + for (i = 0; i < n; ++i) { + const wchar_t *cell = va_arg(va, const wchar_t *); + status = ft_wwrite_impl(table, cell); + if (FT_IS_ERROR(status)) { + va_end(va); + return status; + } + } + va_end(va); + return status; +} + +int ft_nwwrite_ln(ft_table_t *table, size_t n, const wchar_t *cell_content, ...) +{ + size_t i = 0; + assert(table); + int status = ft_wwrite_impl(table, cell_content); + if (FT_IS_ERROR(status)) + return status; + + va_list va; + va_start(va, cell_content); + --n; + for (i = 0; i < n; ++i) { + const wchar_t *cell = va_arg(va, const wchar_t *); + status = ft_wwrite_impl(table, cell); + if (FT_IS_ERROR(status)) { + va_end(va); + return status; + } + } + va_end(va); + + ft_ln(table); + return status; +} +#endif + + +int ft_row_write(ft_table_t *table, size_t cols, const char *cells[]) +{ + size_t i = 0; + assert(table); + for (i = 0; i < cols; ++i) { + int status = ft_write_impl(table, cells[i]); + if (FT_IS_ERROR(status)) { + /* todo: maybe current pos in case of error should be equal to the one before function call? */ + return status; + } + } + return FT_SUCCESS; +} + +int ft_row_write_ln(ft_table_t *table, size_t cols, const char *cells[]) +{ + assert(table); + int status = ft_row_write(table, cols, cells); + if (FT_IS_SUCCESS(status)) { + ft_ln(table); + } + return status; +} + +#ifdef FT_HAVE_WCHAR +int ft_row_wwrite(ft_table_t *table, size_t cols, const wchar_t *cells[]) +{ + size_t i = 0; + assert(table); + for (i = 0; i < cols; ++i) { + int status = ft_wwrite_impl(table, cells[i]); + if (FT_IS_ERROR(status)) { + /* todo: maybe current pos in case of error should be equal + * to the one before function call? + */ + return status; + } + } + return FT_SUCCESS; +} + +int ft_row_wwrite_ln(ft_table_t *table, size_t cols, const wchar_t *cells[]) +{ + assert(table); + int status = ft_row_wwrite(table, cols, cells); + if (FT_IS_SUCCESS(status)) { + ft_ln(table); + } + return status; +} +#endif + + + +int ft_table_write(ft_table_t *table, size_t rows, size_t cols, const char *table_cells[]) +{ + size_t i = 0; + assert(table); + for (i = 0; i < rows; ++i) { + int status = ft_row_write(table, cols, (const char **)&table_cells[i * cols]); + if (FT_IS_ERROR(status)) { + /* todo: maybe current pos in case of error should be equal + * to the one before function call? + */ + return status; + } + if (i != rows - 1) + ft_ln(table); + } + return FT_SUCCESS; +} + +int ft_table_write_ln(ft_table_t *table, size_t rows, size_t cols, const char *table_cells[]) +{ + assert(table); + int status = ft_table_write(table, rows, cols, table_cells); + if (FT_IS_SUCCESS(status)) { + ft_ln(table); + } + return status; +} + + +#ifdef FT_HAVE_WCHAR +int ft_table_wwrite(ft_table_t *table, size_t rows, size_t cols, const wchar_t *table_cells[]) +{ + size_t i = 0; + assert(table); + for (i = 0; i < rows; ++i) { + int status = ft_row_wwrite(table, cols, (const wchar_t **)&table_cells[i * cols]); + if (FT_IS_ERROR(status)) { + /* todo: maybe current pos in case of error should be equal + * to the one before function call? + */ + return status; + } + if (i != rows - 1) + ft_ln(table); + } + return FT_SUCCESS; +} + +int ft_table_wwrite_ln(ft_table_t *table, size_t rows, size_t cols, const wchar_t *table_cells[]) +{ + assert(table); + int status = ft_table_wwrite(table, rows, cols, table_cells); + if (FT_IS_SUCCESS(status)) { + ft_ln(table); + } + return status; +} +#endif + +static +const char *empty_str_arr[] = {"", (const char *)L"", ""}; + +static +const void *ft_to_string_impl(const ft_table_t *table, enum f_string_type b_type) +{ + assert(table); + + const char *result = NULL; + + /* Determine size of table string representation */ + size_t cod_height = 0; + size_t cod_width = 0; + int status = table_internal_codepoints_geometry(table, &cod_height, &cod_width); + if (FT_IS_ERROR(status)) { + return NULL; + } + size_t n_codepoints = cod_height * cod_width + 1; + + /* Allocate string buffer for string representation */ + if (table->conv_buffer == NULL) { + ((ft_table_t *)table)->conv_buffer = create_string_buffer(n_codepoints, b_type); + if (table->conv_buffer == NULL) + return NULL; + } + while (string_buffer_cod_width_capacity(table->conv_buffer) < n_codepoints) { + if (FT_IS_ERROR(realloc_string_buffer_without_copy(table->conv_buffer))) { + return NULL; + } + } + if (!buffer_check_align(table->conv_buffer)) + return NULL; + char *buffer = (char *)buffer_get_data(table->conv_buffer); + + size_t cols = 0; + size_t rows = 0; + size_t *col_vis_width_arr = NULL; + size_t *row_vis_height_arr = NULL; + status = table_rows_and_cols_geometry(table, &col_vis_width_arr, &cols, &row_vis_height_arr, &rows, VISIBLE_GEOMETRY); + if (FT_IS_ERROR(status)) + return NULL; + + if (rows == 0) { + F_FREE(col_vis_width_arr); + F_FREE(row_vis_height_arr); + return empty_str_arr[b_type]; + } + + int tmp = 0; + size_t i = 0; + f_context_t context; + context.table_properties = (table->properties ? table->properties : &g_table_properties); + f_row_t *prev_row = NULL; + f_row_t *cur_row = NULL; + f_separator_t *cur_sep = NULL; + size_t sep_size = vector_size(table->separators); + + f_conv_context_t cntx; + cntx.u.buf = buffer; + cntx.raw_avail = string_buffer_raw_capacity(table->conv_buffer); + cntx.cntx = &context; + cntx.b_type = b_type; + + /* Print top margin */ + for (i = 0; i < context.table_properties->entire_table_properties.top_margin; ++i) { + FT_CHECK(print_n_strings(&cntx, cod_width - 1/* minus new_line*/, FT_SPACE)); + FT_CHECK(print_n_strings(&cntx, 1, FT_NEWLINE)); + } + + for (i = 0; i < rows; ++i) { + cur_sep = (i < sep_size) ? VECTOR_AT(table->separators, i, f_separator_t *) : NULL; + cur_row = VECTOR_AT(table->rows, i, f_row_t *); + enum f_hor_separator_pos separatorPos = (i == 0) ? TOP_SEPARATOR : INSIDE_SEPARATOR; + context.row = i; + FT_CHECK(print_row_separator(&cntx, col_vis_width_arr, cols, prev_row, cur_row, separatorPos, cur_sep)); + FT_CHECK(snprintf_row(cur_row, &cntx, col_vis_width_arr, cols, row_vis_height_arr[i])); + prev_row = cur_row; + } + cur_row = NULL; + cur_sep = (i < sep_size) ? VECTOR_AT(table->separators, i, f_separator_t *) : NULL; + context.row = i; + FT_CHECK(print_row_separator(&cntx, col_vis_width_arr, cols, prev_row, cur_row, BOTTOM_SEPARATOR, cur_sep)); + + /* Print bottom margin */ + for (i = 0; i < context.table_properties->entire_table_properties.bottom_margin; ++i) { + FT_CHECK(print_n_strings(&cntx, cod_width - 1/* minus new_line*/, FT_SPACE)); + FT_CHECK(print_n_strings(&cntx, 1, FT_NEWLINE)); + } + + result = buffer; + +clear: + F_FREE(col_vis_width_arr); + F_FREE(row_vis_height_arr); + return result; +} + +const char *ft_to_string(const ft_table_t *table) +{ + return (const char *)ft_to_string_impl(table, CHAR_BUF); +} + +#ifdef FT_HAVE_WCHAR +const wchar_t *ft_to_wstring(const ft_table_t *table) +{ + return (const wchar_t *)ft_to_string_impl(table, W_CHAR_BUF); +} +#endif + + +int ft_add_separator(ft_table_t *table) +{ + assert(table); + assert(table->separators); + + while (vector_size(table->separators) <= table->cur_row) { + f_separator_t *sep_p = create_separator(F_FALSE); + if (sep_p == NULL) + return FT_MEMORY_ERROR; + int status = vector_push(table->separators, &sep_p); + if (FT_IS_ERROR(status)) + return status; + } + + f_separator_t **sep_p = &VECTOR_AT(table->separators, table->cur_row, f_separator_t *); + if (*sep_p == NULL) + *sep_p = create_separator(F_TRUE); + else + (*sep_p)->enabled = F_TRUE; + + if (*sep_p == NULL) + return FT_GEN_ERROR; + return FT_SUCCESS; +} + +static const struct fort_border_style *built_in_styles[] = { + &FORT_BASIC_STYLE, + &FORT_BASIC2_STYLE, + &FORT_SIMPLE_STYLE, + &FORT_PLAIN_STYLE, + &FORT_DOT_STYLE, + &FORT_EMPTY_STYLE, + &FORT_EMPTY2_STYLE, + &FORT_SOLID_STYLE, + &FORT_SOLID_ROUND_STYLE, + &FORT_NICE_STYLE, + &FORT_DOUBLE_STYLE, + &FORT_DOUBLE2_STYLE, + &FORT_BOLD_STYLE, + &FORT_BOLD2_STYLE, + &FORT_FRAME_STYLE, +}; +#define BUILT_IN_STYLES_SZ (sizeof(built_in_styles) / sizeof(built_in_styles[0])) + +/* todo: remove this stupid and dangerous code */ +static const struct ft_border_style built_in_external_styles[BUILT_IN_STYLES_SZ] = { + { + {"", "", "", "", "", ""}, + {"", "", "", "", "", ""}, + "" + } +}; + +const struct ft_border_style *const FT_BASIC_STYLE = &built_in_external_styles[0]; +const struct ft_border_style *const FT_BASIC2_STYLE = &built_in_external_styles[1]; +const struct ft_border_style *const FT_SIMPLE_STYLE = &built_in_external_styles[2]; +const struct ft_border_style *const FT_PLAIN_STYLE = &built_in_external_styles[3]; +const struct ft_border_style *const FT_DOT_STYLE = &built_in_external_styles[4]; +const struct ft_border_style *const FT_EMPTY_STYLE = &built_in_external_styles[5]; +const struct ft_border_style *const FT_EMPTY2_STYLE = &built_in_external_styles[6]; +const struct ft_border_style *const FT_SOLID_STYLE = &built_in_external_styles[7]; +const struct ft_border_style *const FT_SOLID_ROUND_STYLE = &built_in_external_styles[8]; +const struct ft_border_style *const FT_NICE_STYLE = &built_in_external_styles[9]; +const struct ft_border_style *const FT_DOUBLE_STYLE = &built_in_external_styles[10]; +const struct ft_border_style *const FT_DOUBLE2_STYLE = &built_in_external_styles[11]; +const struct ft_border_style *const FT_BOLD_STYLE = &built_in_external_styles[12]; +const struct ft_border_style *const FT_BOLD2_STYLE = &built_in_external_styles[13]; +const struct ft_border_style *const FT_FRAME_STYLE = &built_in_external_styles[14]; + +static void set_border_props_for_props(f_table_properties_t *properties, const struct ft_border_style *style) +{ + if (style >= built_in_external_styles && style < (built_in_external_styles + BUILT_IN_STYLES_SZ)) { + size_t pos = (size_t)(style - built_in_external_styles); + memcpy(&(properties->border_style), built_in_styles[pos], sizeof(struct fort_border_style)); + return; + } + + const struct ft_border_chars *border_chs = &(style->border_chs); + const struct ft_border_chars *header_border_chs = &(style->header_border_chs); + +#define BOR_CHARS properties->border_style.border_chars +#define H_BOR_CHARS properties->border_style.header_border_chars +#define SEP_CHARS properties->border_style.separator_chars + + BOR_CHARS[TT_bip] = border_chs->top_border_ch; + BOR_CHARS[IH_bip] = border_chs->separator_ch; + BOR_CHARS[BB_bip] = border_chs->bottom_border_ch; + BOR_CHARS[LL_bip] = BOR_CHARS[IV_bip] = BOR_CHARS[RR_bip] = border_chs->side_border_ch; + + BOR_CHARS[TL_bip] = BOR_CHARS[TV_bip] = BOR_CHARS[TR_bip] = border_chs->out_intersect_ch; + BOR_CHARS[LH_bip] = BOR_CHARS[RH_bip] = border_chs->out_intersect_ch; + BOR_CHARS[BL_bip] = BOR_CHARS[BV_bip] = BOR_CHARS[BR_bip] = border_chs->out_intersect_ch; + BOR_CHARS[II_bip] = border_chs->in_intersect_ch; + + BOR_CHARS[LI_bip] = BOR_CHARS[TI_bip] = BOR_CHARS[RI_bip] = BOR_CHARS[BI_bip] = border_chs->in_intersect_ch; + + if (strlen(border_chs->separator_ch) == 0 && strlen(border_chs->in_intersect_ch) == 0) { + BOR_CHARS[LH_bip] = BOR_CHARS[RH_bip] = "\0"; + } + + H_BOR_CHARS[TT_bip] = header_border_chs->top_border_ch; + H_BOR_CHARS[IH_bip] = header_border_chs->separator_ch; + H_BOR_CHARS[BB_bip] = header_border_chs->bottom_border_ch; + H_BOR_CHARS[LL_bip] = H_BOR_CHARS[IV_bip] = H_BOR_CHARS[RR_bip] = header_border_chs->side_border_ch; + + H_BOR_CHARS[TL_bip] = H_BOR_CHARS[TV_bip] = H_BOR_CHARS[TR_bip] = header_border_chs->out_intersect_ch; + H_BOR_CHARS[LH_bip] = H_BOR_CHARS[RH_bip] = header_border_chs->out_intersect_ch; + H_BOR_CHARS[BL_bip] = H_BOR_CHARS[BV_bip] = H_BOR_CHARS[BR_bip] = header_border_chs->out_intersect_ch; + H_BOR_CHARS[II_bip] = header_border_chs->in_intersect_ch; + + H_BOR_CHARS[LI_bip] = H_BOR_CHARS[TI_bip] = H_BOR_CHARS[RI_bip] = H_BOR_CHARS[BI_bip] = header_border_chs->in_intersect_ch; + + if (strlen(header_border_chs->separator_ch) == 0 && strlen(header_border_chs->in_intersect_ch) == 0) { + BOR_CHARS[LH_bip] = BOR_CHARS[RH_bip] = "\0"; + } + + SEP_CHARS[LH_sip] = SEP_CHARS[RH_sip] = SEP_CHARS[II_sip] = header_border_chs->out_intersect_ch; + SEP_CHARS[TI_sip] = SEP_CHARS[BI_sip] = header_border_chs->out_intersect_ch; + SEP_CHARS[IH_sip] = style->hor_separator_char; + + +#undef BOR_CHARS +#undef H_BOR_CHARS +#undef SEP_CHARS +} + + +int ft_set_default_border_style(const struct ft_border_style *style) +{ + set_border_props_for_props(&g_table_properties, style); + return FT_SUCCESS; +} + +int ft_set_border_style(ft_table_t *table, const struct ft_border_style *style) +{ + assert(table); + if (table->properties == NULL) { + table->properties = create_table_properties(); + if (table->properties == NULL) + return FT_MEMORY_ERROR; + } + set_border_props_for_props(table->properties, style); + return FT_SUCCESS; +} + + + +int ft_set_cell_prop(ft_table_t *table, size_t row, size_t col, uint32_t property, int value) +{ + assert(table); + + if (table->properties == NULL) { + table->properties = create_table_properties(); + if (table->properties == NULL) + return FT_MEMORY_ERROR; + } + if (table->properties->cell_properties == NULL) { + table->properties->cell_properties = create_cell_prop_container(); + if (table->properties->cell_properties == NULL) { + return FT_GEN_ERROR; + } + } + + if (row == FT_CUR_ROW) + row = table->cur_row; + if (col == FT_CUR_COLUMN) + col = table->cur_col; + + return set_cell_property(table->properties->cell_properties, row, col, property, value); +} + +int ft_set_default_cell_prop(uint32_t property, int value) +{ + return set_default_cell_property(property, value); +} + + +int ft_set_default_tbl_prop(uint32_t property, int value) +{ + return set_default_entire_table_property(property, value); +} + +int ft_set_tbl_prop(ft_table_t *table, uint32_t property, int value) +{ + assert(table); + + if (table->properties == NULL) { + table->properties = create_table_properties(); + if (table->properties == NULL) + return FT_MEMORY_ERROR; + } + return set_entire_table_property(table->properties, property, value); +} + +void ft_set_memory_funcs(void *(*f_malloc)(size_t size), void (*f_free)(void *ptr)) +{ + set_memory_funcs(f_malloc, f_free); +} + +const char *ft_strerror(int error_code) +{ + switch (error_code) { + case FT_MEMORY_ERROR: + return "Out of memory"; + case FT_GEN_ERROR: + return "General error"; + case FT_EINVAL: + return "Invalid argument"; + case FT_INTERN_ERROR: + return "Internal libfort error"; + default: + if (error_code < 0) + return "Unknown error code"; + else + return "Success"; + } +} + +int ft_set_cell_span(ft_table_t *table, size_t row, size_t col, size_t hor_span) +{ + assert(table); + if (hor_span < 2) + return FT_EINVAL; + + if (row == FT_CUR_ROW) + row = table->cur_row; + if (row == FT_CUR_COLUMN) + col = table->cur_col; + + f_row_t *row_p = get_row_and_create_if_not_exists(table, row); + if (row_p == NULL) + return FT_GEN_ERROR; + + return row_set_cell_span(row_p, col, hor_span); +} + +#ifdef FT_HAVE_UTF8 + +int ft_u8nwrite(ft_table_t *table, size_t n, const void *cell_content, ...) +{ + size_t i = 0; + assert(table); + int status = ft_u8write_impl(table, cell_content); + if (FT_IS_ERROR(status)) + return status; + + va_list va; + va_start(va, cell_content); + --n; + for (i = 0; i < n; ++i) { + const void *cell = va_arg(va, const void *); + status = ft_u8write_impl(table, cell); + if (FT_IS_ERROR(status)) { + va_end(va); + return status; + } + } + va_end(va); + + return status; +} + +int ft_u8nwrite_ln(ft_table_t *table, size_t n, const void *cell_content, ...) +{ + size_t i = 0; + assert(table); + int status = ft_u8write_impl(table, cell_content); + if (FT_IS_ERROR(status)) + return status; + + va_list va; + va_start(va, cell_content); + --n; + for (i = 0; i < n; ++i) { + const void *cell = va_arg(va, const void *); + status = ft_u8write_impl(table, cell); + if (FT_IS_ERROR(status)) { + va_end(va); + return status; + } + } + va_end(va); + + ft_ln(table); + return status; +} + +FT_PRINTF_ATTRIBUTE_FORMAT(2, 3) +int ft_u8printf(ft_table_t *table, const char *fmt, ...) +{ + assert(table); + va_list va; + va_start(va, fmt); + + struct f_string_view fmt_str; + fmt_str.type = UTF8_BUF; + fmt_str.u.cstr = fmt; + int result = ft_row_printf_impl_(table, table->cur_row, &fmt_str, &va); + va_end(va); + return result; +} + +FT_PRINTF_ATTRIBUTE_FORMAT(2, 3) +int ft_u8printf_ln(ft_table_t *table, const char *fmt, ...) +{ + assert(table); + va_list va; + va_start(va, fmt); + + struct f_string_view fmt_str; + fmt_str.type = UTF8_BUF; + fmt_str.u.cstr = fmt; + int result = ft_row_printf_impl_(table, table->cur_row, &fmt_str, &va); + if (result >= 0) { + ft_ln(table); + } + va_end(va); + return result; +} + +const void *ft_to_u8string(const ft_table_t *table) +{ + return (const void *)ft_to_string_impl(table, UTF8_BUF); +} + +void ft_set_u8strwid_func(int (*u8strwid)(const void *beg, const void *end, size_t *width)) +{ + buffer_set_u8strwid_func(u8strwid); +} + +#endif /* FT_HAVE_UTF8 */ + +/******************************************************** + End of file "fort_impl.c" + ********************************************************/ + + +/******************************************************** + Begin of file "fort_utils.c" + ********************************************************/ + +/* #include "fort_utils.h" */ /* Commented by amalgamation script */ +#ifdef FT_HAVE_WCHAR +#include <wchar.h> +#endif +#if defined(FT_HAVE_UTF8) +/* #include "utf8.h" */ /* Commented by amalgamation script */ +#endif +/* #include "string_buffer.h" */ /* Commented by amalgamation script */ + + +char g_col_separator = FORT_DEFAULT_COL_SEPARATOR; + +/***************************************************************************** + * LIBFORT helpers + *****************************************************************************/ + +#if defined(FT_GCC_COMPILER) || defined(FT_CLANG_COMPILER) +void *(*fort_malloc)(size_t size) = &malloc; +void (*fort_free)(void *ptr) = &free; +void *(*fort_calloc)(size_t nmemb, size_t size) = &calloc; +void *(*fort_realloc)(void *ptr, size_t size) = &realloc; +#else +static void *local_malloc(size_t size) +{ + return malloc(size); +} + +static void local_free(void *ptr) +{ + free(ptr); +} + +static void *local_calloc(size_t nmemb, size_t size) +{ + return calloc(nmemb, size); +} + +static void *local_realloc(void *ptr, size_t size) +{ + return realloc(ptr, size); +} + +void *(*fort_malloc)(size_t size) = &local_malloc; +void (*fort_free)(void *ptr) = &local_free; +void *(*fort_calloc)(size_t nmemb, size_t size) = &local_calloc; +void *(*fort_realloc)(void *ptr, size_t size) = &local_realloc; +#endif + +static void *custom_fort_calloc(size_t nmemb, size_t size) +{ + size_t total_size = nmemb * size; + void *result = F_MALLOC(total_size); + if (result != NULL) + memset(result, 0, total_size); + return result; +} + +static void *custom_fort_realloc(void *ptr, size_t size) +{ + if (ptr == NULL) + return F_MALLOC(size); + if (size == 0) { + F_FREE(ptr); + return NULL; + } + + void *new_chunk = F_MALLOC(size); + if (new_chunk == NULL) + return NULL; + + /* + * In theory we should copy MIN(size, size allocated for ptr) bytes, + * but this is rather dummy implementation so we don't care about it + */ + memcpy(new_chunk, ptr, size); + F_FREE(ptr); + return new_chunk; +} + + +FT_INTERNAL +void set_memory_funcs(void *(*f_malloc)(size_t size), void (*f_free)(void *ptr)) +{ + assert((f_malloc == NULL && f_free == NULL) /* Use std functions */ + || (f_malloc != NULL && f_free != NULL) /* Use custom functions */); + + if (f_malloc == NULL && f_free == NULL) { +#if defined(FT_GCC_COMPILER) || defined(FT_CLANG_COMPILER) + fort_malloc = &malloc; + fort_free = &free; + fort_calloc = &calloc; + fort_realloc = &realloc; +#else + fort_malloc = &local_malloc; + fort_free = &local_free; + fort_calloc = &local_calloc; + fort_realloc = &local_realloc; +#endif + } else { + fort_malloc = f_malloc; + fort_free = f_free; + fort_calloc = &custom_fort_calloc; + fort_realloc = &custom_fort_realloc; + } + +} + +FT_INTERNAL +char *fort_strdup(const char *str) +{ + if (str == NULL) + return NULL; + + size_t sz = strlen(str); + char *str_copy = (char *)F_MALLOC((sz + 1) * sizeof(char)); + if (str_copy == NULL) + return NULL; + + strcpy(str_copy, str); + return str_copy; +} + +#if defined(FT_HAVE_WCHAR) +FT_INTERNAL +wchar_t *fort_wcsdup(const wchar_t *str) +{ + if (str == NULL) + return NULL; + + size_t sz = wcslen(str); + wchar_t *str_copy = (wchar_t *)F_MALLOC((sz + 1) * sizeof(wchar_t)); + if (str_copy == NULL) + return NULL; + + wcscpy(str_copy, str); + return str_copy; +} +#endif + + +static +size_t columns_number_in_fmt_string(const char *fmt) +{ + size_t separator_counter = 0; + const char *pos = fmt; + while (1) { + pos = strchr(pos, g_col_separator); + if (pos == NULL) + break; + + separator_counter++; + ++pos; + } + return separator_counter + 1; +} + +#if defined(FT_HAVE_WCHAR) +static +size_t columns_number_in_fmt_wstring(const wchar_t *fmt) +{ + size_t separator_counter = 0; + const wchar_t *pos = fmt; + while (1) { + pos = wcschr(pos, g_col_separator); + if (pos == NULL) + break; + + separator_counter++; + ++pos; + } + return separator_counter + 1; +} +#endif + +#if defined(FT_HAVE_UTF8) +static +size_t columns_number_in_fmt_u8string(const void *fmt) +{ + size_t separator_counter = 0; + const char *pos = (const char *)fmt; + while (1) { + pos = (const char *)utf8chr(pos, g_col_separator); + if (pos == NULL) + break; + + separator_counter++; + ++pos; + } + return separator_counter + 1; +} +#endif + +FT_INTERNAL +size_t number_of_columns_in_format_string(const f_string_view_t *fmt) +{ + switch (fmt->type) { + case CHAR_BUF: + return columns_number_in_fmt_string(fmt->u.cstr); +#ifdef FT_HAVE_WCHAR + case W_CHAR_BUF: + return columns_number_in_fmt_wstring(fmt->u.wstr); +#endif /* FT_HAVE_WCHAR */ +#ifdef FT_HAVE_UTF8 + case UTF8_BUF: + return columns_number_in_fmt_u8string(fmt->u.u8str); +#endif /* FT_HAVE_UTF8 */ + default: + assert(0); + } + return 0; +} + +FT_INTERNAL +size_t number_of_columns_in_format_buffer(const f_string_buffer_t *fmt) +{ + switch (fmt->type) { + case CHAR_BUF: + return columns_number_in_fmt_string(fmt->str.cstr); +#ifdef FT_HAVE_WCHAR + case W_CHAR_BUF: + return columns_number_in_fmt_wstring(fmt->str.wstr); +#endif /* FT_HAVE_WCHAR */ +#ifdef FT_HAVE_UTF8 + case UTF8_BUF: + return columns_number_in_fmt_u8string(fmt->str.u8str); +#endif /* FT_HAVE_UTF8 */ + default: + assert(0); + } + return 0; +} + +static +int snprint_n_strings_impl(char *buf, size_t length, size_t n, const char *str) +{ + size_t str_len = strlen(str); + if (length <= n * str_len) + return -1; + + if (n == 0) + return 0; + + /* To ensure valid return value it is safely not print such big strings */ + if (n * str_len > INT_MAX) + return -1; + + if (str_len == 0) + return 0; + + int status = snprintf(buf, length, "%0*d", (int)(n * str_len), 0); + if (status < 0) + return status; + + size_t i = 0; + for (i = 0; i < n; ++i) { + const char *str_p = str; + while (*str_p) + *(buf++) = *(str_p++); + } + return (int)(n * str_len); +} + +static +int snprint_n_strings(f_conv_context_t *cntx, size_t n, const char *str) +{ + int w = snprint_n_strings_impl(cntx->u.buf, cntx->raw_avail, n, str); + if (w >= 0) { + cntx->u.buf += w; + cntx->raw_avail -= w; + } + return w; +} + +#if defined(FT_HAVE_WCHAR) +static +int wsnprint_n_string(wchar_t *buf, size_t length, size_t n, const char *str); +#endif + +#if defined(FT_HAVE_UTF8) +static +int u8nprint_n_strings(void *buf, size_t length, size_t n, const void *str); +#endif + + +FT_INTERNAL +int print_n_strings(f_conv_context_t *cntx, size_t n, const char *str) +{ + int cod_w; + int raw_written; + + switch (cntx->b_type) { + case CHAR_BUF: + raw_written = snprint_n_strings(cntx, n, str); + cod_w = raw_written; + return cod_w; +#ifdef FT_HAVE_WCHAR + case W_CHAR_BUF: + cod_w = wsnprint_n_string(cntx->u.wbuf, cntx->raw_avail, n, str); + if (cod_w < 0) + return cod_w; + raw_written = sizeof(wchar_t) * cod_w; + + cntx->u.buf += raw_written; + cntx->raw_avail -= raw_written; + return cod_w; +#endif /* FT_HAVE_WCHAR */ +#ifdef FT_HAVE_UTF8 + case UTF8_BUF: + /* Everying is very strange and differs with W_CHAR_BUF */ + raw_written = u8nprint_n_strings(cntx->u.buf, cntx->raw_avail, n, str); + if (raw_written < 0) { + fprintf(stderr, " raw_written = %d\n", raw_written); + return raw_written; + } + + cntx->u.buf += raw_written; + cntx->raw_avail -= raw_written; + return utf8len(str) * n; +#endif /* FT_HAVE_UTF8 */ + default: + assert(0); + return -1; + } +} + +FT_INTERNAL +int ft_nprint(f_conv_context_t *cntx, const char *str, size_t strlen) +{ + if (cntx->raw_avail + 1/* for 0 */ < strlen) + return -1; + + memcpy(cntx->u.buf, str, strlen); + cntx->u.buf += strlen; + cntx->raw_avail -= strlen; + *cntx->u.buf = '\0'; /* Do we need this ? */ + return strlen; +} + +#ifdef FT_HAVE_WCHAR +int ft_nwprint(f_conv_context_t *cntx, const wchar_t *str, size_t strlen) +{ + if (cntx->raw_avail + 1/* for 0 */ < strlen) + return -1; + + size_t raw_len = strlen * sizeof(wchar_t); + + memcpy(cntx->u.buf, str, raw_len); + cntx->u.buf += raw_len; + cntx->raw_avail -= raw_len; + + /* Do we need this ? */ + wchar_t end_of_string = L'\0'; + memcpy(cntx->u.buf, &end_of_string, sizeof(wchar_t)); + return strlen; +} +#endif /* FT_HAVE_WCHAR */ + +#ifdef FT_HAVE_UTF8 +FT_INTERNAL +int ft_nu8print(f_conv_context_t *cntx, const void *beg, const void *end) +{ + const char *bc = (const char *)beg; + const char *ec = (const char *)end; + size_t raw_len = ec - bc; + if (cntx->raw_avail + 1 < raw_len) + return -1; + + memcpy(cntx->u.buf, beg, raw_len); + cntx->u.buf += raw_len; + cntx->raw_avail -= raw_len; + *(cntx->u.buf) = '\0'; /* Do we need this ? */ + return raw_len; /* what return here ? */ +} +#endif /* FT_HAVE_UTF8 */ + +#if defined(FT_HAVE_WCHAR) +#define WCS_SIZE 64 + +static +int wsnprint_n_string(wchar_t *buf, size_t length, size_t n, const char *str) +{ + size_t str_len = strlen(str); + + /* note: maybe it's, better to return -1 in case of multibyte character + * strings (not sure this case is done correctly). + */ + if (str_len > 1) { + const unsigned char *p = (const unsigned char *)str; + while (*p) { + if (*p <= 127) + p++; + else { + wchar_t wcs[WCS_SIZE]; + const char *ptr = str; + size_t wcs_len; + mbstate_t mbst; + memset(&mbst, 0, sizeof(mbst)); + wcs_len = mbsrtowcs(wcs, (const char **)&ptr, WCS_SIZE, &mbst); + /* for simplicity */ + if ((wcs_len == (size_t) - 1) || wcs_len > 1) { + return -1; + } else { + wcs[wcs_len] = L'\0'; + size_t k = n; + while (k) { + *buf = *wcs; + ++buf; + --k; + } + buf[n] = L'\0'; + return (int)n; + } + } + } + } + + if (length <= n * str_len) + return -1; + + if (n == 0) + return 0; + + /* To ensure valid return value it is safely not print such big strings */ + if (n * str_len > INT_MAX) + return -1; + + if (str_len == 0) + return 0; + + int status = swprintf(buf, length, L"%0*d", (int)(n * str_len), 0); + if (status < 0) + return status; + + size_t i = 0; + for (i = 0; i < n; ++i) { + const char *str_p = str; + while (*str_p) + *(buf++) = (wchar_t) * (str_p++); + } + return (int)(n * str_len); +} +#endif + + +#if defined(FT_HAVE_UTF8) +static +int u8nprint_n_strings(void *buf, size_t length, size_t n, const void *str) +{ + size_t str_size = utf8size(str) - 1; /* str_size - raw size in bytes, excluding \0 */ + if (length <= n * str_size) + return -1; + + if (n == 0) + return 0; + + /* To ensure valid return value it is safely not print such big strings */ + if (n * str_size > INT_MAX) + return -1; + + if (str_size == 0) + return 0; + + size_t i = n; + while (i) { + memcpy(buf, str, str_size); + buf = (char *)buf + str_size; + --i; + } + *(char *)buf = '\0'; + return (int)(n * str_size); +} +#endif + +/******************************************************** + End of file "fort_utils.c" + ********************************************************/ + + +/******************************************************** + Begin of file "properties.c" + ********************************************************/ + +/* #include "fort_utils.h" */ /* Commented by amalgamation script */ +#include <assert.h> +/* #include "properties.h" */ /* Commented by amalgamation script */ +/* #include "vector.h" */ /* Commented by amalgamation script */ + +#define FT_RESET_COLOR "\033[0m" + +static const char *fg_colors[] = { + "", + "\033[30m", + "\033[31m", + "\033[32m", + "\033[33m", + "\033[34m", + "\033[35m", + "\033[36m", + "\033[37m", + "\033[90m", + "\033[91m", + "\033[92m", + "\033[93m", + "\033[94m", + "\033[95m", + "\033[96m", + "\033[97m", +}; + +static const char *bg_colors[] = { + "", + "\033[40m", + "\033[41m", + "\033[42m", + "\033[43m", + "\033[44m", + "\033[45m", + "\033[46m", + "\033[47m", + "\033[100m", + "\033[101m", + "\033[102m", + "\033[103m", + "\033[104m", + "\033[105m", + "\033[106m", + "\033[107m", +}; + +static const char *text_styles[] = { + "", + "\033[1m", + "\033[2m", + "\033[3m", + "\033[4m", + "\033[5m", + "\033[7m", + "\033[8m", +}; + +#define UNIVERSAL_RESET_TAG "\033[0m" + +static const size_t n_fg_colors = sizeof(fg_colors) / sizeof(fg_colors[0]); +static const size_t n_bg_colors = sizeof(bg_colors) / sizeof(bg_colors[0]); +static const size_t n_styles = sizeof(text_styles) / sizeof(text_styles[0]); + +void get_style_tag_for_cell(const f_table_properties_t *props, + size_t row, size_t col, char *style_tag, size_t sz) +{ + (void)sz; + size_t i = 0; + + unsigned bg_color_number = get_cell_property_hierarchically(props, row, col, FT_CPROP_CELL_BG_COLOR); + unsigned text_style = get_cell_property_hierarchically(props, row, col, FT_CPROP_CELL_TEXT_STYLE); + + style_tag[0] = '\0'; + + if (text_style < (1U << n_styles)) { + for (i = 0; i < n_styles; ++i) { + if (text_style & (1 << i)) { + strcat(style_tag, text_styles[i]); + } + } + } else { + goto error; + } + + if (get_cell_property_hierarchically(props, row, col, FT_CPROP_CELL_BG_RGBCOLOR)) { + char b[20]; + + #define BGTERMRGB "\x1b[48;2;" + snprintf(b, sizeof(b), BGTERMRGB "%u;%u;%um", bg_color_number>>16, (bg_color_number&0xFF00)>>8, bg_color_number&0xFF); + strcat(style_tag, b); + } + else if (bg_color_number < n_bg_colors) { + strcat(style_tag, bg_colors[bg_color_number]); + } else { + goto error; + } + + return; + +error: + /* shouldn't be here */ + assert(0); + style_tag[0] = '\0'; + return; +} + +void get_reset_style_tag_for_cell(const f_table_properties_t *props, + size_t row, size_t col, char *reset_style_tag, size_t sz) +{ + (void)sz; + size_t i = 0; + + unsigned bg_color_number = get_cell_property_hierarchically(props, row, col, FT_CPROP_CELL_BG_COLOR); + unsigned text_style = get_cell_property_hierarchically(props, row, col, FT_CPROP_CELL_TEXT_STYLE); + + reset_style_tag[0] = '\0'; + + if (text_style < (1U << n_styles)) { + for (i = 0; i < n_styles; ++i) { + if (text_style & (1 << i)) { + if (i != 0) // FT_TSTYLE_DEFAULT + goto reset_style; + } + } + } else { + goto error; + } + +reset_style: + strcat(reset_style_tag, UNIVERSAL_RESET_TAG); + return; + +error: + /* shouldn't be here */ + assert(0); + reset_style_tag[0] = '\0'; + return; +} + + +void get_style_tag_for_content(const f_table_properties_t *props, + size_t row, size_t col, char *style_tag, size_t sz) +{ + (void)sz; + size_t i = 0; + + unsigned text_style = get_cell_property_hierarchically(props, row, col, FT_CPROP_CONT_TEXT_STYLE); + unsigned fg_color_number = get_cell_property_hierarchically(props, row, col, FT_CPROP_CONT_FG_COLOR); + unsigned bg_color_number = get_cell_property_hierarchically(props, row, col, FT_CPROP_CONT_BG_COLOR); + + style_tag[0] = '\0'; + + if (text_style < (1U << n_styles)) { + for (i = 0; i < n_styles; ++i) { + if (text_style & (1 << i)) { + strcat(style_tag, text_styles[i]); + } + } + } else { + goto error; + } + + if (fg_color_number < n_fg_colors) { + if (fg_color_number) + strcat(style_tag, fg_colors[fg_color_number]); + } else { + goto error; + } + + if (bg_color_number < n_bg_colors) { + strcat(style_tag, bg_colors[bg_color_number]); + } else { + goto error; + } + + return; + +error: + /* shouldn't be here */ + assert(0); + style_tag[0] = '\0'; + return; +} + +void get_reset_style_tag_for_content(const f_table_properties_t *props, + size_t row, size_t col, char *reset_style_tag, size_t sz) +{ + (void)sz; + size_t i = 0; + size_t len = 0; + + unsigned text_style = get_cell_property_hierarchically(props, row, col, FT_CPROP_CONT_TEXT_STYLE); + unsigned fg_color_number = get_cell_property_hierarchically(props, row, col, FT_CPROP_CONT_FG_COLOR); + unsigned bg_color_number = get_cell_property_hierarchically(props, row, col, FT_CPROP_CONT_BG_COLOR); + + reset_style_tag[0] = '\0'; + + if (text_style < (1U << n_styles)) { + for (i = 0; i < n_styles; ++i) { + if (text_style & (1 << i)) { + if (i != 0) // FT_TSTYLE_DEFAULT + goto reset_style; + } + } + } else { + goto error; + } + + if (fg_color_number < n_fg_colors) { + if (fg_color_number) + goto reset_style; + } else { + goto error; + } + + if (bg_color_number < n_bg_colors) { + if (bg_color_number) + goto reset_style; + } else { + goto error; + } + + return; + + +reset_style: + strcat(reset_style_tag, UNIVERSAL_RESET_TAG); + len = strlen(reset_style_tag); + get_style_tag_for_cell(props, row, col, reset_style_tag + len, sz - len); + return; + +error: + /* shouldn't be here */ + assert(0); + reset_style_tag[0] = '\0'; + return; +} + + +static struct f_cell_props g_default_cell_properties = { + FT_ANY_ROW, /* cell_row */ + FT_ANY_COLUMN, /* cell_col */ + + /* properties_flags */ + FT_CPROP_MIN_WIDTH | FT_CPROP_TEXT_ALIGN | FT_CPROP_TOP_PADDING + | FT_CPROP_BOTTOM_PADDING | FT_CPROP_LEFT_PADDING | FT_CPROP_RIGHT_PADDING + | FT_CPROP_EMPTY_STR_HEIGHT | FT_CPROP_CONT_FG_COLOR | FT_CPROP_CELL_BG_COLOR + | FT_CPROP_CONT_BG_COLOR | FT_CPROP_CELL_TEXT_STYLE | FT_CPROP_CONT_TEXT_STYLE, + + 0, /* col_min_width */ + FT_ALIGNED_LEFT, /* align */ + 0, /* cell_padding_top */ + 0, /* cell_padding_bottom */ + 1, /* cell_padding_left */ + 1, /* cell_padding_right */ + 1, /* cell_empty_string_height */ + + FT_ROW_COMMON, /* row_type */ + FT_COLOR_DEFAULT, /* content_fg_color_number */ + FT_COLOR_DEFAULT, /* content_bg_color_number */ + FT_COLOR_DEFAULT, /* cell_bg_color_number */ + FT_TSTYLE_DEFAULT, /* cell_text_style */ + FT_TSTYLE_DEFAULT, /* content_text_style */ + false, +}; + +static int get_prop_value_if_exists_otherwise_default(const struct f_cell_props *cell_opts, uint32_t property) +{ + if (cell_opts == NULL || !PROP_IS_SET(cell_opts->properties_flags, property)) { + cell_opts = &g_default_cell_properties; + } + + switch (property) { + case FT_CPROP_MIN_WIDTH: + return cell_opts->col_min_width; + case FT_CPROP_TEXT_ALIGN: + return cell_opts->align; + case FT_CPROP_TOP_PADDING: + return cell_opts->cell_padding_top; + case FT_CPROP_BOTTOM_PADDING: + return cell_opts->cell_padding_bottom; + case FT_CPROP_LEFT_PADDING: + return cell_opts->cell_padding_left; + case FT_CPROP_RIGHT_PADDING: + return cell_opts->cell_padding_right; + case FT_CPROP_EMPTY_STR_HEIGHT: + return cell_opts->cell_empty_string_height; + case FT_CPROP_ROW_TYPE: + return cell_opts->row_type; + case FT_CPROP_CONT_FG_COLOR: + return cell_opts->content_fg_color_number; + case FT_CPROP_CONT_BG_COLOR: + return cell_opts->content_bg_color_number; + case FT_CPROP_CELL_BG_COLOR: + return cell_opts->cell_bg_color_number; + case FT_CPROP_CELL_BG_RGBCOLOR: + return cell_opts->rgb; + case FT_CPROP_CELL_TEXT_STYLE: + return cell_opts->cell_text_style; + case FT_CPROP_CONT_TEXT_STYLE: + return cell_opts->content_text_style; + default: + /* todo: implement later */ + exit(333); + } +} + + +FT_INTERNAL +f_cell_prop_container_t *create_cell_prop_container(void) +{ + f_cell_prop_container_t *ret = create_vector(sizeof(f_cell_props_t), DEFAULT_VECTOR_CAPACITY); + return ret; +} + + +FT_INTERNAL +void destroy_cell_prop_container(f_cell_prop_container_t *cont) +{ + if (cont) + destroy_vector(cont); +} + + +FT_INTERNAL +const f_cell_props_t *cget_cell_prop(const f_cell_prop_container_t *cont, size_t row, size_t col) +{ + assert(cont); + size_t sz = vector_size(cont); + size_t i = 0; + for (i = 0; i < sz; ++i) { + const f_cell_props_t *opt = &VECTOR_AT_C(cont, i, const f_cell_props_t); + if (opt->cell_row == row && opt->cell_col == col) + return opt; + } + return NULL; +} + + +FT_INTERNAL +f_cell_props_t *get_cell_prop_and_create_if_not_exists(f_cell_prop_container_t *cont, size_t row, size_t col) +{ + assert(cont); + size_t sz = vector_size(cont); + size_t i = 0; + for (i = 0; i < sz; ++i) { + f_cell_props_t *opt = &VECTOR_AT(cont, i, f_cell_props_t); + if (opt->cell_row == row && opt->cell_col == col) + return opt; + } + + f_cell_props_t opt; + if (row == FT_ANY_ROW && col == FT_ANY_COLUMN) + memcpy(&opt, &g_default_cell_properties, sizeof(f_cell_props_t)); + else + memset(&opt, 0, sizeof(f_cell_props_t)); + + opt.cell_row = row; + opt.cell_col = col; + if (FT_IS_SUCCESS(vector_push(cont, &opt))) { + return &VECTOR_AT(cont, sz, f_cell_props_t); + } + + return NULL; +} + + +FT_INTERNAL +int get_cell_property_hierarchically(const f_table_properties_t *propertiess, size_t row, size_t column, uint32_t property) +{ + assert(propertiess); + size_t row_origin = row; + + const f_cell_props_t *opt = NULL; + if (propertiess->cell_properties != NULL) { + while (1) { + opt = cget_cell_prop(propertiess->cell_properties, row, column); + if (opt != NULL && PROP_IS_SET(opt->properties_flags, property)) + break; + + if (row != FT_ANY_ROW && column != FT_ANY_COLUMN) { + row = FT_ANY_ROW; + continue; + } else if (row == FT_ANY_ROW && column != FT_ANY_COLUMN) { + row = row_origin; + column = FT_ANY_COLUMN; + continue; + } else if (row != FT_ANY_ROW && column == FT_ANY_COLUMN) { + row = FT_ANY_ROW; + column = FT_ANY_COLUMN; + continue; + } + + opt = NULL; + break; + } + } + + return get_prop_value_if_exists_otherwise_default(opt, property); +} + + +static f_status set_cell_property_impl(f_cell_props_t *opt, uint32_t property, int value) +{ + assert(opt); + + PROP_SET(opt->properties_flags, property); + if (PROP_IS_SET(property, FT_CPROP_MIN_WIDTH)) { + CHECK_NOT_NEGATIVE(value); + opt->col_min_width = value; + } else if (PROP_IS_SET(property, FT_CPROP_TEXT_ALIGN)) { + opt->align = (enum ft_text_alignment)value; + } else if (PROP_IS_SET(property, FT_CPROP_TOP_PADDING)) { + CHECK_NOT_NEGATIVE(value); + opt->cell_padding_top = value; + } else if (PROP_IS_SET(property, FT_CPROP_BOTTOM_PADDING)) { + CHECK_NOT_NEGATIVE(value); + opt->cell_padding_bottom = value; + } else if (PROP_IS_SET(property, FT_CPROP_LEFT_PADDING)) { + CHECK_NOT_NEGATIVE(value); + opt->cell_padding_left = value; + } else if (PROP_IS_SET(property, FT_CPROP_RIGHT_PADDING)) { + CHECK_NOT_NEGATIVE(value); + opt->cell_padding_right = value; + } else if (PROP_IS_SET(property, FT_CPROP_EMPTY_STR_HEIGHT)) { + CHECK_NOT_NEGATIVE(value); + opt->cell_empty_string_height = value; + } else if (PROP_IS_SET(property, FT_CPROP_ROW_TYPE)) { + opt->row_type = (enum ft_row_type)value; + } else if (PROP_IS_SET(property, FT_CPROP_CONT_FG_COLOR)) { + opt->content_fg_color_number = value; + } else if (PROP_IS_SET(property, FT_CPROP_CONT_BG_COLOR)) { + opt->content_bg_color_number = value; + } else if (PROP_IS_SET(property, FT_CPROP_CELL_BG_COLOR)) { + opt->cell_bg_color_number = value; + } else if (PROP_IS_SET(property, FT_CPROP_CELL_BG_RGBCOLOR)) { + opt->cell_bg_color_number = value; + opt->rgb = true; + } else if (PROP_IS_SET(property, FT_CPROP_CELL_TEXT_STYLE)) { + enum ft_text_style v = (enum ft_text_style)value; + if (v == FT_TSTYLE_DEFAULT) { + opt->cell_text_style = FT_TSTYLE_DEFAULT; + } else { + opt->cell_text_style = (enum ft_text_style)(opt->cell_text_style | v); + } + } else if (PROP_IS_SET(property, FT_CPROP_CONT_TEXT_STYLE)) { + enum ft_text_style v = (enum ft_text_style)value; + if (v == FT_TSTYLE_DEFAULT) { + opt->content_text_style = v; + } else { + opt->content_text_style = (enum ft_text_style)(opt->content_text_style | v); + } + } + + return FT_SUCCESS; + +fort_fail: + return FT_EINVAL; +} + + +FT_INTERNAL +f_status set_cell_property(f_cell_prop_container_t *cont, size_t row, size_t col, uint32_t property, int value) +{ + f_cell_props_t *opt = get_cell_prop_and_create_if_not_exists(cont, row, col); + if (opt == NULL) + return FT_GEN_ERROR; + + return set_cell_property_impl(opt, property, value); + /* + PROP_SET(opt->propertiess, property); + if (PROP_IS_SET(property, FT_CPROP_MIN_WIDTH)) { + opt->col_min_width = value; + } else if (PROP_IS_SET(property, FT_CPROP_TEXT_ALIGN)) { + opt->align = value; + } + + return FT_SUCCESS; + */ +} + + +FT_INTERNAL +f_status set_default_cell_property(uint32_t property, int value) +{ + return set_cell_property_impl(&g_default_cell_properties, property, value); +} + + +#define BASIC_STYLE { \ + /* border_chars */ \ + { \ + "+", "-", "+", "+", \ + "|", "|", "|", \ + "\0", "\0", "\0", "\0", \ + "+", "-", "+", "+", \ + "+", "+", "+", "+", \ + }, \ + /* header_border_chars */ \ + { \ + "+", "-", "+", "+", \ + "|", "|", "|", \ + "+", "-", "+", "+", \ + "+", "-", "+", "+", \ + "+", "+", "+", "+", \ + }, \ + /* separator_chars */ \ + { \ + "+", "-", "+", "+", \ + "+", "+", \ + }, \ +} + +#define BASIC2_STYLE { \ + /* border_chars */ \ + { \ + "+", "-", "+", "+", \ + "|", "|", "|", \ + "+", "-", "+", "+", \ + "+", "-", "+", "+", \ + "+", "+", "+", "+", \ + }, \ + /* header_border_chars */ \ + { \ + "+", "-", "+", "+", \ + "|", "|", "|", \ + "+", "-", "+", "+", \ + "+", "-", "+", "+", \ + "+", "+", "+", "+", \ + }, \ + /* separator_chars */ \ + { \ + "+", "-", "+", "+", \ + "+", "+", \ + }, \ +} + +#define SIMPLE_STYLE { \ + /* border_chars */ \ + { \ + "\0", "\0", "\0", "\0", \ + "\0", " ", "\0", \ + "\0", "\0", "\0", "\0", \ + "\0", "\0", "\0", "\0", \ + "\0", "\0", "\0", "\0", \ + }, \ + /* header_border_chars */ \ + { \ + "\0", "\0", "\0", "\0", \ + "\0", " ", "\0", \ + "\0", "─", " ", "\0", \ + "\0", " ", " ", "\0", \ + " ", "─", " ", "─", \ + }, \ + /* separator_chars */ \ + { \ + "\0", "─", " ", "\0", \ + " ", " ", \ + }, \ +} + +#define PLAIN_STYLE { \ + /* border_chars */ \ + { \ + "\0", "\0", "\0", "\0", \ + "\0", " ", "\0", \ + "\0", "\0", "\0", "\0", \ + "\0", "\0", "\0", "\0", \ + "\0", "\0", "\0", "\0", \ + }, \ + /* header_border_chars */ \ + { \ + "\0", "-", "-", "\0", \ + "\0", " ", "\0", \ + "\0", "-", "-", "\0", \ + "\0", "-", "-", "\0", \ + " ", "-", " ", "-", \ + }, \ + /* separator_chars */ \ + { \ + "\0", "-", "-", "\0", \ + "-", "-", \ + }, \ +} + +#define DOT_STYLE { \ + /* border_chars */ \ + { \ + ".", ".", ".", ".", \ + ":", ":", ":", \ + "\0", "\0", "\0", "\0", \ + ":", ".", ":", ":", \ + "+", ":", "+", ":", \ + }, \ + /* header_border_chars */ \ + { \ + ".", ".", ".", ".", \ + ":", ":", ":", \ + ":", ".", ":", ":", \ + ":", ".", ":", ":", \ + "+", ".", "+", ".", \ + }, \ + /* separator_chars */ \ + { \ + ":", ".", ":", ":", \ + ":", ":", \ + }, \ +} + +#define EMPTY_STYLE { \ + /* border_chars */ \ + { \ + "\0", "\0", "\0", "\0", \ + "\0", "\0", "\0", \ + "\0", "\0", "\0", "\0", \ + "\0", "\0", "\0", "\0", \ + "\0", "\0", "\0", "\0", \ + }, \ + /* header_border_chars */ \ + { \ + "\0", "\0", "\0", "\0", \ + "\0", "\0", "\0", \ + "\0", "\0", "\0", "\0", \ + "\0", "\0", "\0", "\0", \ + "\0", "\0", "\0", "\0", \ + }, \ + /* separator_chars */ \ + { \ + "\0", " ", "\0 ", "\0", \ + "\0", "\0", \ + }, \ +} + + +#define EMPTY2_STYLE { \ + /* border_chars */ \ + { \ + " ", " ", " ", " ", \ + " ", " ", " ", \ + "\0", "\0", "\0", "\0", \ + " ", " ", " ", " ", \ + " ", " ", " ", " ", \ + }, \ + /* header_border_chars */ \ + { \ + " ", " ", " ", " ", \ + " ", " ", " ", \ + "\0", "\0", "\0", "\0", \ + " ", " ", " ", " ", \ + " ", " ", " ", " ", \ + }, \ + /* separator_chars */ \ + { \ + " ", " ", " ", " ", \ + " ", " ", \ + }, \ +} + +#define SOLID_STYLE { \ + /* border_chars */ \ + { \ + "┌", "─", "┬", "┐", \ + "│", "│", "│", \ + "", "", "", "", \ + "└", "─", "┴", "┘", \ + "│", "─", "│", "─", \ + }, \ + /* header_border_chars */ \ + { \ + "┌", "─", "┬", "┐", \ + "│", "│", "│", \ + "├", "─", "┼", "┤", \ + "└", "─", "┴", "┘", \ + "┼", "┬", "┼", "┴", \ + }, \ + /* separator_chars */ \ + { \ + "├", "─", "┼", "┤", \ + "┬", "┴", \ + }, \ +} + +#define SOLID_ROUND_STYLE { \ + /* border_chars */ \ + { \ + "╭", "─", "┬", "╮", \ + "│", "│", "│", \ + "", "", "", "", \ + "╰", "─", "┴", "╯", \ + "│", "─", "│", "─", \ + }, \ + /* header_border_chars */ \ + { \ + "╭", "─", "┬", "╮", \ + "│", "│", "│", \ + "├", "─", "┼", "┤", \ + "╰", "─", "┴", "╯", \ + "┼", "┬", "┼", "┴", \ + }, \ + /* separator_chars */ \ + { \ + "├", "─", "┼", "┤", \ + "┬", "┴", \ + }, \ +} + +#define NICE_STYLE { \ + /* border_chars */ \ + { \ + "╔", "═", "╦", "╗", \ + "║", "║", "║", \ + "", "", "", "", \ + "╚", "═", "╩", "╝", \ + "┣", "┻", "┣", "┳", \ + }, \ + /* header_border_chars */ \ + { \ + "╔", "═", "╦", "╗", \ + "║", "║", "║", \ + "╠", "═", "╬", "╣", \ + "╚", "═", "╩", "╝", \ + "┣", "╦", "┣", "╩", \ + }, \ + /* separator_chars */ \ + { \ + "╟", "─", "╫", "╢", \ + "╥", "╨", \ + }, \ +} + +#define DOUBLE_STYLE { \ + /* border_chars */ \ + { \ + "╔", "═", "╦", "╗", \ + "║", "║", "║", \ + "", "", "", "", \ + "╚", "═", "╩", "╝", \ + "┣", "┻", "┣", "┳", \ + }, \ + /* header_border_chars */ \ + { \ + "╔", "═", "╦", "╗", \ + "║", "║", "║", \ + "╠", "═", "╬", "╣", \ + "╚", "═", "╩", "╝", \ + "┣", "╦", "┣", "╩", \ + }, \ + /* separator_chars */ \ + { \ + "╠", "═", "╬", "╣", \ + "╦", "╩", \ + }, \ +} + + + + +#define DOUBLE2_STYLE { \ + /* border_chars */ \ + { \ + "╔", "═", "╤", "╗", \ + "║", "│", "║", \ + "╟", "─", "┼", "╢", \ + "╚", "═", "╧", "╝", \ + "├", "┬", "┤", "┴", \ + }, \ + /* header_border_chars */ \ + { \ + "╔", "═", "╤", "╗", \ + "║", "│", "║", \ + "╠", "═", "╪", "╣", \ + "╚", "═", "╧", "╝", \ + "├", "╤", "┤", "╧", \ + }, \ + /* separator_chars */ \ + { \ + "╠", "═", "╪", "╣", \ + "╤", "╧", \ + }, \ +} + + +#define BOLD_STYLE { \ + /* border_chars */ \ + { \ + "┏", "━", "┳", "┓", \ + "┃", "┃", "┃", \ + "", "", "", "", \ + "┗", "━", "┻", "┛", \ + "┣", "┻", "┣", "┳", \ + }, \ + /* header_border_chars */ \ + { \ + "┏", "━", "┳", "┓", \ + "┃", "┃", "┃", \ + "┣", "━", "╋", "┫", \ + "┗", "━", "┻", "┛", \ + "┣", "┳", "┣", "┻", \ + }, \ + /* separator_chars */ \ + { \ + "┣", "━", "╋", "┫", \ + "┳", "┻", \ + }, \ +} + +#define BOLD2_STYLE { \ + /* border_chars */ \ + { \ + "┏", "━", "┯", "┓", \ + "┃", "│", "┃", \ + "┠", "─", "┼", "┨", \ + "┗", "━", "┷", "┛", \ + "┣", "┬", "┣", "┴", \ + }, \ + /* header_border_chars */ \ + { \ + "┏", "━", "┯", "┓", \ + "┃", "│", "┃", \ + "┣", "━", "┿", "┫", \ + "┗", "━", "┷", "┛", \ + "┣", "┯", "┣", "┷", \ + }, \ + /* separator_chars */ \ + { \ + "┣", "━", "┿", "┫", \ + "┯", "┷", \ + }, \ +} + +#define FRAME_STYLE { \ + /* border_chars */ \ + { \ + "▛", "▀", "▀", "▜", \ + "▌", "┃", "▐", \ + "", "", "", "", \ + "▙", "▄", "▄", "▟", \ + "┣", "━", "┣", "━" \ + }, \ + /* header_border_chars */ \ + { \ + "▛", "▀", "▀", "▜", \ + "▌", "┃", "▐", \ + "▌", "━", "╋", "▐", \ + "▙", "▄", "▄", "▟", \ + "┣", "━", "┣", "━", \ + }, \ + /* separator_chars */ \ + { \ + "▌", "━", "╋", "▐", \ + "╋", "╋", \ + }, \ +} + + +struct fort_border_style FORT_BASIC_STYLE = BASIC_STYLE; +struct fort_border_style FORT_BASIC2_STYLE = BASIC2_STYLE; +struct fort_border_style FORT_SIMPLE_STYLE = SIMPLE_STYLE; +struct fort_border_style FORT_PLAIN_STYLE = PLAIN_STYLE; +struct fort_border_style FORT_DOT_STYLE = DOT_STYLE; +struct fort_border_style FORT_EMPTY_STYLE = EMPTY_STYLE; +struct fort_border_style FORT_EMPTY2_STYLE = EMPTY2_STYLE; +struct fort_border_style FORT_SOLID_STYLE = SOLID_STYLE; +struct fort_border_style FORT_SOLID_ROUND_STYLE = SOLID_ROUND_STYLE; +struct fort_border_style FORT_NICE_STYLE = NICE_STYLE; +struct fort_border_style FORT_DOUBLE_STYLE = DOUBLE_STYLE; +struct fort_border_style FORT_DOUBLE2_STYLE = DOUBLE2_STYLE; +struct fort_border_style FORT_BOLD_STYLE = BOLD_STYLE; +struct fort_border_style FORT_BOLD2_STYLE = BOLD2_STYLE; +struct fort_border_style FORT_FRAME_STYLE = FRAME_STYLE; + + + +fort_entire_table_properties_t g_entire_table_properties = { + 0, /* left_margin */ + 0, /* top_margin */ + 0, /* right_margin */ + 0, /* bottom_margin */ + FT_STRATEGY_REPLACE, /* add_strategy */ +}; + +static f_status set_entire_table_property_internal(fort_entire_table_properties_t *properties, uint32_t property, int value) +{ + assert(properties); + CHECK_NOT_NEGATIVE(value); + if (PROP_IS_SET(property, FT_TPROP_LEFT_MARGIN)) { + properties->left_margin = value; + } else if (PROP_IS_SET(property, FT_TPROP_TOP_MARGIN)) { + properties->top_margin = value; + } else if (PROP_IS_SET(property, FT_TPROP_RIGHT_MARGIN)) { + properties->right_margin = value; + } else if (PROP_IS_SET(property, FT_TPROP_BOTTOM_MARGIN)) { + properties->bottom_margin = value; + } else if (PROP_IS_SET(property, FT_TPROP_ADDING_STRATEGY)) { + properties->add_strategy = (enum ft_adding_strategy)value; + } else { + return FT_EINVAL; + } + return FT_SUCCESS; + +fort_fail: + return FT_EINVAL; +} + + +FT_INTERNAL +f_status set_entire_table_property(f_table_properties_t *table_properties, uint32_t property, int value) +{ + assert(table_properties); + return set_entire_table_property_internal(&table_properties->entire_table_properties, property, value); +} + + +FT_INTERNAL +f_status set_default_entire_table_property(uint32_t property, int value) +{ + return set_entire_table_property_internal(&g_entire_table_properties, property, value); +} + + +FT_INTERNAL +size_t max_border_elem_strlen(struct f_table_properties *properties) +{ + assert(properties); + size_t result = 1; + int i = 0; + for (i = 0; i < BORDER_ITEM_POS_SIZE; ++i) { + result = MAX(result, strlen(properties->border_style.border_chars[i])); + } + + for (i = 0; i < BORDER_ITEM_POS_SIZE; ++i) { + result = MAX(result, strlen(properties->border_style.header_border_chars[i])); + } + + for (i = 0; i < SEPARATOR_ITEM_POS_SIZE; ++i) { + result = MAX(result, strlen(properties->border_style.separator_chars[i])); + } + return result; +} + + +f_table_properties_t g_table_properties = { + /* border_style */ + BASIC_STYLE, + NULL, /* cell_properties */ + /* entire_table_properties */ + { + 0, /* left_margin */ + 0, /* top_margin */ + 0, /* right_margin */ + 0, /* bottom_margin */ + FT_STRATEGY_REPLACE, /* add_strategy */ + } +}; + + +FT_INTERNAL +f_table_properties_t *create_table_properties(void) +{ + f_table_properties_t *properties = (f_table_properties_t *)F_CALLOC(sizeof(f_table_properties_t), 1); + if (properties == NULL) { + return NULL; + } + memcpy(properties, &g_table_properties, sizeof(f_table_properties_t)); + properties->cell_properties = create_cell_prop_container(); + if (properties->cell_properties == NULL) { + destroy_table_properties(properties); + return NULL; + } + memcpy(&properties->entire_table_properties, &g_entire_table_properties, sizeof(fort_entire_table_properties_t)); + return properties; +} + +FT_INTERNAL +void destroy_table_properties(f_table_properties_t *properties) +{ + if (properties == NULL) + return; + + if (properties->cell_properties != NULL) { + destroy_cell_prop_container(properties->cell_properties); + } + F_FREE(properties); +} + +static +f_cell_prop_container_t *copy_cell_properties(f_cell_prop_container_t *cont) +{ + f_cell_prop_container_t *result = create_cell_prop_container(); + if (result == NULL) + return NULL; + + size_t i = 0; + size_t sz = vector_size(cont); + for (i = 0; i < sz; ++i) { + f_cell_props_t *opt = (f_cell_props_t *)vector_at(cont, i); + if (FT_IS_ERROR(vector_push(result, opt))) { + destroy_cell_prop_container(result); + return NULL; + } + } + return result; +} + +FT_INTERNAL +f_table_properties_t *copy_table_properties(const f_table_properties_t *properties) +{ + f_table_properties_t *new_opt = create_table_properties(); + if (new_opt == NULL) + return NULL; + + destroy_vector(new_opt->cell_properties); + new_opt->cell_properties = copy_cell_properties(properties->cell_properties); + if (new_opt->cell_properties == NULL) { + destroy_table_properties(new_opt); + return NULL; + } + + memcpy(&new_opt->border_style, &properties->border_style, sizeof(struct fort_border_style)); + memcpy(&new_opt->entire_table_properties, + &properties->entire_table_properties, sizeof(fort_entire_table_properties_t)); + + return new_opt; +} + +/******************************************************** + End of file "properties.c" + ********************************************************/ + + +/******************************************************** + Begin of file "row.c" + ********************************************************/ + +#include <assert.h> +#include <ctype.h> +/* #include "row.h" */ /* Commented by amalgamation script */ +/* #include "cell.h" */ /* Commented by amalgamation script */ +/* #include "string_buffer.h" */ /* Commented by amalgamation script */ +/* #include "vector.h" */ /* Commented by amalgamation script */ + + +struct f_row { + f_vector_t *cells; +}; + +static +f_row_t *create_row_impl(f_vector_t *cells) +{ + f_row_t *row = (f_row_t *)F_CALLOC(1, sizeof(f_row_t)); + if (row == NULL) + return NULL; + if (cells) { + row->cells = cells; + } else { + row->cells = create_vector(sizeof(f_cell_t *), DEFAULT_VECTOR_CAPACITY); + if (row->cells == NULL) { + F_FREE(row); + return NULL; + } + } + return row; +} + +FT_INTERNAL +f_row_t *create_row(void) +{ + return create_row_impl(NULL); +} + +static +void destroy_each_cell(f_vector_t *cells) +{ + size_t i = 0; + size_t cells_n = vector_size(cells); + for (i = 0; i < cells_n; ++i) { + f_cell_t *cell = VECTOR_AT(cells, i, f_cell_t *); + destroy_cell(cell); + } +} + +FT_INTERNAL +void destroy_row(f_row_t *row) +{ + if (row == NULL) + return; + + if (row->cells) { + destroy_each_cell(row->cells); + destroy_vector(row->cells); + } + + F_FREE(row); +} + +FT_INTERNAL +f_row_t *copy_row(f_row_t *row) +{ + assert(row); + f_row_t *result = create_row(); + if (result == NULL) + return NULL; + + size_t i = 0; + size_t cols_n = vector_size(row->cells); + for (i = 0; i < cols_n; ++i) { + f_cell_t *cell = VECTOR_AT(row->cells, i, f_cell_t *); + f_cell_t *new_cell = copy_cell(cell); + if (new_cell == NULL) { + destroy_row(result); + return NULL; + } + vector_push(result->cells, &new_cell); + } + + return result; +} + +FT_INTERNAL +f_row_t *split_row(f_row_t *row, size_t pos) +{ + assert(row); + + f_vector_t *cells = vector_split(row->cells, pos); + if (!cells) + return NULL; + f_row_t *tail = create_row_impl(cells); + if (!tail) { + destroy_each_cell(cells); + destroy_vector(cells); + } + return tail; +} + +FT_INTERNAL +int ft_row_erase_range(f_row_t *row, size_t left, size_t right) +{ + assert(row); + size_t cols_n = vector_size(row->cells); + if (cols_n == 0 || (right < left)) + return FT_SUCCESS; + + f_cell_t *cell = NULL; + size_t i = left; + while (i < cols_n && i <= right) { + cell = VECTOR_AT(row->cells, i, f_cell_t *); + destroy_cell(cell); + ++i; + } + size_t n_destroy = MIN(cols_n - 1, right) - left + 1; + while (n_destroy--) { + vector_erase(row->cells, left); + } + return FT_SUCCESS; +} + +FT_INTERNAL +size_t columns_in_row(const f_row_t *row) +{ + if (row == NULL || row->cells == NULL) + return 0; + + return vector_size(row->cells); +} + + +static +f_cell_t *get_cell_impl(f_row_t *row, size_t col, enum f_get_policy policy) +{ + if (row == NULL || row->cells == NULL) { + return NULL; + } + + switch (policy) { + case DONT_CREATE_ON_NULL: + if (col < columns_in_row(row)) { + return VECTOR_AT(row->cells, col, f_cell_t *); + } + return NULL; + case CREATE_ON_NULL: + while (col >= columns_in_row(row)) { + f_cell_t *new_cell = create_cell(); + if (new_cell == NULL) + return NULL; + if (FT_IS_ERROR(vector_push(row->cells, &new_cell))) { + destroy_cell(new_cell); + return NULL; + } + } + return VECTOR_AT(row->cells, col, f_cell_t *); + } + + assert(0 && "Shouldn't be here!"); + return NULL; +} + + +FT_INTERNAL +f_cell_t *get_cell(f_row_t *row, size_t col) +{ + return get_cell_impl(row, col, DONT_CREATE_ON_NULL); +} + + +FT_INTERNAL +const f_cell_t *get_cell_c(const f_row_t *row, size_t col) +{ + return get_cell((f_row_t *)row, col); +} + + +FT_INTERNAL +f_cell_t *get_cell_and_create_if_not_exists(f_row_t *row, size_t col) +{ + return get_cell_impl(row, col, CREATE_ON_NULL); +} + +FT_INTERNAL +f_cell_t *create_cell_in_position(f_row_t *row, size_t col) +{ + if (row == NULL || row->cells == NULL) { + return NULL; + } + + f_cell_t *new_cell = create_cell(); + if (new_cell == NULL) + return NULL; + if (FT_IS_ERROR(vector_insert(row->cells, &new_cell, col))) { + destroy_cell(new_cell); + return NULL; + } + return VECTOR_AT(row->cells, col, f_cell_t *); +} + + +FT_INTERNAL +f_status swap_row(f_row_t *cur_row, f_row_t *ins_row, size_t pos) +{ + assert(cur_row); + assert(ins_row); + size_t cur_sz = vector_size(cur_row->cells); + if (cur_sz == 0 && pos == 0) { + f_row_t tmp; + memcpy(&tmp, cur_row, sizeof(f_row_t)); + memcpy(cur_row, ins_row, sizeof(f_row_t)); + memcpy(ins_row, &tmp, sizeof(f_row_t)); + return FT_SUCCESS; + } + + // Append empty cells to `cur_row` if needed. + while (vector_size(cur_row->cells) < pos) { + create_cell_in_position(cur_row, vector_size(cur_row->cells)); + } + + return vector_swap(cur_row->cells, ins_row->cells, pos); +} + +/* Ownership of cells of `ins_row` is passed to `cur_row`. */ +FT_INTERNAL +f_status insert_row(f_row_t *cur_row, f_row_t *ins_row, size_t pos) +{ + assert(cur_row); + assert(ins_row); + + while (vector_size(cur_row->cells) < pos) { + f_cell_t *new_cell = create_cell(); + if (!new_cell) + return FT_GEN_ERROR; + vector_push(cur_row->cells, &new_cell); + } + + size_t sz = vector_size(ins_row->cells); + size_t i = 0; + for (i = 0; i < sz; ++i) { + f_cell_t *cell = VECTOR_AT(ins_row->cells, i, f_cell_t *); + if (FT_IS_ERROR(vector_insert(cur_row->cells, &cell, pos + i))) { + /* clean up what we have inserted */ + while (i--) { + vector_erase(cur_row->cells, pos); + } + return FT_GEN_ERROR; + } + } + /* Clear cells so that it will be safe to destroy this row */ + vector_clear(ins_row->cells); + return FT_SUCCESS; +} + + +FT_INTERNAL +size_t group_cell_number(const f_row_t *row, size_t master_cell_col) +{ + assert(row); + const f_cell_t *master_cell = get_cell_c(row, master_cell_col); + if (master_cell == NULL) + return 0; + + if (get_cell_type(master_cell) != GROUP_MASTER_CELL) + return 1; + + size_t total_cols = vector_size(row->cells); + size_t slave_col = master_cell_col + 1; + while (slave_col < total_cols) { + const f_cell_t *cell = get_cell_c(row, slave_col); + if (cell && get_cell_type(cell) == GROUP_SLAVE_CELL) { + ++slave_col; + } else { + break; + } + } + return slave_col - master_cell_col; +} + + +FT_INTERNAL +int get_row_cell_types(const f_row_t *row, enum f_cell_type *types, size_t types_sz) +{ + assert(row); + assert(types); + size_t i = 0; + for (i = 0; i < types_sz; ++i) { + const f_cell_t *cell = get_cell_c(row, i); + if (cell) { + types[i] = get_cell_type(cell); + } else { + types[i] = COMMON_CELL; + } + } + return FT_SUCCESS; +} + + +FT_INTERNAL +f_status row_set_cell_span(f_row_t *row, size_t cell_column, size_t hor_span) +{ + assert(row); + + if (hor_span < 2) + return FT_EINVAL; + + f_cell_t *main_cell = get_cell_and_create_if_not_exists(row, cell_column); + if (main_cell == NULL) { + return FT_GEN_ERROR; + } + set_cell_type(main_cell, GROUP_MASTER_CELL); + --hor_span; + ++cell_column; + + while (hor_span) { + f_cell_t *slave_cell = get_cell_and_create_if_not_exists(row, cell_column); + if (slave_cell == NULL) { + return FT_GEN_ERROR; + } + set_cell_type(slave_cell, GROUP_SLAVE_CELL); + --hor_span; + ++cell_column; + } + + return FT_SUCCESS; +} + +static +int print_row_separator_impl(f_conv_context_t *cntx, + const size_t *col_width_arr, size_t cols, + const f_row_t *upper_row, const f_row_t *lower_row, + enum f_hor_separator_pos separatorPos, + const f_separator_t *sep) +{ + assert(cntx); + + int status = FT_GEN_ERROR; + + const f_context_t *context = cntx->cntx; + + /* Get cell types + * + * Regions above top row and below bottom row areconsidered full of virtual + * GROUP_SLAVE_CELL cells + */ + enum f_cell_type *top_row_types = (enum f_cell_type *)F_MALLOC(sizeof(enum f_cell_type) * cols * 2); + if (top_row_types == NULL) { + return FT_MEMORY_ERROR; + } + enum f_cell_type *bottom_row_types = top_row_types + cols; + if (upper_row) { + get_row_cell_types(upper_row, top_row_types, cols); + } else { + size_t i = 0; + for (i = 0; i < cols; ++i) + top_row_types[i] = GROUP_SLAVE_CELL; + } + if (lower_row) { + get_row_cell_types(lower_row, bottom_row_types, cols); + } else { + size_t i = 0; + for (i = 0; i < cols; ++i) + bottom_row_types[i] = GROUP_SLAVE_CELL; + } + + + f_table_properties_t *properties = context->table_properties; + fort_entire_table_properties_t *entire_tprops = &properties->entire_table_properties; + + size_t written = 0; + int tmp = 0; + + enum ft_row_type lower_row_type = FT_ROW_COMMON; + if (lower_row != NULL) { + lower_row_type = (enum ft_row_type)get_cell_property_hierarchically(properties, context->row, FT_ANY_COLUMN, FT_CPROP_ROW_TYPE); + } + enum ft_row_type upper_row_type = FT_ROW_COMMON; + if (upper_row != NULL) { + upper_row_type = (enum ft_row_type)get_cell_property_hierarchically(properties, context->row - 1, FT_ANY_COLUMN, FT_CPROP_ROW_TYPE); + } + + /* Row separator anatomy + * + * | C11 | C12 C13 | C14 C15 | + * L I I I IV I I IT I I I IB I I II I I R + * | C21 | C22 | C23 C24 C25 | + */ + const char **L = NULL; + const char **I = NULL; + const char **IV = NULL; + const char **R = NULL; + const char **IT = NULL; + const char **IB = NULL; + const char **II = NULL; + + struct fort_border_style *border_style = &properties->border_style; + + typedef const char *(*border_chars_point_t)[BORDER_ITEM_POS_SIZE]; + const char *(*border_chars)[BORDER_ITEM_POS_SIZE] = NULL; + border_chars = (border_chars_point_t)&border_style->border_chars; + if (upper_row_type == FT_ROW_HEADER || lower_row_type == FT_ROW_HEADER) { + border_chars = (border_chars_point_t)&border_style->header_border_chars; + } + + if (sep && sep->enabled) { + L = &(border_style->separator_chars[LH_sip]); + I = &(border_style->separator_chars[IH_sip]); + IV = &(border_style->separator_chars[II_sip]); + R = &(border_style->separator_chars[RH_sip]); + + IT = &(border_style->separator_chars[TI_sip]); + IB = &(border_style->separator_chars[BI_sip]); + II = &(border_style->separator_chars[IH_sip]); + + if (lower_row == NULL) { + L = &(*border_chars)[BL_bip]; + R = &(*border_chars)[BR_bip]; + } else if (upper_row == NULL) { + L = &(*border_chars)[TL_bip]; + R = &(*border_chars)[TR_bip]; + } + } else { + switch (separatorPos) { + case TOP_SEPARATOR: + L = &(*border_chars)[TL_bip]; + I = &(*border_chars)[TT_bip]; + IV = &(*border_chars)[TV_bip]; + R = &(*border_chars)[TR_bip]; + + IT = &(*border_chars)[TV_bip]; + IB = &(*border_chars)[TV_bip]; + II = &(*border_chars)[TT_bip]; + break; + case INSIDE_SEPARATOR: + L = &(*border_chars)[LH_bip]; + I = &(*border_chars)[IH_bip]; + IV = &(*border_chars)[II_bip]; + R = &(*border_chars)[RH_bip]; + + IT = &(*border_chars)[TI_bip]; + IB = &(*border_chars)[BI_bip]; + II = &(*border_chars)[IH_bip]; + break; + case BOTTOM_SEPARATOR: + L = &(*border_chars)[BL_bip]; + I = &(*border_chars)[BB_bip]; + IV = &(*border_chars)[BV_bip]; + R = &(*border_chars)[BR_bip]; + + IT = &(*border_chars)[BV_bip]; + IB = &(*border_chars)[BV_bip]; + II = &(*border_chars)[BB_bip]; + break; + default: + break; + } + } + + size_t i = 0; + + /* If all chars are not printable, skip line separator */ + /* NOTE: argument of `isprint` should be explicitly converted to + * unsigned char according to + * https://en.cppreference.com/w/c/string/byte/isprint + */ + if ((strlen(*L) == 0 || (strlen(*L) == 1 && !isprint((unsigned char) **L))) + && (strlen(*I) == 0 || (strlen(*I) == 1 && !isprint((unsigned char) **I))) + && (strlen(*IV) == 0 || (strlen(*IV) == 1 && !isprint((unsigned char) **IV))) + && (strlen(*R) == 0 || (strlen(*R) == 1 && !isprint((unsigned char) **R)))) { + status = 0; + goto clear; + } + + /* Print left margin */ + CHCK_RSLT_ADD_TO_WRITTEN(print_n_strings(cntx, entire_tprops->left_margin, FT_SPACE)); + + for (i = 0; i < cols; ++i) { + if (i == 0) { + CHCK_RSLT_ADD_TO_WRITTEN(print_n_strings(cntx, 1, *L)); + } else { + if ((top_row_types[i] == COMMON_CELL || top_row_types[i] == GROUP_MASTER_CELL) + && (bottom_row_types[i] == COMMON_CELL || bottom_row_types[i] == GROUP_MASTER_CELL)) { + CHCK_RSLT_ADD_TO_WRITTEN(print_n_strings(cntx, 1, *IV)); + } else if (top_row_types[i] == GROUP_SLAVE_CELL && bottom_row_types[i] == GROUP_SLAVE_CELL) { + CHCK_RSLT_ADD_TO_WRITTEN(print_n_strings(cntx, 1, *II)); + } else if (top_row_types[i] == GROUP_SLAVE_CELL) { + CHCK_RSLT_ADD_TO_WRITTEN(print_n_strings(cntx, 1, *IT)); + } else { + CHCK_RSLT_ADD_TO_WRITTEN(print_n_strings(cntx, 1, *IB)); + } + } + CHCK_RSLT_ADD_TO_WRITTEN(print_n_strings(cntx, col_width_arr[i], *I)); + } + CHCK_RSLT_ADD_TO_WRITTEN(print_n_strings(cntx, 1, *R)); + + /* Print right margin */ + CHCK_RSLT_ADD_TO_WRITTEN(print_n_strings(cntx, entire_tprops->right_margin, FT_SPACE)); + + CHCK_RSLT_ADD_TO_WRITTEN(print_n_strings(cntx, 1, FT_NEWLINE)); + + status = (int)written; + +clear: + F_FREE(top_row_types); + return status; +} + +FT_INTERNAL +int print_row_separator(f_conv_context_t *cntx, + const size_t *col_width_arr, size_t cols, + const f_row_t *upper_row, const f_row_t *lower_row, + enum f_hor_separator_pos separatorPos, const f_separator_t *sep) +{ + return print_row_separator_impl(cntx, col_width_arr, cols, upper_row, lower_row, + separatorPos, sep); +} + +FT_INTERNAL +f_row_t *create_row_from_string(const char *str) +{ + typedef char char_type; + char_type *(*strdup_)(const char_type * str) = F_STRDUP; + const char_type zero_char = '\0'; + f_status(*fill_cell_from_string_)(f_cell_t *cell, const char *str) = fill_cell_from_string; + const char_type *const zero_string = ""; +#define STRCHR strchr + + char_type *pos = NULL; + char_type *base_pos = NULL; + size_t number_of_separators = 0; + + f_row_t *row = create_row(); + if (row == NULL) + return NULL; + + if (str == NULL) + return row; + + char_type *str_copy = strdup_(str); + if (str_copy == NULL) + goto clear; + + pos = str_copy; + base_pos = str_copy; + number_of_separators = 0; + while (*pos) { + pos = STRCHR(pos, g_col_separator); + if (pos != NULL) { + *(pos) = zero_char; + ++pos; + number_of_separators++; + } + + f_cell_t *cell = create_cell(); + if (cell == NULL) + goto clear; + + int status = fill_cell_from_string_(cell, base_pos); + if (FT_IS_ERROR(status)) { + destroy_cell(cell); + goto clear; + } + + status = vector_push(row->cells, &cell); + if (FT_IS_ERROR(status)) { + destroy_cell(cell); + goto clear; + } + + if (pos == NULL) + break; + base_pos = pos; + } + + /* special case if in format string last cell is empty */ + while (vector_size(row->cells) < (number_of_separators + 1)) { + f_cell_t *cell = create_cell(); + if (cell == NULL) + goto clear; + + int status = fill_cell_from_string_(cell, zero_string); + if (FT_IS_ERROR(status)) { + destroy_cell(cell); + goto clear; + } + + status = vector_push(row->cells, &cell); + if (FT_IS_ERROR(status)) { + destroy_cell(cell); + goto clear; + } + } + + F_FREE(str_copy); + return row; + +clear: + destroy_row(row); + F_FREE(str_copy); + return NULL; + +#undef STRCHR +} + + +#ifdef FT_HAVE_WCHAR +FT_INTERNAL +f_row_t *create_row_from_wstring(const wchar_t *str) +{ + typedef wchar_t char_type; + char_type *(*strdup_)(const char_type * str) = F_WCSDUP; + const char_type zero_char = L'\0'; + f_status(*fill_cell_from_string_)(f_cell_t *cell, const wchar_t *str) = fill_cell_from_wstring; + const char_type *const zero_string = L""; +#define STRCHR wcschr + + char_type *pos = NULL; + char_type *base_pos = NULL; + size_t number_of_separators = 0; + + f_row_t *row = create_row(); + if (row == NULL) + return NULL; + + if (str == NULL) + return row; + + char_type *str_copy = strdup_(str); + if (str_copy == NULL) + goto clear; + + pos = str_copy; + base_pos = str_copy; + number_of_separators = 0; + while (*pos) { + pos = STRCHR(pos, g_col_separator); + if (pos != NULL) { + *(pos) = zero_char; + ++pos; + number_of_separators++; + } + + f_cell_t *cell = create_cell(); + if (cell == NULL) + goto clear; + + int status = fill_cell_from_string_(cell, base_pos); + if (FT_IS_ERROR(status)) { + destroy_cell(cell); + goto clear; + } + + status = vector_push(row->cells, &cell); + if (FT_IS_ERROR(status)) { + destroy_cell(cell); + goto clear; + } + + if (pos == NULL) + break; + base_pos = pos; + } + + /* special case if in format string last cell is empty */ + while (vector_size(row->cells) < (number_of_separators + 1)) { + f_cell_t *cell = create_cell(); + if (cell == NULL) + goto clear; + + int status = fill_cell_from_string_(cell, zero_string); + if (FT_IS_ERROR(status)) { + destroy_cell(cell); + goto clear; + } + + status = vector_push(row->cells, &cell); + if (FT_IS_ERROR(status)) { + destroy_cell(cell); + goto clear; + } + } + + F_FREE(str_copy); + return row; + +clear: + destroy_row(row); + F_FREE(str_copy); + return NULL; +#undef STRCHR +} +#endif + +FT_INTERNAL +f_row_t *create_row_from_buffer(const f_string_buffer_t *buffer) +{ + switch (buffer->type) { + case CHAR_BUF: + return create_row_from_string(buffer->str.cstr); +#ifdef FT_HAVE_WCHAR + case W_CHAR_BUF: + return create_row_from_wstring(buffer->str.wstr); +#endif /* FT_HAVE_WCHAR */ +#ifdef FT_HAVE_UTF8 + case UTF8_BUF: + return create_row_from_string((const char *)buffer->str.u8str); +#endif /* FT_HAVE_UTF8 */ + default: + assert(0); + return NULL; + } +} + +static int +vsnprintf_buffer(f_string_buffer_t *buffer, const struct f_string_view *fmt, + va_list *va) +{ + /* Disable compiler diagnostic (format string is not a string literal) */ +#if defined(FT_CLANG_COMPILER) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wformat-nonliteral" +#endif +#if defined(FT_GCC_COMPILER) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wformat-nonliteral" +#endif + size_t width_capacity = string_buffer_width_capacity(buffer); + switch (buffer->type) { + case CHAR_BUF: + return vsnprintf(buffer->str.cstr, width_capacity, fmt->u.cstr, *va); +#ifdef FT_HAVE_WCHAR + case W_CHAR_BUF: + return vswprintf(buffer->str.wstr, width_capacity, fmt->u.wstr, *va); +#endif +#ifdef FT_HAVE_UTF8 + case UTF8_BUF: + return vsnprintf(buffer->str.cstr, width_capacity, fmt->u.cstr, *va); +#endif + default: + assert(0); + return 0; + } +#if defined(FT_CLANG_COMPILER) +#pragma clang diagnostic pop +#endif +#if defined(FT_GCC_COMPILER) +#pragma GCC diagnostic pop +#endif +} + +FT_INTERNAL +f_row_t *create_row_from_fmt_string(const struct f_string_view *fmt, va_list *va_args) +{ + f_string_buffer_t *buffer = create_string_buffer(DEFAULT_STR_BUF_SIZE, fmt->type); + if (buffer == NULL) + return NULL; + + size_t cols_origin = number_of_columns_in_format_string(fmt); + size_t cols = 0; + + while (1) { + va_list va; + va_copy(va, *va_args); + int virtual_sz = vsnprintf_buffer(buffer, fmt, &va); + va_end(va); + /* If error encountered */ + if (virtual_sz < 0) + goto clear; + + /* Successful write */ + if ((size_t)virtual_sz < string_buffer_width_capacity(buffer)) + break; + + /* Otherwise buffer was too small, so incr. buffer size ant try again. */ + if (!FT_IS_SUCCESS(realloc_string_buffer_without_copy(buffer))) + goto clear; + } + + cols = number_of_columns_in_format_buffer(buffer); + if (cols == cols_origin) { + f_row_t *row = create_row_from_buffer(buffer); + if (row == NULL) { + goto clear; + } + + destroy_string_buffer(buffer); + return row; + } + + if (cols_origin == 1) { + f_row_t *row = create_row(); + if (row == NULL) { + goto clear; + } + + f_cell_t *cell = get_cell_and_create_if_not_exists(row, 0); + if (cell == NULL) { + destroy_row(row); + goto clear; + } + + f_status result = fill_cell_from_buffer(cell, buffer); + if (FT_IS_ERROR(result)) { + destroy_row(row); + goto clear; + } + + destroy_string_buffer(buffer); + return row; + } + + /* + * todo: add processing of cols != cols_origin in a general way + * (when cols_origin != 1). + */ + +clear: + destroy_string_buffer(buffer); + return NULL; +} + + +FT_INTERNAL +int snprintf_row(const f_row_t *row, f_conv_context_t *cntx, size_t *col_width_arr, size_t col_width_arr_sz, + size_t row_height) +{ + const f_context_t *context = cntx->cntx; + assert(context); + + if (row == NULL) + return -1; + + size_t cols_in_row = columns_in_row(row); + if (cols_in_row > col_width_arr_sz) + return -1; + + /* Row separator anatomy + * + * L data IV data IV data R + */ + f_table_properties_t *properties = context->table_properties; + + typedef const char *(*border_chars_point_t)[BORDER_ITEM_POS_SIZE]; + enum ft_row_type row_type = (enum ft_row_type)get_cell_property_hierarchically(properties, context->row, FT_ANY_COLUMN, FT_CPROP_ROW_TYPE); + const char *(*bord_chars)[BORDER_ITEM_POS_SIZE] = (row_type == FT_ROW_HEADER) + ? (border_chars_point_t)(&properties->border_style.header_border_chars) + : (border_chars_point_t)(&properties->border_style.border_chars); + const char **L = &(*bord_chars)[LL_bip]; + const char **IV = &(*bord_chars)[IV_bip]; + const char **R = &(*bord_chars)[RR_bip]; + + + size_t written = 0; + int tmp = 0; + size_t i = 0; + fort_entire_table_properties_t *entire_tprops = &context->table_properties->entire_table_properties; + for (i = 0; i < row_height; ++i) { + /* Print left margin */ + CHCK_RSLT_ADD_TO_WRITTEN(print_n_strings(cntx, entire_tprops->left_margin, FT_SPACE)); + + /* Print left table boundary */ + CHCK_RSLT_ADD_TO_WRITTEN(print_n_strings(cntx, 1, *L)); + size_t j = 0; + while (j < col_width_arr_sz) { + if (j < cols_in_row) { + ((f_context_t *)context)->column = j; + f_cell_t *cell = VECTOR_AT(row->cells, j, f_cell_t *); + size_t cell_vis_width = 0; + + size_t group_slave_sz = group_cell_number(row, j); + cell_vis_width = col_width_arr[j]; + size_t slave_j = 0; + size_t master_j = j; + for (slave_j = master_j + 1; slave_j < (master_j + group_slave_sz); ++slave_j) { + cell_vis_width += col_width_arr[slave_j] + FORT_COL_SEPARATOR_LENGTH; + ++j; + } + + CHCK_RSLT_ADD_TO_WRITTEN(cell_printf(cell, i, cntx, cell_vis_width)); + } else { + /* Print empty cell */ + CHCK_RSLT_ADD_TO_WRITTEN(print_n_strings(cntx, col_width_arr[j], FT_SPACE)); + } + + /* Print boundary between cells */ + if (j < col_width_arr_sz - 1) + CHCK_RSLT_ADD_TO_WRITTEN(print_n_strings(cntx, 1, *IV)); + + ++j; + } + + /* Print right table boundary */ + CHCK_RSLT_ADD_TO_WRITTEN(print_n_strings(cntx, 1, *R)); + + /* Print right margin */ + CHCK_RSLT_ADD_TO_WRITTEN(print_n_strings(cntx, entire_tprops->right_margin, FT_SPACE)); + + /* Print new line character */ + CHCK_RSLT_ADD_TO_WRITTEN(print_n_strings(cntx, 1, FT_NEWLINE)); + } + return (int)written; + +clear: + return -1; +} + +/******************************************************** + End of file "row.c" + ********************************************************/ + + +/******************************************************** + Begin of file "string_buffer.c" + ********************************************************/ + +/* #include "string_buffer.h" */ /* Commented by amalgamation script */ +/* #include "properties.h" */ /* Commented by amalgamation script */ +/* #include "wcwidth.h" */ /* Commented by amalgamation script */ +#include <assert.h> +#include <stddef.h> +#ifdef FT_HAVE_WCHAR +#include <wchar.h> +#endif +#if defined(FT_HAVE_UTF8) +/* #include "utf8.h" */ /* Commented by amalgamation script */ +#endif + +static ptrdiff_t str_iter_width(const char *beg, const char *end) +{ + assert(end >= beg); + return (end - beg); +} + + +#ifdef FT_HAVE_WCHAR +static ptrdiff_t wcs_iter_width(const wchar_t *beg, const wchar_t *end) +{ + assert(end >= beg); + return mk_wcswidth(beg, (size_t)(end - beg)); +} +#endif /* FT_HAVE_WCHAR */ + + +static size_t buf_str_len(const f_string_buffer_t *buf) +{ + assert(buf); + + switch (buf->type) { + case CHAR_BUF: + return strlen(buf->str.cstr); +#ifdef FT_HAVE_WCHAR + case W_CHAR_BUF: + return wcslen(buf->str.wstr); +#endif +#ifdef FT_HAVE_UTF8 + case UTF8_BUF: + return utf8len(buf->str.u8str); +#endif + } + + assert(0); + return 0; +} + + +FT_INTERNAL +size_t strchr_count(const char *str, char ch) +{ + if (str == NULL) + return 0; + + size_t count = 0; + str = strchr(str, ch); + while (str) { + count++; + str++; + str = strchr(str, ch); + } + return count; +} + +#ifdef FT_HAVE_WCHAR +FT_INTERNAL +size_t wstrchr_count(const wchar_t *str, wchar_t ch) +{ + if (str == NULL) + return 0; + + size_t count = 0; + str = wcschr(str, ch); + while (str) { + count++; + str++; + str = wcschr(str, ch); + } + return count; +} +#endif + + +#if defined(FT_HAVE_UTF8) +/* todo: do something with code below!!! */ +FT_INTERNAL +void *ut8next(const void *str) +{ + utf8_int32_t out_codepoint; + return utf8codepoint(str, &out_codepoint); +} + +FT_INTERNAL +size_t utf8chr_count(const void *str, utf8_int32_t ch) +{ + if (str == NULL) + return 0; + + size_t count = 0; + str = utf8chr(str, ch); + while (str) { + count++; + str = ut8next(str); + str = utf8chr(str, ch); + } + return count; +} +#endif /* FT_HAVE_UTF8 */ + + +FT_INTERNAL +const char *str_n_substring_beg(const char *str, char ch_separator, size_t n) +{ + if (str == NULL) + return NULL; + + if (n == 0) + return str; + + str = strchr(str, ch_separator); + --n; + while (n > 0) { + if (str == NULL) + return NULL; + --n; + str++; + str = strchr(str, ch_separator); + } + return str ? (str + 1) : NULL; +} + + +#ifdef FT_HAVE_WCHAR +FT_INTERNAL +const wchar_t *wstr_n_substring_beg(const wchar_t *str, wchar_t ch_separator, size_t n) +{ + if (str == NULL) + return NULL; + + if (n == 0) + return str; + + str = wcschr(str, ch_separator); + --n; + while (n > 0) { + if (str == NULL) + return NULL; + --n; + str++; + str = wcschr(str, ch_separator); + } + return str ? (str + 1) : NULL; +} +#endif /* FT_HAVE_WCHAR */ + +#if defined(FT_HAVE_UTF8) +FT_INTERNAL +const void *utf8_n_substring_beg(const void *str, utf8_int32_t ch_separator, size_t n) +{ + if (str == NULL) + return NULL; + + if (n == 0) + return str; + + str = utf8chr(str, ch_separator); + --n; + while (n > 0) { + if (str == NULL) + return NULL; + --n; + str = ut8next(str); + str = utf8chr(str, ch_separator); + } + return str ? (ut8next(str)) : NULL; +} +#endif + + +FT_INTERNAL +void str_n_substring(const char *str, char ch_separator, size_t n, const char **begin, const char **end) +{ + const char *beg = str_n_substring_beg(str, ch_separator, n); + if (beg == NULL) { + *begin = NULL; + *end = NULL; + return; + } + + const char *en = strchr(beg, ch_separator); + if (en == NULL) { + en = str + strlen(str); + } + + *begin = beg; + *end = en; + return; +} + + +#ifdef FT_HAVE_WCHAR +FT_INTERNAL +void wstr_n_substring(const wchar_t *str, wchar_t ch_separator, size_t n, const wchar_t **begin, const wchar_t **end) +{ + const wchar_t *beg = wstr_n_substring_beg(str, ch_separator, n); + if (beg == NULL) { + *begin = NULL; + *end = NULL; + return; + } + + const wchar_t *en = wcschr(beg, ch_separator); + if (en == NULL) { + en = str + wcslen(str); + } + + *begin = beg; + *end = en; + return; +} +#endif /* FT_HAVE_WCHAR */ + +#if defined(FT_HAVE_UTF8) +FT_INTERNAL +void utf8_n_substring(const void *str, utf8_int32_t ch_separator, size_t n, const void **begin, const void **end) +{ + const char *beg = (const char *)utf8_n_substring_beg(str, ch_separator, n); + if (beg == NULL) { + *begin = NULL; + *end = NULL; + return; + } + + const char *en = (const char *)utf8chr(beg, ch_separator); + if (en == NULL) { + en = (const char *)str + strlen((const char *)str); + } + + *begin = beg; + *end = en; + return; +} +#endif /* FT_HAVE_UTF8 */ + + + +FT_INTERNAL +f_string_buffer_t *create_string_buffer(size_t n_chars, enum f_string_type type) +{ + size_t char_sz = 0; + switch (type) { + case CHAR_BUF: + char_sz = 1; + break; +#ifdef FT_HAVE_WCHAR + case W_CHAR_BUF: + char_sz = sizeof(wchar_t); + break; +#endif +#ifdef FT_HAVE_UTF8 + case UTF8_BUF: + char_sz = 4; + break; +#endif + } + + size_t sz = n_chars * char_sz; + f_string_buffer_t *result = (f_string_buffer_t *)F_MALLOC(sizeof(f_string_buffer_t)); + if (result == NULL) + return NULL; + result->str.data = F_MALLOC(sz); + if (result->str.data == NULL) { + F_FREE(result); + return NULL; + } + result->data_sz = sz; + result->type = type; + + if (sz) { + switch (type) { + case CHAR_BUF: + result->str.cstr[0] = '\0'; + break; +#ifdef FT_HAVE_WCHAR + case W_CHAR_BUF: + result->str.wstr[0] = L'\0'; + break; +#endif +#ifdef FT_HAVE_UTF8 + case UTF8_BUF: + result->str.cstr[0] = '\0'; + break; +#endif + } + } + + return result; +} + + +FT_INTERNAL +void destroy_string_buffer(f_string_buffer_t *buffer) +{ + if (buffer == NULL) + return; + F_FREE(buffer->str.data); + buffer->str.data = NULL; + F_FREE(buffer); +} + +FT_INTERNAL +f_string_buffer_t *copy_string_buffer(const f_string_buffer_t *buffer) +{ + assert(buffer); + f_string_buffer_t *result = create_string_buffer(buffer->data_sz, buffer->type); + if (result == NULL) + return NULL; + switch (buffer->type) { + case CHAR_BUF: + if (FT_IS_ERROR(fill_buffer_from_string(result, buffer->str.cstr))) { + destroy_string_buffer(result); + return NULL; + } + break; +#ifdef FT_HAVE_WCHAR + case W_CHAR_BUF: + if (FT_IS_ERROR(fill_buffer_from_wstring(result, buffer->str.wstr))) { + destroy_string_buffer(result); + return NULL; + } + break; +#endif /* FT_HAVE_WCHAR */ + default: + destroy_string_buffer(result); + return NULL; + } + return result; +} + +FT_INTERNAL +f_status realloc_string_buffer_without_copy(f_string_buffer_t *buffer) +{ + assert(buffer); + char *new_str = (char *)F_MALLOC(buffer->data_sz * 2); + if (new_str == NULL) { + return FT_MEMORY_ERROR; + } + F_FREE(buffer->str.data); + buffer->str.data = new_str; + buffer->data_sz *= 2; + return FT_SUCCESS; +} + + +FT_INTERNAL +f_status fill_buffer_from_string(f_string_buffer_t *buffer, const char *str) +{ + assert(buffer); + assert(str); + + char *copy = F_STRDUP(str); + if (copy == NULL) + return FT_MEMORY_ERROR; + + F_FREE(buffer->str.data); + buffer->str.cstr = copy; + buffer->type = CHAR_BUF; + + return FT_SUCCESS; +} + + +#ifdef FT_HAVE_WCHAR +FT_INTERNAL +f_status fill_buffer_from_wstring(f_string_buffer_t *buffer, const wchar_t *str) +{ + assert(buffer); + assert(str); + + wchar_t *copy = F_WCSDUP(str); + if (copy == NULL) + return FT_MEMORY_ERROR; + + F_FREE(buffer->str.data); + buffer->str.wstr = copy; + buffer->type = W_CHAR_BUF; + + return FT_SUCCESS; +} +#endif /* FT_HAVE_WCHAR */ + +#ifdef FT_HAVE_UTF8 +FT_INTERNAL +f_status fill_buffer_from_u8string(f_string_buffer_t *buffer, const void *str) +{ + assert(buffer); + assert(str); + + void *copy = F_UTF8DUP(str); + if (copy == NULL) + return FT_MEMORY_ERROR; + + F_FREE(buffer->str.u8str); + buffer->str.u8str = copy; + buffer->type = UTF8_BUF; + + return FT_SUCCESS; +} +#endif /* FT_HAVE_UTF8 */ + +FT_INTERNAL +size_t buffer_text_visible_height(const f_string_buffer_t *buffer) +{ + if (buffer == NULL || buffer->str.data == NULL || buf_str_len(buffer) == 0) { + return 0; + } + if (buffer->type == CHAR_BUF) + return 1 + strchr_count(buffer->str.cstr, '\n'); +#ifdef FT_HAVE_WCHAR + else if (buffer->type == W_CHAR_BUF) + return 1 + wstrchr_count(buffer->str.wstr, L'\n'); +#endif /* FT_HAVE_WCHAR */ +#ifdef FT_HAVE_UTF8 + else if (buffer->type == UTF8_BUF) + return 1 + utf8chr_count(buffer->str.u8str, '\n'); +#endif /* FT_HAVE_WCHAR */ + + assert(0); + return 0; +} + +FT_INTERNAL +size_t string_buffer_cod_width_capacity(const f_string_buffer_t *buffer) +{ + return string_buffer_width_capacity(buffer); +} + +FT_INTERNAL +size_t string_buffer_raw_capacity(const f_string_buffer_t *buffer) +{ + return buffer->data_sz; +} + +#ifdef FT_HAVE_UTF8 +/* User provided function to compute utf8 string visible width */ +static int (*_custom_u8strwid)(const void *beg, const void *end, size_t *width) = NULL; + +FT_INTERNAL +void buffer_set_u8strwid_func(int (*u8strwid)(const void *beg, const void *end, size_t *width)) +{ + _custom_u8strwid = u8strwid; +} + +static +size_t utf8_width(const void *beg, const void *end) +{ + if (_custom_u8strwid) { + size_t width = 0; + if (!_custom_u8strwid(beg, end, &width)) + return width; + } + + size_t sz = (size_t)((const char *)end - (const char *)beg); + char *tmp = (char *)F_MALLOC(sizeof(char) * (sz + 1)); + // @todo: add check to tmp + assert(tmp); + + memcpy(tmp, beg, sz); + tmp[sz] = '\0'; + size_t result = utf8width(tmp); + F_FREE(tmp); + return result; +} +#endif /* FT_HAVE_WCHAR */ + +FT_INTERNAL +size_t buffer_text_visible_width(const f_string_buffer_t *buffer) +{ + size_t max_length = 0; + if (buffer->type == CHAR_BUF) { + size_t n = 0; + while (1) { + const char *beg = NULL; + const char *end = NULL; + str_n_substring(buffer->str.cstr, '\n', n, &beg, &end); + if (beg == NULL || end == NULL) + return max_length; + + max_length = MAX(max_length, (size_t)(end - beg)); + ++n; + } +#ifdef FT_HAVE_WCHAR + } else if (buffer->type == W_CHAR_BUF) { + size_t n = 0; + while (1) { + const wchar_t *beg = NULL; + const wchar_t *end = NULL; + wstr_n_substring(buffer->str.wstr, L'\n', n, &beg, &end); + if (beg == NULL || end == NULL) + return max_length; + + int line_width = mk_wcswidth(beg, (size_t)(end - beg)); + if (line_width < 0) /* For safety */ + line_width = 0; + max_length = MAX(max_length, (size_t)line_width); + + ++n; + } +#endif /* FT_HAVE_WCHAR */ +#ifdef FT_HAVE_UTF8 + } else if (buffer->type == UTF8_BUF) { + size_t n = 0; + while (1) { + const void *beg = NULL; + const void *end = NULL; + utf8_n_substring(buffer->str.u8str, '\n', n, &beg, &end); + if (beg == NULL || end == NULL) + return max_length; + + max_length = MAX(max_length, (size_t)utf8_width(beg, end)); + ++n; + } +#endif /* FT_HAVE_WCHAR */ + } + + return max_length; /* shouldn't be here */ +} + + +static void +buffer_substring(const f_string_buffer_t *buffer, size_t buffer_row, const void **begin, const void **end, ptrdiff_t *str_it_width) +{ + switch (buffer->type) { + case CHAR_BUF: + str_n_substring(buffer->str.cstr, '\n', buffer_row, (const char **)begin, (const char **)end); + if ((*(const char **)begin) && (*(const char **)end)) + *str_it_width = str_iter_width(*(const char **)begin, *(const char **)end); + break; +#ifdef FT_HAVE_WCHAR + case W_CHAR_BUF: + wstr_n_substring(buffer->str.wstr, L'\n', buffer_row, (const wchar_t **)begin, (const wchar_t **)end); + if ((*(const wchar_t **)begin) && (*(const wchar_t **)end)) + *str_it_width = wcs_iter_width(*(const wchar_t **)begin, *(const wchar_t **)end); + break; +#endif /* FT_HAVE_WCHAR */ +#ifdef FT_HAVE_UTF8 + case UTF8_BUF: + utf8_n_substring(buffer->str.u8str, '\n', buffer_row, begin, end); + if ((*(const char **)begin) && (*(const char **)end)) + *str_it_width = utf8_width(*begin, *end); + break; +#endif /* FT_HAVE_UTF8 */ + default: + assert(0); + } +} + + +static int +buffer_print_range(f_conv_context_t *cntx, const void *beg, const void *end) +{ + size_t len; + switch (cntx->b_type) { + case CHAR_BUF: + len = (size_t)((const char *)end - (const char *)beg); + return ft_nprint(cntx, (const char *)beg, len); +#ifdef FT_HAVE_WCHAR + case W_CHAR_BUF: + len = (size_t)((const wchar_t *)end - (const wchar_t *)beg); + return ft_nwprint(cntx, (const wchar_t *)beg, len); +#endif /* FT_HAVE_WCHAR */ +#ifdef FT_HAVE_UTF8 + case UTF8_BUF: + return ft_nu8print(cntx, beg, end); +#endif /* FT_HAVE_UTF8 */ + default: + assert(0); + return -1; + } +} + + +FT_INTERNAL +int buffer_printf(f_string_buffer_t *buffer, size_t buffer_row, f_conv_context_t *cntx, size_t vis_width, + const char *content_style_tag, const char *reset_content_style_tag) +{ + const f_context_t *context = cntx->cntx; + f_table_properties_t *props = context->table_properties; + size_t row = context->row; + size_t column = context->column; + + if (buffer == NULL || buffer->str.data == NULL + || buffer_row >= buffer_text_visible_height(buffer)) { + return -1; + } + + size_t content_width = buffer_text_visible_width(buffer); + if (vis_width < content_width) + return -1; + + size_t left = 0; + size_t right = 0; + switch (get_cell_property_hierarchically(props, row, column, FT_CPROP_TEXT_ALIGN)) { + case FT_ALIGNED_LEFT: + left = 0; + right = (vis_width) - content_width; + break; + case FT_ALIGNED_CENTER: + left = ((vis_width) - content_width) / 2; + right = ((vis_width) - content_width) - left; + break; + case FT_ALIGNED_RIGHT: + left = (vis_width) - content_width; + right = 0; + break; + default: + assert(0); + break; + } + + size_t written = 0; + int tmp = 0; + ptrdiff_t str_it_width = 0; + const void *beg = NULL; + const void *end = NULL; + buffer_substring(buffer, buffer_row, &beg, &end, &str_it_width); + if (beg == NULL || end == NULL) + return -1; + if (str_it_width < 0 || content_width < (size_t)str_it_width) + return -1; + + size_t padding = content_width - (size_t)str_it_width; + + CHCK_RSLT_ADD_TO_WRITTEN(print_n_strings(cntx, left, FT_SPACE)); + CHCK_RSLT_ADD_TO_WRITTEN(print_n_strings(cntx, 1, content_style_tag)); + CHCK_RSLT_ADD_TO_WRITTEN(buffer_print_range(cntx, beg, end)); + CHCK_RSLT_ADD_TO_WRITTEN(print_n_strings(cntx, 1, reset_content_style_tag)); + CHCK_RSLT_ADD_TO_WRITTEN(print_n_strings(cntx, padding, FT_SPACE)); + CHCK_RSLT_ADD_TO_WRITTEN(print_n_strings(cntx, right, FT_SPACE)); + return (int)written; + +clear: + return -1; +} + +FT_INTERNAL +size_t string_buffer_width_capacity(const f_string_buffer_t *buffer) +{ + assert(buffer); + switch (buffer->type) { + case CHAR_BUF: + return buffer->data_sz; +#ifdef FT_HAVE_WCHAR + case W_CHAR_BUF: + return buffer->data_sz / sizeof(wchar_t); +#endif +#ifdef FT_HAVE_UTF8 + case UTF8_BUF: + return buffer->data_sz / 4; +#endif + default: + assert(0); + return 0; + } +} + + +FT_INTERNAL +void *buffer_get_data(f_string_buffer_t *buffer) +{ + assert(buffer); + return buffer->str.data; +} + +FT_INTERNAL +int buffer_check_align(f_string_buffer_t *buffer) +{ + assert(buffer); + assert(buffer->str.data); + + switch (buffer->type) { + case CHAR_BUF: + return 1; +#ifdef FT_HAVE_WCHAR + case W_CHAR_BUF: + return (((uintptr_t)buffer->str.data) & (sizeof(wchar_t) - 1)) == 0; +#endif +#ifdef FT_HAVE_UTF8 + case UTF8_BUF: + return 1; +#endif + default: + assert(0); + return 0; + } +} + +/******************************************************** + End of file "string_buffer.c" + ********************************************************/ + + +/******************************************************** + Begin of file "table.c" + ********************************************************/ + +/* #include "table.h" */ /* Commented by amalgamation script */ +/* #include "string_buffer.h" */ /* Commented by amalgamation script */ +/* #include "cell.h" */ /* Commented by amalgamation script */ +/* #include "vector.h" */ /* Commented by amalgamation script */ +/* #include "row.h" */ /* Commented by amalgamation script */ + +FT_INTERNAL +f_separator_t *create_separator(int enabled) +{ + f_separator_t *res = (f_separator_t *)F_CALLOC(1, sizeof(f_separator_t)); + if (res == NULL) + return NULL; + res->enabled = enabled; + return res; +} + + +FT_INTERNAL +void destroy_separator(f_separator_t *sep) +{ + F_FREE(sep); +} + + +FT_INTERNAL +f_separator_t *copy_separator(f_separator_t *sep) +{ + assert(sep); + return create_separator(sep->enabled); +} + + +static +f_row_t *get_row_impl(ft_table_t *table, size_t row, enum f_get_policy policy) +{ + if (table == NULL || table->rows == NULL) { + return NULL; + } + + switch (policy) { + case DONT_CREATE_ON_NULL: + if (row < vector_size(table->rows)) { + return VECTOR_AT(table->rows, row, f_row_t *); + } + return NULL; + case CREATE_ON_NULL: + while (row >= vector_size(table->rows)) { + f_row_t *new_row = create_row(); + if (new_row == NULL) + return NULL; + if (FT_IS_ERROR(vector_push(table->rows, &new_row))) { + destroy_row(new_row); + return NULL; + } + } + return VECTOR_AT(table->rows, row, f_row_t *); + } + + assert(0 && "Shouldn't be here!"); + return NULL; +} + + +FT_INTERNAL +f_row_t *get_row(ft_table_t *table, size_t row) +{ + return get_row_impl(table, row, DONT_CREATE_ON_NULL); +} + + +FT_INTERNAL +const f_row_t *get_row_c(const ft_table_t *table, size_t row) +{ + return get_row((ft_table_t *)table, row); +} + + +FT_INTERNAL +f_row_t *get_row_and_create_if_not_exists(ft_table_t *table, size_t row) +{ + return get_row_impl(table, row, CREATE_ON_NULL); +} + +FT_INTERNAL +f_string_buffer_t *get_cur_str_buffer_and_create_if_not_exists(ft_table_t *table) +{ + assert(table); + + f_row_t *row = get_row_and_create_if_not_exists(table, table->cur_row); + if (row == NULL) + return NULL; + + f_cell_t *cell = NULL; + fort_entire_table_properties_t *table_props = &table->properties->entire_table_properties; + switch (table_props->add_strategy) { + case FT_STRATEGY_INSERT: + cell = create_cell_in_position(row, table->cur_col); + break; + case FT_STRATEGY_REPLACE: + cell = get_cell_and_create_if_not_exists(row, table->cur_col); + break; + default: + assert(0 && "Unexpected situation inside libfort"); + break; + } + + if (cell == NULL) + return NULL; + + return cell_get_string_buffer(cell); +} + + +/* + * Returns number of cells (rows * cols) + */ +FT_INTERNAL +f_status get_table_sizes(const ft_table_t *table, size_t *rows, size_t *cols) +{ + *rows = 0; + *cols = 0; + if (table && table->rows) { + *rows = vector_size(table->rows); + size_t row_index = 0; + for (row_index = 0; row_index < vector_size(table->rows); ++row_index) { + f_row_t *row = VECTOR_AT(table->rows, row_index, f_row_t *); + size_t cols_in_row = columns_in_row(row); + if (cols_in_row > *cols) + *cols = cols_in_row; + } + } + return FT_SUCCESS; +} + + +FT_INTERNAL +f_status table_rows_and_cols_geometry(const ft_table_t *table, + size_t **col_width_arr_p, size_t *col_width_arr_sz, + size_t **row_height_arr_p, size_t *row_height_arr_sz, + enum f_geometry_type geom) +{ + if (table == NULL) { + return FT_GEN_ERROR; + } + + size_t max_invis_codepoints = 0; + size_t cols = 0; + size_t rows = 0; + int status = get_table_sizes(table, &rows, &cols); + if (FT_IS_ERROR(status)) + return status; + + size_t *col_width_arr = (size_t *)F_CALLOC(cols, sizeof(size_t)); + size_t *row_height_arr = (size_t *)F_CALLOC(rows, sizeof(size_t)); + if (col_width_arr == NULL || row_height_arr == NULL) { + F_FREE(col_width_arr); + F_FREE(row_height_arr); + return FT_GEN_ERROR; + } + + int combined_cells_found = 0; + f_context_t context; + context.table_properties = (table->properties ? table->properties : &g_table_properties); + size_t col = 0; + for (col = 0; col < cols; ++col) { + col_width_arr[col] = 0; + size_t row = 0; + for (row = 0; row < rows; ++row) { + const f_row_t *row_p = get_row_c(table, row); + const f_cell_t *cell = get_cell_c(row_p, col); + context.column = col; + context.row = row; + if (cell) { + switch (get_cell_type(cell)) { + case COMMON_CELL: + col_width_arr[col] = MAX(col_width_arr[col], cell_vis_width(cell, &context)); + break; + case GROUP_MASTER_CELL: + combined_cells_found = 1; + break; + case GROUP_SLAVE_CELL: + ; /* Do nothing */ + break; + } + row_height_arr[row] = MAX(row_height_arr[row], hint_height_cell(cell, &context)); + } else { + size_t cell_empty_string_height = get_cell_property_hierarchically(context.table_properties, context.row, context.column, FT_CPROP_EMPTY_STR_HEIGHT); + if (cell_empty_string_height) { + size_t cell_top_padding = get_cell_property_hierarchically(context.table_properties, context.row, context.column, FT_CPROP_TOP_PADDING); + size_t cell_bottom_padding = get_cell_property_hierarchically(context.table_properties, context.row, context.column, FT_CPROP_BOTTOM_PADDING); + row_height_arr[row] = MAX(row_height_arr[row], cell_empty_string_height + cell_top_padding + cell_bottom_padding); + } + } + } + + if (geom == INTERN_REPR_GEOMETRY) { + max_invis_codepoints = 0; + for (row = 0; row < rows; ++row) { + const f_row_t *row_p = get_row_c(table, row); + const f_cell_t *cell = get_cell_c(row_p, col); + if (!cell) + continue; + context.column = col; + context.row = row; + size_t inv_codepoints = cell_invis_codes_width(cell, &context); + max_invis_codepoints = MAX(max_invis_codepoints, inv_codepoints); + } + col_width_arr[col] += max_invis_codepoints; + } + } + + if (combined_cells_found) { + for (col = 0; col < cols; ++col) { + size_t row = 0; + for (row = 0; row < rows; ++row) { + const f_row_t *row_p = get_row_c(table, row); + const f_cell_t *cell = get_cell_c(row_p, col); + context.column = col; + context.row = row; + if (cell) { + if (get_cell_type(cell) == GROUP_MASTER_CELL) { + size_t hint_width = cell_vis_width(cell, &context); + if (geom == INTERN_REPR_GEOMETRY) { + hint_width += cell_invis_codes_width(cell, &context); + } + size_t slave_col = col + group_cell_number(row_p, col); + size_t cur_adj_col = col; + size_t group_width = col_width_arr[col]; + size_t i; + for (i = col + 1; i < slave_col; ++i) + group_width += col_width_arr[i] + FORT_COL_SEPARATOR_LENGTH; + /* adjust col. widths */ + while (1) { + if (group_width >= hint_width) + break; + col_width_arr[cur_adj_col] += 1; + group_width++; + cur_adj_col++; + if (cur_adj_col == slave_col) + cur_adj_col = col; + } + } + } + } + } + } + + /* todo: Maybe it is better to move min width checking to a particular cell + * width checking. At the moment min width includes paddings. Maybe it is + * better that min width weren't include paddings but be min width of the + * cell content without padding + */ + /* + if (table->properties) { + for (size_t i = 0; i < cols; ++i) { + col_width_arr[i] = MAX((int)col_width_arr[i], fort_props_column_width(table->properties, i)); + } + } + */ + + *col_width_arr_p = col_width_arr; + *col_width_arr_sz = cols; + *row_height_arr_p = row_height_arr; + *row_height_arr_sz = rows; + return FT_SUCCESS; +} + + +/* + * Returns geometry in characters + */ +FT_INTERNAL +f_status table_geometry(const ft_table_t *table, size_t *height, size_t *width) +{ + if (table == NULL) + return FT_GEN_ERROR; + + *height = 0; + *width = 0; + size_t cols = 0; + size_t rows = 0; + size_t *col_width_arr = NULL; + size_t *row_height_arr = NULL; + + int status = table_rows_and_cols_geometry(table, &col_width_arr, &cols, &row_height_arr, &rows, INTERN_REPR_GEOMETRY); + if (FT_IS_ERROR(status)) + return status; + + *width = 1 + (cols == 0 ? 1 : cols) + 1; /* for boundaries (that take 1 symbol) + newline */ + size_t i = 0; + for (i = 0; i < cols; ++i) { + *width += col_width_arr[i]; + } + + /* todo: add check for non printable horizontal row separators */ + *height = 1 + (rows == 0 ? 1 : rows); /* for boundaries (that take 1 symbol) */ + for (i = 0; i < rows; ++i) { + *height += row_height_arr[i]; + } + F_FREE(col_width_arr); + F_FREE(row_height_arr); + + f_table_properties_t *properties = table->properties; + if (properties) { + *height += properties->entire_table_properties.top_margin; + *height += properties->entire_table_properties.bottom_margin; + *width += properties->entire_table_properties.left_margin; + *width += properties->entire_table_properties.right_margin; + } + + /* Take into account that border elements can be more than one byte long */ + f_table_properties_t *table_properties = properties ? properties : &g_table_properties; + size_t max_border_elem_len = max_border_elem_strlen(table_properties); + *width *= max_border_elem_len; + + return FT_SUCCESS; +} + +FT_INTERNAL +f_status table_internal_codepoints_geometry(const ft_table_t *table, size_t *height, size_t *width) +{ + return table_geometry(table, height, width); +} + +/******************************************************** + End of file "table.c" + ********************************************************/ + + +/******************************************************** + Begin of file "vector.c" + ********************************************************/ + +/* #include "vector.h" */ /* Commented by amalgamation script */ +#include <assert.h> +#include <string.h> + +struct f_vector { + size_t m_size; + void *m_data; + size_t m_capacity; + size_t m_item_size; +}; + + +static int vector_reallocate_(f_vector_t *vector, size_t new_capacity) +{ + assert(vector); + assert(new_capacity > vector->m_capacity); + + size_t new_size = new_capacity * vector->m_item_size; + vector->m_data = F_REALLOC(vector->m_data, new_size); + if (vector->m_data == NULL) + return -1; + return 0; +} + + +FT_INTERNAL +f_vector_t *create_vector(size_t item_size, size_t capacity) +{ + f_vector_t *vector = (f_vector_t *)F_MALLOC(sizeof(f_vector_t)); + if (vector == NULL) { + return NULL; + } + + size_t init_size = MAX(item_size * capacity, 1); + vector->m_data = F_MALLOC(init_size); + if (vector->m_data == NULL) { + F_FREE(vector); + return NULL; + } + + vector->m_size = 0; + vector->m_capacity = capacity; + vector->m_item_size = item_size; + + return vector; +} + + +FT_INTERNAL +void destroy_vector(f_vector_t *vector) +{ + assert(vector); + F_FREE(vector->m_data); + F_FREE(vector); +} + + +FT_INTERNAL +size_t vector_size(const f_vector_t *vector) +{ + assert(vector); + return vector->m_size; +} + + +FT_INTERNAL +size_t vector_capacity(const f_vector_t *vector) +{ + assert(vector); + return vector->m_capacity; +} + + +FT_INTERNAL +int vector_push(f_vector_t *vector, const void *item) +{ + assert(vector); + assert(item); + + if (vector->m_size == vector->m_capacity) { + if (vector_reallocate_(vector, vector->m_capacity * 2) == -1) + return FT_GEN_ERROR; + vector->m_capacity = vector->m_capacity * 2; + } + + size_t offset = vector->m_size * vector->m_item_size; + memcpy((char *)vector->m_data + offset, item, vector->m_item_size); + + ++(vector->m_size); + + return FT_SUCCESS; +} + +FT_INTERNAL +int vector_insert(f_vector_t *vector, const void *item, size_t pos) +{ + assert(vector); + assert(item); + size_t needed_capacity = MAX(pos + 1, vector->m_size + 1); + if (vector->m_capacity < needed_capacity) { + if (vector_reallocate_(vector, needed_capacity) == -1) + return FT_GEN_ERROR; + vector->m_capacity = needed_capacity; + } + size_t offset = pos * vector->m_item_size; + if (pos >= vector->m_size) { + /* Data in the middle are not initialized */ + memcpy((char *)vector->m_data + offset, item, vector->m_item_size); + vector->m_size = pos + 1; + return FT_SUCCESS; + } else { + /* Shift following data by one position */ + memmove((char *)vector->m_data + offset + vector->m_item_size, + (char *)vector->m_data + offset, + vector->m_item_size * (vector->m_size - pos)); + memcpy((char *)vector->m_data + offset, item, vector->m_item_size); + ++(vector->m_size); + return FT_SUCCESS; + } +} + +FT_INTERNAL +f_vector_t *vector_split(f_vector_t *vector, size_t pos) +{ + size_t trailing_sz = vector->m_size > pos ? vector->m_size - pos : 0; + f_vector_t *new_vector = create_vector(vector->m_item_size, trailing_sz); + if (!new_vector) + return new_vector; + if (new_vector->m_capacity < trailing_sz) { + destroy_vector(new_vector); + return NULL; + } + + if (trailing_sz == 0) + return new_vector; + + size_t offset = vector->m_item_size * pos; + memcpy(new_vector->m_data, (char *)vector->m_data + offset, + trailing_sz * vector->m_item_size); + new_vector->m_size = trailing_sz; + vector->m_size = pos; + return new_vector; +} + +FT_INTERNAL +const void *vector_at_c(const f_vector_t *vector, size_t index) +{ + if (index >= vector->m_size) + return NULL; + + return (char *)vector->m_data + index * vector->m_item_size; +} + + +FT_INTERNAL +void *vector_at(f_vector_t *vector, size_t index) +{ + if (index >= vector->m_size) + return NULL; + + return (char *)vector->m_data + index * vector->m_item_size; +} + + +FT_INTERNAL +f_status vector_swap(f_vector_t *cur_vec, f_vector_t *mv_vec, size_t pos) +{ + assert(cur_vec); + assert(mv_vec); + assert(cur_vec != mv_vec); + assert(cur_vec->m_item_size == mv_vec->m_item_size); + + size_t cur_sz = vector_size(cur_vec); + size_t mv_sz = vector_size(mv_vec); + if (mv_sz == 0) { + return FT_SUCCESS; + } + + size_t min_targ_size = pos + mv_sz; + if (vector_capacity(cur_vec) < min_targ_size) { + if (vector_reallocate_(cur_vec, min_targ_size) == -1) + return FT_GEN_ERROR; + cur_vec->m_capacity = min_targ_size; + } + + size_t offset = pos * cur_vec->m_item_size; + void *tmp = NULL; + size_t new_mv_sz = 0; + if (cur_sz > pos) { + new_mv_sz = MIN(cur_sz - pos, mv_sz); + tmp = F_MALLOC(cur_vec->m_item_size * new_mv_sz); + if (tmp == NULL) { + return FT_MEMORY_ERROR; + } + } + + if (tmp) { + memcpy(tmp, + (char *)cur_vec->m_data + offset, + cur_vec->m_item_size * new_mv_sz); + } + + memcpy((char *)cur_vec->m_data + offset, + mv_vec->m_data, + cur_vec->m_item_size * mv_sz); + + if (tmp) { + memcpy(mv_vec->m_data, + tmp, + cur_vec->m_item_size * new_mv_sz); + } + + cur_vec->m_size = MAX(cur_vec->m_size, min_targ_size); + mv_vec->m_size = new_mv_sz; + F_FREE(tmp); + return FT_SUCCESS; +} + +FT_INTERNAL +void vector_clear(f_vector_t *vector) +{ + vector->m_size = 0; +} + +FT_INTERNAL +int vector_erase(f_vector_t *vector, size_t index) +{ + assert(vector); + + if (vector->m_size == 0 || index >= vector->m_size) + return FT_GEN_ERROR; + + memmove((char *)vector->m_data + vector->m_item_size * index, + (char *)vector->m_data + vector->m_item_size * (index + 1), + (vector->m_size - 1 - index) * vector->m_item_size); + vector->m_size--; + return FT_SUCCESS; +} + +#ifdef FT_TEST_BUILD + +f_vector_t *copy_vector(f_vector_t *v) +{ + if (v == NULL) + return NULL; + + f_vector_t *new_vector = create_vector(v->m_item_size, v->m_capacity); + if (new_vector == NULL) + return NULL; + + memcpy(new_vector->m_data, v->m_data, v->m_item_size * v->m_size); + new_vector->m_size = v->m_size ; + new_vector->m_item_size = v->m_item_size ; + return new_vector; +} + +size_t vector_index_of(const f_vector_t *vector, const void *item) +{ + assert(vector); + assert(item); + + size_t i = 0; + for (i = 0; i < vector->m_size; ++i) { + void *data_pos = (char *)vector->m_data + i * vector->m_item_size; + if (memcmp(data_pos, item, vector->m_item_size) == 0) { + return i; + } + } + return INVALID_VEC_INDEX; +} + +#endif + +/******************************************************** + End of file "vector.c" + ********************************************************/ + + +/******************************************************** + Begin of file "wcwidth.c" + ********************************************************/ + +/* + * This is an implementation of wcwidth() and wcswidth() (defined in + * IEEE Std 1002.1-2001) for Unicode. + * + * http://www.opengroup.org/onlinepubs/007904975/functions/wcwidth.html + * http://www.opengroup.org/onlinepubs/007904975/functions/wcswidth.html + * + * In fixed-width output devices, Latin characters all occupy a single + * "cell" position of equal width, whereas ideographic CJK characters + * occupy two such cells. Interoperability between terminal-line + * applications and (teletype-style) character terminals using the + * UTF-8 encoding requires agreement on which character should advance + * the cursor by how many cell positions. No established formal + * standards exist at present on which Unicode character shall occupy + * how many cell positions on character terminals. These routines are + * a first attempt of defining such behavior based on simple rules + * applied to data provided by the Unicode Consortium. + * + * For some graphical characters, the Unicode standard explicitly + * defines a character-cell width via the definition of the East Asian + * FullWidth (F), Wide (W), Half-width (H), and Narrow (Na) classes. + * In all these cases, there is no ambiguity about which width a + * terminal shall use. For characters in the East Asian Ambiguous (A) + * class, the width choice depends purely on a preference of backward + * compatibility with either historic CJK or Western practice. + * Choosing single-width for these characters is easy to justify as + * the appropriate long-term solution, as the CJK practice of + * displaying these characters as double-width comes from historic + * implementation simplicity (8-bit encoded characters were displayed + * single-width and 16-bit ones double-width, even for Greek, + * Cyrillic, etc.) and not any typographic considerations. + * + * Much less clear is the choice of width for the Not East Asian + * (Neutral) class. Existing practice does not dictate a width for any + * of these characters. It would nevertheless make sense + * typographically to allocate two character cells to characters such + * as for instance EM SPACE or VOLUME INTEGRAL, which cannot be + * represented adequately with a single-width glyph. The following + * routines at present merely assign a single-cell width to all + * neutral characters, in the interest of simplicity. This is not + * entirely satisfactory and should be reconsidered before + * establishing a formal standard in this area. At the moment, the + * decision which Not East Asian (Neutral) characters should be + * represented by double-width glyphs cannot yet be answered by + * applying a simple rule from the Unicode database content. Setting + * up a proper standard for the behavior of UTF-8 character terminals + * will require a careful analysis not only of each Unicode character, + * but also of each presentation form, something the author of these + * routines has avoided to do so far. + * + * http://www.unicode.org/unicode/reports/tr11/ + * + * Markus Kuhn -- 2007-05-26 (Unicode 5.0) + * + * Permission to use, copy, modify, and distribute this software + * for any purpose and without fee is hereby granted. The author + * disclaims all warranties with regard to this software. + * + * Latest version: http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c + */ + +/* #include "wcwidth.h" */ /* Commented by amalgamation script */ + +#ifdef FT_HAVE_WCHAR + + +struct interval { + int32_t first; + int32_t last; +}; + +/* auxiliary function for binary search in interval table */ +static int bisearch(int32_t ucs, const struct interval *table, int max) +{ + int min = 0; + + if (ucs < table[0].first || ucs > table[max].last) + return 0; + while (max >= min) { + int mid = (min + max) / 2; + if (ucs > table[mid].last) + min = mid + 1; + else if (ucs < table[mid].first) + max = mid - 1; + else + return 1; + } + + return 0; +} + + +/* The following two functions define the column width of an ISO 10646 + * character as follows: + * + * - The null character (U+0000) has a column width of 0. + * + * - Other C0/C1 control characters and DEL will lead to a return + * value of -1. + * + * - Non-spacing and enclosing combining characters (general + * category code Mn or Me in the Unicode database) have a + * column width of 0. + * + * - SOFT HYPHEN (U+00AD) has a column width of 1. + * + * - Other format characters (general category code Cf in the Unicode + * database) and ZERO WIDTH SPACE (U+200B) have a column width of 0. + * + * - Hangul Jamo medial vowels and final consonants (U+1160-U+11FF) + * have a column width of 0. + * + * - Spacing characters in the East Asian Wide (W) or East Asian + * Full-width (F) category as defined in Unicode Technical + * Report #11 have a column width of 2. + * + * - All remaining characters (including all printable + * ISO 8859-1 and WGL4 characters, Unicode control characters, + * etc.) have a column width of 1. + * + * This implementation assumes that wchar_t characters are encoded + * in ISO 10646. + */ + +static int mk_wcwidth(wchar_t wcs) +{ + /* sorted list of non-overlapping intervals of non-spacing characters */ + /* generated by "uniset +cat=Me +cat=Mn +cat=Cf -00AD +1160-11FF +200B c" */ + static const struct interval combining[] = { + { 0x0300, 0x036F }, { 0x0483, 0x0486 }, { 0x0488, 0x0489 }, + { 0x0591, 0x05BD }, { 0x05BF, 0x05BF }, { 0x05C1, 0x05C2 }, + { 0x05C4, 0x05C5 }, { 0x05C7, 0x05C7 }, { 0x0600, 0x0603 }, + { 0x0610, 0x0615 }, { 0x064B, 0x065E }, { 0x0670, 0x0670 }, + { 0x06D6, 0x06E4 }, { 0x06E7, 0x06E8 }, { 0x06EA, 0x06ED }, + { 0x070F, 0x070F }, { 0x0711, 0x0711 }, { 0x0730, 0x074A }, + { 0x07A6, 0x07B0 }, { 0x07EB, 0x07F3 }, { 0x0901, 0x0902 }, + { 0x093C, 0x093C }, { 0x0941, 0x0948 }, { 0x094D, 0x094D }, + { 0x0951, 0x0954 }, { 0x0962, 0x0963 }, { 0x0981, 0x0981 }, + { 0x09BC, 0x09BC }, { 0x09C1, 0x09C4 }, { 0x09CD, 0x09CD }, + { 0x09E2, 0x09E3 }, { 0x0A01, 0x0A02 }, { 0x0A3C, 0x0A3C }, + { 0x0A41, 0x0A42 }, { 0x0A47, 0x0A48 }, { 0x0A4B, 0x0A4D }, + { 0x0A70, 0x0A71 }, { 0x0A81, 0x0A82 }, { 0x0ABC, 0x0ABC }, + { 0x0AC1, 0x0AC5 }, { 0x0AC7, 0x0AC8 }, { 0x0ACD, 0x0ACD }, + { 0x0AE2, 0x0AE3 }, { 0x0B01, 0x0B01 }, { 0x0B3C, 0x0B3C }, + { 0x0B3F, 0x0B3F }, { 0x0B41, 0x0B43 }, { 0x0B4D, 0x0B4D }, + { 0x0B56, 0x0B56 }, { 0x0B82, 0x0B82 }, { 0x0BC0, 0x0BC0 }, + { 0x0BCD, 0x0BCD }, { 0x0C3E, 0x0C40 }, { 0x0C46, 0x0C48 }, + { 0x0C4A, 0x0C4D }, { 0x0C55, 0x0C56 }, { 0x0CBC, 0x0CBC }, + { 0x0CBF, 0x0CBF }, { 0x0CC6, 0x0CC6 }, { 0x0CCC, 0x0CCD }, + { 0x0CE2, 0x0CE3 }, { 0x0D41, 0x0D43 }, { 0x0D4D, 0x0D4D }, + { 0x0DCA, 0x0DCA }, { 0x0DD2, 0x0DD4 }, { 0x0DD6, 0x0DD6 }, + { 0x0E31, 0x0E31 }, { 0x0E34, 0x0E3A }, { 0x0E47, 0x0E4E }, + { 0x0EB1, 0x0EB1 }, { 0x0EB4, 0x0EB9 }, { 0x0EBB, 0x0EBC }, + { 0x0EC8, 0x0ECD }, { 0x0F18, 0x0F19 }, { 0x0F35, 0x0F35 }, + { 0x0F37, 0x0F37 }, { 0x0F39, 0x0F39 }, { 0x0F71, 0x0F7E }, + { 0x0F80, 0x0F84 }, { 0x0F86, 0x0F87 }, { 0x0F90, 0x0F97 }, + { 0x0F99, 0x0FBC }, { 0x0FC6, 0x0FC6 }, { 0x102D, 0x1030 }, + { 0x1032, 0x1032 }, { 0x1036, 0x1037 }, { 0x1039, 0x1039 }, + { 0x1058, 0x1059 }, { 0x1160, 0x11FF }, { 0x135F, 0x135F }, + { 0x1712, 0x1714 }, { 0x1732, 0x1734 }, { 0x1752, 0x1753 }, + { 0x1772, 0x1773 }, { 0x17B4, 0x17B5 }, { 0x17B7, 0x17BD }, + { 0x17C6, 0x17C6 }, { 0x17C9, 0x17D3 }, { 0x17DD, 0x17DD }, + { 0x180B, 0x180D }, { 0x18A9, 0x18A9 }, { 0x1920, 0x1922 }, + { 0x1927, 0x1928 }, { 0x1932, 0x1932 }, { 0x1939, 0x193B }, + { 0x1A17, 0x1A18 }, { 0x1B00, 0x1B03 }, { 0x1B34, 0x1B34 }, + { 0x1B36, 0x1B3A }, { 0x1B3C, 0x1B3C }, { 0x1B42, 0x1B42 }, + { 0x1B6B, 0x1B73 }, { 0x1DC0, 0x1DCA }, { 0x1DFE, 0x1DFF }, + { 0x200B, 0x200F }, { 0x202A, 0x202E }, { 0x2060, 0x2063 }, + { 0x206A, 0x206F }, { 0x20D0, 0x20EF }, { 0x302A, 0x302F }, + { 0x3099, 0x309A }, { 0xA806, 0xA806 }, { 0xA80B, 0xA80B }, + { 0xA825, 0xA826 }, { 0xFB1E, 0xFB1E }, { 0xFE00, 0xFE0F }, + { 0xFE20, 0xFE23 }, { 0xFEFF, 0xFEFF }, { 0xFFF9, 0xFFFB }, + { 0x10A01, 0x10A03 }, { 0x10A05, 0x10A06 }, { 0x10A0C, 0x10A0F }, + { 0x10A38, 0x10A3A }, { 0x10A3F, 0x10A3F }, { 0x1D167, 0x1D169 }, + { 0x1D173, 0x1D182 }, { 0x1D185, 0x1D18B }, { 0x1D1AA, 0x1D1AD }, + { 0x1D242, 0x1D244 }, { 0xE0001, 0xE0001 }, { 0xE0020, 0xE007F }, + { 0xE0100, 0xE01EF } + }; + + /* We convert wchar_t to int32_t to avoid compiler warnings + * about implicit integer conversions + * https://github.com/seleznevae/libfort/issues/20 + * + * note: didn't test if we can do it + */ + int32_t ucs = (int32_t)wcs; + + /* test for 8-bit control characters */ + if (ucs == 0) + return 0; + if (ucs < 32 || (ucs >= 0x7f && ucs < 0xa0)) + return -1; + + /* binary search in table of non-spacing characters */ + if (bisearch(ucs, combining, + sizeof(combining) / sizeof(struct interval) - 1)) + return 0; + + /* if we arrive here, ucs is not a combining or C0/C1 control character */ + + return 1 + + (ucs >= 0x1100 && + (ucs <= 0x115f || /* Hangul Jamo init. consonants */ + ucs == 0x2329 || ucs == 0x232a || + (ucs >= 0x2e80 && ucs <= 0xa4cf && + ucs != 0x303f) || /* CJK ... Yi */ + (ucs >= 0xac00 && ucs <= 0xd7a3) || /* Hangul Syllables */ + (ucs >= 0xf900 && ucs <= 0xfaff) || /* CJK Compatibility Ideographs */ + (ucs >= 0xfe10 && ucs <= 0xfe19) || /* Vertical forms */ + (ucs >= 0xfe30 && ucs <= 0xfe6f) || /* CJK Compatibility Forms */ + (ucs >= 0xff00 && ucs <= 0xff60) || /* Fullwidth Forms */ + (ucs >= 0xffe0 && ucs <= 0xffe6) || + (ucs >= 0x20000 && ucs <= 0x2fffd) || + (ucs >= 0x30000 && ucs <= 0x3fffd))); +} + + +FT_INTERNAL +int mk_wcswidth(const wchar_t *pwcs, size_t n) +{ + int width = 0; + + for (; *pwcs && n-- > 0; pwcs++) { + int w; + if ((w = mk_wcwidth(*pwcs)) < 0) + return -1; + else + width += w; + } + + return width; +} +#endif /* FT_HAVE_WCHAR */ + +/******************************************************** + End of file "wcwidth.c" + ********************************************************/ + diff --git a/fort.h b/fort.h @@ -0,0 +1,1057 @@ +/* +libfort + +MIT License + +Copyright (c) 2017 - 2020 Seleznev Anton + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ + +/** + * @file fort.h + * @brief Main header file describing libfort API. + * + * This file contains declarations of all libfort functions and macro + * definitions. + */ + +#ifndef LIBFORT_H +#define LIBFORT_H + +#include <stddef.h> +#include <stdlib.h> +#include <stdint.h> +#include <limits.h> + +/***************************************************************************** + * VERSION + *****************************************************************************/ + +#define LIBFORT_MAJOR_VERSION 0 +#define LIBFORT_MINOR_VERSION 5 +#define LIBFORT_REVISION 0 +#define LIBFORT_VERSION_STR "0.5.0" + + +/***************************************************************************** + * Configuration + *****************************************************************************/ + +/** + * libfort configuration macros + * (to disable wchar_t/UTF-8 support this macros should be defined) + */ +/** #define FT_CONGIG_DISABLE_WCHAR */ +/** #define FT_CONGIG_DISABLE_UTF8 */ + +#if !defined(FT_CONGIG_DISABLE_WCHAR) +#define FT_HAVE_WCHAR +#endif + +#if !defined(FT_CONGIG_DISABLE_UTF8) +#define FT_HAVE_UTF8 +#endif + + +/***************************************************************************** + * RETURN CODES + *****************************************************************************/ + +/** + * Operation successfully ended. + */ +#define FT_SUCCESS 0 + +/** + * Memory allocation failed. + */ +#define FT_MEMORY_ERROR -1 + +/** + * Invalid argument. + */ +#define FT_EINVAL -2 + +/** + * Libfort internal logic error. + * + * Usually such errors mean that something is wrong in + * libfort internal logic and in most of cases cause of + * these errors is a library bug. + */ +#define FT_INTERN_ERROR -3 + +/** + * General error. + * + * Different errors that do not belong to the group of errors + * mentioned above. + */ +#define FT_GEN_ERROR -4 + + +#define FT_IS_SUCCESS(arg) ((arg) >= 0) +#define FT_IS_ERROR(arg) ((arg) < 0) + + + + +/** + * @cond HELPER_MACROS + */ + +/***************************************************************************** + * Determine compiler + *****************************************************************************/ + +#if defined(__clang__) +#define FT_CLANG_COMPILER +#elif defined(__GNUC__) +#define FT_GCC_COMPILER +#elif defined(_MSC_VER) +#define FT_MICROSOFT_COMPILER +#else +#define FT_UNDEFINED_COMPILER +#endif + + +/***************************************************************************** + * Declare inline + *****************************************************************************/ + +#if defined(__cplusplus) +#define FT_INLINE inline +#else +#define FT_INLINE __inline +#endif /* if defined(__cplusplus) */ + + +/***************************************************************************** + * C++ needs to know that types and declarations are C, not C++. + *****************************************************************************/ + +#ifdef __cplusplus +# define FT_BEGIN_DECLS extern "C" { +# define FT_END_DECLS } +#else +# define FT_BEGIN_DECLS +# define FT_END_DECLS +#endif + + +/***************************************************************************** + * Helper macros + *****************************************************************************/ + +#define FT_STR_2_CAT_(arg1, arg2) \ + arg1##arg2 +#define FT_STR_2_CAT(arg1, arg2) \ + FT_STR_2_CAT_(arg1, arg2) + +/** + * @interanl + */ +static FT_INLINE int ft_check_if_string_helper(const char *str) +{ + (void)str; + return 0; +} + +/** + * @interanl + */ +static FT_INLINE int ft_check_if_wstring_helper(const wchar_t *str) +{ + (void)str; + return 0; +} + +#define FT_NARGS_IMPL_(x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15,N,...) N +#define FT_EXPAND_(x) x +#define FT_PP_NARG_(...) \ + FT_EXPAND_(FT_NARGS_IMPL_(__VA_ARGS__,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0)) + +#define FT_CHECK_IF_STR_32(checker,arg,...) (checker(arg),FT_EXPAND_(FT_CHECK_IF_STR_31(checker,__VA_ARGS__))) +#define FT_CHECK_IF_STR_31(checker,arg,...) (checker(arg),FT_EXPAND_(FT_CHECK_IF_STR_30(checker,__VA_ARGS__))) +#define FT_CHECK_IF_STR_30(checker,arg,...) (checker(arg),FT_EXPAND_(FT_CHECK_IF_STR_29(checker,__VA_ARGS__))) +#define FT_CHECK_IF_STR_29(checker,arg,...) (checker(arg),FT_EXPAND_(FT_CHECK_IF_STR_28(checker,__VA_ARGS__))) +#define FT_CHECK_IF_STR_28(checker,arg,...) (checker(arg),FT_EXPAND_(FT_CHECK_IF_STR_27(checker,__VA_ARGS__))) +#define FT_CHECK_IF_STR_27(checker,arg,...) (checker(arg),FT_EXPAND_(FT_CHECK_IF_STR_26(checker,__VA_ARGS__))) +#define FT_CHECK_IF_STR_26(checker,arg,...) (checker(arg),FT_EXPAND_(FT_CHECK_IF_STR_25(checker,__VA_ARGS__))) +#define FT_CHECK_IF_STR_25(checker,arg,...) (checker(arg),FT_EXPAND_(FT_CHECK_IF_STR_24(checker,__VA_ARGS__))) +#define FT_CHECK_IF_STR_24(checker,arg,...) (checker(arg),FT_EXPAND_(FT_CHECK_IF_STR_23(checker,__VA_ARGS__))) +#define FT_CHECK_IF_STR_23(checker,arg,...) (checker(arg),FT_EXPAND_(FT_CHECK_IF_STR_22(checker,__VA_ARGS__))) +#define FT_CHECK_IF_STR_22(checker,arg,...) (checker(arg),FT_EXPAND_(FT_CHECK_IF_STR_21(checker,__VA_ARGS__))) +#define FT_CHECK_IF_STR_21(checker,arg,...) (checker(arg),FT_EXPAND_(FT_CHECK_IF_STR_20(checker,__VA_ARGS__))) +#define FT_CHECK_IF_STR_20(checker,arg,...) (checker(arg),FT_EXPAND_(FT_CHECK_IF_STR_19(checker,__VA_ARGS__))) +#define FT_CHECK_IF_STR_19(checker,arg,...) (checker(arg),FT_EXPAND_(FT_CHECK_IF_STR_18(checker,__VA_ARGS__))) +#define FT_CHECK_IF_STR_18(checker,arg,...) (checker(arg),FT_EXPAND_(FT_CHECK_IF_STR_17(checker,__VA_ARGS__))) +#define FT_CHECK_IF_STR_17(checker,arg,...) (checker(arg),FT_EXPAND_(FT_CHECK_IF_STR_16(checker,__VA_ARGS__))) +#define FT_CHECK_IF_STR_16(checker,arg,...) (checker(arg),FT_EXPAND_(FT_CHECK_IF_STR_15(checker,__VA_ARGS__))) +#define FT_CHECK_IF_STR_15(checker,arg,...) (checker(arg),FT_EXPAND_(FT_CHECK_IF_STR_14(checker,__VA_ARGS__))) +#define FT_CHECK_IF_STR_14(checker,arg,...) (checker(arg),FT_EXPAND_(FT_CHECK_IF_STR_13(checker,__VA_ARGS__))) +#define FT_CHECK_IF_STR_13(checker,arg,...) (checker(arg),FT_EXPAND_(FT_CHECK_IF_STR_12(checker,__VA_ARGS__))) +#define FT_CHECK_IF_STR_12(checker,arg,...) (checker(arg),FT_EXPAND_(FT_CHECK_IF_STR_11(checker,__VA_ARGS__))) +#define FT_CHECK_IF_STR_11(checker,arg,...) (checker(arg),FT_EXPAND_(FT_CHECK_IF_STR_10(checker,__VA_ARGS__))) +#define FT_CHECK_IF_STR_10(checker,arg,...) (checker(arg),FT_EXPAND_(FT_CHECK_IF_STR_9(checker,__VA_ARGS__))) +#define FT_CHECK_IF_STR_9(checker,arg,...) (checker(arg),FT_EXPAND_(FT_CHECK_IF_STR_8(checker,__VA_ARGS__))) +#define FT_CHECK_IF_STR_8(checker,arg,...) (checker(arg),FT_EXPAND_(FT_CHECK_IF_STR_7(checker,__VA_ARGS__))) +#define FT_CHECK_IF_STR_7(checker,arg,...) (checker(arg),FT_EXPAND_(FT_CHECK_IF_STR_6(checker,__VA_ARGS__))) +#define FT_CHECK_IF_STR_6(checker,arg,...) (checker(arg),FT_EXPAND_(FT_CHECK_IF_STR_5(checker,__VA_ARGS__))) +#define FT_CHECK_IF_STR_5(checker,arg,...) (checker(arg),FT_EXPAND_(FT_CHECK_IF_STR_4(checker,__VA_ARGS__))) +#define FT_CHECK_IF_STR_4(checker,arg,...) (checker(arg),FT_EXPAND_(FT_CHECK_IF_STR_3(checker,__VA_ARGS__))) +#define FT_CHECK_IF_STR_3(checker,arg,...) (checker(arg),FT_EXPAND_(FT_CHECK_IF_STR_2(checker,__VA_ARGS__))) +#define FT_CHECK_IF_STR_2(checker,arg,...) (checker(arg),FT_EXPAND_(FT_CHECK_IF_STR_1(checker,__VA_ARGS__))) +#define FT_CHECK_IF_STR_1(checker,arg) (checker(arg)) + +#define FT_CHECK_IF_ARGS_ARE_STRINGS__(checker,func, ...) \ + FT_EXPAND_(func(checker,__VA_ARGS__)) +#define FT_CHECK_IF_ARGS_ARE_STRINGS_(checker,basis, n, ...) \ + FT_CHECK_IF_ARGS_ARE_STRINGS__(checker,FT_STR_2_CAT_(basis, n), __VA_ARGS__) +#define FT_CHECK_IF_ARGS_ARE_STRINGS(...) \ + FT_CHECK_IF_ARGS_ARE_STRINGS_(ft_check_if_string_helper,FT_CHECK_IF_STR_,FT_PP_NARG_(__VA_ARGS__), __VA_ARGS__) + +#ifdef FT_HAVE_WCHAR +#define CHECK_IF_ARGS_ARE_WSTRINGS(...) \ + FT_CHECK_IF_ARGS_ARE_STRINGS_(ft_check_if_wstring_helper,FT_CHECK_IF_STR_,FT_PP_NARG_(__VA_ARGS__), __VA_ARGS__) +#endif + +/** + * @endcond + */ + + +/***************************************************************************** + * Attribute format for argument checking + *****************************************************************************/ + +#if defined(FT_CLANG_COMPILER) || defined(FT_GCC_COMPILER) +#define FT_PRINTF_ATTRIBUTE_FORMAT(string_index, first_to_check) \ + __attribute__ ((format (printf, string_index, first_to_check))) +#else +#define FT_PRINTF_ATTRIBUTE_FORMAT(string_index, first_to_check) +#endif /* defined(FT_CLANG_COMPILER) || defined(FT_GCC_COMPILER) */ + + +/***************************************************************************** + * libfort API + *****************************************************************************/ + +FT_BEGIN_DECLS + +/** + * The main structure of libfort containing information about formatted table. + */ +struct ft_table; + +/** + * The main structure of libfort containing information about formatted table. + * + * ft_table_t objects should be created by a call to ft_create_table and + * destroyed with ft_destroy_table. + */ +typedef struct ft_table ft_table_t; + +/** + * Create formatted table. + * + * @return + * The pointer to the new allocated ft_table_t, on success. NULL on error. + */ +ft_table_t *ft_create_table(void); + +/** + * Destroy formatted table. + * + * Destroy formatted table and free all resources allocated during table creation + * and work with it. + * + * @param table + * Pointer to formatted table previousley created with ft_create_table. If + * table is a null pointer, the function does nothing. + */ +void ft_destroy_table(ft_table_t *table); + +/** + * Copy formatted table. + * + * @param table + * Pointer to formatted table previousley created with ft_create_table. If + * table is a null pointer, the function returns null. + * @return + * The pointer to the new allocated ft_table_t, on success. NULL on error. + */ +ft_table_t *ft_copy_table(ft_table_t *table); + +/** + * Move current position to the first cell of the next line(row). + * + * @param table + * Pointer to formatted table. + * @return + * - 0: Success; data were written + * - (<0): In case of error. + * @note + * This function can fail only in case FT_STRATEGY_INSERT adding strategy + * was set for the table. + */ +int ft_ln(ft_table_t *table); + +/** + * Get row number of the current cell. + * + * @param table + * Pointer to formatted table. + * @return + * Row number of the current cell. + */ +size_t ft_cur_row(const ft_table_t *table); + +/** + * Get column number of the current cell. + * + * @param table + * Pointer to formatted table. + * @return + * Column number of the current cell. + */ +size_t ft_cur_col(const ft_table_t *table); + +/** + * Set current cell position. + * + * Current cell - cell that will be edited with all modifiing functions + * (ft_printf, ft_write ...). + * + * @param table + * Pointer to formatted table. + * @param row + * New row number for the current cell. + * @param col + * New row number for the current cell. + */ +void ft_set_cur_cell(ft_table_t *table, size_t row, size_t col); + +/** + * Check if table is empty. + * + * @param table + * Pointer to the table. + * @return + * 1 - table is empty + * 0 - some data has been inserted + */ +int ft_is_empty(const ft_table_t *table); + +/** + * Get number of rows in the table. + * + * @param table + * Pointer to formatted table. + * @return + * Number of rows in the table. + */ +size_t ft_row_count(const ft_table_t *table); + +/** + * Get number of columns in the table. + * + * @param table + * Pointer to formatted table. + * @return + * Number of columns in the table. + */ +size_t ft_col_count(const ft_table_t *table); + +/** + * Erase range of cells. + * + * Range of cells is determined by 2 points (top-left and bottom-right) (both + * ends are included). + * + * @param table + * Pointer to formatted table. + * @param top_left_row + * Row number of the top left cell in the range. + * @param top_left_col + * Column number of the top left cell in the range. + * @param bottom_right_row + * Row number of the bottom right cell in the range. + * @param bottom_right_col + * Column number of the bottom right cell in the range. + * @return + * - 0 - Operation was successfully implemented + * - (<0): In case of error + */ +int ft_erase_range(ft_table_t *table, + size_t top_left_row, size_t top_left_col, + size_t bottom_right_row, size_t bottom_right_col); + +#if defined(FT_CLANG_COMPILER) || defined(FT_GCC_COMPILER) + +/** + * Write data formatted according to the format string to a variety of table + * cells. + * + * @param table + * Pointer to formatted table. + * @param fmt + * Pointer to a null-terminated multibyte string specifying how to interpret + * the data. The format string consists of ordinary characters (except % and |), + * which are copied unchanged into the output stream, and conversion + * specifications. Conversion specifications are the same as for standard + * printf function. Character '|' (which can be changed with + * {@link ft_set_default_printf_field_separator}) in the format string is treated as + * a cell separator. + * @param ... + * Arguments specifying data to print. Similarly to standard printf-like + * functions if any argument after default conversions is not the type + * expected by the corresponding conversion specifier, or if there are fewer + * arguments than required by format, the behavior is undefined. If there are + * more arguments than required by format, the extraneous arguments are + * evaluated and ignored. + * @return + * - Number of printed cells + * - (<0): In case of error + */ +int ft_printf(ft_table_t *table, const char *fmt, ...) FT_PRINTF_ATTRIBUTE_FORMAT(2, 3); + +/** + * Write data formatted according to the format string to a variety of table + * cells and move current position to the first cell of the next line(row). + * + * @param table + * Pointer to formatted table. + * @param fmt + * Pointer to a null-terminated multibyte string specifying how to interpret + * the data. The format string consists of ordinary characters (except % and |), + * which are copied unchanged into the output stream, and conversion + * specifications. Conversion specifications are the same as for standard + * printf function. Character '|' (which can be changed with + * {@link ft_set_default_printf_field_separator}) in the format string is treated as + * a cell separator. + * @param ... + * Arguments specifying data to print. Similarly to standard printf-like + * functions if any argument after default conversions is not the type + * expected by the corresponding conversion specifier, or if there are fewer + * arguments than required by format, the behavior is undefined. If there are + * more arguments than required by format, the extraneous arguments are + * evaluated and ignored. + * @return + * - Number of printed cells. + * - (<0): In case of error. + */ +int ft_printf_ln(ft_table_t *table, const char *fmt, ...) FT_PRINTF_ATTRIBUTE_FORMAT(2, 3); + +#else + +/** + * @cond IGNORE_DOC + */ + +int ft_printf_impl(ft_table_t *table, const char *fmt, ...) FT_PRINTF_ATTRIBUTE_FORMAT(2, 3); +int ft_printf_ln_impl(ft_table_t *table, const char *fmt, ...) FT_PRINTF_ATTRIBUTE_FORMAT(2, 3); + +#define ft_printf(table, ...) \ + (( 0 ? fprintf(stderr, __VA_ARGS__) : 1), ft_printf_impl(table, __VA_ARGS__)) +#define ft_printf_ln(table, ...) \ + (( 0 ? fprintf(stderr, __VA_ARGS__) : 1), ft_printf_ln_impl(table, __VA_ARGS__)) + +/** + * @endcond + */ +#endif + +/** + * Set field separator for {@link ft_printf}, {@link ft_printf_ln} + * (default separator is '|'). + * + * @param separator + * New separator. + */ +void ft_set_default_printf_field_separator(char separator); + + +/** + * Write strings to the table. + * + * Write specified strings to the same number of consecutive cells in the + * current row. + * + * @param table + * Pointer to formatted table. + * @param ... + * Strings to write. + * @return + * - 0: Success; data were written + * - (<0): In case of error + */ +#define ft_write(table, ...)\ + (0 ? FT_CHECK_IF_ARGS_ARE_STRINGS(__VA_ARGS__) : ft_nwrite(table, FT_PP_NARG_(__VA_ARGS__), __VA_ARGS__)) + +/** + * Write strings to the table and go to the next line. + * + * Write specified strings to the same number of consecutive cells in the + * current row and move current position to the first cell of the next + * line(row). + * + * @param table + * Pointer to formatted table. + * @param ... + * Strings to write. + * @return + * - 0: Success; data were written + * - (<0): In case of error + */ +#define ft_write_ln(table, ...)\ + (0 ? FT_CHECK_IF_ARGS_ARE_STRINGS(__VA_ARGS__) : ft_nwrite_ln(table, FT_PP_NARG_(__VA_ARGS__), __VA_ARGS__)) + +/** + * Write specified number of strings to the table. + * + * Write specified number of strings to the same number of consecutive cells in + * the current row. + * + * @note In most cases it is more preferable to use MACRO @ref ft_write instead + * of @ref ft_nwrite, which is more safe (@ref ft_write automatically counts the + * number of string arguments and at compile check that all passed arguments are + * strings). + * + * @param table + * Pointer to formatted table. + * @param count + * Number of strings to write. + * @param cell_content + * First string to write. + * @param ... + * Other strings to write. + * @return + * - 0: Success; data were written + * - (<0): In case of error + */ +int ft_nwrite(ft_table_t *table, size_t count, const char *cell_content, ...); + +/** + * Write specified number of strings to the table and go to the next line. + * + * Write specified number of strings to the same number of consecutive cells + * in the current row and move current position to the first cell of the next + * line(row). + * + * @note In most cases it is more preferable to use MACRO @ref ft_write instead + * of @ref ft_nwrite, which is more safe (@ref ft_write automatically counts the + * number of string arguments and at compile check that all passed arguments are + * strings). + * + * @param table + * Pointer to formatted table. + * @param count + * Number of strings to write. + * @param cell_content + * First string to write. + * @param ... + * Other strings to write. + * @return + * - 0: Success; data were written + * - (<0): In case of error + */ +int ft_nwrite_ln(ft_table_t *table, size_t count, const char *cell_content, ...); + + + +/** + * Write strings from the array to the table. + * + * Write specified number of strings from the array to the same number of + * consecutive cells in the current row. + * + * @param table + * Pointer to formatted table. + * @param cols + * Number of elements in row_cells. + * @param row_cells + * Array of strings to write. + * @return + * - 0: Success; data were written + * - (<0): In case of error + */ +int ft_row_write(ft_table_t *table, size_t cols, const char *row_cells[]); + +/** + * Write strings from the array to the table and go to the next line. + * + * Write specified number of strings from the array to the same number of + * consecutive cells in the current row and move current position to the first + * cell of the next line(row). + * + * @param table + * Pointer to formatted table. + * @param cols + * Number of elements in row_cells. + * @param row_cells + * Array of strings to write. + * @return + * - 0: Success; data were written + * - (<0): In case of error + */ +int ft_row_write_ln(ft_table_t *table, size_t cols, const char *row_cells[]); + + +/** + * Write strings from the 2D array to the table. + * + * Write specified number of strings from the 2D array to the formatted table. + * + * @param table + * Pointer to formatted table. + * @param rows + * Number of rows in the 2D array. + * @param cols + * Number of columns in the 2D array. + * @param table_cells + * 2D array of strings to write. + * @return + * - 0: Success; data were written + * - (<0): In case of error + */ +int ft_table_write(ft_table_t *table, size_t rows, size_t cols, const char *table_cells[]); + +/** + * Write strings from the 2D array to the table and go to the next line. + * + * Write specified number of strings from the 2D array to the formatted table + * and move current position to the first cell of the next line(row). + * + * @param table + * Pointer to formatted table. + * @param rows + * Number of rows in the 2D array. + * @param cols + * Number of columns in the 2D array. + * @param table_cells + * 2D array of strings to write. + * @return + * - 0: Success; data were written + * - (<0): In case of error + */ +int ft_table_write_ln(ft_table_t *table, size_t rows, size_t cols, const char *table_cells[]); + + +/** + * Add separator after the current row. + * + * @param table + * Formatted table. + * @return + * - 0: Success; separator was added. + * - (<0): In case of error + */ +int ft_add_separator(ft_table_t *table); + + +/** + * Convert table to string representation. + * + * ft_table_t has ownership of the returned pointer. So there is no need to + * free it. To take ownership user should explicitly copy the returned + * string with strdup or similar functions. + * + * Returned pointer may be later invalidated by: + * - Calling ft_destroy_table; + * - Other invocations of ft_to_string. + * + * @param table + * Formatted table. + * @return + * - The pointer to the string representation of formatted table, on success. + * - NULL on error. + */ +const char *ft_to_string(const ft_table_t *table); + + + + + + + +/** + * Structure describing border appearance. + */ +struct ft_border_chars { + const char *top_border_ch; + const char *separator_ch; + const char *bottom_border_ch; + const char *side_border_ch; + const char *out_intersect_ch; + const char *in_intersect_ch; +}; + +/** + * Structure describing border style. + */ +struct ft_border_style { + struct ft_border_chars border_chs; + struct ft_border_chars header_border_chs; + const char *hor_separator_char; +}; + +/** + * @defgroup BasicStyles + * @name Built-in table border styles. + * @note Built-in border styles (FT_BASIC_STYLE, FT_BASIC2_STYLE ...) can be + * used as arguments for @ref ft_set_border_style and + * @ref ft_set_default_border_style, but their fields shouldn't be accessed + * directly because implementation doesn't guarantee that these objects are + * properly initialized. + * @{ + */ +extern const struct ft_border_style *const FT_BASIC_STYLE; +extern const struct ft_border_style *const FT_BASIC2_STYLE; +extern const struct ft_border_style *const FT_SIMPLE_STYLE; +extern const struct ft_border_style *const FT_PLAIN_STYLE; +extern const struct ft_border_style *const FT_DOT_STYLE; +extern const struct ft_border_style *const FT_EMPTY_STYLE; +extern const struct ft_border_style *const FT_EMPTY2_STYLE; +extern const struct ft_border_style *const FT_SOLID_STYLE; +extern const struct ft_border_style *const FT_SOLID_ROUND_STYLE; +extern const struct ft_border_style *const FT_NICE_STYLE; +extern const struct ft_border_style *const FT_DOUBLE_STYLE; +extern const struct ft_border_style *const FT_DOUBLE2_STYLE; +extern const struct ft_border_style *const FT_BOLD_STYLE; +extern const struct ft_border_style *const FT_BOLD2_STYLE; +extern const struct ft_border_style *const FT_FRAME_STYLE; +/** @} */ + + + +/** + * Set default border style for all new formatted tables. + * + * @param style + * Pointer to border style. + * @return + * - 0: Success; default border style was changed. + * - (<0): In case of error + */ +int ft_set_default_border_style(const struct ft_border_style *style); + +/** + * Set border style for the table. + * + * @param table + * A pointer to the ft_table_t structure. + * @param style + * Pointer to border style. + * @return + * - 0: Success; table border style was changed. + * - (<0): In case of error + */ +int ft_set_border_style(ft_table_t *table, const struct ft_border_style *style); + + + +/** + * @name Special macros to define cell position (row and column). + * @{ + */ +#define FT_ANY_COLUMN (UINT_MAX) /**< Any column (can be used to refer to all cells in a row)*/ +#define FT_CUR_COLUMN (UINT_MAX - 1) /**< Current column */ +#define FT_ANY_ROW (UINT_MAX) /**< Any row (can be used to refer to all cells in a column)*/ +#define FT_CUR_ROW (UINT_MAX - 1) /**< Current row */ +/** @} */ + +#define FT_MAX_ROW_INDEX (UINT_MAX - 2) +#define FT_MAX_COL_INDEX (UINT_MAX - 2) + + +/** + * @name Cell properties identifiers. + * @{ + */ +#define FT_CPROP_MIN_WIDTH (0x01U << 0) /**< Minimum width */ +#define FT_CPROP_TEXT_ALIGN (0x01U << 1) /**< Text alignment */ +#define FT_CPROP_TOP_PADDING (0x01U << 2) /**< Top padding for cell content */ +#define FT_CPROP_BOTTOM_PADDING (0x01U << 3) /**< Bottom padding for cell content */ +#define FT_CPROP_LEFT_PADDING (0x01U << 4) /**< Left padding for cell content */ +#define FT_CPROP_RIGHT_PADDING (0x01U << 5) /**< Right padding for cell content */ +#define FT_CPROP_EMPTY_STR_HEIGHT (0x01U << 6) /**< Height of empty cell */ +#define FT_CPROP_ROW_TYPE (0x01U << 7) /**< Row type */ +#define FT_CPROP_CONT_FG_COLOR (0x01U << 8) /**< Cell content foreground text color */ +#define FT_CPROP_CELL_BG_COLOR (0x01U << 9) /**< Cell background color */ +#define FT_CPROP_CONT_BG_COLOR (0x01U << 10) /**< Cell content background color */ +#define FT_CPROP_CELL_TEXT_STYLE (0x01U << 11) /**< Cell text style */ +#define FT_CPROP_CONT_TEXT_STYLE (0x01U << 12) /**< Cell content text style */ +#define FT_CPROP_CELL_BG_RGBCOLOR (0x01U << 13) /**< Cell background color */ +/** @} */ + + +/** + * Colors. + */ +enum ft_color { + FT_COLOR_DEFAULT = 0, /**< Default color */ + FT_COLOR_BLACK = 1, /**< Black color*/ + FT_COLOR_RED = 2, /**< Red color */ + FT_COLOR_GREEN = 3, /**< Green color */ + FT_COLOR_YELLOW = 4, /**< Yellow color */ + FT_COLOR_BLUE = 5, /**< Blue color */ + FT_COLOR_MAGENTA = 6, /**< Magenta color */ + FT_COLOR_CYAN = 7, /**< Cyan color */ + FT_COLOR_LIGHT_GRAY = 8, /**< Light gray color */ + FT_COLOR_DARK_GRAY = 9, /**< Dark gray color */ + FT_COLOR_LIGHT_RED = 10, /**< Light red color */ + FT_COLOR_LIGHT_GREEN = 11, /**< Light green color */ + FT_COLOR_LIGHT_YELLOW = 12, /**< Light yellow color */ + FT_COLOR_LIGHT_BLUE = 13, /**< Light blue color */ + FT_COLOR_LIGHT_MAGENTA = 14, /**< Light magenta color */ + FT_COLOR_LIGHT_CYAN = 15, /**< Light cyan color */ + FT_COLOR_LIGHT_WHYTE = 16 /**< Light whyte color */ +}; + +/** + * Text styles. + */ +enum ft_text_style { + FT_TSTYLE_DEFAULT = (1U << 0), /**< Default style */ + FT_TSTYLE_BOLD = (1U << 1), /**< Bold */ + FT_TSTYLE_DIM = (1U << 2), /**< Dim */ + FT_TSTYLE_ITALIC = (1U << 3), /**< Italic */ + FT_TSTYLE_UNDERLINED = (1U << 4), /**< Underlined */ + FT_TSTYLE_BLINK = (1U << 5), /**< Blink */ + FT_TSTYLE_INVERTED = (1U << 6), /**< Reverse (invert the foreground and background colors) */ + FT_TSTYLE_HIDDEN = (1U << 7) /**< Hidden (useful for passwords) */ +}; + + +/** + * Alignment of cell content. + */ +enum ft_text_alignment { + FT_ALIGNED_LEFT = 0, /**< Align left */ + FT_ALIGNED_CENTER, /**< Align center */ + FT_ALIGNED_RIGHT /**< Align right */ +}; + +/** + * Type of table row. Determines appearance of row. + */ +enum ft_row_type { + FT_ROW_COMMON = 0, /**< Common row */ + FT_ROW_HEADER /**< Header row */ +}; + +/** + * Set default cell property for all new formatted tables. + * + * @param property + * Cell property identifier. + * @param value + * Cell property value. + * @return + * - 0: Success; default cell property was changed. + * - (<0): In case of error + */ +int ft_set_default_cell_prop(uint32_t property, int value); + +/** + * Set property for the specified cell of the table. + * + * @param table + * A pointer to the ft_table_t structure. + * @param row + * Cell row. + * @param col + * Cell column. + * @param property + * Cell property identifier. + * @param value + * Cell property value. + * @return + * - 0: Success; cell property was changed. + * - (<0): In case of error + */ +int ft_set_cell_prop(ft_table_t *table, size_t row, size_t col, uint32_t property, int value); + + +/** + * @name Table properties identifiers. + * @{ + */ +#define FT_TPROP_LEFT_MARGIN (0x01U << 0) +#define FT_TPROP_TOP_MARGIN (0x01U << 1) +#define FT_TPROP_RIGHT_MARGIN (0x01U << 2) +#define FT_TPROP_BOTTOM_MARGIN (0x01U << 3) +#define FT_TPROP_ADDING_STRATEGY (0x01U << 4) +/** @} */ + +/** + * Adding strategy. + * + * Determines what happens with old content if current cell is not empty after + * adding data to it. Default strategy is FT_STRATEGY_REPLACE. + */ +enum ft_adding_strategy { + FT_STRATEGY_REPLACE = 0, /**< Replace old content. */ + FT_STRATEGY_INSERT /**< Insert new conten. Old content is shifted. */ +}; + + +/** + * Set default table property. + * + * @param property + * Table property identifier. + * @param value + * Table property value. + * @return + * - 0: Success; default table property was changed. + * - (<0): In case of error + */ +int ft_set_default_tbl_prop(uint32_t property, int value); + +/** + * Set table property. + * + * @param table + * A pointer to the ft_table_t structure. + * @param property + * Table property identifier. + * @param value + * Table property value. + * @return + * - 0: Success; default table property was changed. + * - (<0): In case of error + */ +int ft_set_tbl_prop(ft_table_t *table, uint32_t property, int value); + + +/** + * Set column span for the specified cell of the table. + * + * @param table + * A pointer to the ft_table_t structure. + * @param row + * Cell row. + * @param col + * Cell column. + * @param hor_span + * Column span. + * @return + * - 0: Success; cell span was changed. + * - (<0): In case of error + */ +int ft_set_cell_span(ft_table_t *table, size_t row, size_t col, size_t hor_span); + + +/** + * Set functions for memory allocation and deallocation to be used instead of + * standard ones. + * + * @param f_malloc + * Pointer to a function for memory allocation that should be used instead of + * malloc. + * @param f_free + * Pointer to a function for memory deallocation that should be used instead + * of free. + * @note + * To return memory allocation/deallocation functions to their standard values + * set f_malloc and f_free to NULL. + */ +void ft_set_memory_funcs(void *(*f_malloc)(size_t size), void (*f_free)(void *ptr)); + + +/** + * Return string describing the `error_code`. + * + * @param error_code + * Error code returned by the library. + * @return + * String describing the error. + */ +const char *ft_strerror(int error_code); + + + +#ifdef FT_HAVE_WCHAR + + +int ft_wprintf(ft_table_t *table, const wchar_t *fmt, ...); +int ft_wprintf_ln(ft_table_t *table, const wchar_t *fmt, ...); + + +#define ft_wwrite(table, ...)\ + (0 ? CHECK_IF_ARGS_ARE_WSTRINGS(__VA_ARGS__) : ft_nwwrite(table, FT_PP_NARG_(__VA_ARGS__), __VA_ARGS__)) +#define ft_wwrite_ln(table, ...)\ + (0 ? CHECK_IF_ARGS_ARE_WSTRINGS(__VA_ARGS__) : ft_nwwrite_ln(table, FT_PP_NARG_(__VA_ARGS__), __VA_ARGS__)) +int ft_nwwrite(ft_table_t *table, size_t n, const wchar_t *cell_content, ...); +int ft_nwwrite_ln(ft_table_t *table, size_t n, const wchar_t *cell_content, ...); + +int ft_row_wwrite(ft_table_t *table, size_t cols, const wchar_t *row_cells[]); +int ft_row_wwrite_ln(ft_table_t *table, size_t cols, const wchar_t *row_cells[]); + +int ft_table_wwrite(ft_table_t *table, size_t rows, size_t cols, const wchar_t *table_cells[]); +int ft_table_wwrite_ln(ft_table_t *table, size_t rows, size_t cols, const wchar_t *table_cells[]); + +const wchar_t *ft_to_wstring(const ft_table_t *table); +#endif + + + +#ifdef FT_HAVE_UTF8 +#define ft_u8write(table, ...)\ + (ft_u8nwrite(table, FT_PP_NARG_(__VA_ARGS__), __VA_ARGS__)) +#define ft_u8write_ln(table, ...)\ + (ft_u8nwrite_ln(table, FT_PP_NARG_(__VA_ARGS__), __VA_ARGS__)) +int ft_u8nwrite(ft_table_t *table, size_t n, const void *cell_content, ...); +int ft_u8nwrite_ln(ft_table_t *table, size_t n, const void *cell_content, ...); + +int ft_u8printf(ft_table_t *table, const char *fmt, ...) FT_PRINTF_ATTRIBUTE_FORMAT(2, 3); +int ft_u8printf_ln(ft_table_t *table, const char *fmt, ...) FT_PRINTF_ATTRIBUTE_FORMAT(2, 3); + +const void *ft_to_u8string(const ft_table_t *table); + +/** + * Set custom function to compute visible width of UTF-8 string. + * + * libfort internally has a very simple logic to compute visible width of UTF-8 + * strings. It considers that each codepoint will occupy one position on the + * terminal in case of monowidth font (some east asians wide and fullwidth + * characters (see http://www.unicode.org/reports/tr11/tr11-33.html) will occupy + * 2 positions). This logic is very simple and covers wide range of cases. But + * obviously there a lot of cases when it is not sufficient. In such cases user + * should use some external libraries and provide an appropriate function to + * libfort. + * + * @param u8strwid + * User provided function to evaluate width of UTF-8 string ( beg - start of + * UTF-8 string, end - end of UTF-8 string (not included), width - pointer to + * the result). If function succeed it should return 0, otherwise some non- + * zero value. If function returns nonzero value libfort fallbacks to default + * internal algorithm. + */ +void ft_set_u8strwid_func(int (*u8strwid)(const void *beg, const void *end, size_t *width)); + +#endif /* FT_HAVE_UTF8 */ + + +FT_END_DECLS + +#endif /* LIBFORT_H */ diff --git a/genEntities.c b/genEntities.c @@ -0,0 +1,26 @@ +#! /usr/bin/env sheepy +#include "libsheepyObject.h" + +int main(int ARGC, char** ARGV) { + + initLibsheepy(ARGV[0]); + setLogMode(LOG_DATE); + + cleanAllocateSmallJson(e); + readFileG(e, "entities.json"); + + cleanAllocateSmallJson(r); + + iter(e, C) { + cast(smallDictt*,c,C); + cleanFinishSmallArrayP(p) = getG(c, rtSmallArrayt, "codepoints"); + rune uni = getG(p, rtU32, 0); + char utf8Code[10] = init0Var; + pError0(bRune2CodeUTF8(utf8Code, uni)); + logD("%s: %s %d", iterKeyG(e), utf8Code, uni); + setG(r, iterKeyG(e), utf8Code); + } + + writeFileG(r, "e.json"); +} +// vim: set expandtab ts=2 sw=2: diff --git a/md.c b/md.c @@ -3,69 +3,122 @@ #include "libsheepyObject.h" #include "shpPackages/md4c/md4c.h" +#include "fort.h" -int argc; -char **argv; +#include "entities.h" + +/* enable/disable logging */ +#undef pLog +#define pLog(...) + + +typ struct { + MD_BLOCKTYPE type; + unsigned olCount; +} blockt; + +sliceT(blockst, blockt); typedef struct { - smallArrayt *out; + smallArrayt *out; smallStringt *current; - MD_BLOCKTYPE blockquote; - MD_BLOCKTYPE li; - MD_BLOCKTYPE h; - unsigned level; - MD_BLOCKTYPE p; - MD_BLOCKTYPE em; - MD_BLOCKTYPE strong; - MD_BLOCKTYPE a; - MD_BLOCKTYPE img; - MD_BLOCKTYPE code; - MD_BLOCKTYPE del; - MD_BLOCKTYPE blockcode; - MD_BLOCKTYPE ul; - MD_BLOCKTYPE ol; - unsigned olCount; + smallArrayt *span; + smallStringt *rspan; // recursive span string, copy to current on last leave span + MD_BLOCKTYPE blockquote; + MD_BLOCKTYPE h; + MD_BLOCKTYPE p; + MD_BLOCKTYPE blockcode; + MD_BLOCKTYPE tbl; + blockst blocks; + blockst spans; + smallArrayt table; + smallArrayt row; + int bqlevel; + smallJsont *entities; + char colorCode[40]; } outt; -#define BLOCKQUOTE " " +#define BLOCKQUOTE FNT"┃"RST" " #define ADD_BLOCKQUOTE \ if(r->blockquote == MD_BLOCK_QUOTE) {\ - prependG(r->current, BLOCKQUOTE);\ + loop(r->bqlevel) {\ + prependG(r->current, BLOCKQUOTE);\ + }\ } #define ADD_BLOCKCODE \ if(r->blockcode == MD_BLOCK_CODE) {\ - pushG(r->current, BLD BGRED WHT "`````````" RST "\n");\ + pushG(r->current, BLD BGRED WHT "`````````" RST "\n" BLD MGT);\ r->blockcode = 0;\ } + +internal int leave_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata); +internal int leave_all_spans(MD_SPANTYPE type, void* detail, void* userdata); + +char *e2text[50]; // block +char *t2text[50]; // text +char *s2text[50]; // span + internal int enter_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata) { cast(outt *, r, userdata); + logD(e2text[type]); + if (not sliceIsEmpty(&r->blocks)) { + logD("stack %s",e2text[sliceLast(&r->blocks).type]); + } + + + logVarG(r->current); + logVarG(sliceCount(&r->blocks)); + switch(type) { case MD_BLOCK_DOC: /* noop */ break; case MD_BLOCK_QUOTE: r->blockquote = MD_BLOCK_QUOTE; - pushG(r->out, ""); + ADD_BLOCKQUOTE; + pErrorNULL(pushG(r->out, r->current)); + rallocG(r->current, ""); + inc r->bqlevel; //puts(MGT "blockquote" RST); break; - case MD_BLOCK_UL: - r->ul = MD_BLOCK_UL; - pushG(r->out, ""); + case MD_BLOCK_UL:; + if (not sliceIsEmpty(&r->blocks) and sliceLast(&r->blocks).type == MD_BLOCK_LI) { + leave_block_callback(MD_BLOCK_LI, null, userdata); + } + cast(MD_BLOCK_UL_DETAIL *, uld, detail); + logVarG(uld->is_tight); + logD("uld->mark=%c",uld->mark); + if (sliceIsEmpty(&r->blocks)) { + pushG(r->out, ""); + } //puts(GRN "ul" RST); + sliceAppend(&r->blocks, (blockt){.type=MD_BLOCK_UL}); break; - case MD_BLOCK_OL: - r->ol = MD_BLOCK_OL; - r->olCount = 1; - pushG(r->out, ""); + case MD_BLOCK_OL:; + if (not sliceIsEmpty(&r->blocks) and sliceLast(&r->blocks).type == MD_BLOCK_LI) { + leave_block_callback(MD_BLOCK_LI, null, userdata); + } + cast(MD_BLOCK_OL_DETAIL *, old, detail); + logVarG(old->start); + logVarG(old->is_tight); + logD("old->mark_delimiter=%c",old->mark_delimiter); + if (sliceIsEmpty(&r->blocks)) { + pushG(r->out, ""); + } //puts(GRN "ol" RST); + blockt t = {.type=MD_BLOCK_OL, .olCount=1}; + sliceAppend(&r->blocks, t); break; - case MD_BLOCK_LI: - r->li = MD_BLOCK_LI; + case MD_BLOCK_LI:; + cast(MD_BLOCK_LI_DETAIL *, lid, detail); + logVarG(lid->is_task); + logD("lid->task_mark=%c",lid->task_mark); + logVarG(lid->task_mark_offset); if(!isEmptyG(r->current)) { ADD_BLOCKQUOTE ADD_BLOCKCODE @@ -74,14 +127,13 @@ enter_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata) rallocG(r->current, ""); } //puts(GRN "li" RST); + sliceAppend(&r->blocks, (blockt){.type=MD_BLOCK_LI}); break; case MD_BLOCK_HR: - pushG(r->out, ""); //puts(BLU "hr" RST); break; case MD_BLOCK_H: r->h = MD_BLOCK_H; - r->level=((MD_BLOCK_H_DETAIL*)detail)->level; if(!isEmptyG(r->current)) { ADD_BLOCKCODE pushG(r->out, r->current); @@ -91,12 +143,12 @@ enter_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata) break; case MD_BLOCK_CODE: r->blockcode = MD_BLOCK_CODE; - pushG(r->out, "\n" BLD BGRED WHT "`````````" RST); + pushG(r->out, "\n" BLD BGRED WHT "`````````" RST BLD MGT); //puts(MGT "BLOCK_CODE" RST); break; /* case MD_BLOCK_HTML: #<{(| noop |)}># break; */ case MD_BLOCK_P: - if((r->li != MD_BLOCK_LI)&&(!isEmptyG(r->current))) { + if ((sliceLast(&r->blocks).type != MD_BLOCK_LI) && !isEmptyG(r->current)) { ADD_BLOCKQUOTE ADD_BLOCKCODE pushG(r->out, r->current); @@ -104,12 +156,17 @@ enter_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata) } //puts(GRN "p" RST); break; - /* case MD_BLOCK_TABLE: RENDER_LITERAL(r, "<table>\n"); break; */ + case MD_BLOCK_TABLE: + initiateG(&r->table); + r->tbl = MD_BLOCK_TABLE; + break; /* case MD_BLOCK_THEAD: RENDER_LITERAL(r, "<thead>\n"); break; */ /* case MD_BLOCK_TBODY: RENDER_LITERAL(r, "<tbody>\n"); break; */ - /* case MD_BLOCK_TR: RENDER_LITERAL(r, "<tr>\n"); break; */ - /* case MD_BLOCK_TH: render_open_td_block(r, "th", (MD_BLOCK_TD_DETAIL*)detail); break; */ - /* case MD_BLOCK_TD: render_open_td_block(r, "td", (MD_BLOCK_TD_DETAIL*)detail); break; */ + case MD_BLOCK_TR: + initiateG(&r->row); + break; + /* case MD_BLOCK_TH: render_open_td_block(r, "th", (MD_BLOCK_TD_DETAIL*)detail); break; */ + /* case MD_BLOCK_TD: render_open_td_block(r, "td", (MD_BLOCK_TD_DETAIL*)detail); break; */ } return 0; @@ -119,47 +176,78 @@ internal int leave_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata) { cast(outt *, r, userdata); - static const MD_CHAR* head[6] = { "</h1>\n", "</h2>\n", "</h3>\n", "</h4>\n", "</h5>\n", "</h6>\n" }; /* MD_RENDER_HTML* r = (MD_RENDER_HTML*) userdata; */ + logD(e2text[type]); + if (not sliceIsEmpty(&r->blocks)) { + logD("stack %s",e2text[sliceLast(&r->blocks).type]); + } + + logVarG(r->current); + logVarG(sliceCount(&r->blocks)); + + leave_all_spans(0, null, userdata); + switch(type) { case MD_BLOCK_DOC: /*noop*/ break; case MD_BLOCK_QUOTE: - r->blockquote = 0; - pushG(r->out, ""); + ADD_BLOCKQUOTE; + pErrorNULL(pushG(r->out, r->current)); + rallocG(r->current, ""); + dec r->bqlevel; + if (!r->bqlevel) + r->blockquote = 0; //puts(MGT "/blockquote" RST); break; case MD_BLOCK_UL: - r->ul = 0; - pushG(r->out, ""); + if (not sliceIsEmpty(&r->blocks) and sliceLast(&r->blocks).type == MD_BLOCK_UL) { + sliceDelLast(&r->blocks); + } + if (sliceIsEmpty(&r->blocks)) { + pushG(r->out, ""); + } //puts(GRN "/ul" RST); break; case MD_BLOCK_OL: - r->ol = 0; - pushG(r->out, ""); + if (not sliceIsEmpty(&r->blocks) and sliceLast(&r->blocks).type == MD_BLOCK_OL) { + sliceDelLast(&r->blocks); + } + if (sliceIsEmpty(&r->blocks)) { + pushG(r->out, ""); + } //puts(GRN "/ol" RST); break; case MD_BLOCK_LI: + if (not sliceIsEmpty(&r->blocks) and sliceLast(&r->blocks).type == MD_BLOCK_LI) { + sliceDelLast(&r->blocks); + } //puts(GRN "/li" RST); - if (r->li == MD_BLOCK_LI) { - if (r->ul == MD_BLOCK_UL) { + //logD("'%m'", r->current); + if (not isEmptyG(r->current)) { + if (sliceLast(&r->blocks).type == MD_BLOCK_UL) { prependG(r->current, "- "); + loop(sliceCount(&r->blocks)-1/*list level*/) { + prependG(r->current, " "); + } } - if (r->ol == MD_BLOCK_OL) { - char *s = intToS(r->olCount); + if (sliceLast(&r->blocks).type == MD_BLOCK_OL) { + char *s = intToS(sliceLast(&r->blocks).olCount); pErrorNULL(pushG(&s, ". ")); prependG(r->current, s); free(s); - r->olCount++; + loop(sliceCount(&r->blocks)-1/*list level*/) { + prependG(r->current, " "); + } + sliceLast(&r->blocks).olCount++; } - r->li = 0; ADD_BLOCKQUOTE pushG(r->out, r->current); rallocG(r->current, ""); } break; - case MD_BLOCK_HR: /*noop*/ + case MD_BLOCK_HR: + pushG(r->out, "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"); break; case MD_BLOCK_H: //puts(BLU "</hN>" RST); @@ -171,104 +259,392 @@ leave_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata) } colorG(r->current, BLD YLW); pushG(r->out, r->current); - pushG(r->out,""); rallocG(r->current, ""); break; case MD_BLOCK_CODE: + if (not isEmptyG(r->current)) { + pushG(r->current, RST BLD BGRED WHT "`````````" RST "\n"); + pushG(r->out, r->current); + rallocG(r->current, ""); + } //puts(MGT "/BLOCK_CODE" RST); break; case MD_BLOCK_HTML: /* noop */ break; case MD_BLOCK_P: //puts(GRN "/p" RST); - if (r->li == MD_BLOCK_LI) { - if (r->ul == MD_BLOCK_UL) { + if (sliceLast(&r->blocks).type == MD_BLOCK_LI) { + if (sliceAt(&r->blocks, sliceCount(&r->blocks)-2).type == MD_BLOCK_UL) { prependG(r->current, "- "); + loop(sliceCount(&r->blocks)-2/*list level*/) { + prependG(r->current, " "); + } } - if (r->ol == MD_BLOCK_OL) { - char *s = intToS(r->olCount); + if (sliceAt(&r->blocks, sliceCount(&r->blocks)-2).type == MD_BLOCK_OL) { + char *s = intToS(sliceAt(&r->blocks, sliceCount(&r->blocks)-2).olCount); pErrorNULL(pushG(&s, ". ")); prependG(r->current, s); free(s); - r->olCount++; + loop(sliceCount(&r->blocks)-2/*list level*/) { + prependG(r->current, " "); + } + sliceAt(&r->blocks, sliceCount(&r->blocks)-2).olCount++; } - r->li = 0; } ADD_BLOCKQUOTE pErrorNULL(pushG(r->out, r->current)); rallocG(r->current, ""); break; - /* case MD_BLOCK_TABLE: RENDER_LITERAL(r, "</table>\n"); break; */ - /* case MD_BLOCK_THEAD: RENDER_LITERAL(r, "</thead>\n"); break; */ - /* case MD_BLOCK_TBODY: RENDER_LITERAL(r, "</tbody>\n"); break; */ - /* case MD_BLOCK_TR: RENDER_LITERAL(r, "</tr>\n"); break; */ - /* case MD_BLOCK_TH: RENDER_LITERAL(r, "</th>\n"); break; */ - /* case MD_BLOCK_TD: RENDER_LITERAL(r, "</td>\n"); break; */ + case MD_BLOCK_TABLE:; + // TODO render with fort like csvFmt + int rows = lenG(&r->table); + int cols = 0; + iter(&r->table, Row) { + cast(smallArrayt*, row, Row); + cols = maxV(cols, lenG(row)); + } + char **t = malloc(rows * cols * sizeof(char*)); + int j = 0; + iter(&r->table, Row) { + cast(smallArrayt*, row, Row); + int i = 0; + iter(row, c) { + *(t + j * cols + i) = ssGet(c); + inc i; + } + if (i < cols) { + while (i < cols) { + *(t + j * cols + i) = ""; + inc i; + } + } + inc j; + } + ft_table_t *table2 = ft_create_table(); + + ft_set_border_style(table2, FT_SIMPLE_STYLE); + //ft_set_border_style(table2, FT_DOUBLE_STYLE); + + ft_table_write(table2, rows, cols, (const char **) t); + + ft_set_cell_prop(table2, 0, FT_ANY_COLUMN, FT_CPROP_ROW_TYPE, FT_ROW_HEADER); + range(i, cols) { + ft_set_cell_prop(table2, 0, i, FT_CPROP_CONT_TEXT_STYLE, FT_TSTYLE_BOLD); + } + range(i, rows) { + ft_set_cell_prop(table2, i, FT_ANY_COLUMN, FT_CPROP_CELL_BG_COLOR, FT_COLOR_DEFAULT); + ft_set_cell_prop(table2, i, FT_ANY_COLUMN, FT_CPROP_CELL_BG_RGBCOLOR, i & 1 ? 0x2f2f2f : 0x1f1f1f); + } + ft_set_cell_prop(table2, 0, FT_ANY_COLUMN, FT_CPROP_CELL_BG_RGBCOLOR, 0x4867ff); + + // 0x4042f + // 0x2f2f2f + pErrorNULL(pushG(r->out, ft_to_string(table2))); + ft_destroy_table(table2); + free(t); + logG(&r->table); + r->tbl = 0; + freeG(&r->table); + break; + /* case MD_BLOCK_THEAD: RENDER_LITERAL(r, "</thead>\n"); break; */ + /* case MD_BLOCK_TBODY: RENDER_LITERAL(r, "</tbody>\n"); break; */ + case MD_BLOCK_TR: + pushG(&r->table, &r->row); + break; + case MD_BLOCK_TH: + pErrorNULL(pushG(&r->row, r->current)); + rallocG(r->current, ""); + break; + case MD_BLOCK_TD: + pErrorNULL(pushG(&r->row, r->current)); + rallocG(r->current, ""); + break; } return 0; } + internal int enter_span_callback(MD_SPANTYPE type, void* detail, void* userdata) { cast(outt *, r, userdata); - /* MD_RENDER_HTML* r = (MD_RENDER_HTML*) userdata; */ - /* */ - /* if(r->image_nesting_level > 0) { */ - /* #<{(| We are inside an image, i.e. rendering the ALT attribute of */ - /* * <IMG> tag. |)}># */ - /* return 0; */ - /* } */ + logD(s2text[type]); + + // check if span a specifies a color + // type is changed to MD_SPAN_COLOR + if (type == MD_SPAN_A) { + MD_SPAN_A_DETAIL *a = detail; + /* logVarG(a->href.text); */ + /* logVarG(a->href.size); */ + // TODO detect font + i32 fg = -1; + i32 bg = -1; + if (a->href.text[0] == '#' and a->href.size < 17 /* maximum size of hexadecimal rgb colors */) { + if (a->href.size > 1) { + char s[30]; + char colorString[17] = init0Var; + if (a->href.text[1] == 'x') { + if (a->href.size > 2) { + memcpy(colorString, a->href.text, a->href.size); + // search for background color + // minimum: #x?#? + i32 bgoff = -1; + if (a->href.size > 5) { + rangeFrom(i, 3, a->href.size) { + if (a->href.text[i] == '#') { + bgoff = i; + break; + } + } + if (bgoff > 0) { + if (bgoff+2 > a->href.size) + bgoff = -1; + else { + if (a->href.text[bgoff+1] < '0' and a->href.text[bgoff+1] > '9' and a->href.text[bgoff+1] != 'x') { + bgoff = -1; + } + elif (a->href.text[bgoff+1] == 'x' and bgoff+3 > a->href.size) + bgoff = -1; + } + } + } + colorString[0] = '0'; + fg = parseHex(colorString); + colorString[0] = '#'; + if (bgoff < 0) { + sprintf(r->colorCode, "\x1b[38;2;%u;%u;%um", fg >> 16, (fg&0xFF00)>>8, fg&0xFF); + } + else { + if (a->href.text[bgoff+1] >= '0' and a->href.text[bgoff+1] <= '9') { + bg = parseInt(a->href.text + bgoff); + if (bg >= 0 and bg < 8) { + sprintf(r->colorCode, "\x1b[38;2;%u;%u;%um\x1B[4%dm", fg >> 16, (fg&0xFF00)>>8, fg&0xFF, bg); + } + } + elif (a->href.text[bgoff+1] == 'x') { + colorString[bgoff] = '0'; + bg = parseHex(colorString + bgoff); + colorString[bgoff] = '#'; + sprintf(r->colorCode, "\x1b[38;2;%u;%u;%um\x1b[48;2;%u;%u;%um", fg >> 16, (fg&0xFF00)>>8, fg&0xFF, bg >> 16, (bg&0xFF00)>>8, bg&0xFF); + } + } + type = MD_SPAN_COLOR; + } + } + elif (a->href.text[1] >= '0' and a->href.text[1] <= '9') { + // check background color + // Minimum: #?#? + i32 bgoff = -1; + if (a->href.size > 3) { + rangeFrom(i, 2, a->href.size) { + if (a->href.text[i] == '#') { + bgoff = i; + break; + } + } + if (bgoff > 0) { + if (bgoff+2 > a->href.size) { + bgoff = -1; + } + else { + if (a->href.text[bgoff+1] < '0' and a->href.text[bgoff+1] > '9' and a->href.text[bgoff+1] != 'x') { + bgoff = -1; + } + elif (a->href.text[bgoff+1] == 'x' and bgoff+3 > a->href.size) { + bgoff = -1; + } + } + } + } + fg = parseInt(a->href.text); + char *bright = ""; + if (fg >= 8 and fg < 16) { + bright = BLD; + fg -= 8; + } + if (fg >= 0 and fg < 8) { + if (bgoff < 0) { + sprintf(r->colorCode, "%s\x1B[3%dm", bright, fg); + } + else { + if (a->href.text[bgoff+1] >= '0' and a->href.text[bgoff+1] <= '9') { + bg = parseInt(a->href.text + bgoff); + if (bg >= 0 and bg < 8) { + sprintf(r->colorCode, "%s\x1B[3%dm\x1B[4%dm", bright, fg, bg); + } + } + elif (a->href.text[bgoff+1] == 'x') { + memcpy(colorString, a->href.text, a->href.size); + colorString[bgoff] = '0'; + bg = parseHex(colorString + bgoff); + colorString[bgoff] = '#'; + sprintf(r->colorCode, "%s\x1B[3%dm\x1b[48;2;%u;%u;%um", bright, fg, bg >> 16, (bg&0xFF00)>>8, bg&0xFF); + } + } + type = MD_SPAN_COLOR; + } + } + elif (a->href.size > 2 and a->href.text[1] == '#') { + // ##? + // there is no foreground color + if (a->href.text[2] == 'x' and a->href.size > 3) { + // ##x? + memcpy(colorString, a->href.text, a->href.size); + colorString[1] = '0'; + bg = parseHex(colorString + 1); + sprintf(r->colorCode, "\x1b[48;2;%u;%u;%um", bg >> 16, (bg&0xFF00)>>8, bg&0xFF); + type = MD_SPAN_COLOR; + } + elif (a->href.text[2] >= '0' and a->href.text[2] <= '9') { + bg = parseInt(a->href.text); + if (bg >= 0 and bg < 8) { + sprintf(r->colorCode, "\x1B[4%dm", bg); + type = MD_SPAN_COLOR; + } + + } + } + } + } + } - switch(type) { - case MD_SPAN_EM: - r->em = 1; - //puts(RED "em" RST); - break; - case MD_SPAN_STRONG: - r->strong=MD_SPAN_STRONG; - break; - case MD_SPAN_A: - r->a=MD_SPAN_A; - break; - case MD_SPAN_IMG: - r->img = MD_SPAN_IMG; - break; - case MD_SPAN_CODE: - r->code=MD_SPAN_CODE; - break; - case MD_SPAN_DEL: - r->del = MD_SPAN_DEL; - //puts(BLD RED "del" RST); - break; + if (not sliceIsEmpty(&r->spans)) { + pushG(r->span, r->rspan); + rallocG(r->rspan, ""); } + sliceAppend(&r->spans, (blockt){.type=type}); + + switch(type) { + case MD_SPAN_EM: + prependG(r->rspan, ITL); + break; + case MD_SPAN_STRONG: + prependG(r->rspan, BLD); + break; + case MD_SPAN_U: + prependG(r->rspan, UDL); + break; + case MD_SPAN_A: + prependG(r->rspan, BLD UDL BLU); + break; + case MD_SPAN_IMG: + prependG(r->rspan, UDL RED); + break; + case MD_SPAN_CODE: + prependG(r->rspan, BGBLU); + break; + case MD_SPAN_DEL: + prependG(r->rspan, CRD); + break; + case MD_SPAN_FNT: + prependG(r->rspan, FNT); + break; + case MD_SPAN_INV: + prependG(r->rspan, INV); + break; + case MD_SPAN_COC: + prependG(r->rspan, COC); + break; + case MD_SPAN_BLI: + prependG(r->rspan, BLI); + break; + case MD_SPAN_ANCHOR: + prependG(r->rspan, GRN); + break; + case MD_SPAN_COLOR: + prependG(r->rspan, r->colorCode); + break; + } return 0; } internal int +leave_all_spans(MD_SPANTYPE type, void* detail, void* userdata) +{ + cast(outt *, r, userdata); + + if (not sliceIsEmpty(&r->spans)) { + sliceEmpty(&r->spans); + pushG(r->rspan, RST); + pushG(r->span, r->rspan); + rallocG(r->rspan, ""); + cleanFinishSmallStringP(s) = joinG(r->span, " "); + freeG(r->span); + pushG(r->current, s); + } + + return 0; +} +internal int leave_span_callback(MD_SPANTYPE type, void* detail, void* userdata) { - /* MD_RENDER_HTML* r = (MD_RENDER_HTML*) userdata; */ - /* */ - /* if(r->image_nesting_level > 0) { */ - /* #<{(| We are inside an image, i.e. rendering the ALT attribute of */ - /* * <IMG> tag. |)}># */ - /* if(r->image_nesting_level == 1 && type == MD_SPAN_IMG) */ - /* render_close_img_span(r, (MD_SPAN_IMG_DETAIL*) detail); */ - /* return 0; */ - /* } */ - /* */ - /* switch(type) { */ - /* case MD_SPAN_EM: RENDER_LITERAL(r, "</em>"); break; */ - /* case MD_SPAN_STRONG: RENDER_LITERAL(r, "</strong>"); break; */ - /* case MD_SPAN_A: RENDER_LITERAL(r, "</a>"); break; */ - /* case MD_SPAN_IMG: #<{(|noop, handled above|)}># break; */ - /* case MD_SPAN_CODE: RENDER_LITERAL(r, "</code>"); break; */ - /* case MD_SPAN_DEL: RENDER_LITERAL(r, "</del>"); break; */ - /* } */ + cast(outt *, r, userdata); + + logD(s2text[type]); + + if (not sliceIsEmpty(&r->spans)) { + sliceDelLast(&r->spans); + if (sliceIsEmpty(&r->spans)) { + pushG(r->rspan, RST); + pushG(r->span, r->rspan); + rallocG(r->rspan, ""); + cleanFinishSmallStringP(s) = joinG(r->span, " "); + freeG(r->span); + pushG(r->current, s); + } + else { + pushG(r->rspan, RST); + sliceForEach(&r->spans, e) { + switch(e->type) { + case MD_SPAN_EM: + pushG(r->rspan, ITL); + break; + case MD_SPAN_STRONG: + pushG(r->rspan, BLD); + break; + case MD_SPAN_U: + pushG(r->rspan, UDL); + break; + case MD_SPAN_A: + pushG(r->rspan, BLD UDL BLU); + break; + case MD_SPAN_IMG: + pushG(r->rspan, UDL RED); + break; + case MD_SPAN_CODE: + pushG(r->rspan, BGBLU); + break; + case MD_SPAN_DEL: + pushG(r->rspan, CRD); + break; + case MD_SPAN_FNT: + pushG(r->rspan, FNT); + break; + case MD_SPAN_INV: + pushG(r->rspan, INV); + break; + case MD_SPAN_COC: + pushG(r->rspan, COC); + break; + case MD_SPAN_BLI: + pushG(r->rspan, BLI); + break; + case MD_SPAN_ANCHOR: + pushG(r->rspan, GRN); + break; + case MD_SPAN_COLOR: + pushG(r->rspan, r->colorCode); + break; + } + } + pushG(r->span, r->rspan); + rallocG(r->rspan, ""); + } + } + //logG(r->span); return 0; } @@ -280,6 +656,8 @@ text_callback(MD_TEXTTYPE type, const MD_CHAR* text, MD_SIZE size, void* userdat char *s = calloc(1, size+1); smallStringt *t; + logD(t2text[type]); + switch(type) { case MD_TEXT_NULLCHAR: break; @@ -294,34 +672,44 @@ text_callback(MD_TEXTTYPE type, const MD_CHAR* text, MD_SIZE size, void* userdat case MD_TEXT_HTML: strncpy(s, text, size); /*pushG(r->out, s);*/ //puts(BLU "html" RST); break; - /* case MD_TEXT_ENTITY: render_entity(r, text, size, render_html_escaped); break; */ - default: + case MD_TEXT_ENTITY: + // spec https://html.spec.whatwg.org/entities.json strncpy(s, text, size); - t = allocG(s); - if (r->em == 1) { - colorG(t, GRN); - r->em = 0; - } - if (r->strong == MD_SPAN_STRONG) { - colorG(t, BLD); - r->strong = 0; - } - if (r->a == MD_SPAN_A) { - colorG(t, BLD UDL BLU); - r->a = 0; - } - if (r->img == MD_SPAN_IMG) { - //puts(BLD "IMG" RST); - colorG(t, UDL RED); - r->img = 0; + logVarG(s); + char utf8Code[10] = init0Var; + if (s[1] == '#') { + // numeric character reference + if (s[2] == 'x' or s[2] == 'X') { + // hexadecimal + s[1] = '0'; + rune c = parseHex(s+1); + pError0(bRune2CodeUTF8(utf8Code, c)); + s[1] = '#'; + } + else { + // decimal + rune c = parseIntG(s); + pError0(bRune2CodeUTF8(utf8Code, c)); + } } - if (r->code == MD_SPAN_CODE) { - colorG(t, BGBLU); - r->code = 0; + else { + // check entity list + if (!r->entities) { + initiateG(&r->entities); + parseG(r->entities, entitiesString); + } + char *u = getG(r->entities, rtChar, s); + if (!u) break; // not found + strcpy(utf8Code, u); } - if (r->del == MD_SPAN_DEL) { - colorG(t, INV); - r->del = 0; + pushG(r->current, utf8Code); + break; + default: + strncpy(s, text, size); + t = allocG(s); + if (not sliceIsEmpty(&r->spans)) { + pushNFreeG(r->rspan, t); + break; } pushNFreeG(r->current, t); //puts(BLU "default" RST); @@ -329,6 +717,7 @@ text_callback(MD_TEXTTYPE type, const MD_CHAR* text, MD_SIZE size, void* userdat break; } + logVarG(s); free(s); return 0; @@ -351,52 +740,100 @@ md_highlight(const char *md_source) { result.out = allocG(rtSmallArrayt); result.current = allocG(""); + result.span = allocG(rtSmallArrayt); + result.rspan = allocG(""); + sliceInitCount(&result.blocks, 16); + sliceInitCount(&result.spans, 16); - unsigned parser_flags = MD_DIALECT_GITHUB; + unsigned parser_flags = MD_DIALECT_GITHUB | MD_FLAG_UNDERLINE; unsigned renderer_flags = 0; - MD_RENDERER renderer = { + MD_PARSER parser = { + 0, + parser_flags, enter_block_callback, leave_block_callback, enter_span_callback, leave_span_callback, text_callback, debug_log_callback, - parser_flags + null }; - md_parse(md_source, strlen(md_source), &renderer, &result); + md_parse(md_source, strlen(md_source), &parser, &result); smashG(result.current); + terminateG(result.span); + terminateG(result.rspan); + sliceFree(&result.blocks); + sliceFree(&result.spans); + terminateG(result.entities); return result.out; } #ifndef LIB int main(int ARGC, char** ARGV) { - argc = ARGC; - argv = ARGV; + initLibsheepy(ARGV[0]); + setLogMode(LOG_FUNC); - initLibsheepy(argv[0]); - - if (argc < 2) { + if (ARGC < 2) { puts(RED "Give a filename in parameter" RST); XFAILURE } - char *c = readFileG(c, argv[1]); + e2text[MD_BLOCK_DOC] = "MD_BLOCK_DOC"; + e2text[MD_BLOCK_QUOTE] = "MD_BLOCK_QUOTE"; + e2text[MD_BLOCK_UL] = "MD_BLOCK_UL"; + e2text[MD_BLOCK_OL] = "MD_BLOCK_OL"; + e2text[MD_BLOCK_LI] = "MD_BLOCK_LI"; + e2text[MD_BLOCK_HR] = "MD_BLOCK_HR"; + e2text[MD_BLOCK_H] = "MD_BLOCK_H"; + e2text[MD_BLOCK_CODE] = "MD_BLOCK_CODE"; + e2text[MD_BLOCK_HTML] = "MD_BLOCK_HTML"; + e2text[MD_BLOCK_P] = "MD_BLOCK_P"; + e2text[MD_BLOCK_TABLE] = "MD_BLOCK_TABLE"; + e2text[MD_BLOCK_THEAD] = "MD_BLOCK_THEAD"; + e2text[MD_BLOCK_TBODY] = "MD_BLOCK_TBODY"; + e2text[MD_BLOCK_TR] = "MD_BLOCK_TR"; + e2text[MD_BLOCK_TH] = "MD_BLOCK_TH"; + e2text[MD_BLOCK_TD] = "MD_BLOCK_TD"; + + t2text[MD_TEXT_NORMAL] = "MD_TEXT_NORMAL"; + t2text[MD_TEXT_NULLCHAR] = "MD_TEXT_NULLCHAR"; + t2text[MD_TEXT_BR] = "MD_TEXT_BR"; + t2text[MD_TEXT_SOFTBR] = "MD_TEXT_SOFTBR"; + t2text[MD_TEXT_ENTITY] = "MD_TEXT_ENTITY"; + t2text[MD_TEXT_CODE] = "MD_TEXT_CODE"; + t2text[MD_TEXT_HTML] = "MD_TEXT_HTML"; + t2text[MD_TEXT_LATEXMATH] = "MD_TEXT_LATEXMATH"; + + s2text[MD_SPAN_EM] = "MD_SPAN_EM"; + s2text[MD_SPAN_STRONG] = "MD_SPAN_STRONG"; + s2text[MD_SPAN_A] = "MD_SPAN_A"; + s2text[MD_SPAN_IMG] = "MD_SPAN_IMG"; + s2text[MD_SPAN_CODE] = "MD_SPAN_CODE"; + s2text[MD_SPAN_DEL] = "MD_SPAN_DEL"; + s2text[MD_SPAN_LATEXMATH] = "MD_SPAN_LATEXMATH"; + s2text[MD_SPAN_LATEXMATH_DISPLAY] = "MD_SPAN_LATEXMATH_DISPLAY"; + s2text[MD_SPAN_WIKILINK] = "MD_SPAN_WIKILINK"; + s2text[MD_SPAN_U] = "MD_SPAN_U"; + s2text[MD_SPAN_FNT] = "MD_SPAN_FNT"; + s2text[MD_SPAN_INV] = "MD_SPAN_INV"; + s2text[MD_SPAN_COC] = "MD_SPAN_COC"; + s2text[MD_SPAN_BLI] = "MD_SPAN_BLI"; + s2text[MD_SPAN_ANCHOR] = "MD_SPAN_ANCHOR"; + + char *c = readFileG(c, ARGV[1]); if (!c) { puts(RED "Error reading:" RST); - puts(argv[1]); + puts(ARGV[1]); XFAILURE } logG(md_highlight(c)); //logVarG(result.out); - - finalizeLibsheepy(); - } #endif diff --git a/package.yml b/package.yml @@ -1,6 +1,6 @@ --- name: md - version: 0.0.6 + version: 0.0.7 description: cat markdown files with syntax highlighting bin: ./md.c repository: diff --git a/shpPackages/md4c/md4c.c b/shpPackages/md4c/md4c.c @@ -0,0 +1,6706 @@ +/* commit e9ff661ff818ee94a4a231958d9b6768dc6882c9 - mity/md4c repo + * MD4C: Markdown parser for C + * (http://github.com/mity/md4c) + * + * Copyright (c) 2016-2020 Martin Mitas + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "md4c.h" + +#include <limits.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + + +/***************************** + *** Miscellaneous Stuff *** + *****************************/ + +#if !defined(__STDC_VERSION__) || __STDC_VERSION__ < 199409L + /* C89/90 or old compilers in general may not understand "inline". */ + #if defined __GNUC__ + #define inline __inline__ + #elif defined _MSC_VER + #define inline __inline + #else + #define inline + #endif +#endif + +/* Make the UTF-8 support the default. */ +#if !defined MD4C_USE_ASCII && !defined MD4C_USE_UTF8 && !defined MD4C_USE_UTF16 + #define MD4C_USE_UTF8 +#endif + +/* Magic for making wide literals with MD4C_USE_UTF16. */ +#ifdef _T + #undef _T +#endif +#if defined MD4C_USE_UTF16 + #define _T(x) L##x +#else + #define _T(x) x +#endif + +/* Misc. macros. */ +#define SIZEOF_ARRAY(a) (sizeof(a) / sizeof(a[0])) + +#define STRINGIZE_(x) #x +#define STRINGIZE(x) STRINGIZE_(x) + +#ifndef TRUE + #define TRUE 1 + #define FALSE 0 +#endif + +#define MD_LOG(msg) \ + do { \ + if(ctx->parser.debug_log != NULL) \ + ctx->parser.debug_log((msg), ctx->userdata); \ + } while(0) + +#ifdef DEBUG + #define MD_ASSERT(cond) \ + do { \ + if(!(cond)) { \ + MD_LOG(__FILE__ ":" STRINGIZE(__LINE__) ": " \ + "Assertion '" STRINGIZE(cond) "' failed."); \ + exit(1); \ + } \ + } while(0) + + #define MD_UNREACHABLE() MD_ASSERT(1 == 0) +#else + #ifdef __GNUC__ + #define MD_ASSERT(cond) do { if(!(cond)) __builtin_unreachable(); } while(0) + #define MD_UNREACHABLE() do { __builtin_unreachable(); } while(0) + #elif defined _MSC_VER && _MSC_VER > 120 + #define MD_ASSERT(cond) do { __assume(cond); } while(0) + #define MD_UNREACHABLE() do { __assume(0); } while(0) + #else + #define MD_ASSERT(cond) do {} while(0) + #define MD_UNREACHABLE() do {} while(0) + #endif +#endif + +/* For falling through case labels in switch statements. */ +#if defined __clang__ && __clang_major__ >= 12 + #define MD_FALLTHROUGH() __attribute__((fallthrough)) +#elif defined __GNUC__ && __GNUC__ >= 7 + #define MD_FALLTHROUGH() __attribute__((fallthrough)) +#else + #define MD_FALLTHROUGH() ((void)0) +#endif + +/* Suppress "unused parameter" warnings. */ +#define MD_UNUSED(x) ((void)x) + + +/************************ + *** Internal Types *** + ************************/ + +/* These are omnipresent so lets save some typing. */ +#define CHAR MD_CHAR +#define SZ MD_SIZE +#define OFF MD_OFFSET + +typedef struct MD_MARK_tag MD_MARK; +typedef struct MD_BLOCK_tag MD_BLOCK; +typedef struct MD_CONTAINER_tag MD_CONTAINER; +typedef struct MD_REF_DEF_tag MD_REF_DEF; + + +/* During analyzes of inline marks, we need to manage some "mark chains", + * of (yet unresolved) openers. This structure holds start/end of the chain. + * The chain internals are then realized through MD_MARK::prev and ::next. + */ +typedef struct MD_MARKCHAIN_tag MD_MARKCHAIN; +struct MD_MARKCHAIN_tag { + int head; /* Index of first mark in the chain, or -1 if empty. */ + int tail; /* Index of last mark in the chain, or -1 if empty. */ +}; + +/* Context propagated through all the parsing. */ +typedef struct MD_CTX_tag MD_CTX; +struct MD_CTX_tag { + /* Immutable stuff (parameters of md_parse()). */ + const CHAR* text; + SZ size; + MD_PARSER parser; + void* userdata; + + /* When this is true, it allows some optimizations. */ + int doc_ends_with_newline; + + /* Helper temporary growing buffer. */ + CHAR* buffer; + unsigned alloc_buffer; + + /* Reference definitions. */ + MD_REF_DEF* ref_defs; + int n_ref_defs; + int alloc_ref_defs; + void** ref_def_hashtable; + int ref_def_hashtable_size; + + /* Stack of inline/span markers. + * This is only used for parsing a single block contents but by storing it + * here we may reuse the stack for subsequent blocks; i.e. we have fewer + * (re)allocations. */ + MD_MARK* marks; + int n_marks; + int alloc_marks; + +#if defined MD4C_USE_UTF16 + char mark_char_map[128]; +#else + char mark_char_map[256]; +#endif + + /* For resolving of inline spans. */ + MD_MARKCHAIN mark_chains[17]; +#define PTR_CHAIN (ctx->mark_chains[0]) +#define TABLECELLBOUNDARIES (ctx->mark_chains[1]) +#define ASTERISK_OPENERS_extraword_mod3_0 (ctx->mark_chains[2]) +#define ASTERISK_OPENERS_extraword_mod3_1 (ctx->mark_chains[3]) +#define ASTERISK_OPENERS_extraword_mod3_2 (ctx->mark_chains[4]) +#define ASTERISK_OPENERS_intraword_mod3_0 (ctx->mark_chains[5]) +#define ASTERISK_OPENERS_intraword_mod3_1 (ctx->mark_chains[6]) +#define ASTERISK_OPENERS_intraword_mod3_2 (ctx->mark_chains[7]) +#define UNDERSCORE_OPENERS (ctx->mark_chains[8]) +#define TILDE_OPENERS_1 (ctx->mark_chains[9]) +#define TILDE_OPENERS_2 (ctx->mark_chains[10]) +#define BRACKET_OPENERS (ctx->mark_chains[11]) +#define DOLLAR_OPENERS (ctx->mark_chains[12]) +#define FAINT_OPENERS (ctx->mark_chains[13]) +#define INVERSE_OPENERS (ctx->mark_chains[14]) +#define CONCEAL_OPENERS (ctx->mark_chains[15]) +#define BLINK_OPENERS (ctx->mark_chains[16]) +#define OPENERS_CHAIN_FIRST 1 +#define OPENERS_CHAIN_LAST 16 + + int n_table_cell_boundaries; + + /* For resolving links. */ + int unresolved_link_head; + int unresolved_link_tail; + + /* For resolving raw HTML. */ + OFF html_comment_horizon; + OFF html_proc_instr_horizon; + OFF html_decl_horizon; + OFF html_cdata_horizon; + + /* For block analysis. + * Notes: + * -- It holds MD_BLOCK as well as MD_LINE structures. After each + * MD_BLOCK, its (multiple) MD_LINE(s) follow. + * -- For MD_BLOCK_HTML and MD_BLOCK_CODE, MD_VERBATIMLINE(s) are used + * instead of MD_LINE(s). + */ + void* block_bytes; + MD_BLOCK* current_block; + int n_block_bytes; + int alloc_block_bytes; + + /* For container block analysis. */ + MD_CONTAINER* containers; + int n_containers; + int alloc_containers; + + /* Minimal indentation to call the block "indented code block". */ + unsigned code_indent_offset; + + /* Contextual info for line analysis. */ + SZ code_fence_length; /* For checking closing fence length. */ + int html_block_type; /* For checking closing raw HTML condition. */ + int last_line_has_list_loosening_effect; + int last_list_item_starts_with_two_blank_lines; +}; + +enum MD_LINETYPE_tag { + MD_LINE_BLANK, + MD_LINE_HR, + MD_LINE_ATXHEADER, + MD_LINE_SETEXTHEADER, + MD_LINE_SETEXTUNDERLINE, + MD_LINE_INDENTEDCODE, + MD_LINE_FENCEDCODE, + MD_LINE_HTML, + MD_LINE_TEXT, + MD_LINE_TABLE, + MD_LINE_TABLEUNDERLINE +}; +typedef enum MD_LINETYPE_tag MD_LINETYPE; + +typedef struct MD_LINE_ANALYSIS_tag MD_LINE_ANALYSIS; +struct MD_LINE_ANALYSIS_tag { + MD_LINETYPE type : 16; + unsigned data : 16; + OFF beg; + OFF end; + unsigned indent; /* Indentation level. */ +}; + +typedef struct MD_LINE_tag MD_LINE; +struct MD_LINE_tag { + OFF beg; + OFF end; +}; + +typedef struct MD_VERBATIMLINE_tag MD_VERBATIMLINE; +struct MD_VERBATIMLINE_tag { + OFF beg; + OFF end; + OFF indent; +}; + + +/***************** + *** Helpers *** + *****************/ + +/* Character accessors. */ +#define CH(off) (ctx->text[(off)]) +#define STR(off) (ctx->text + (off)) + +/* Character classification. + * Note we assume ASCII compatibility of code points < 128 here. */ +#define ISIN_(ch, ch_min, ch_max) ((ch_min) <= (unsigned)(ch) && (unsigned)(ch) <= (ch_max)) +#define ISANYOF_(ch, palette) ((ch) != _T('\0') && md_strchr((palette), (ch)) != NULL) +#define ISANYOF2_(ch, ch1, ch2) ((ch) == (ch1) || (ch) == (ch2)) +#define ISANYOF3_(ch, ch1, ch2, ch3) ((ch) == (ch1) || (ch) == (ch2) || (ch) == (ch3)) +#define ISASCII_(ch) ((unsigned)(ch) <= 127) +#define ISBLANK_(ch) (ISANYOF2_((ch), _T(' '), _T('\t'))) +#define ISNEWLINE_(ch) (ISANYOF2_((ch), _T('\r'), _T('\n'))) +#define ISWHITESPACE_(ch) (ISBLANK_(ch) || ISANYOF2_((ch), _T('\v'), _T('\f'))) +#define ISCNTRL_(ch) ((unsigned)(ch) <= 31 || (unsigned)(ch) == 127) +#define ISPUNCT_(ch) (ISIN_(ch, 33, 47) || ISIN_(ch, 58, 64) || ISIN_(ch, 91, 96) || ISIN_(ch, 123, 126)) +#define ISUPPER_(ch) (ISIN_(ch, _T('A'), _T('Z'))) +#define ISLOWER_(ch) (ISIN_(ch, _T('a'), _T('z'))) +#define ISALPHA_(ch) (ISUPPER_(ch) || ISLOWER_(ch)) +#define ISDIGIT_(ch) (ISIN_(ch, _T('0'), _T('9'))) +#define ISXDIGIT_(ch) (ISDIGIT_(ch) || ISIN_(ch, _T('A'), _T('F')) || ISIN_(ch, _T('a'), _T('f'))) +#define ISALNUM_(ch) (ISALPHA_(ch) || ISDIGIT_(ch)) + +#define ISANYOF(off, palette) ISANYOF_(CH(off), (palette)) +#define ISANYOF2(off, ch1, ch2) ISANYOF2_(CH(off), (ch1), (ch2)) +#define ISANYOF3(off, ch1, ch2, ch3) ISANYOF3_(CH(off), (ch1), (ch2), (ch3)) +#define ISASCII(off) ISASCII_(CH(off)) +#define ISBLANK(off) ISBLANK_(CH(off)) +#define ISNEWLINE(off) ISNEWLINE_(CH(off)) +#define ISWHITESPACE(off) ISWHITESPACE_(CH(off)) +#define ISCNTRL(off) ISCNTRL_(CH(off)) +#define ISPUNCT(off) ISPUNCT_(CH(off)) +#define ISUPPER(off) ISUPPER_(CH(off)) +#define ISLOWER(off) ISLOWER_(CH(off)) +#define ISALPHA(off) ISALPHA_(CH(off)) +#define ISDIGIT(off) ISDIGIT_(CH(off)) +#define ISXDIGIT(off) ISXDIGIT_(CH(off)) +#define ISALNUM(off) ISALNUM_(CH(off)) + + +#if defined MD4C_USE_UTF16 + #define md_strchr wcschr +#else + #define md_strchr strchr +#endif + + +/* Case insensitive check of string equality. */ +static inline int +md_ascii_case_eq(const CHAR* s1, const CHAR* s2, SZ n) +{ + OFF i; + for(i = 0; i < n; i++) { + CHAR ch1 = s1[i]; + CHAR ch2 = s2[i]; + + if(ISLOWER_(ch1)) + ch1 += ('A'-'a'); + if(ISLOWER_(ch2)) + ch2 += ('A'-'a'); + if(ch1 != ch2) + return FALSE; + } + return TRUE; +} + +static inline int +md_ascii_eq(const CHAR* s1, const CHAR* s2, SZ n) +{ + return memcmp(s1, s2, n * sizeof(CHAR)) == 0; +} + +static int +md_text_with_null_replacement(MD_CTX* ctx, MD_TEXTTYPE type, const CHAR* str, SZ size) +{ + OFF off = 0; + int ret = 0; + + while(1) { + while(off < size && str[off] != _T('\0')) + off++; + + if(off > 0) { + ret = ctx->parser.text(type, str, off, ctx->userdata); + if(ret != 0) + return ret; + + str += off; + size -= off; + off = 0; + } + + if(off >= size) + return 0; + + ret = ctx->parser.text(MD_TEXT_NULLCHAR, _T(""), 1, ctx->userdata); + if(ret != 0) + return ret; + off++; + } +} + + +#define MD_CHECK(func) \ + do { \ + ret = (func); \ + if(ret < 0) \ + goto abort; \ + } while(0) + + +#define MD_TEMP_BUFFER(sz) \ + do { \ + if(sz > ctx->alloc_buffer) { \ + CHAR* new_buffer; \ + SZ new_size = ((sz) + (sz) / 2 + 128) & ~127; \ + \ + new_buffer = realloc(ctx->buffer, new_size); \ + if(new_buffer == NULL) { \ + MD_LOG("realloc() failed."); \ + ret = -1; \ + goto abort; \ + } \ + \ + ctx->buffer = new_buffer; \ + ctx->alloc_buffer = new_size; \ + } \ + } while(0) + + +#define MD_ENTER_BLOCK(type, arg) \ + do { \ + ret = ctx->parser.enter_block((type), (arg), ctx->userdata); \ + if(ret != 0) { \ + MD_LOG("Aborted from enter_block() callback."); \ + goto abort; \ + } \ + } while(0) + +#define MD_LEAVE_BLOCK(type, arg) \ + do { \ + ret = ctx->parser.leave_block((type), (arg), ctx->userdata); \ + if(ret != 0) { \ + MD_LOG("Aborted from leave_block() callback."); \ + goto abort; \ + } \ + } while(0) + +#define MD_ENTER_SPAN(type, arg) \ + do { \ + ret = ctx->parser.enter_span((type), (arg), ctx->userdata); \ + if(ret != 0) { \ + MD_LOG("Aborted from enter_span() callback."); \ + goto abort; \ + } \ + } while(0) + +#define MD_LEAVE_SPAN(type, arg) \ + do { \ + ret = ctx->parser.leave_span((type), (arg), ctx->userdata); \ + if(ret != 0) { \ + MD_LOG("Aborted from leave_span() callback."); \ + goto abort; \ + } \ + } while(0) + +#define MD_TEXT(type, str, size) \ + do { \ + if(size > 0) { \ + ret = ctx->parser.text((type), (str), (size), ctx->userdata); \ + if(ret != 0) { \ + MD_LOG("Aborted from text() callback."); \ + goto abort; \ + } \ + } \ + } while(0) + +#define MD_TEXT_INSECURE(type, str, size) \ + do { \ + if(size > 0) { \ + ret = md_text_with_null_replacement(ctx, type, str, size); \ + if(ret != 0) { \ + MD_LOG("Aborted from text() callback."); \ + goto abort; \ + } \ + } \ + } while(0) + + +/* If the offset falls into a gap between line, we return the following + * line. */ +static const MD_LINE* +md_lookup_line(OFF off, const MD_LINE* lines, int n_lines) +{ + int lo, hi; + int pivot; + const MD_LINE* line; + + lo = 0; + hi = n_lines - 1; + while(lo <= hi) { + pivot = (lo + hi) / 2; + line = &lines[pivot]; + + if(off < line->beg) { + hi = pivot - 1; + if(hi < 0 || lines[hi].end <= off) + return line; + } else if(off > line->end) { + lo = pivot + 1; + } else { + return line; + } + } + + return NULL; +} + + +/************************* + *** Unicode Support *** + *************************/ + +typedef struct MD_UNICODE_FOLD_INFO_tag MD_UNICODE_FOLD_INFO; +struct MD_UNICODE_FOLD_INFO_tag { + unsigned codepoints[3]; + unsigned n_codepoints; +}; + + +#if defined MD4C_USE_UTF16 || defined MD4C_USE_UTF8 + /* Binary search over sorted "map" of codepoints. Consecutive sequences + * of codepoints may be encoded in the map by just using the + * (MIN_CODEPOINT | 0x40000000) and (MAX_CODEPOINT | 0x80000000). + * + * Returns index of the found record in the map (in the case of ranges, + * the minimal value is used); or -1 on failure. */ + static int + md_unicode_bsearch__(unsigned codepoint, const unsigned* map, size_t map_size) + { + int beg, end; + int pivot_beg, pivot_end; + + beg = 0; + end = (int) map_size-1; + while(beg <= end) { + /* Pivot may be a range, not just a single value. */ + pivot_beg = pivot_end = (beg + end) / 2; + if(map[pivot_end] & 0x40000000) + pivot_end++; + if(map[pivot_beg] & 0x80000000) + pivot_beg--; + + if(codepoint < (map[pivot_beg] & 0x00ffffff)) + end = pivot_beg - 1; + else if(codepoint > (map[pivot_end] & 0x00ffffff)) + beg = pivot_end + 1; + else + return pivot_beg; + } + + return -1; + } + + static int + md_is_unicode_whitespace__(unsigned codepoint) + { +#define R(cp_min, cp_max) ((cp_min) | 0x40000000), ((cp_max) | 0x80000000) +#define S(cp) (cp) + /* Unicode "Zs" category. + * (generated by scripts/build_whitespace_map.py) */ + static const unsigned WHITESPACE_MAP[] = { + S(0x0020), S(0x00a0), S(0x1680), R(0x2000,0x200a), S(0x202f), S(0x205f), S(0x3000) + }; +#undef R +#undef S + + /* The ASCII ones are the most frequently used ones, also CommonMark + * specification requests few more in this range. */ + if(codepoint <= 0x7f) + return ISWHITESPACE_(codepoint); + + return (md_unicode_bsearch__(codepoint, WHITESPACE_MAP, SIZEOF_ARRAY(WHITESPACE_MAP)) >= 0); + } + + static int + md_is_unicode_punct__(unsigned codepoint) + { +#define R(cp_min, cp_max) ((cp_min) | 0x40000000), ((cp_max) | 0x80000000) +#define S(cp) (cp) + /* Unicode "Pc", "Pd", "Pe", "Pf", "Pi", "Po", "Ps" categories. + * (generated by scripts/build_punct_map.py) */ + static const unsigned PUNCT_MAP[] = { + R(0x0021,0x0023), R(0x0025,0x002a), R(0x002c,0x002f), R(0x003a,0x003b), R(0x003f,0x0040), + R(0x005b,0x005d), S(0x005f), S(0x007b), S(0x007d), S(0x00a1), S(0x00a7), S(0x00ab), R(0x00b6,0x00b7), + S(0x00bb), S(0x00bf), S(0x037e), S(0x0387), R(0x055a,0x055f), R(0x0589,0x058a), S(0x05be), S(0x05c0), + S(0x05c3), S(0x05c6), R(0x05f3,0x05f4), R(0x0609,0x060a), R(0x060c,0x060d), S(0x061b), R(0x061e,0x061f), + R(0x066a,0x066d), S(0x06d4), R(0x0700,0x070d), R(0x07f7,0x07f9), R(0x0830,0x083e), S(0x085e), + R(0x0964,0x0965), S(0x0970), S(0x09fd), S(0x0a76), S(0x0af0), S(0x0c77), S(0x0c84), S(0x0df4), S(0x0e4f), + R(0x0e5a,0x0e5b), R(0x0f04,0x0f12), S(0x0f14), R(0x0f3a,0x0f3d), S(0x0f85), R(0x0fd0,0x0fd4), + R(0x0fd9,0x0fda), R(0x104a,0x104f), S(0x10fb), R(0x1360,0x1368), S(0x1400), S(0x166e), R(0x169b,0x169c), + R(0x16eb,0x16ed), R(0x1735,0x1736), R(0x17d4,0x17d6), R(0x17d8,0x17da), R(0x1800,0x180a), + R(0x1944,0x1945), R(0x1a1e,0x1a1f), R(0x1aa0,0x1aa6), R(0x1aa8,0x1aad), R(0x1b5a,0x1b60), + R(0x1bfc,0x1bff), R(0x1c3b,0x1c3f), R(0x1c7e,0x1c7f), R(0x1cc0,0x1cc7), S(0x1cd3), R(0x2010,0x2027), + R(0x2030,0x2043), R(0x2045,0x2051), R(0x2053,0x205e), R(0x207d,0x207e), R(0x208d,0x208e), + R(0x2308,0x230b), R(0x2329,0x232a), R(0x2768,0x2775), R(0x27c5,0x27c6), R(0x27e6,0x27ef), + R(0x2983,0x2998), R(0x29d8,0x29db), R(0x29fc,0x29fd), R(0x2cf9,0x2cfc), R(0x2cfe,0x2cff), S(0x2d70), + R(0x2e00,0x2e2e), R(0x2e30,0x2e4f), S(0x2e52), R(0x3001,0x3003), R(0x3008,0x3011), R(0x3014,0x301f), + S(0x3030), S(0x303d), S(0x30a0), S(0x30fb), R(0xa4fe,0xa4ff), R(0xa60d,0xa60f), S(0xa673), S(0xa67e), + R(0xa6f2,0xa6f7), R(0xa874,0xa877), R(0xa8ce,0xa8cf), R(0xa8f8,0xa8fa), S(0xa8fc), R(0xa92e,0xa92f), + S(0xa95f), R(0xa9c1,0xa9cd), R(0xa9de,0xa9df), R(0xaa5c,0xaa5f), R(0xaade,0xaadf), R(0xaaf0,0xaaf1), + S(0xabeb), R(0xfd3e,0xfd3f), R(0xfe10,0xfe19), R(0xfe30,0xfe52), R(0xfe54,0xfe61), S(0xfe63), S(0xfe68), + R(0xfe6a,0xfe6b), R(0xff01,0xff03), R(0xff05,0xff0a), R(0xff0c,0xff0f), R(0xff1a,0xff1b), + R(0xff1f,0xff20), R(0xff3b,0xff3d), S(0xff3f), S(0xff5b), S(0xff5d), R(0xff5f,0xff65), R(0x10100,0x10102), + S(0x1039f), S(0x103d0), S(0x1056f), S(0x10857), S(0x1091f), S(0x1093f), R(0x10a50,0x10a58), S(0x10a7f), + R(0x10af0,0x10af6), R(0x10b39,0x10b3f), R(0x10b99,0x10b9c), S(0x10ead), R(0x10f55,0x10f59), + R(0x11047,0x1104d), R(0x110bb,0x110bc), R(0x110be,0x110c1), R(0x11140,0x11143), R(0x11174,0x11175), + R(0x111c5,0x111c8), S(0x111cd), S(0x111db), R(0x111dd,0x111df), R(0x11238,0x1123d), S(0x112a9), + R(0x1144b,0x1144f), R(0x1145a,0x1145b), S(0x1145d), S(0x114c6), R(0x115c1,0x115d7), R(0x11641,0x11643), + R(0x11660,0x1166c), R(0x1173c,0x1173e), S(0x1183b), R(0x11944,0x11946), S(0x119e2), R(0x11a3f,0x11a46), + R(0x11a9a,0x11a9c), R(0x11a9e,0x11aa2), R(0x11c41,0x11c45), R(0x11c70,0x11c71), R(0x11ef7,0x11ef8), + S(0x11fff), R(0x12470,0x12474), R(0x16a6e,0x16a6f), S(0x16af5), R(0x16b37,0x16b3b), S(0x16b44), + R(0x16e97,0x16e9a), S(0x16fe2), S(0x1bc9f), R(0x1da87,0x1da8b), R(0x1e95e,0x1e95f) + }; +#undef R +#undef S + + /* The ASCII ones are the most frequently used ones, also CommonMark + * specification requests few more in this range. */ + if(codepoint <= 0x7f) + return ISPUNCT_(codepoint); + + return (md_unicode_bsearch__(codepoint, PUNCT_MAP, SIZEOF_ARRAY(PUNCT_MAP)) >= 0); + } + + static void + md_get_unicode_fold_info(unsigned codepoint, MD_UNICODE_FOLD_INFO* info) + { +#define R(cp_min, cp_max) ((cp_min) | 0x40000000), ((cp_max) | 0x80000000) +#define S(cp) (cp) + /* Unicode "Pc", "Pd", "Pe", "Pf", "Pi", "Po", "Ps" categories. + * (generated by scripts/build_folding_map.py) */ + static const unsigned FOLD_MAP_1[] = { + R(0x0041,0x005a), S(0x00b5), R(0x00c0,0x00d6), R(0x00d8,0x00de), R(0x0100,0x012e), R(0x0132,0x0136), + R(0x0139,0x0147), R(0x014a,0x0176), S(0x0178), R(0x0179,0x017d), S(0x017f), S(0x0181), S(0x0182), + S(0x0184), S(0x0186), S(0x0187), S(0x0189), S(0x018a), S(0x018b), S(0x018e), S(0x018f), S(0x0190), + S(0x0191), S(0x0193), S(0x0194), S(0x0196), S(0x0197), S(0x0198), S(0x019c), S(0x019d), S(0x019f), + R(0x01a0,0x01a4), S(0x01a6), S(0x01a7), S(0x01a9), S(0x01ac), S(0x01ae), S(0x01af), S(0x01b1), S(0x01b2), + S(0x01b3), S(0x01b5), S(0x01b7), S(0x01b8), S(0x01bc), S(0x01c4), S(0x01c5), S(0x01c7), S(0x01c8), + S(0x01ca), R(0x01cb,0x01db), R(0x01de,0x01ee), S(0x01f1), S(0x01f2), S(0x01f4), S(0x01f6), S(0x01f7), + R(0x01f8,0x021e), S(0x0220), R(0x0222,0x0232), S(0x023a), S(0x023b), S(0x023d), S(0x023e), S(0x0241), + S(0x0243), S(0x0244), S(0x0245), R(0x0246,0x024e), S(0x0345), S(0x0370), S(0x0372), S(0x0376), S(0x037f), + S(0x0386), R(0x0388,0x038a), S(0x038c), S(0x038e), S(0x038f), R(0x0391,0x03a1), R(0x03a3,0x03ab), + S(0x03c2), S(0x03cf), S(0x03d0), S(0x03d1), S(0x03d5), S(0x03d6), R(0x03d8,0x03ee), S(0x03f0), S(0x03f1), + S(0x03f4), S(0x03f5), S(0x03f7), S(0x03f9), S(0x03fa), R(0x03fd,0x03ff), R(0x0400,0x040f), + R(0x0410,0x042f), R(0x0460,0x0480), R(0x048a,0x04be), S(0x04c0), R(0x04c1,0x04cd), R(0x04d0,0x052e), + R(0x0531,0x0556), R(0x10a0,0x10c5), S(0x10c7), S(0x10cd), R(0x13f8,0x13fd), S(0x1c80), S(0x1c81), + S(0x1c82), S(0x1c83), S(0x1c84), S(0x1c85), S(0x1c86), S(0x1c87), S(0x1c88), R(0x1c90,0x1cba), + R(0x1cbd,0x1cbf), R(0x1e00,0x1e94), S(0x1e9b), R(0x1ea0,0x1efe), R(0x1f08,0x1f0f), R(0x1f18,0x1f1d), + R(0x1f28,0x1f2f), R(0x1f38,0x1f3f), R(0x1f48,0x1f4d), S(0x1f59), S(0x1f5b), S(0x1f5d), S(0x1f5f), + R(0x1f68,0x1f6f), S(0x1fb8), S(0x1fb9), S(0x1fba), S(0x1fbb), S(0x1fbe), R(0x1fc8,0x1fcb), S(0x1fd8), + S(0x1fd9), S(0x1fda), S(0x1fdb), S(0x1fe8), S(0x1fe9), S(0x1fea), S(0x1feb), S(0x1fec), S(0x1ff8), + S(0x1ff9), S(0x1ffa), S(0x1ffb), S(0x2126), S(0x212a), S(0x212b), S(0x2132), R(0x2160,0x216f), S(0x2183), + R(0x24b6,0x24cf), R(0x2c00,0x2c2e), S(0x2c60), S(0x2c62), S(0x2c63), S(0x2c64), R(0x2c67,0x2c6b), + S(0x2c6d), S(0x2c6e), S(0x2c6f), S(0x2c70), S(0x2c72), S(0x2c75), S(0x2c7e), S(0x2c7f), R(0x2c80,0x2ce2), + S(0x2ceb), S(0x2ced), S(0x2cf2), R(0xa640,0xa66c), R(0xa680,0xa69a), R(0xa722,0xa72e), R(0xa732,0xa76e), + S(0xa779), S(0xa77b), S(0xa77d), R(0xa77e,0xa786), S(0xa78b), S(0xa78d), S(0xa790), S(0xa792), + R(0xa796,0xa7a8), S(0xa7aa), S(0xa7ab), S(0xa7ac), S(0xa7ad), S(0xa7ae), S(0xa7b0), S(0xa7b1), S(0xa7b2), + S(0xa7b3), R(0xa7b4,0xa7be), S(0xa7c2), S(0xa7c4), S(0xa7c5), S(0xa7c6), S(0xa7c7), S(0xa7c9), S(0xa7f5), + R(0xab70,0xabbf), R(0xff21,0xff3a), R(0x10400,0x10427), R(0x104b0,0x104d3), R(0x10c80,0x10cb2), + R(0x118a0,0x118bf), R(0x16e40,0x16e5f), R(0x1e900,0x1e921) + }; + static const unsigned FOLD_MAP_1_DATA[] = { + 0x0061, 0x007a, 0x03bc, 0x00e0, 0x00f6, 0x00f8, 0x00fe, 0x0101, 0x012f, 0x0133, 0x0137, 0x013a, 0x0148, + 0x014b, 0x0177, 0x00ff, 0x017a, 0x017e, 0x0073, 0x0253, 0x0183, 0x0185, 0x0254, 0x0188, 0x0256, 0x0257, + 0x018c, 0x01dd, 0x0259, 0x025b, 0x0192, 0x0260, 0x0263, 0x0269, 0x0268, 0x0199, 0x026f, 0x0272, 0x0275, + 0x01a1, 0x01a5, 0x0280, 0x01a8, 0x0283, 0x01ad, 0x0288, 0x01b0, 0x028a, 0x028b, 0x01b4, 0x01b6, 0x0292, + 0x01b9, 0x01bd, 0x01c6, 0x01c6, 0x01c9, 0x01c9, 0x01cc, 0x01cc, 0x01dc, 0x01df, 0x01ef, 0x01f3, 0x01f3, + 0x01f5, 0x0195, 0x01bf, 0x01f9, 0x021f, 0x019e, 0x0223, 0x0233, 0x2c65, 0x023c, 0x019a, 0x2c66, 0x0242, + 0x0180, 0x0289, 0x028c, 0x0247, 0x024f, 0x03b9, 0x0371, 0x0373, 0x0377, 0x03f3, 0x03ac, 0x03ad, 0x03af, + 0x03cc, 0x03cd, 0x03ce, 0x03b1, 0x03c1, 0x03c3, 0x03cb, 0x03c3, 0x03d7, 0x03b2, 0x03b8, 0x03c6, 0x03c0, + 0x03d9, 0x03ef, 0x03ba, 0x03c1, 0x03b8, 0x03b5, 0x03f8, 0x03f2, 0x03fb, 0x037b, 0x037d, 0x0450, 0x045f, + 0x0430, 0x044f, 0x0461, 0x0481, 0x048b, 0x04bf, 0x04cf, 0x04c2, 0x04ce, 0x04d1, 0x052f, 0x0561, 0x0586, + 0x2d00, 0x2d25, 0x2d27, 0x2d2d, 0x13f0, 0x13f5, 0x0432, 0x0434, 0x043e, 0x0441, 0x0442, 0x0442, 0x044a, + 0x0463, 0xa64b, 0x10d0, 0x10fa, 0x10fd, 0x10ff, 0x1e01, 0x1e95, 0x1e61, 0x1ea1, 0x1eff, 0x1f00, 0x1f07, + 0x1f10, 0x1f15, 0x1f20, 0x1f27, 0x1f30, 0x1f37, 0x1f40, 0x1f45, 0x1f51, 0x1f53, 0x1f55, 0x1f57, 0x1f60, + 0x1f67, 0x1fb0, 0x1fb1, 0x1f70, 0x1f71, 0x03b9, 0x1f72, 0x1f75, 0x1fd0, 0x1fd1, 0x1f76, 0x1f77, 0x1fe0, + 0x1fe1, 0x1f7a, 0x1f7b, 0x1fe5, 0x1f78, 0x1f79, 0x1f7c, 0x1f7d, 0x03c9, 0x006b, 0x00e5, 0x214e, 0x2170, + 0x217f, 0x2184, 0x24d0, 0x24e9, 0x2c30, 0x2c5e, 0x2c61, 0x026b, 0x1d7d, 0x027d, 0x2c68, 0x2c6c, 0x0251, + 0x0271, 0x0250, 0x0252, 0x2c73, 0x2c76, 0x023f, 0x0240, 0x2c81, 0x2ce3, 0x2cec, 0x2cee, 0x2cf3, 0xa641, + 0xa66d, 0xa681, 0xa69b, 0xa723, 0xa72f, 0xa733, 0xa76f, 0xa77a, 0xa77c, 0x1d79, 0xa77f, 0xa787, 0xa78c, + 0x0265, 0xa791, 0xa793, 0xa797, 0xa7a9, 0x0266, 0x025c, 0x0261, 0x026c, 0x026a, 0x029e, 0x0287, 0x029d, + 0xab53, 0xa7b5, 0xa7bf, 0xa7c3, 0xa794, 0x0282, 0x1d8e, 0xa7c8, 0xa7ca, 0xa7f6, 0x13a0, 0x13ef, 0xff41, + 0xff5a, 0x10428, 0x1044f, 0x104d8, 0x104fb, 0x10cc0, 0x10cf2, 0x118c0, 0x118df, 0x16e60, 0x16e7f, 0x1e922, + 0x1e943 + }; + static const unsigned FOLD_MAP_2[] = { + S(0x00df), S(0x0130), S(0x0149), S(0x01f0), S(0x0587), S(0x1e96), S(0x1e97), S(0x1e98), S(0x1e99), + S(0x1e9a), S(0x1e9e), S(0x1f50), R(0x1f80,0x1f87), R(0x1f88,0x1f8f), R(0x1f90,0x1f97), R(0x1f98,0x1f9f), + R(0x1fa0,0x1fa7), R(0x1fa8,0x1faf), S(0x1fb2), S(0x1fb3), S(0x1fb4), S(0x1fb6), S(0x1fbc), S(0x1fc2), + S(0x1fc3), S(0x1fc4), S(0x1fc6), S(0x1fcc), S(0x1fd6), S(0x1fe4), S(0x1fe6), S(0x1ff2), S(0x1ff3), + S(0x1ff4), S(0x1ff6), S(0x1ffc), S(0xfb00), S(0xfb01), S(0xfb02), S(0xfb05), S(0xfb06), S(0xfb13), + S(0xfb14), S(0xfb15), S(0xfb16), S(0xfb17) + }; + static const unsigned FOLD_MAP_2_DATA[] = { + 0x0073,0x0073, 0x0069,0x0307, 0x02bc,0x006e, 0x006a,0x030c, 0x0565,0x0582, 0x0068,0x0331, 0x0074,0x0308, + 0x0077,0x030a, 0x0079,0x030a, 0x0061,0x02be, 0x0073,0x0073, 0x03c5,0x0313, 0x1f00,0x03b9, 0x1f07,0x03b9, + 0x1f00,0x03b9, 0x1f07,0x03b9, 0x1f20,0x03b9, 0x1f27,0x03b9, 0x1f20,0x03b9, 0x1f27,0x03b9, 0x1f60,0x03b9, + 0x1f67,0x03b9, 0x1f60,0x03b9, 0x1f67,0x03b9, 0x1f70,0x03b9, 0x03b1,0x03b9, 0x03ac,0x03b9, 0x03b1,0x0342, + 0x03b1,0x03b9, 0x1f74,0x03b9, 0x03b7,0x03b9, 0x03ae,0x03b9, 0x03b7,0x0342, 0x03b7,0x03b9, 0x03b9,0x0342, + 0x03c1,0x0313, 0x03c5,0x0342, 0x1f7c,0x03b9, 0x03c9,0x03b9, 0x03ce,0x03b9, 0x03c9,0x0342, 0x03c9,0x03b9, + 0x0066,0x0066, 0x0066,0x0069, 0x0066,0x006c, 0x0073,0x0074, 0x0073,0x0074, 0x0574,0x0576, 0x0574,0x0565, + 0x0574,0x056b, 0x057e,0x0576, 0x0574,0x056d + }; + static const unsigned FOLD_MAP_3[] = { + S(0x0390), S(0x03b0), S(0x1f52), S(0x1f54), S(0x1f56), S(0x1fb7), S(0x1fc7), S(0x1fd2), S(0x1fd3), + S(0x1fd7), S(0x1fe2), S(0x1fe3), S(0x1fe7), S(0x1ff7), S(0xfb03), S(0xfb04) + }; + static const unsigned FOLD_MAP_3_DATA[] = { + 0x03b9,0x0308,0x0301, 0x03c5,0x0308,0x0301, 0x03c5,0x0313,0x0300, 0x03c5,0x0313,0x0301, + 0x03c5,0x0313,0x0342, 0x03b1,0x0342,0x03b9, 0x03b7,0x0342,0x03b9, 0x03b9,0x0308,0x0300, + 0x03b9,0x0308,0x0301, 0x03b9,0x0308,0x0342, 0x03c5,0x0308,0x0300, 0x03c5,0x0308,0x0301, + 0x03c5,0x0308,0x0342, 0x03c9,0x0342,0x03b9, 0x0066,0x0066,0x0069, 0x0066,0x0066,0x006c + }; +#undef R +#undef S + static const struct { + const unsigned* map; + const unsigned* data; + size_t map_size; + unsigned n_codepoints; + } FOLD_MAP_LIST[] = { + { FOLD_MAP_1, FOLD_MAP_1_DATA, SIZEOF_ARRAY(FOLD_MAP_1), 1 }, + { FOLD_MAP_2, FOLD_MAP_2_DATA, SIZEOF_ARRAY(FOLD_MAP_2), 2 }, + { FOLD_MAP_3, FOLD_MAP_3_DATA, SIZEOF_ARRAY(FOLD_MAP_3), 3 } + }; + + int i; + + /* Fast path for ASCII characters. */ + if(codepoint <= 0x7f) { + info->codepoints[0] = codepoint; + if(ISUPPER_(codepoint)) + info->codepoints[0] += 'a' - 'A'; + info->n_codepoints = 1; + return; + } + + /* Try to locate the codepoint in any of the maps. */ + for(i = 0; i < (int) SIZEOF_ARRAY(FOLD_MAP_LIST); i++) { + int index; + + index = md_unicode_bsearch__(codepoint, FOLD_MAP_LIST[i].map, FOLD_MAP_LIST[i].map_size); + if(index >= 0) { + /* Found the mapping. */ + unsigned n_codepoints = FOLD_MAP_LIST[i].n_codepoints; + const unsigned* map = FOLD_MAP_LIST[i].map; + const unsigned* codepoints = FOLD_MAP_LIST[i].data + (index * n_codepoints); + + memcpy(info->codepoints, codepoints, sizeof(unsigned) * n_codepoints); + info->n_codepoints = n_codepoints; + + if(FOLD_MAP_LIST[i].map[index] != codepoint) { + /* The found mapping maps whole range of codepoints, + * i.e. we have to offset info->codepoints[0] accordingly. */ + if((map[index] & 0x00ffffff)+1 == codepoints[0]) { + /* Alternating type of the range. */ + info->codepoints[0] = codepoint + ((codepoint & 0x1) == (map[index] & 0x1) ? 1 : 0); + } else { + /* Range to range kind of mapping. */ + info->codepoints[0] += (codepoint - (map[index] & 0x00ffffff)); + } + } + + return; + } + } + + /* No mapping found. Map the codepoint to itself. */ + info->codepoints[0] = codepoint; + info->n_codepoints = 1; + } +#endif + + +#if defined MD4C_USE_UTF16 + #define IS_UTF16_SURROGATE_HI(word) (((WORD)(word) & 0xfc00) == 0xd800) + #define IS_UTF16_SURROGATE_LO(word) (((WORD)(word) & 0xfc00) == 0xdc00) + #define UTF16_DECODE_SURROGATE(hi, lo) (0x10000 + ((((unsigned)(hi) & 0x3ff) << 10) | (((unsigned)(lo) & 0x3ff) << 0))) + + static unsigned + md_decode_utf16le__(const CHAR* str, SZ str_size, SZ* p_size) + { + if(IS_UTF16_SURROGATE_HI(str[0])) { + if(1 < str_size && IS_UTF16_SURROGATE_LO(str[1])) { + if(p_size != NULL) + *p_size = 2; + return UTF16_DECODE_SURROGATE(str[0], str[1]); + } + } + + if(p_size != NULL) + *p_size = 1; + return str[0]; + } + + static unsigned + md_decode_utf16le_before__(MD_CTX* ctx, OFF off) + { + if(off > 2 && IS_UTF16_SURROGATE_HI(CH(off-2)) && IS_UTF16_SURROGATE_LO(CH(off-1))) + return UTF16_DECODE_SURROGATE(CH(off-2), CH(off-1)); + + return CH(off); + } + + /* No whitespace uses surrogates, so no decoding needed here. */ + #define ISUNICODEWHITESPACE_(codepoint) md_is_unicode_whitespace__(codepoint) + #define ISUNICODEWHITESPACE(off) md_is_unicode_whitespace__(CH(off)) + #define ISUNICODEWHITESPACEBEFORE(off) md_is_unicode_whitespace__(CH((off)-1)) + + #define ISUNICODEPUNCT(off) md_is_unicode_punct__(md_decode_utf16le__(STR(off), ctx->size - (off), NULL)) + #define ISUNICODEPUNCTBEFORE(off) md_is_unicode_punct__(md_decode_utf16le_before__(ctx, off)) + + static inline int + md_decode_unicode(const CHAR* str, OFF off, SZ str_size, SZ* p_char_size) + { + return md_decode_utf16le__(str+off, str_size-off, p_char_size); + } +#elif defined MD4C_USE_UTF8 + #define IS_UTF8_LEAD1(byte) ((unsigned char)(byte) <= 0x7f) + #define IS_UTF8_LEAD2(byte) (((unsigned char)(byte) & 0xe0) == 0xc0) + #define IS_UTF8_LEAD3(byte) (((unsigned char)(byte) & 0xf0) == 0xe0) + #define IS_UTF8_LEAD4(byte) (((unsigned char)(byte) & 0xf8) == 0xf0) + #define IS_UTF8_TAIL(byte) (((unsigned char)(byte) & 0xc0) == 0x80) + + static unsigned + md_decode_utf8__(const CHAR* str, SZ str_size, SZ* p_size) + { + if(!IS_UTF8_LEAD1(str[0])) { + if(IS_UTF8_LEAD2(str[0])) { + if(1 < str_size && IS_UTF8_TAIL(str[1])) { + if(p_size != NULL) + *p_size = 2; + + return (((unsigned int)str[0] & 0x1f) << 6) | + (((unsigned int)str[1] & 0x3f) << 0); + } + } else if(IS_UTF8_LEAD3(str[0])) { + if(2 < str_size && IS_UTF8_TAIL(str[1]) && IS_UTF8_TAIL(str[2])) { + if(p_size != NULL) + *p_size = 3; + + return (((unsigned int)str[0] & 0x0f) << 12) | + (((unsigned int)str[1] & 0x3f) << 6) | + (((unsigned int)str[2] & 0x3f) << 0); + } + } else if(IS_UTF8_LEAD4(str[0])) { + if(3 < str_size && IS_UTF8_TAIL(str[1]) && IS_UTF8_TAIL(str[2]) && IS_UTF8_TAIL(str[3])) { + if(p_size != NULL) + *p_size = 4; + + return (((unsigned int)str[0] & 0x07) << 18) | + (((unsigned int)str[1] & 0x3f) << 12) | + (((unsigned int)str[2] & 0x3f) << 6) | + (((unsigned int)str[3] & 0x3f) << 0); + } + } + } + + if(p_size != NULL) + *p_size = 1; + return (unsigned) str[0]; + } + + static unsigned + md_decode_utf8_before__(MD_CTX* ctx, OFF off) + { + if(!IS_UTF8_LEAD1(CH(off-1))) { + if(off > 1 && IS_UTF8_LEAD2(CH(off-2)) && IS_UTF8_TAIL(CH(off-1))) + return (((unsigned int)CH(off-2) & 0x1f) << 6) | + (((unsigned int)CH(off-1) & 0x3f) << 0); + + if(off > 2 && IS_UTF8_LEAD3(CH(off-3)) && IS_UTF8_TAIL(CH(off-2)) && IS_UTF8_TAIL(CH(off-1))) + return (((unsigned int)CH(off-3) & 0x0f) << 12) | + (((unsigned int)CH(off-2) & 0x3f) << 6) | + (((unsigned int)CH(off-1) & 0x3f) << 0); + + if(off > 3 && IS_UTF8_LEAD4(CH(off-4)) && IS_UTF8_TAIL(CH(off-3)) && IS_UTF8_TAIL(CH(off-2)) && IS_UTF8_TAIL(CH(off-1))) + return (((unsigned int)CH(off-4) & 0x07) << 18) | + (((unsigned int)CH(off-3) & 0x3f) << 12) | + (((unsigned int)CH(off-2) & 0x3f) << 6) | + (((unsigned int)CH(off-1) & 0x3f) << 0); + } + + return (unsigned) CH(off-1); + } + + #define ISUNICODEWHITESPACE_(codepoint) md_is_unicode_whitespace__(codepoint) + #define ISUNICODEWHITESPACE(off) md_is_unicode_whitespace__(md_decode_utf8__(STR(off), ctx->size - (off), NULL)) + #define ISUNICODEWHITESPACEBEFORE(off) md_is_unicode_whitespace__(md_decode_utf8_before__(ctx, off)) + + #define ISUNICODEPUNCT(off) md_is_unicode_punct__(md_decode_utf8__(STR(off), ctx->size - (off), NULL)) + #define ISUNICODEPUNCTBEFORE(off) md_is_unicode_punct__(md_decode_utf8_before__(ctx, off)) + + static inline unsigned + md_decode_unicode(const CHAR* str, OFF off, SZ str_size, SZ* p_char_size) + { + return md_decode_utf8__(str+off, str_size-off, p_char_size); + } +#else + #define ISUNICODEWHITESPACE_(codepoint) ISWHITESPACE_(codepoint) + #define ISUNICODEWHITESPACE(off) ISWHITESPACE(off) + #define ISUNICODEWHITESPACEBEFORE(off) ISWHITESPACE((off)-1) + + #define ISUNICODEPUNCT(off) ISPUNCT(off) + #define ISUNICODEPUNCTBEFORE(off) ISPUNCT((off)-1) + + static inline void + md_get_unicode_fold_info(unsigned codepoint, MD_UNICODE_FOLD_INFO* info) + { + info->codepoints[0] = codepoint; + if(ISUPPER_(codepoint)) + info->codepoints[0] += 'a' - 'A'; + info->n_codepoints = 1; + } + + static inline unsigned + md_decode_unicode(const CHAR* str, OFF off, SZ str_size, SZ* p_size) + { + *p_size = 1; + return (unsigned) str[off]; + } +#endif + + +/************************************* + *** Helper string manipulations *** + *************************************/ + +/* Fill buffer with copy of the string between 'beg' and 'end' but replace any + * line breaks with given replacement character. + * + * NOTE: Caller is responsible to make sure the buffer is large enough. + * (Given the output is always shorter then input, (end - beg) is good idea + * what the caller should allocate.) + */ +static void +md_merge_lines(MD_CTX* ctx, OFF beg, OFF end, const MD_LINE* lines, int n_lines, + CHAR line_break_replacement_char, CHAR* buffer, SZ* p_size) +{ + CHAR* ptr = buffer; + int line_index = 0; + OFF off = beg; + + MD_UNUSED(n_lines); + + while(1) { + const MD_LINE* line = &lines[line_index]; + OFF line_end = line->end; + if(end < line_end) + line_end = end; + + while(off < line_end) { + *ptr = CH(off); + ptr++; + off++; + } + + if(off >= end) { + *p_size = (MD_SIZE)(ptr - buffer); + return; + } + + *ptr = line_break_replacement_char; + ptr++; + + line_index++; + off = lines[line_index].beg; + } +} + +/* Wrapper of md_merge_lines() which allocates new buffer for the output string. + */ +static int +md_merge_lines_alloc(MD_CTX* ctx, OFF beg, OFF end, const MD_LINE* lines, int n_lines, + CHAR line_break_replacement_char, CHAR** p_str, SZ* p_size) +{ + CHAR* buffer; + + buffer = (CHAR*) malloc(sizeof(CHAR) * (end - beg)); + if(buffer == NULL) { + MD_LOG("malloc() failed."); + return -1; + } + + md_merge_lines(ctx, beg, end, lines, n_lines, + line_break_replacement_char, buffer, p_size); + + *p_str = buffer; + return 0; +} + +static OFF +md_skip_unicode_whitespace(const CHAR* label, OFF off, SZ size) +{ + SZ char_size; + unsigned codepoint; + + while(off < size) { + codepoint = md_decode_unicode(label, off, size, &char_size); + if(!ISUNICODEWHITESPACE_(codepoint) && !ISNEWLINE_(label[off])) + break; + off += char_size; + } + + return off; +} + + +/****************************** + *** Recognizing raw HTML *** + ******************************/ + +/* md_is_html_tag() may be called when processing inlines (inline raw HTML) + * or when breaking document to blocks (checking for start of HTML block type 7). + * + * When breaking document to blocks, we do not yet know line boundaries, but + * in that case the whole tag has to live on a single line. We distinguish this + * by n_lines == 0. + */ +static int +md_is_html_tag(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end) +{ + int attr_state; + OFF off = beg; + OFF line_end = (n_lines > 0) ? lines[0].end : ctx->size; + int i = 0; + + MD_ASSERT(CH(beg) == _T('<')); + + if(off + 1 >= line_end) + return FALSE; + off++; + + /* For parsing attributes, we need a little state automaton below. + * State -1: no attributes are allowed. + * State 0: attribute could follow after some whitespace. + * State 1: after a whitespace (attribute name may follow). + * State 2: after attribute name ('=' MAY follow). + * State 3: after '=' (value specification MUST follow). + * State 41: in middle of unquoted attribute value. + * State 42: in middle of single-quoted attribute value. + * State 43: in middle of double-quoted attribute value. + */ + attr_state = 0; + + if(CH(off) == _T('/')) { + /* Closer tag "</ ... >". No attributes may be present. */ + attr_state = -1; + off++; + } + + /* Tag name */ + if(off >= line_end || !ISALPHA(off)) + return FALSE; + off++; + while(off < line_end && (ISALNUM(off) || CH(off) == _T('-'))) + off++; + + /* (Optional) attributes (if not closer), (optional) '/' (if not closer) + * and final '>'. */ + while(1) { + while(off < line_end && !ISNEWLINE(off)) { + if(attr_state > 40) { + if(attr_state == 41 && (ISBLANK(off) || ISANYOF(off, _T("\"'=<>`")))) { + attr_state = 0; + off--; /* Put the char back for re-inspection in the new state. */ + } else if(attr_state == 42 && CH(off) == _T('\'')) { + attr_state = 0; + } else if(attr_state == 43 && CH(off) == _T('"')) { + attr_state = 0; + } + off++; + } else if(ISWHITESPACE(off)) { + if(attr_state == 0) + attr_state = 1; + off++; + } else if(attr_state <= 2 && CH(off) == _T('>')) { + /* End. */ + goto done; + } else if(attr_state <= 2 && CH(off) == _T('/') && off+1 < line_end && CH(off+1) == _T('>')) { + /* End with digraph '/>' */ + off++; + goto done; + } else if((attr_state == 1 || attr_state == 2) && (ISALPHA(off) || CH(off) == _T('_') || CH(off) == _T(':'))) { + off++; + /* Attribute name */ + while(off < line_end && (ISALNUM(off) || ISANYOF(off, _T("_.:-")))) + off++; + attr_state = 2; + } else if(attr_state == 2 && CH(off) == _T('=')) { + /* Attribute assignment sign */ + off++; + attr_state = 3; + } else if(attr_state == 3) { + /* Expecting start of attribute value. */ + if(CH(off) == _T('"')) + attr_state = 43; + else if(CH(off) == _T('\'')) + attr_state = 42; + else if(!ISANYOF(off, _T("\"'=<>`")) && !ISNEWLINE(off)) + attr_state = 41; + else + return FALSE; + off++; + } else { + /* Anything unexpected. */ + return FALSE; + } + } + + /* We have to be on a single line. See definition of start condition + * of HTML block, type 7. */ + if(n_lines == 0) + return FALSE; + + i++; + if(i >= n_lines) + return FALSE; + + off = lines[i].beg; + line_end = lines[i].end; + + if(attr_state == 0 || attr_state == 41) + attr_state = 1; + + if(off >= max_end) + return FALSE; + } + +done: + if(off >= max_end) + return FALSE; + + *p_end = off+1; + return TRUE; +} + +static int +md_scan_for_html_closer(MD_CTX* ctx, const MD_CHAR* str, MD_SIZE len, + const MD_LINE* lines, int n_lines, + OFF beg, OFF max_end, OFF* p_end, + OFF* p_scan_horizon) +{ + OFF off = beg; + int i = 0; + + if(off < *p_scan_horizon && *p_scan_horizon >= max_end - len) { + /* We have already scanned the range up to the max_end so we know + * there is nothing to see. */ + return FALSE; + } + + while(TRUE) { + while(off + len <= lines[i].end && off + len <= max_end) { + if(md_ascii_eq(STR(off), str, len)) { + /* Success. */ + *p_end = off + len; + return TRUE; + } + off++; + } + + i++; + if(off >= max_end || i >= n_lines) { + /* Failure. */ + *p_scan_horizon = off; + return FALSE; + } + + off = lines[i].beg; + } +} + +static int +md_is_html_comment(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end) +{ + OFF off = beg; + + MD_ASSERT(CH(beg) == _T('<')); + + if(off + 4 >= lines[0].end) + return FALSE; + if(CH(off+1) != _T('!') || CH(off+2) != _T('-') || CH(off+3) != _T('-')) + return FALSE; + off += 4; + + /* ">" and "->" must not follow the opening. */ + if(off < lines[0].end && CH(off) == _T('>')) + return FALSE; + if(off+1 < lines[0].end && CH(off) == _T('-') && CH(off+1) == _T('>')) + return FALSE; + + /* HTML comment must not contain "--", so we scan just for "--" instead + * of "-->" and verify manually that '>' follows. */ + if(md_scan_for_html_closer(ctx, _T("--"), 2, + lines, n_lines, off, max_end, p_end, &ctx->html_comment_horizon)) + { + if(*p_end < max_end && CH(*p_end) == _T('>')) { + *p_end = *p_end + 1; + return TRUE; + } + } + + return FALSE; +} + +static int +md_is_html_processing_instruction(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end) +{ + OFF off = beg; + + if(off + 2 >= lines[0].end) + return FALSE; + if(CH(off+1) != _T('?')) + return FALSE; + off += 2; + + return md_scan_for_html_closer(ctx, _T("?>"), 2, + lines, n_lines, off, max_end, p_end, &ctx->html_proc_instr_horizon); +} + +static int +md_is_html_declaration(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end) +{ + OFF off = beg; + + if(off + 2 >= lines[0].end) + return FALSE; + if(CH(off+1) != _T('!')) + return FALSE; + off += 2; + + /* Declaration name. */ + if(off >= lines[0].end || !ISALPHA(off)) + return FALSE; + off++; + while(off < lines[0].end && ISALPHA(off)) + off++; + if(off < lines[0].end && !ISWHITESPACE(off)) + return FALSE; + + return md_scan_for_html_closer(ctx, _T(">"), 1, + lines, n_lines, off, max_end, p_end, &ctx->html_decl_horizon); +} + +static int +md_is_html_cdata(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end) +{ + static const CHAR open_str[] = _T("<![CDATA["); + static const SZ open_size = SIZEOF_ARRAY(open_str) - 1; + + OFF off = beg; + + if(off + open_size >= lines[0].end) + return FALSE; + if(memcmp(STR(off), open_str, open_size) != 0) + return FALSE; + off += open_size; + + if(lines[n_lines-1].end < max_end) + max_end = lines[n_lines-1].end - 2; + + return md_scan_for_html_closer(ctx, _T("]]>"), 3, + lines, n_lines, off, max_end, p_end, &ctx->html_cdata_horizon); +} + +static int +md_is_html_any(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end) +{ + MD_ASSERT(CH(beg) == _T('<')); + return (md_is_html_tag(ctx, lines, n_lines, beg, max_end, p_end) || + md_is_html_comment(ctx, lines, n_lines, beg, max_end, p_end) || + md_is_html_processing_instruction(ctx, lines, n_lines, beg, max_end, p_end) || + md_is_html_declaration(ctx, lines, n_lines, beg, max_end, p_end) || + md_is_html_cdata(ctx, lines, n_lines, beg, max_end, p_end)); +} + + +/**************************** + *** Recognizing Entity *** + ****************************/ + +static int +md_is_hex_entity_contents(MD_CTX* ctx, const CHAR* text, OFF beg, OFF max_end, OFF* p_end) +{ + OFF off = beg; + MD_UNUSED(ctx); + + while(off < max_end && ISXDIGIT_(text[off]) && off - beg <= 8) + off++; + + if(1 <= off - beg && off - beg <= 6) { + *p_end = off; + return TRUE; + } else { + return FALSE; + } +} + +static int +md_is_dec_entity_contents(MD_CTX* ctx, const CHAR* text, OFF beg, OFF max_end, OFF* p_end) +{ + OFF off = beg; + MD_UNUSED(ctx); + + while(off < max_end && ISDIGIT_(text[off]) && off - beg <= 8) + off++; + + if(1 <= off - beg && off - beg <= 7) { + *p_end = off; + return TRUE; + } else { + return FALSE; + } +} + +static int +md_is_named_entity_contents(MD_CTX* ctx, const CHAR* text, OFF beg, OFF max_end, OFF* p_end) +{ + OFF off = beg; + MD_UNUSED(ctx); + + if(off < max_end && ISALPHA_(text[off])) + off++; + else + return FALSE; + + while(off < max_end && ISALNUM_(text[off]) && off - beg <= 48) + off++; + + if(2 <= off - beg && off - beg <= 48) { + *p_end = off; + return TRUE; + } else { + return FALSE; + } +} + +static int +md_is_entity_str(MD_CTX* ctx, const CHAR* text, OFF beg, OFF max_end, OFF* p_end) +{ + int is_contents; + OFF off = beg; + + MD_ASSERT(text[off] == _T('&')); + off++; + + if(off+2 < max_end && text[off] == _T('#') && (text[off+1] == _T('x') || text[off+1] == _T('X'))) + is_contents = md_is_hex_entity_contents(ctx, text, off+2, max_end, &off); + else if(off+1 < max_end && text[off] == _T('#')) + is_contents = md_is_dec_entity_contents(ctx, text, off+1, max_end, &off); + else + is_contents = md_is_named_entity_contents(ctx, text, off, max_end, &off); + + if(is_contents && off < max_end && text[off] == _T(';')) { + *p_end = off+1; + return TRUE; + } else { + return FALSE; + } +} + +static inline int +md_is_entity(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end) +{ + return md_is_entity_str(ctx, ctx->text, beg, max_end, p_end); +} + + +/****************************** + *** Attribute Management *** + ******************************/ + +typedef struct MD_ATTRIBUTE_BUILD_tag MD_ATTRIBUTE_BUILD; +struct MD_ATTRIBUTE_BUILD_tag { + CHAR* text; + MD_TEXTTYPE* substr_types; + OFF* substr_offsets; + int substr_count; + int substr_alloc; + MD_TEXTTYPE trivial_types[1]; + OFF trivial_offsets[2]; +}; + + +#define MD_BUILD_ATTR_NO_ESCAPES 0x0001 + +static int +md_build_attr_append_substr(MD_CTX* ctx, MD_ATTRIBUTE_BUILD* build, + MD_TEXTTYPE type, OFF off) +{ + if(build->substr_count >= build->substr_alloc) { + MD_TEXTTYPE* new_substr_types; + OFF* new_substr_offsets; + + build->substr_alloc = (build->substr_alloc > 0 + ? build->substr_alloc + build->substr_alloc / 2 + : 8); + new_substr_types = (MD_TEXTTYPE*) realloc(build->substr_types, + build->substr_alloc * sizeof(MD_TEXTTYPE)); + if(new_substr_types == NULL) { + MD_LOG("realloc() failed."); + return -1; + } + /* Note +1 to reserve space for final offset (== raw_size). */ + new_substr_offsets = (OFF*) realloc(build->substr_offsets, + (build->substr_alloc+1) * sizeof(OFF)); + if(new_substr_offsets == NULL) { + MD_LOG("realloc() failed."); + free(new_substr_types); + return -1; + } + + build->substr_types = new_substr_types; + build->substr_offsets = new_substr_offsets; + } + + build->substr_types[build->substr_count] = type; + build->substr_offsets[build->substr_count] = off; + build->substr_count++; + return 0; +} + +static void +md_free_attribute(MD_CTX* ctx, MD_ATTRIBUTE_BUILD* build) +{ + MD_UNUSED(ctx); + + if(build->substr_alloc > 0) { + free(build->text); + free(build->substr_types); + free(build->substr_offsets); + } +} + +static int +md_build_attribute(MD_CTX* ctx, const CHAR* raw_text, SZ raw_size, + unsigned flags, MD_ATTRIBUTE* attr, MD_ATTRIBUTE_BUILD* build) +{ + OFF raw_off, off; + int is_trivial; + int ret = 0; + + memset(build, 0, sizeof(MD_ATTRIBUTE_BUILD)); + + /* If there is no backslash and no ampersand, build trivial attribute + * without any malloc(). */ + is_trivial = TRUE; + for(raw_off = 0; raw_off < raw_size; raw_off++) { + if(ISANYOF3_(raw_text[raw_off], _T('\\'), _T('&'), _T('\0'))) { + is_trivial = FALSE; + break; + } + } + + if(is_trivial) { + build->text = (CHAR*) (raw_size ? raw_text : NULL); + build->substr_types = build->trivial_types; + build->substr_offsets = build->trivial_offsets; + build->substr_count = 1; + build->substr_alloc = 0; + build->trivial_types[0] = MD_TEXT_NORMAL; + build->trivial_offsets[0] = 0; + build->trivial_offsets[1] = raw_size; + off = raw_size; + } else { + build->text = (CHAR*) malloc(raw_size * sizeof(CHAR)); + if(build->text == NULL) { + MD_LOG("malloc() failed."); + goto abort; + } + + raw_off = 0; + off = 0; + + while(raw_off < raw_size) { + if(raw_text[raw_off] == _T('\0')) { + MD_CHECK(md_build_attr_append_substr(ctx, build, MD_TEXT_NULLCHAR, off)); + memcpy(build->text + off, raw_text + raw_off, 1); + off++; + raw_off++; + continue; + } + + if(raw_text[raw_off] == _T('&')) { + OFF ent_end; + + if(md_is_entity_str(ctx, raw_text, raw_off, raw_size, &ent_end)) { + MD_CHECK(md_build_attr_append_substr(ctx, build, MD_TEXT_ENTITY, off)); + memcpy(build->text + off, raw_text + raw_off, ent_end - raw_off); + off += ent_end - raw_off; + raw_off = ent_end; + continue; + } + } + + if(build->substr_count == 0 || build->substr_types[build->substr_count-1] != MD_TEXT_NORMAL) + MD_CHECK(md_build_attr_append_substr(ctx, build, MD_TEXT_NORMAL, off)); + + if(!(flags & MD_BUILD_ATTR_NO_ESCAPES) && + raw_text[raw_off] == _T('\\') && raw_off+1 < raw_size && + (ISPUNCT_(raw_text[raw_off+1]) || ISNEWLINE_(raw_text[raw_off+1]))) + raw_off++; + + build->text[off++] = raw_text[raw_off++]; + } + build->substr_offsets[build->substr_count] = off; + } + + attr->text = build->text; + attr->size = off; + attr->substr_offsets = build->substr_offsets; + attr->substr_types = build->substr_types; + return 0; + +abort: + md_free_attribute(ctx, build); + return -1; +} + + +/********************************************* + *** Dictionary of Reference Definitions *** + *********************************************/ + +#define MD_FNV1A_BASE 2166136261U +#define MD_FNV1A_PRIME 16777619U + +static inline unsigned +md_fnv1a(unsigned base, const void* data, size_t n) +{ + const unsigned char* buf = (const unsigned char*) data; + unsigned hash = base; + size_t i; + + for(i = 0; i < n; i++) { + hash ^= buf[i]; + hash *= MD_FNV1A_PRIME; + } + + return hash; +} + + +struct MD_REF_DEF_tag { + CHAR* label; + CHAR* title; + unsigned hash; + SZ label_size; + SZ title_size; + OFF dest_beg; + OFF dest_end; + unsigned char label_needs_free : 1; + unsigned char title_needs_free : 1; +}; + +/* Label equivalence is quite complicated with regards to whitespace and case + * folding. This complicates computing a hash of it as well as direct comparison + * of two labels. */ + +static unsigned +md_link_label_hash(const CHAR* label, SZ size) +{ + unsigned hash = MD_FNV1A_BASE; + OFF off; + unsigned codepoint; + int is_whitespace = FALSE; + + off = md_skip_unicode_whitespace(label, 0, size); + while(off < size) { + SZ char_size; + + codepoint = md_decode_unicode(label, off, size, &char_size); + is_whitespace = ISUNICODEWHITESPACE_(codepoint) || ISNEWLINE_(label[off]); + + if(is_whitespace) { + codepoint = ' '; + hash = md_fnv1a(hash, &codepoint, sizeof(unsigned)); + off = md_skip_unicode_whitespace(label, off, size); + } else { + MD_UNICODE_FOLD_INFO fold_info; + + md_get_unicode_fold_info(codepoint, &fold_info); + hash = md_fnv1a(hash, fold_info.codepoints, fold_info.n_codepoints * sizeof(unsigned)); + off += char_size; + } + } + + return hash; +} + +static OFF +md_link_label_cmp_load_fold_info(const CHAR* label, OFF off, SZ size, + MD_UNICODE_FOLD_INFO* fold_info) +{ + unsigned codepoint; + SZ char_size; + + if(off >= size) { + /* Treat end of a link label as a whitespace. */ + goto whitespace; + } + + codepoint = md_decode_unicode(label, off, size, &char_size); + off += char_size; + if(ISUNICODEWHITESPACE_(codepoint)) { + /* Treat all whitespace as equivalent */ + goto whitespace; + } + + /* Get real folding info. */ + md_get_unicode_fold_info(codepoint, fold_info); + return off; + +whitespace: + fold_info->codepoints[0] = _T(' '); + fold_info->n_codepoints = 1; + return md_skip_unicode_whitespace(label, off, size); +} + +static int +md_link_label_cmp(const CHAR* a_label, SZ a_size, const CHAR* b_label, SZ b_size) +{ + OFF a_off; + OFF b_off; + MD_UNICODE_FOLD_INFO a_fi = { { 0 }, 0 }; + MD_UNICODE_FOLD_INFO b_fi = { { 0 }, 0 }; + OFF a_fi_off = 0; + OFF b_fi_off = 0; + int cmp; + + a_off = md_skip_unicode_whitespace(a_label, 0, a_size); + b_off = md_skip_unicode_whitespace(b_label, 0, b_size); + while(a_off < a_size || a_fi_off < a_fi.n_codepoints || + b_off < b_size || b_fi_off < b_fi.n_codepoints) + { + /* If needed, load fold info for next char. */ + if(a_fi_off >= a_fi.n_codepoints) { + a_fi_off = 0; + a_off = md_link_label_cmp_load_fold_info(a_label, a_off, a_size, &a_fi); + } + if(b_fi_off >= b_fi.n_codepoints) { + b_fi_off = 0; + b_off = md_link_label_cmp_load_fold_info(b_label, b_off, b_size, &b_fi); + } + + cmp = b_fi.codepoints[b_fi_off] - a_fi.codepoints[a_fi_off]; + if(cmp != 0) + return cmp; + + a_fi_off++; + b_fi_off++; + } + + return 0; +} + +typedef struct MD_REF_DEF_LIST_tag MD_REF_DEF_LIST; +struct MD_REF_DEF_LIST_tag { + int n_ref_defs; + int alloc_ref_defs; + MD_REF_DEF* ref_defs[]; /* Valid items always point into ctx->ref_defs[] */ +}; + +static int +md_ref_def_cmp(const void* a, const void* b) +{ + const MD_REF_DEF* a_ref = *(const MD_REF_DEF**)a; + const MD_REF_DEF* b_ref = *(const MD_REF_DEF**)b; + + if(a_ref->hash < b_ref->hash) + return -1; + else if(a_ref->hash > b_ref->hash) + return +1; + else + return md_link_label_cmp(a_ref->label, a_ref->label_size, b_ref->label, b_ref->label_size); +} + +static int +md_ref_def_cmp_for_sort(const void* a, const void* b) +{ + int cmp; + + cmp = md_ref_def_cmp(a, b); + + /* Ensure stability of the sorting. */ + if(cmp == 0) { + const MD_REF_DEF* a_ref = *(const MD_REF_DEF**)a; + const MD_REF_DEF* b_ref = *(const MD_REF_DEF**)b; + + if(a_ref < b_ref) + cmp = -1; + else if(a_ref > b_ref) + cmp = +1; + else + cmp = 0; + } + + return cmp; +} + +static int +md_build_ref_def_hashtable(MD_CTX* ctx) +{ + int i, j; + + if(ctx->n_ref_defs == 0) + return 0; + + ctx->ref_def_hashtable_size = (ctx->n_ref_defs * 5) / 4; + ctx->ref_def_hashtable = malloc(ctx->ref_def_hashtable_size * sizeof(void*)); + if(ctx->ref_def_hashtable == NULL) { + MD_LOG("malloc() failed."); + goto abort; + } + memset(ctx->ref_def_hashtable, 0, ctx->ref_def_hashtable_size * sizeof(void*)); + + /* Each member of ctx->ref_def_hashtable[] can be: + * -- NULL, + * -- pointer to the MD_REF_DEF in ctx->ref_defs[], or + * -- pointer to a MD_REF_DEF_LIST, which holds multiple pointers to + * such MD_REF_DEFs. + */ + for(i = 0; i < ctx->n_ref_defs; i++) { + MD_REF_DEF* def = &ctx->ref_defs[i]; + void* bucket; + MD_REF_DEF_LIST* list; + + def->hash = md_link_label_hash(def->label, def->label_size); + bucket = ctx->ref_def_hashtable[def->hash % ctx->ref_def_hashtable_size]; + + if(bucket == NULL) { + /* The bucket is empty. Make it just point to the def. */ + ctx->ref_def_hashtable[def->hash % ctx->ref_def_hashtable_size] = def; + continue; + } + + if(ctx->ref_defs <= (MD_REF_DEF*) bucket && (MD_REF_DEF*) bucket < ctx->ref_defs + ctx->n_ref_defs) { + /* The bucket already contains one ref. def. Lets see whether it + * is the same label (ref. def. duplicate) or different one + * (hash conflict). */ + MD_REF_DEF* old_def = (MD_REF_DEF*) bucket; + + if(md_link_label_cmp(def->label, def->label_size, old_def->label, old_def->label_size) == 0) { + /* Duplicate label: Ignore this ref. def. */ + continue; + } + + /* Make the bucket complex, i.e. able to hold more ref. defs. */ + list = (MD_REF_DEF_LIST*) malloc(sizeof(MD_REF_DEF_LIST) + 2 * sizeof(MD_REF_DEF*)); + if(list == NULL) { + MD_LOG("malloc() failed."); + goto abort; + } + list->ref_defs[0] = old_def; + list->ref_defs[1] = def; + list->n_ref_defs = 2; + list->alloc_ref_defs = 2; + ctx->ref_def_hashtable[def->hash % ctx->ref_def_hashtable_size] = list; + continue; + } + + /* Append the def to the complex bucket list. + * + * Note in this case we ignore potential duplicates to avoid expensive + * iterating over the complex bucket. Below, we revisit all the complex + * buckets and handle it more cheaply after the complex bucket contents + * is sorted. */ + list = (MD_REF_DEF_LIST*) bucket; + if(list->n_ref_defs >= list->alloc_ref_defs) { + int alloc_ref_defs = list->alloc_ref_defs + list->alloc_ref_defs / 2; + MD_REF_DEF_LIST* list_tmp = (MD_REF_DEF_LIST*) realloc(list, + sizeof(MD_REF_DEF_LIST) + alloc_ref_defs * sizeof(MD_REF_DEF*)); + if(list_tmp == NULL) { + MD_LOG("realloc() failed."); + goto abort; + } + list = list_tmp; + list->alloc_ref_defs = alloc_ref_defs; + ctx->ref_def_hashtable[def->hash % ctx->ref_def_hashtable_size] = list; + } + + list->ref_defs[list->n_ref_defs] = def; + list->n_ref_defs++; + } + + /* Sort the complex buckets so we can use bsearch() with them. */ + for(i = 0; i < ctx->ref_def_hashtable_size; i++) { + void* bucket = ctx->ref_def_hashtable[i]; + MD_REF_DEF_LIST* list; + + if(bucket == NULL) + continue; + if(ctx->ref_defs <= (MD_REF_DEF*) bucket && (MD_REF_DEF*) bucket < ctx->ref_defs + ctx->n_ref_defs) + continue; + + list = (MD_REF_DEF_LIST*) bucket; + qsort(list->ref_defs, list->n_ref_defs, sizeof(MD_REF_DEF*), md_ref_def_cmp_for_sort); + + /* Disable all duplicates in the complex bucket by forcing all such + * records to point to the 1st such ref. def. I.e. no matter which + * record is found during the lookup, it will always point to the right + * ref. def. in ctx->ref_defs[]. */ + for(j = 1; j < list->n_ref_defs; j++) { + if(md_ref_def_cmp(&list->ref_defs[j-1], &list->ref_defs[j]) == 0) + list->ref_defs[j] = list->ref_defs[j-1]; + } + } + + return 0; + +abort: + return -1; +} + +static void +md_free_ref_def_hashtable(MD_CTX* ctx) +{ + if(ctx->ref_def_hashtable != NULL) { + int i; + + for(i = 0; i < ctx->ref_def_hashtable_size; i++) { + void* bucket = ctx->ref_def_hashtable[i]; + if(bucket == NULL) + continue; + if(ctx->ref_defs <= (MD_REF_DEF*) bucket && (MD_REF_DEF*) bucket < ctx->ref_defs + ctx->n_ref_defs) + continue; + free(bucket); + } + + free(ctx->ref_def_hashtable); + } +} + +static const MD_REF_DEF* +md_lookup_ref_def(MD_CTX* ctx, const CHAR* label, SZ label_size) +{ + unsigned hash; + void* bucket; + + if(ctx->ref_def_hashtable_size == 0) + return NULL; + + hash = md_link_label_hash(label, label_size); + bucket = ctx->ref_def_hashtable[hash % ctx->ref_def_hashtable_size]; + + if(bucket == NULL) { + return NULL; + } else if(ctx->ref_defs <= (MD_REF_DEF*) bucket && (MD_REF_DEF*) bucket < ctx->ref_defs + ctx->n_ref_defs) { + const MD_REF_DEF* def = (MD_REF_DEF*) bucket; + + if(md_link_label_cmp(def->label, def->label_size, label, label_size) == 0) + return def; + else + return NULL; + } else { + MD_REF_DEF_LIST* list = (MD_REF_DEF_LIST*) bucket; + MD_REF_DEF key_buf; + const MD_REF_DEF* key = &key_buf; + const MD_REF_DEF** ret; + + key_buf.label = (CHAR*) label; + key_buf.label_size = label_size; + key_buf.hash = md_link_label_hash(key_buf.label, key_buf.label_size); + + ret = (const MD_REF_DEF**) bsearch(&key, list->ref_defs, + list->n_ref_defs, sizeof(MD_REF_DEF*), md_ref_def_cmp); + if(ret != NULL) + return *ret; + else + return NULL; + } +} + + +/*************************** + *** Recognizing Links *** + ***************************/ + +/* Note this code is partially shared between processing inlines and blocks + * as reference definitions and links share some helper parser functions. + */ + +typedef struct MD_LINK_ATTR_tag MD_LINK_ATTR; +struct MD_LINK_ATTR_tag { + OFF dest_beg; + OFF dest_end; + + CHAR* title; + SZ title_size; + int title_needs_free; +}; + + +static int +md_is_link_label(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, + OFF* p_end, int* p_beg_line_index, int* p_end_line_index, + OFF* p_contents_beg, OFF* p_contents_end) +{ + OFF off = beg; + OFF contents_beg = 0; + OFF contents_end = 0; + int line_index = 0; + int len = 0; + + if(CH(off) != _T('[')) + return FALSE; + off++; + + while(1) { + OFF line_end = lines[line_index].end; + + while(off < line_end) { + if(CH(off) == _T('\\') && off+1 < ctx->size && (ISPUNCT(off+1) || ISNEWLINE(off+1))) { + if(contents_end == 0) { + contents_beg = off; + *p_beg_line_index = line_index; + } + contents_end = off + 2; + off += 2; + } else if(CH(off) == _T('[')) { + return FALSE; + } else if(CH(off) == _T(']')) { + if(contents_beg < contents_end) { + /* Success. */ + *p_contents_beg = contents_beg; + *p_contents_end = contents_end; + *p_end = off+1; + *p_end_line_index = line_index; + return TRUE; + } else { + /* Link label must have some non-whitespace contents. */ + return FALSE; + } + } else { + unsigned codepoint; + SZ char_size; + + codepoint = md_decode_unicode(ctx->text, off, ctx->size, &char_size); + if(!ISUNICODEWHITESPACE_(codepoint)) { + if(contents_end == 0) { + contents_beg = off; + *p_beg_line_index = line_index; + } + contents_end = off + char_size; + } + + off += char_size; + } + + len++; + if(len > 999) + return FALSE; + } + + line_index++; + len++; + if(line_index < n_lines) + off = lines[line_index].beg; + else + break; + } + + return FALSE; +} + +static int +md_is_link_destination_A(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end, + OFF* p_contents_beg, OFF* p_contents_end) +{ + OFF off = beg; + + if(off >= max_end || CH(off) != _T('<')) + return FALSE; + off++; + + while(off < max_end) { + if(CH(off) == _T('\\') && off+1 < max_end && ISPUNCT(off+1)) { + off += 2; + continue; + } + + if(ISNEWLINE(off) || CH(off) == _T('<')) + return FALSE; + + if(CH(off) == _T('>')) { + /* Success. */ + *p_contents_beg = beg+1; + *p_contents_end = off; + *p_end = off+1; + return TRUE; + } + + off++; + } + + return FALSE; +} + +static int +md_is_link_destination_B(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end, + OFF* p_contents_beg, OFF* p_contents_end) +{ + OFF off = beg; + int parenthesis_level = 0; + + while(off < max_end) { + if(CH(off) == _T('\\') && off+1 < max_end && ISPUNCT(off+1)) { + off += 2; + continue; + } + + if(ISWHITESPACE(off) || ISCNTRL(off)) + break; + + /* Link destination may include balanced pairs of unescaped '(' ')'. + * Note we limit the maximal nesting level by 32 to protect us from + * https://github.com/jgm/cmark/issues/214 */ + if(CH(off) == _T('(')) { + parenthesis_level++; + if(parenthesis_level > 32) + return FALSE; + } else if(CH(off) == _T(')')) { + if(parenthesis_level == 0) + break; + parenthesis_level--; + } + + off++; + } + + if(parenthesis_level != 0 || off == beg) + return FALSE; + + /* Success. */ + *p_contents_beg = beg; + *p_contents_end = off; + *p_end = off; + return TRUE; +} + +static inline int +md_is_link_destination(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end, + OFF* p_contents_beg, OFF* p_contents_end) +{ + if(CH(beg) == _T('<')) + return md_is_link_destination_A(ctx, beg, max_end, p_end, p_contents_beg, p_contents_end); + else + return md_is_link_destination_B(ctx, beg, max_end, p_end, p_contents_beg, p_contents_end); +} + +static int +md_is_link_title(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, + OFF* p_end, int* p_beg_line_index, int* p_end_line_index, + OFF* p_contents_beg, OFF* p_contents_end) +{ + OFF off = beg; + CHAR closer_char; + int line_index = 0; + + /* White space with up to one line break. */ + while(off < lines[line_index].end && ISWHITESPACE(off)) + off++; + if(off >= lines[line_index].end) { + line_index++; + if(line_index >= n_lines) + return FALSE; + off = lines[line_index].beg; + } + if(off == beg) + return FALSE; + + *p_beg_line_index = line_index; + + /* First char determines how to detect end of it. */ + switch(CH(off)) { + case _T('"'): closer_char = _T('"'); break; + case _T('\''): closer_char = _T('\''); break; + case _T('('): closer_char = _T(')'); break; + default: return FALSE; + } + off++; + + *p_contents_beg = off; + + while(line_index < n_lines) { + OFF line_end = lines[line_index].end; + + while(off < line_end) { + if(CH(off) == _T('\\') && off+1 < ctx->size && (ISPUNCT(off+1) || ISNEWLINE(off+1))) { + off++; + } else if(CH(off) == closer_char) { + /* Success. */ + *p_contents_end = off; + *p_end = off+1; + *p_end_line_index = line_index; + return TRUE; + } else if(closer_char == _T(')') && CH(off) == _T('(')) { + /* ()-style title cannot contain (unescaped '(')) */ + return FALSE; + } + + off++; + } + + line_index++; + } + + return FALSE; +} + +/* Returns 0 if it is not a reference definition. + * + * Returns N > 0 if it is a reference definition. N then corresponds to the + * number of lines forming it). In this case the definition is stored for + * resolving any links referring to it. + * + * Returns -1 in case of an error (out of memory). + */ +static int +md_is_link_reference_definition(MD_CTX* ctx, const MD_LINE* lines, int n_lines) +{ + OFF label_contents_beg; + OFF label_contents_end; + int label_contents_line_index = -1; + int label_is_multiline = FALSE; + OFF dest_contents_beg; + OFF dest_contents_end; + OFF title_contents_beg; + OFF title_contents_end; + int title_contents_line_index; + int title_is_multiline = FALSE; + OFF off; + int line_index = 0; + int tmp_line_index; + MD_REF_DEF* def = NULL; + int ret = 0; + + /* Link label. */ + if(!md_is_link_label(ctx, lines, n_lines, lines[0].beg, + &off, &label_contents_line_index, &line_index, + &label_contents_beg, &label_contents_end)) + return FALSE; + label_is_multiline = (label_contents_line_index != line_index); + + /* Colon. */ + if(off >= lines[line_index].end || CH(off) != _T(':')) + return FALSE; + off++; + + /* Optional white space with up to one line break. */ + while(off < lines[line_index].end && ISWHITESPACE(off)) + off++; + if(off >= lines[line_index].end) { + line_index++; + if(line_index >= n_lines) + return FALSE; + off = lines[line_index].beg; + } + + /* Link destination. */ + if(!md_is_link_destination(ctx, off, lines[line_index].end, + &off, &dest_contents_beg, &dest_contents_end)) + return FALSE; + + /* (Optional) title. Note we interpret it as an title only if nothing + * more follows on its last line. */ + if(md_is_link_title(ctx, lines + line_index, n_lines - line_index, off, + &off, &title_contents_line_index, &tmp_line_index, + &title_contents_beg, &title_contents_end) + && off >= lines[line_index + tmp_line_index].end) + { + title_is_multiline = (tmp_line_index != title_contents_line_index); + title_contents_line_index += line_index; + line_index += tmp_line_index; + } else { + /* Not a title. */ + title_is_multiline = FALSE; + title_contents_beg = off; + title_contents_end = off; + title_contents_line_index = 0; + } + + /* Nothing more can follow on the last line. */ + if(off < lines[line_index].end) + return FALSE; + + /* So, it _is_ a reference definition. Remember it. */ + if(ctx->n_ref_defs >= ctx->alloc_ref_defs) { + MD_REF_DEF* new_defs; + + ctx->alloc_ref_defs = (ctx->alloc_ref_defs > 0 + ? ctx->alloc_ref_defs + ctx->alloc_ref_defs / 2 + : 16); + new_defs = (MD_REF_DEF*) realloc(ctx->ref_defs, ctx->alloc_ref_defs * sizeof(MD_REF_DEF)); + if(new_defs == NULL) { + MD_LOG("realloc() failed."); + goto abort; + } + + ctx->ref_defs = new_defs; + } + def = &ctx->ref_defs[ctx->n_ref_defs]; + memset(def, 0, sizeof(MD_REF_DEF)); + + if(label_is_multiline) { + MD_CHECK(md_merge_lines_alloc(ctx, label_contents_beg, label_contents_end, + lines + label_contents_line_index, n_lines - label_contents_line_index, + _T(' '), &def->label, &def->label_size)); + def->label_needs_free = TRUE; + } else { + def->label = (CHAR*) STR(label_contents_beg); + def->label_size = label_contents_end - label_contents_beg; + } + + if(title_is_multiline) { + MD_CHECK(md_merge_lines_alloc(ctx, title_contents_beg, title_contents_end, + lines + title_contents_line_index, n_lines - title_contents_line_index, + _T('\n'), &def->title, &def->title_size)); + def->title_needs_free = TRUE; + } else { + def->title = (CHAR*) STR(title_contents_beg); + def->title_size = title_contents_end - title_contents_beg; + } + + def->dest_beg = dest_contents_beg; + def->dest_end = dest_contents_end; + + /* Success. */ + ctx->n_ref_defs++; + return line_index + 1; + +abort: + /* Failure. */ + if(def != NULL && def->label_needs_free) + free(def->label); + if(def != NULL && def->title_needs_free) + free(def->title); + return ret; +} + +static int +md_is_link_reference(MD_CTX* ctx, const MD_LINE* lines, int n_lines, + OFF beg, OFF end, MD_LINK_ATTR* attr) +{ + const MD_REF_DEF* def; + const MD_LINE* beg_line; + int is_multiline; + CHAR* label; + SZ label_size; + int ret; + + MD_ASSERT(CH(beg) == _T('[') || CH(beg) == _T('!')); + MD_ASSERT(CH(end-1) == _T(']')); + + beg += (CH(beg) == _T('!') ? 2 : 1); + end--; + + /* Find lines corresponding to the beg and end positions. */ + beg_line = md_lookup_line(beg, lines, n_lines); + is_multiline = (end > beg_line->end); + + if(is_multiline) { + MD_CHECK(md_merge_lines_alloc(ctx, beg, end, beg_line, + (int)(n_lines - (beg_line - lines)), _T(' '), &label, &label_size)); + } else { + label = (CHAR*) STR(beg); + label_size = end - beg; + } + + def = md_lookup_ref_def(ctx, label, label_size); + if(def != NULL) { + attr->dest_beg = def->dest_beg; + attr->dest_end = def->dest_end; + attr->title = def->title; + attr->title_size = def->title_size; + attr->title_needs_free = FALSE; + } + + if(is_multiline) + free(label); + + ret = (def != NULL); + +abort: + return ret; +} + +static int +md_is_inline_link_spec(MD_CTX* ctx, const MD_LINE* lines, int n_lines, + OFF beg, OFF* p_end, MD_LINK_ATTR* attr) +{ + int line_index = 0; + int tmp_line_index; + OFF title_contents_beg; + OFF title_contents_end; + int title_contents_line_index; + int title_is_multiline; + OFF off = beg; + int ret = FALSE; + + while(off >= lines[line_index].end) + line_index++; + + MD_ASSERT(CH(off) == _T('(')); + off++; + + /* Optional white space with up to one line break. */ + while(off < lines[line_index].end && ISWHITESPACE(off)) + off++; + if(off >= lines[line_index].end && (off >= ctx->size || ISNEWLINE(off))) { + line_index++; + if(line_index >= n_lines) + return FALSE; + off = lines[line_index].beg; + } + + /* Link destination may be omitted, but only when not also having a title. */ + if(off < ctx->size && CH(off) == _T(')')) { + attr->dest_beg = off; + attr->dest_end = off; + attr->title = NULL; + attr->title_size = 0; + attr->title_needs_free = FALSE; + off++; + *p_end = off; + return TRUE; + } + + /* Link destination. */ + if(!md_is_link_destination(ctx, off, lines[line_index].end, + &off, &attr->dest_beg, &attr->dest_end)) + return FALSE; + + /* (Optional) title. */ + if(md_is_link_title(ctx, lines + line_index, n_lines - line_index, off, + &off, &title_contents_line_index, &tmp_line_index, + &title_contents_beg, &title_contents_end)) + { + title_is_multiline = (tmp_line_index != title_contents_line_index); + title_contents_line_index += line_index; + line_index += tmp_line_index; + } else { + /* Not a title. */ + title_is_multiline = FALSE; + title_contents_beg = off; + title_contents_end = off; + title_contents_line_index = 0; + } + + /* Optional whitespace followed with final ')'. */ + while(off < lines[line_index].end && ISWHITESPACE(off)) + off++; + if (off >= lines[line_index].end && (off >= ctx->size || ISNEWLINE(off))) { + line_index++; + if(line_index >= n_lines) + return FALSE; + off = lines[line_index].beg; + } + if(CH(off) != _T(')')) + goto abort; + off++; + + if(title_contents_beg >= title_contents_end) { + attr->title = NULL; + attr->title_size = 0; + attr->title_needs_free = FALSE; + } else if(!title_is_multiline) { + attr->title = (CHAR*) STR(title_contents_beg); + attr->title_size = title_contents_end - title_contents_beg; + attr->title_needs_free = FALSE; + } else { + MD_CHECK(md_merge_lines_alloc(ctx, title_contents_beg, title_contents_end, + lines + title_contents_line_index, n_lines - title_contents_line_index, + _T('\n'), &attr->title, &attr->title_size)); + attr->title_needs_free = TRUE; + } + + *p_end = off; + ret = TRUE; + +abort: + return ret; +} + +static void +md_free_ref_defs(MD_CTX* ctx) +{ + int i; + + for(i = 0; i < ctx->n_ref_defs; i++) { + MD_REF_DEF* def = &ctx->ref_defs[i]; + + if(def->label_needs_free) + free(def->label); + if(def->title_needs_free) + free(def->title); + } + + free(ctx->ref_defs); +} + + +/****************************************** + *** Processing Inlines (a.k.a Spans) *** + ******************************************/ + +/* We process inlines in few phases: + * + * (1) We go through the block text and collect all significant characters + * which may start/end a span or some other significant position into + * ctx->marks[]. Core of this is what md_collect_marks() does. + * + * We also do some very brief preliminary context-less analysis, whether + * it might be opener or closer (e.g. of an emphasis span). + * + * This speeds the other steps as we do not need to re-iterate over all + * characters anymore. + * + * (2) We analyze each potential mark types, in order by their precedence. + * + * In each md_analyze_XXX() function, we re-iterate list of the marks, + * skipping already resolved regions (in preceding precedences) and try to + * resolve them. + * + * (2.1) For trivial marks, which are single (e.g. HTML entity), we just mark + * them as resolved. + * + * (2.2) For range-type marks, we analyze whether the mark could be closer + * and, if yes, whether there is some preceding opener it could satisfy. + * + * If not we check whether it could be really an opener and if yes, we + * remember it so subsequent closers may resolve it. + * + * (3) Finally, when all marks were analyzed, we render the block contents + * by calling MD_RENDERER::text() callback, interrupting by ::enter_span() + * or ::close_span() whenever we reach a resolved mark. + */ + + +/* The mark structure. + * + * '\\': Maybe escape sequence. + * '\0': NULL char. + * '*': Maybe (strong) emphasis start/end. + * '_': Maybe (strong) emphasis start/end. + * '~': Maybe strikethrough start/end (needs MD_FLAG_STRIKETHROUGH). + * '`': Maybe code span start/end. + * '&': Maybe start of entity. + * ';': Maybe end of entity. + * '<': Maybe start of raw HTML or autolink. + * '>': Maybe end of raw HTML or autolink. + * '[': Maybe start of link label or link text. + * '!': Equivalent of '[' for image. + * ']': Maybe end of link label or link text. + * '@': Maybe permissive e-mail auto-link (needs MD_FLAG_PERMISSIVEEMAILAUTOLINKS). + * ':': Maybe permissive URL auto-link (needs MD_FLAG_PERMISSIVEURLAUTOLINKS). + * '.': Maybe permissive WWW auto-link (needs MD_FLAG_PERMISSIVEWWWAUTOLINKS). + * 'D': Dummy mark, it reserves a space for splitting a previous mark + * (e.g. emphasis) or to make more space for storing some special data + * related to the preceding mark (e.g. link). + * + * Note that not all instances of these chars in the text imply creation of the + * structure. Only those which have (or may have, after we see more context) + * the special meaning. + * + * (Keep this struct as small as possible to fit as much of them into CPU + * cache line.) + */ +struct MD_MARK_tag { + OFF beg; + OFF end; + + /* For unresolved openers, 'prev' and 'next' form the chain of open openers + * of given type 'ch'. + * + * During resolving, we disconnect from the chain and point to the + * corresponding counterpart so opener points to its closer and vice versa. + */ + int prev; + int next; + CHAR ch; + unsigned char flags; +}; + +/* Mark flags (these apply to ALL mark types). */ +#define MD_MARK_POTENTIAL_OPENER 0x01 /* Maybe opener. */ +#define MD_MARK_POTENTIAL_CLOSER 0x02 /* Maybe closer. */ +#define MD_MARK_OPENER 0x04 /* Definitely opener. */ +#define MD_MARK_CLOSER 0x08 /* Definitely closer. */ +#define MD_MARK_RESOLVED 0x10 /* Resolved in any definite way. */ + +/* Mark flags specific for various mark types (so they can share bits). */ +#define MD_MARK_EMPH_INTRAWORD 0x20 /* Helper for the "rule of 3". */ +#define MD_MARK_EMPH_MOD3_0 0x40 +#define MD_MARK_EMPH_MOD3_1 0x80 +#define MD_MARK_EMPH_MOD3_2 (0x40 | 0x80) +#define MD_MARK_EMPH_MOD3_MASK (0x40 | 0x80) +#define MD_MARK_AUTOLINK 0x20 /* Distinguisher for '<', '>'. */ +#define MD_MARK_VALIDPERMISSIVEAUTOLINK 0x20 /* For permissive autolinks. */ +#define MD_MARK_HASNESTEDBRACKETS 0x20 /* For '[' to rule out invalid link labels early */ + +static MD_MARKCHAIN* +md_asterisk_chain(MD_CTX* ctx, unsigned flags) +{ + switch(flags & (MD_MARK_EMPH_INTRAWORD | MD_MARK_EMPH_MOD3_MASK)) { + case MD_MARK_EMPH_INTRAWORD | MD_MARK_EMPH_MOD3_0: return &ASTERISK_OPENERS_intraword_mod3_0; + case MD_MARK_EMPH_INTRAWORD | MD_MARK_EMPH_MOD3_1: return &ASTERISK_OPENERS_intraword_mod3_1; + case MD_MARK_EMPH_INTRAWORD | MD_MARK_EMPH_MOD3_2: return &ASTERISK_OPENERS_intraword_mod3_2; + case MD_MARK_EMPH_MOD3_0: return &ASTERISK_OPENERS_extraword_mod3_0; + case MD_MARK_EMPH_MOD3_1: return &ASTERISK_OPENERS_extraword_mod3_1; + case MD_MARK_EMPH_MOD3_2: return &ASTERISK_OPENERS_extraword_mod3_2; + default: MD_UNREACHABLE(); + } + return NULL; +} + +static MD_MARKCHAIN* +md_mark_chain(MD_CTX* ctx, int mark_index) +{ + MD_MARK* mark = &ctx->marks[mark_index]; + + switch(mark->ch) { + case _T('*'): return md_asterisk_chain(ctx, mark->flags); + case _T('_'): return &UNDERSCORE_OPENERS; + case _T('~'): return (mark->end - mark->beg == 1) ? &TILDE_OPENERS_1 : &TILDE_OPENERS_2; + /* case _T('!'): MD_FALLTHROUGH(); */ + case _T('['): return &BRACKET_OPENERS; + case _T('|'): return &TABLECELLBOUNDARIES; + case _T('-'): return &FAINT_OPENERS; + case _T('%'): return &INVERSE_OPENERS; + case _T('!'): return &CONCEAL_OPENERS; + case _T('^'): return &BLINK_OPENERS; + default: return NULL; + } +} + +static MD_MARK* +md_push_mark(MD_CTX* ctx) +{ + if(ctx->n_marks >= ctx->alloc_marks) { + MD_MARK* new_marks; + + ctx->alloc_marks = (ctx->alloc_marks > 0 + ? ctx->alloc_marks + ctx->alloc_marks / 2 + : 64); + new_marks = realloc(ctx->marks, ctx->alloc_marks * sizeof(MD_MARK)); + if(new_marks == NULL) { + MD_LOG("realloc() failed."); + return NULL; + } + + ctx->marks = new_marks; + } + + return &ctx->marks[ctx->n_marks++]; +} + +#define PUSH_MARK_() \ + do { \ + mark = md_push_mark(ctx); \ + if(mark == NULL) { \ + ret = -1; \ + goto abort; \ + } \ + } while(0) + +#define PUSH_MARK(ch_, beg_, end_, flags_) \ + do { \ + PUSH_MARK_(); \ + mark->beg = (beg_); \ + mark->end = (end_); \ + mark->prev = -1; \ + mark->next = -1; \ + mark->ch = (char)(ch_); \ + mark->flags = (flags_); \ + } while(0) + + +static void +md_mark_chain_append(MD_CTX* ctx, MD_MARKCHAIN* chain, int mark_index) +{ + if(chain->tail >= 0) + ctx->marks[chain->tail].next = mark_index; + else + chain->head = mark_index; + + ctx->marks[mark_index].prev = chain->tail; + ctx->marks[mark_index].next = -1; + chain->tail = mark_index; +} + +/* Sometimes, we need to store a pointer into the mark. It is quite rare + * so we do not bother to make MD_MARK use union, and it can only happen + * for dummy marks. */ +static inline void +md_mark_store_ptr(MD_CTX* ctx, int mark_index, void* ptr) +{ + MD_MARK* mark = &ctx->marks[mark_index]; + MD_ASSERT(mark->ch == 'D'); + + /* Check only members beg and end are misused for this. */ + MD_ASSERT(sizeof(void*) <= 2 * sizeof(OFF)); + memcpy(mark, &ptr, sizeof(void*)); +} + +static inline void* +md_mark_get_ptr(MD_CTX* ctx, int mark_index) +{ + void* ptr; + MD_MARK* mark = &ctx->marks[mark_index]; + MD_ASSERT(mark->ch == 'D'); + memcpy(&ptr, mark, sizeof(void*)); + return ptr; +} + +static void +md_resolve_range(MD_CTX* ctx, MD_MARKCHAIN* chain, int opener_index, int closer_index) +{ + MD_MARK* opener = &ctx->marks[opener_index]; + MD_MARK* closer = &ctx->marks[closer_index]; + + /* Remove opener from the list of openers. */ + if(chain != NULL) { + if(opener->prev >= 0) + ctx->marks[opener->prev].next = opener->next; + else + chain->head = opener->next; + + if(opener->next >= 0) + ctx->marks[opener->next].prev = opener->prev; + else + chain->tail = opener->prev; + } + + /* Interconnect opener and closer and mark both as resolved. */ + opener->next = closer_index; + opener->flags |= MD_MARK_OPENER | MD_MARK_RESOLVED; + closer->prev = opener_index; + closer->flags |= MD_MARK_CLOSER | MD_MARK_RESOLVED; +} + + +#define MD_ROLLBACK_ALL 0 +#define MD_ROLLBACK_CROSSING 1 + +/* In the range ctx->marks[opener_index] ... [closer_index], undo some or all + * resolvings accordingly to these rules: + * + * (1) All openers BEFORE the range corresponding to any closer inside the + * range are un-resolved and they are re-added to their respective chains + * of unresolved openers. This ensures we can reuse the opener for closers + * AFTER the range. + * + * (2) If 'how' is MD_ROLLBACK_ALL, then ALL resolved marks inside the range + * are discarded. + * + * (3) If 'how' is MD_ROLLBACK_CROSSING, only closers with openers handled + * in (1) are discarded. I.e. pairs of openers and closers which are both + * inside the range are retained as well as any unpaired marks. + */ +static void +md_rollback(MD_CTX* ctx, int opener_index, int closer_index, int how) +{ + int i; + int mark_index; + + /* Cut all unresolved openers at the mark index. */ + for(i = OPENERS_CHAIN_FIRST; i < OPENERS_CHAIN_LAST+1; i++) { + MD_MARKCHAIN* chain = &ctx->mark_chains[i]; + + while(chain->tail >= opener_index) { + int same = chain->tail == opener_index; + chain->tail = ctx->marks[chain->tail].prev; + if (same) break; + } + + if(chain->tail >= 0) + ctx->marks[chain->tail].next = -1; + else + chain->head = -1; + } + + /* Go backwards so that unresolved openers are re-added into their + * respective chains, in the right order. */ + mark_index = closer_index - 1; + while(mark_index > opener_index) { + MD_MARK* mark = &ctx->marks[mark_index]; + int mark_flags = mark->flags; + int discard_flag = (how == MD_ROLLBACK_ALL); + + if(mark->flags & MD_MARK_CLOSER) { + int mark_opener_index = mark->prev; + + /* Undo opener BEFORE the range. */ + if(mark_opener_index < opener_index) { + MD_MARK* mark_opener = &ctx->marks[mark_opener_index]; + MD_MARKCHAIN* chain; + + mark_opener->flags &= ~(MD_MARK_OPENER | MD_MARK_CLOSER | MD_MARK_RESOLVED); + chain = md_mark_chain(ctx, opener_index); + if(chain != NULL) { + md_mark_chain_append(ctx, chain, mark_opener_index); + discard_flag = 1; + } + } + } + + /* And reset our flags. */ + if(discard_flag) { + /* Make zero-length closer a dummy mark as that's how it was born */ + if((mark->flags & MD_MARK_CLOSER) && mark->beg == mark->end) + mark->ch = 'D'; + + mark->flags &= ~(MD_MARK_OPENER | MD_MARK_CLOSER | MD_MARK_RESOLVED); + } + + /* Jump as far as we can over unresolved or non-interesting marks. */ + switch(how) { + case MD_ROLLBACK_CROSSING: + if((mark_flags & MD_MARK_CLOSER) && mark->prev > opener_index) { + /* If we are closer with opener INSIDE the range, there may + * not be any other crosser inside the subrange. */ + mark_index = mark->prev; + break; + } + MD_FALLTHROUGH(); + default: + mark_index--; + break; + } + } +} + +static void +md_build_mark_char_map(MD_CTX* ctx) +{ + memset(ctx->mark_char_map, 0, sizeof(ctx->mark_char_map)); + + ctx->mark_char_map['\\'] = 1; + ctx->mark_char_map['^'] = 1; + ctx->mark_char_map['%'] = 1; + ctx->mark_char_map['-'] = 1; + ctx->mark_char_map['*'] = 1; + ctx->mark_char_map['_'] = 1; + ctx->mark_char_map['`'] = 1; + ctx->mark_char_map['&'] = 1; + ctx->mark_char_map[';'] = 1; + ctx->mark_char_map['<'] = 1; + ctx->mark_char_map['>'] = 1; + ctx->mark_char_map['['] = 1; + ctx->mark_char_map['!'] = 1; + ctx->mark_char_map[']'] = 1; + ctx->mark_char_map['\0'] = 1; + + if(ctx->parser.flags & MD_FLAG_STRIKETHROUGH) + ctx->mark_char_map['~'] = 1; + + if(ctx->parser.flags & MD_FLAG_LATEXMATHSPANS) + ctx->mark_char_map['$'] = 1; + + if(ctx->parser.flags & MD_FLAG_PERMISSIVEEMAILAUTOLINKS) + ctx->mark_char_map['@'] = 1; + + if(ctx->parser.flags & MD_FLAG_PERMISSIVEURLAUTOLINKS) + ctx->mark_char_map[':'] = 1; + + if(ctx->parser.flags & MD_FLAG_PERMISSIVEWWWAUTOLINKS) + ctx->mark_char_map['.'] = 1; + + if((ctx->parser.flags & MD_FLAG_TABLES) || (ctx->parser.flags & MD_FLAG_WIKILINKS)) + ctx->mark_char_map['|'] = 1; + + if(ctx->parser.flags & MD_FLAG_COLLAPSEWHITESPACE) { + int i; + + for(i = 0; i < (int) sizeof(ctx->mark_char_map); i++) { + if(ISWHITESPACE_(i)) + ctx->mark_char_map[i] = 1; + } + } +} + +/* We limit code span marks to lower than 32 backticks. This solves the + * pathologic case of too many openers, each of different length: Their + * resolving would be then O(n^2). */ +#define CODESPAN_MARK_MAXLEN 32 + +static int +md_is_code_span(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, + OFF* p_opener_beg, OFF* p_opener_end, + OFF* p_closer_beg, OFF* p_closer_end, + OFF last_potential_closers[CODESPAN_MARK_MAXLEN], + int* p_reached_paragraph_end) +{ + OFF opener_beg = beg; + OFF opener_end; + OFF closer_beg; + OFF closer_end; + SZ mark_len; + OFF line_end; + int has_space_after_opener = FALSE; + int has_eol_after_opener = FALSE; + int has_space_before_closer = FALSE; + int has_eol_before_closer = FALSE; + int has_only_space = TRUE; + int line_index = 0; + + line_end = lines[0].end; + opener_end = opener_beg; + while(opener_end < line_end && CH(opener_end) == _T('`')) + opener_end++; + has_space_after_opener = (opener_end < line_end && CH(opener_end) == _T(' ')); + has_eol_after_opener = (opener_end == line_end); + + /* The caller needs to know end of the opening mark even if we fail. */ + *p_opener_end = opener_end; + + mark_len = opener_end - opener_beg; + if(mark_len > CODESPAN_MARK_MAXLEN) + return FALSE; + + /* Check whether we already know there is no closer of this length. + * If so, re-scan does no sense. This fixes issue #59. */ + if(last_potential_closers[mark_len-1] >= lines[n_lines-1].end || + (*p_reached_paragraph_end && last_potential_closers[mark_len-1] < opener_end)) + return FALSE; + + closer_beg = opener_end; + closer_end = opener_end; + + /* Find closer mark. */ + while(TRUE) { + while(closer_beg < line_end && CH(closer_beg) != _T('`')) { + if(CH(closer_beg) != _T(' ')) + has_only_space = FALSE; + closer_beg++; + } + closer_end = closer_beg; + while(closer_end < line_end && CH(closer_end) == _T('`')) + closer_end++; + + if(closer_end - closer_beg == mark_len) { + /* Success. */ + has_space_before_closer = (closer_beg > lines[line_index].beg && CH(closer_beg-1) == _T(' ')); + has_eol_before_closer = (closer_beg == lines[line_index].beg); + break; + } + + if(closer_end - closer_beg > 0) { + /* We have found a back-tick which is not part of the closer. */ + has_only_space = FALSE; + + /* But if we eventually fail, remember it as a potential closer + * of its own length for future attempts. This mitigates needs for + * rescans. */ + if(closer_end - closer_beg < CODESPAN_MARK_MAXLEN) { + if(closer_beg > last_potential_closers[closer_end - closer_beg - 1]) + last_potential_closers[closer_end - closer_beg - 1] = closer_beg; + } + } + + if(closer_end >= line_end) { + line_index++; + if(line_index >= n_lines) { + /* Reached end of the paragraph and still nothing. */ + *p_reached_paragraph_end = TRUE; + return FALSE; + } + /* Try on the next line. */ + line_end = lines[line_index].end; + closer_beg = lines[line_index].beg; + } else { + closer_beg = closer_end; + } + } + + /* If there is a space or a new line both after and before the opener + * (and if the code span is not made of spaces only), consume one initial + * and one trailing space as part of the marks. */ + if(!has_only_space && + (has_space_after_opener || has_eol_after_opener) && + (has_space_before_closer || has_eol_before_closer)) + { + if(has_space_after_opener) + opener_end++; + else + opener_end = lines[1].beg; + + if(has_space_before_closer) + closer_beg--; + else { + closer_beg = lines[line_index-1].end; + /* We need to eat the preceding "\r\n" but not any line trailing + * spaces. */ + while(closer_beg < ctx->size && ISBLANK(closer_beg)) + closer_beg++; + } + } + + *p_opener_beg = opener_beg; + *p_opener_end = opener_end; + *p_closer_beg = closer_beg; + *p_closer_end = closer_end; + return TRUE; +} + +/* detect anchors with syntax: [|anchorId] */ +static int +md_is_anchor_span(MD_CTX* ctx, const MD_LINE* lines, OFF off, OFF* p_closer_beg) +{ + OFF line_end = lines[0].end; + // Smallest anchor is [|x] + // An anchor must be on a single line + if (off+4 >= line_end) + return FALSE; + off += 2; + + // Find closer mark + int opener_end = off; + while (off < line_end) { + if (CH(off) == _T(']')) { + // Check if there an id for the anchor + if (off == opener_end) + return FALSE; + *p_closer_beg = off; + return TRUE; + } + off++; + } + return FALSE; +} + +#ifdef MD4C_USE_UTF16 + /* For UTF-16, mark_char_map[] covers only ASCII. */ + #define IS_MARK_CHAR(off) ((CH(off) < SIZEOF_ARRAY(ctx->mark_char_map)) && \ + (ctx->mark_char_map[(unsigned char) CH(off)])) +#else + /* For 8-bit encodings, mark_char_map[] covers all 256 elements. */ + #define IS_MARK_CHAR(off) (ctx->mark_char_map[(unsigned char) CH(off)]) +#endif + +/* detect faint effect: -text text- */ +static int +md_is_faint_span(MD_CTX* ctx, const MD_LINE* lines, OFF beg, OFF* p_closer_beg) +{ + OFF tmp; + OFF line_end; + + line_end = lines[0].end; + if (beg+2 >= line_end) + return FALSE; + if (ISUNICODEWHITESPACE(beg+1)) + return FALSE; + tmp = beg+2; + while (tmp < line_end) { + if (CH(tmp) == _T('-') && (tmp+1 == line_end || ISUNICODEWHITESPACE(tmp+1) || IS_MARK_CHAR(tmp+1)) + && (!ISUNICODEWHITESPACE(tmp-1))) { + *p_closer_beg = tmp; + return TRUE; + } + tmp++; + } + + return FALSE; +} + +/* detect inverse effect: %text text% */ +static int +md_is_inverse_span(MD_CTX* ctx, const MD_LINE* lines, OFF beg, OFF* p_closer_beg) +{ + OFF tmp; + OFF line_end; + + line_end = lines[0].end; + if (beg+2 >= line_end) + return FALSE; + if (ISUNICODEWHITESPACE(beg+1)) + return FALSE; + tmp = beg+2; + while (tmp < line_end) { + if (CH(tmp) == _T('%') && (tmp+1 == line_end || ISUNICODEWHITESPACE(tmp+1) || IS_MARK_CHAR(tmp+1)) + && (!ISUNICODEWHITESPACE(tmp-1))) { + *p_closer_beg = tmp; + return TRUE; + } + tmp++; + } + + return FALSE; +} + +/* detect conceal effect: !text text! */ +static int +md_is_conceal_span(MD_CTX* ctx, const MD_LINE* lines, OFF beg, OFF* p_closer_beg) +{ + OFF tmp; + OFF line_end; + + line_end = lines[0].end; + if (beg+2 >= line_end) + return FALSE; + if (ISUNICODEWHITESPACE(beg+1)) + return FALSE; + tmp = beg+2; + while (tmp < line_end) { + if (CH(tmp) == _T('!') && (tmp+1 == line_end || ISUNICODEWHITESPACE(tmp+1) || IS_MARK_CHAR(tmp+1)) + && (!ISUNICODEWHITESPACE(tmp-1))) { + *p_closer_beg = tmp; + return TRUE; + } + tmp++; + } + + return FALSE; +} + +/* detect blink effect: ^text text^ */ +static int +md_is_blink_span(MD_CTX* ctx, const MD_LINE* lines, OFF beg, OFF* p_closer_beg) +{ + OFF tmp; + OFF line_end; + + line_end = lines[0].end; + if (beg+2 >= line_end) + return FALSE; + if (ISUNICODEWHITESPACE(beg+1)) + return FALSE; + tmp = beg+2; + while (tmp < line_end) { + if (CH(tmp) == _T('^') && (tmp+1 == line_end || ISUNICODEWHITESPACE(tmp+1) || IS_MARK_CHAR(tmp+1)) + && (!ISUNICODEWHITESPACE(tmp-1))) { + *p_closer_beg = tmp; + return TRUE; + } + tmp++; + } + + return FALSE; +} + +static int +md_is_autolink_uri(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end) +{ + OFF off = beg+1; + + MD_ASSERT(CH(beg) == _T('<')); + + /* Check for scheme. */ + if(off >= max_end || !ISASCII(off)) + return FALSE; + off++; + while(1) { + if(off >= max_end) + return FALSE; + if(off - beg > 32) + return FALSE; + if(CH(off) == _T(':') && off - beg >= 3) + break; + if(!ISALNUM(off) && CH(off) != _T('+') && CH(off) != _T('-') && CH(off) != _T('.')) + return FALSE; + off++; + } + + /* Check the path after the scheme. */ + while(off < max_end && CH(off) != _T('>')) { + if(ISWHITESPACE(off) || ISCNTRL(off) || CH(off) == _T('<')) + return FALSE; + off++; + } + + if(off >= max_end) + return FALSE; + + MD_ASSERT(CH(off) == _T('>')); + *p_end = off+1; + return TRUE; +} + +static int +md_is_autolink_email(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end) +{ + OFF off = beg + 1; + int label_len; + + MD_ASSERT(CH(beg) == _T('<')); + + /* The code should correspond to this regexp: + /^[a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+ + @[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])? + (?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$/ + */ + + /* Username (before '@'). */ + while(off < max_end && (ISALNUM(off) || ISANYOF(off, _T(".!#$%&'*+/=?^_`{|}~-")))) + off++; + if(off <= beg+1) + return FALSE; + + /* '@' */ + if(off >= max_end || CH(off) != _T('@')) + return FALSE; + off++; + + /* Labels delimited with '.'; each label is sequence of 1 - 63 alnum + * characters or '-', but '-' is not allowed as first or last char. */ + label_len = 0; + while(off < max_end) { + if(ISALNUM(off)) + label_len++; + else if(CH(off) == _T('-') && label_len > 0) + label_len++; + else if(CH(off) == _T('.') && label_len > 0 && CH(off-1) != _T('-')) + label_len = 0; + else + break; + + if(label_len > 63) + return FALSE; + + off++; + } + + if(label_len <= 0 || off >= max_end || CH(off) != _T('>') || CH(off-1) == _T('-')) + return FALSE; + + *p_end = off+1; + return TRUE; +} + +static int +md_is_autolink(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end, int* p_missing_mailto) +{ + if(md_is_autolink_uri(ctx, beg, max_end, p_end)) { + *p_missing_mailto = FALSE; + return TRUE; + } + + if(md_is_autolink_email(ctx, beg, max_end, p_end)) { + *p_missing_mailto = TRUE; + return TRUE; + } + + return FALSE; +} + +static int +md_collect_marks(MD_CTX* ctx, const MD_LINE* lines, int n_lines, int table_mode) +{ + const MD_LINE* line_term = lines + n_lines; + const MD_LINE* line; + int ret = 0; + MD_MARK* mark; + OFF codespan_last_potential_closers[CODESPAN_MARK_MAXLEN] = { 0 }; + int codespan_scanned_till_paragraph_end = FALSE; + + for(line = lines; line < line_term; line++) { + OFF off = line->beg; + OFF line_end = line->end; + + while(TRUE) { + CHAR ch; + + /* Optimization: Use some loop unrolling. */ + while(off + 3 < line_end && !IS_MARK_CHAR(off+0) && !IS_MARK_CHAR(off+1) + && !IS_MARK_CHAR(off+2) && !IS_MARK_CHAR(off+3)) + off += 4; + while(off < line_end && !IS_MARK_CHAR(off+0)) + off++; + + if(off >= line_end) + break; + + ch = CH(off); + + /* A backslash escape. + * It can go beyond line->end as it may involve escaped new + * line to form a hard break. */ + if(ch == _T('\\') && off+1 < ctx->size && (ISPUNCT(off+1) || ISNEWLINE(off+1))) { + /* Hard-break cannot be on the last line of the block. */ + if(!ISNEWLINE(off+1) || line+1 < line_term) + PUSH_MARK(ch, off, off+2, MD_MARK_RESOLVED); + off += 2; + continue; + } + + /* A potential (string) emphasis start/end. */ + if(ch == _T('*') || ch == _T('_')) { + OFF tmp = off+1; + int left_level; /* What precedes: 0 = whitespace; 1 = punctuation; 2 = other char. */ + int right_level; /* What follows: 0 = whitespace; 1 = punctuation; 2 = other char. */ + + while(tmp < line_end && CH(tmp) == ch) + tmp++; + + if(off == line->beg || ISUNICODEWHITESPACEBEFORE(off)) + left_level = 0; + else if(ISUNICODEPUNCTBEFORE(off)) + left_level = 1; + else + left_level = 2; + + if(tmp == line_end || ISUNICODEWHITESPACE(tmp)) + right_level = 0; + else if(ISUNICODEPUNCT(tmp)) + right_level = 1; + else + right_level = 2; + + /* Intra-word underscore doesn't have special meaning. */ + if(ch == _T('_') && left_level == 2 && right_level == 2) { + left_level = 0; + right_level = 0; + } + + if(left_level != 0 || right_level != 0) { + unsigned flags = 0; + + if(left_level > 0 && left_level >= right_level) + flags |= MD_MARK_POTENTIAL_CLOSER; + if(right_level > 0 && right_level >= left_level) + flags |= MD_MARK_POTENTIAL_OPENER; + if(left_level == 2 && right_level == 2) + flags |= MD_MARK_EMPH_INTRAWORD; + + /* For "the rule of three" we need to remember the original + * size of the mark (modulo three), before we potentially + * split the mark when being later resolved partially by some + * shorter closer. */ + switch((tmp - off) % 3) { + case 0: flags |= MD_MARK_EMPH_MOD3_0; break; + case 1: flags |= MD_MARK_EMPH_MOD3_1; break; + case 2: flags |= MD_MARK_EMPH_MOD3_2; break; + } + + PUSH_MARK(ch, off, tmp, flags); + + /* During resolving, multiple asterisks may have to be + * split into independent span start/ends. Consider e.g. + * "**foo* bar*". Therefore we push also some empty dummy + * marks to have enough space for that. */ + off++; + while(off < tmp) { + PUSH_MARK('D', off, off, 0); + off++; + } + continue; + } + + off = tmp; + continue; + } + + /* A potential code span start/end. */ + if(ch == _T('`')) { + OFF opener_beg, opener_end; + OFF closer_beg, closer_end; + int is_code_span; + + is_code_span = md_is_code_span(ctx, line, line_term - line, off, + &opener_beg, &opener_end, &closer_beg, &closer_end, + codespan_last_potential_closers, + &codespan_scanned_till_paragraph_end); + if(is_code_span) { + PUSH_MARK(_T('`'), opener_beg, opener_end, MD_MARK_OPENER | MD_MARK_RESOLVED); + PUSH_MARK(_T('`'), closer_beg, closer_end, MD_MARK_CLOSER | MD_MARK_RESOLVED); + ctx->marks[ctx->n_marks-2].next = ctx->n_marks-1; + ctx->marks[ctx->n_marks-1].prev = ctx->n_marks-2; + + off = closer_end; + + /* Advance the current line accordingly. */ + if(off > line_end) { + line = md_lookup_line(off, line, line_term - line); + line_end = line->end; + } + continue; + } + + off = opener_end; + continue; + } + + /* A potential faint span start/end. */ + if(ch == _T('-')) { + OFF closer_beg; + int is_faint_span; + + if (off == line->beg || ISUNICODEWHITESPACEBEFORE(off) || ISUNICODEPUNCTBEFORE(off) + || IS_MARK_CHAR(off-1)) { + + is_faint_span = md_is_faint_span(ctx, line, off, &closer_beg); + if(is_faint_span) { + PUSH_MARK(_T('-'), off, off+1, MD_MARK_OPENER | MD_MARK_RESOLVED); + PUSH_MARK(_T('-'), closer_beg, closer_beg+1, MD_MARK_CLOSER | MD_MARK_RESOLVED); + ctx->marks[ctx->n_marks-2].next = ctx->n_marks-1; + ctx->marks[ctx->n_marks-1].prev = ctx->n_marks-2; + } + } + off++; + continue; + } + + /* A potential inverse span start/end. */ + if(ch == _T('%')) { + OFF closer_beg; + int is_inverse_span; + + if (off == line->beg || ISUNICODEWHITESPACEBEFORE(off) || ISUNICODEPUNCTBEFORE(off) + || IS_MARK_CHAR(off-1)) { + + is_inverse_span = md_is_inverse_span(ctx, line, off, &closer_beg); + if(is_inverse_span) { + PUSH_MARK(_T('%'), off, off+1, MD_MARK_OPENER | MD_MARK_RESOLVED); + PUSH_MARK(_T('%'), closer_beg, closer_beg+1, MD_MARK_CLOSER | MD_MARK_RESOLVED); + ctx->marks[ctx->n_marks-2].next = ctx->n_marks-1; + ctx->marks[ctx->n_marks-1].prev = ctx->n_marks-2; + + } + } + off++; + continue; + } + + /* A potential conceal span start/end. */ + if(ch == _T('!')) { + OFF closer_beg; + int is_conceal_span; + + if (off == line->beg || ISUNICODEWHITESPACEBEFORE(off) || ISUNICODEPUNCTBEFORE(off) + || IS_MARK_CHAR(off-1)) { + + is_conceal_span = md_is_conceal_span(ctx, line, off, &closer_beg); + if(is_conceal_span) { + PUSH_MARK(_T('!'), off, off+1, MD_MARK_OPENER | MD_MARK_RESOLVED); + PUSH_MARK(_T('!'), closer_beg, closer_beg+1, MD_MARK_CLOSER | MD_MARK_RESOLVED); + ctx->marks[ctx->n_marks-2].next = ctx->n_marks-1; + ctx->marks[ctx->n_marks-1].prev = ctx->n_marks-2; + + } + } + off++; + continue; + } + + /* A potential blink span start/end. */ + if(ch == _T('^')) { + OFF closer_beg; + int is_blink_span; + + if (off == line->beg || ISUNICODEWHITESPACEBEFORE(off) || ISUNICODEPUNCTBEFORE(off) + || IS_MARK_CHAR(off-1)) { + + is_blink_span = md_is_blink_span(ctx, line, off, &closer_beg); + if(is_blink_span) { + PUSH_MARK(_T('^'), off, off+1, MD_MARK_OPENER | MD_MARK_RESOLVED); + PUSH_MARK(_T('^'), closer_beg, closer_beg+1, MD_MARK_CLOSER | MD_MARK_RESOLVED); + ctx->marks[ctx->n_marks-2].next = ctx->n_marks-1; + ctx->marks[ctx->n_marks-1].prev = ctx->n_marks-2; + + } + } + off++; + continue; + } + + /* A potential entity start. */ + if(ch == _T('&')) { + PUSH_MARK(ch, off, off+1, MD_MARK_POTENTIAL_OPENER); + off++; + continue; + } + + /* A potential entity end. */ + if(ch == _T(';')) { + /* We surely cannot be entity unless the previous mark is '&'. */ + if(ctx->n_marks > 0 && ctx->marks[ctx->n_marks-1].ch == _T('&')) + PUSH_MARK(ch, off, off+1, MD_MARK_POTENTIAL_CLOSER); + + off++; + continue; + } + + /* A potential autolink or raw HTML start/end. */ + if(ch == _T('<')) { + int is_autolink; + OFF autolink_end; + int missing_mailto; + + if(!(ctx->parser.flags & MD_FLAG_NOHTMLSPANS)) { + int is_html; + OFF html_end; + + /* Given the nature of the raw HTML, we have to recognize + * it here. Doing so later in md_analyze_lt_gt() could + * open can of worms of quadratic complexity. */ + is_html = md_is_html_any(ctx, line, line_term - line, off, + lines[n_lines-1].end, &html_end); + if(is_html) { + PUSH_MARK(_T('<'), off, off, MD_MARK_OPENER | MD_MARK_RESOLVED); + PUSH_MARK(_T('>'), html_end, html_end, MD_MARK_CLOSER | MD_MARK_RESOLVED); + ctx->marks[ctx->n_marks-2].next = ctx->n_marks-1; + ctx->marks[ctx->n_marks-1].prev = ctx->n_marks-2; + off = html_end; + + /* Advance the current line accordingly. */ + if(off > line_end) { + line = md_lookup_line(off, line, line_term - line); + line_end = line->end; + } + continue; + } + } + + is_autolink = md_is_autolink(ctx, off, lines[n_lines-1].end, + &autolink_end, &missing_mailto); + if(is_autolink) { + PUSH_MARK((missing_mailto ? _T('@') : _T('<')), off, off+1, + MD_MARK_OPENER | MD_MARK_RESOLVED | MD_MARK_AUTOLINK); + PUSH_MARK(_T('>'), autolink_end-1, autolink_end, + MD_MARK_CLOSER | MD_MARK_RESOLVED | MD_MARK_AUTOLINK); + ctx->marks[ctx->n_marks-2].next = ctx->n_marks-1; + ctx->marks[ctx->n_marks-1].prev = ctx->n_marks-2; + off = autolink_end; + continue; + } + + off++; + continue; + } + + /* A potential anchor */ + if(ch == _T('[') && off+1 < line_end && CH(off+1) == _T('|')) { + OFF closer_beg; + int is_anchor_span = md_is_anchor_span(ctx, line, off, &closer_beg); + if (is_anchor_span) { + PUSH_MARK(_T('['), off, off+2, MD_MARK_OPENER | MD_MARK_RESOLVED); + PUSH_MARK(_T(']'), closer_beg, closer_beg+1, MD_MARK_CLOSER | MD_MARK_RESOLVED); + ctx->marks[ctx->n_marks-2].next = ctx->n_marks-1; + ctx->marks[ctx->n_marks-1].prev = ctx->n_marks-2; + off = closer_beg+1; + continue; + } + // continue analyzing [ mark + } + + /* A potential link or its part. */ + if(ch == _T('[') || (ch == _T('!') && off+1 < line_end && CH(off+1) == _T('['))) { + OFF tmp = (ch == _T('[') ? off+1 : off+2); + PUSH_MARK(ch, off, tmp, MD_MARK_POTENTIAL_OPENER); + off = tmp; + /* Two dummies to make enough place for data we need if it is + * a link. */ + PUSH_MARK('D', off, off, 0); + PUSH_MARK('D', off, off, 0); + continue; + } + if(ch == _T(']')) { + PUSH_MARK(ch, off, off+1, MD_MARK_POTENTIAL_CLOSER); + off++; + continue; + } + + /* A potential permissive e-mail autolink. */ + if(ch == _T('@')) { + if(line->beg + 1 <= off && ISALNUM(off-1) && + off + 3 < line->end && ISALNUM(off+1)) + { + PUSH_MARK(ch, off, off+1, MD_MARK_POTENTIAL_OPENER); + /* Push a dummy as a reserve for a closer. */ + PUSH_MARK('D', off, off, 0); + } + + off++; + continue; + } + + /* A potential permissive URL autolink. */ + if(ch == _T(':')) { + static struct { + const CHAR* scheme; + SZ scheme_size; + const CHAR* suffix; + SZ suffix_size; + } scheme_map[] = { + /* In the order from the most frequently used, arguably. */ + { _T("https"), 5, _T("//"), 2 }, + { _T("gemini"), 6, _T("//"), 2 }, + { _T("http"), 4, _T("//"), 2 }, + { _T("gopher"), 6, _T("//"), 2 }, + { _T("spartan"), 7, _T("//"), 2 }, + { _T("ftp"), 3, _T("//"), 2 } + }; + int scheme_index; + + for(scheme_index = 0; scheme_index < (int) SIZEOF_ARRAY(scheme_map); scheme_index++) { + const CHAR* scheme = scheme_map[scheme_index].scheme; + const SZ scheme_size = scheme_map[scheme_index].scheme_size; + const CHAR* suffix = scheme_map[scheme_index].suffix; + const SZ suffix_size = scheme_map[scheme_index].suffix_size; + + if(line->beg + scheme_size <= off && md_ascii_eq(STR(off-scheme_size), scheme, scheme_size) && + (line->beg + scheme_size == off || ISWHITESPACE(off-scheme_size-1) || ISANYOF(off-scheme_size-1, _T("*_~(["))) && + off + 1 + suffix_size < line->end && md_ascii_eq(STR(off+1), suffix, suffix_size)) + { + PUSH_MARK(ch, off-scheme_size, off+1+suffix_size, MD_MARK_POTENTIAL_OPENER); + /* Push a dummy as a reserve for a closer. */ + PUSH_MARK('D', off, off, 0); + off += 1 + suffix_size; + break; + } + } + + off++; + continue; + } + + /* A potential permissive WWW autolink. */ + if(ch == _T('.')) { + if(line->beg + 3 <= off && md_ascii_eq(STR(off-3), _T("www"), 3) && + (line->beg + 3 == off || ISWHITESPACE(off-4) || ISANYOF(off-4, _T("*_~(["))) && + off + 1 < line_end) + { + PUSH_MARK(ch, off-3, off+1, MD_MARK_POTENTIAL_OPENER); + /* Push a dummy as a reserve for a closer. */ + PUSH_MARK('D', off, off, 0); + off++; + continue; + } + + off++; + continue; + } + + /* A potential table cell boundary or wiki link label delimiter. */ + if((table_mode || ctx->parser.flags & MD_FLAG_WIKILINKS) && ch == _T('|')) { + PUSH_MARK(ch, off, off+1, 0); + off++; + continue; + } + + /* A potential strikethrough start/end. */ + if(ch == _T('~')) { + OFF tmp = off+1; + + while(tmp < line_end && CH(tmp) == _T('~')) + tmp++; + + if(tmp - off < 3) { + unsigned flags = 0; + + if(tmp < line_end && !ISUNICODEWHITESPACE(tmp)) + flags |= MD_MARK_POTENTIAL_OPENER; + if(off > line->beg && !ISUNICODEWHITESPACEBEFORE(off)) + flags |= MD_MARK_POTENTIAL_CLOSER; + if(flags != 0) + PUSH_MARK(ch, off, tmp, flags); + } + + off = tmp; + continue; + } + + /* A potential equation start/end */ + if(ch == _T('$')) { + /* We can have at most two consecutive $ signs, + * where two dollar signs signify a display equation. */ + OFF tmp = off+1; + + while(tmp < line_end && CH(tmp) == _T('$')) + tmp++; + + if (tmp - off <= 2) + PUSH_MARK(ch, off, tmp, MD_MARK_POTENTIAL_OPENER | MD_MARK_POTENTIAL_CLOSER); + off = tmp; + continue; + } + + /* Turn non-trivial whitespace into single space. */ + if(ISWHITESPACE_(ch)) { + OFF tmp = off+1; + + while(tmp < line_end && ISWHITESPACE(tmp)) + tmp++; + + if(tmp - off > 1 || ch != _T(' ')) + PUSH_MARK(ch, off, tmp, MD_MARK_RESOLVED); + + off = tmp; + continue; + } + + /* NULL character. */ + if(ch == _T('\0')) { + PUSH_MARK(ch, off, off+1, MD_MARK_RESOLVED); + off++; + continue; + } + + off++; + } + } + + /* Add a dummy mark at the end of the mark vector to simplify + * process_inlines(). */ + PUSH_MARK(127, ctx->size, ctx->size, MD_MARK_RESOLVED); + +abort: + return ret; +} + +static void +md_analyze_bracket(MD_CTX* ctx, int mark_index) +{ + /* We cannot really resolve links here as for that we would need + * more context. E.g. a following pair of brackets (reference link), + * or enclosing pair of brackets (if the inner is the link, the outer + * one cannot be.) + * + * Therefore we here only construct a list of '[' ']' pairs ordered by + * position of the closer. This allows us to analyze what is or is not + * link in the right order, from inside to outside in case of nested + * brackets. + * + * The resolving itself is deferred to md_resolve_links(). + */ + + MD_MARK* mark = &ctx->marks[mark_index]; + + if(mark->flags & MD_MARK_POTENTIAL_OPENER) { + if(BRACKET_OPENERS.head != -1) + ctx->marks[BRACKET_OPENERS.tail].flags |= MD_MARK_HASNESTEDBRACKETS; + + md_mark_chain_append(ctx, &BRACKET_OPENERS, mark_index); + return; + } + + if(BRACKET_OPENERS.tail >= 0) { + /* Pop the opener from the chain. */ + int opener_index = BRACKET_OPENERS.tail; + MD_MARK* opener = &ctx->marks[opener_index]; + if(opener->prev >= 0) + ctx->marks[opener->prev].next = -1; + else + BRACKET_OPENERS.head = -1; + BRACKET_OPENERS.tail = opener->prev; + + /* Interconnect the opener and closer. */ + opener->next = mark_index; + mark->prev = opener_index; + + /* Add the pair into chain of potential links for md_resolve_links(). + * Note we misuse opener->prev for this as opener->next points to its + * closer. */ + if(ctx->unresolved_link_tail >= 0) + ctx->marks[ctx->unresolved_link_tail].prev = opener_index; + else + ctx->unresolved_link_head = opener_index; + ctx->unresolved_link_tail = opener_index; + opener->prev = -1; + } +} + +/* Forward declaration. */ +static void md_analyze_link_contents(MD_CTX* ctx, const MD_LINE* lines, int n_lines, + int mark_beg, int mark_end); + +static int +md_resolve_links(MD_CTX* ctx, const MD_LINE* lines, int n_lines) +{ + int opener_index = ctx->unresolved_link_head; + OFF last_link_beg = 0; + OFF last_link_end = 0; + OFF last_img_beg = 0; + OFF last_img_end = 0; + + while(opener_index >= 0) { + MD_MARK* opener = &ctx->marks[opener_index]; + int closer_index = opener->next; + MD_MARK* closer = &ctx->marks[closer_index]; + int next_index = opener->prev; + MD_MARK* next_opener; + MD_MARK* next_closer; + MD_LINK_ATTR attr; + int is_link = FALSE; + + if(next_index >= 0) { + next_opener = &ctx->marks[next_index]; + next_closer = &ctx->marks[next_opener->next]; + } else { + next_opener = NULL; + next_closer = NULL; + } + + /* If nested ("[ [ ] ]"), we need to make sure that: + * - The outer does not end inside of (...) belonging to the inner. + * - The outer cannot be link if the inner is link (i.e. not image). + * + * (Note we here analyze from inner to outer as the marks are ordered + * by closer->beg.) + */ + if((opener->beg < last_link_beg && closer->end < last_link_end) || + (opener->beg < last_img_beg && closer->end < last_img_end) || + (opener->beg < last_link_end && opener->ch == '[')) + { + opener_index = next_index; + continue; + } + + /* Recognize and resolve wiki links. + * Wiki-links maybe '[[destination]]' or '[[destination|label]]'. + */ + if ((ctx->parser.flags & MD_FLAG_WIKILINKS) && + (opener->end - opener->beg == 1) && /* not image */ + next_opener != NULL && /* double '[' opener */ + next_opener->ch == '[' && + (next_opener->beg == opener->beg - 1) && + (next_opener->end - next_opener->beg == 1) && + next_closer != NULL && /* double ']' closer */ + next_closer->ch == ']' && + (next_closer->beg == closer->beg + 1) && + (next_closer->end - next_closer->beg == 1)) + { + MD_MARK* delim = NULL; + int delim_index; + OFF dest_beg, dest_end; + + is_link = TRUE; + + /* We don't allow destination to be longer than 100 characters. + * Lets scan to see whether there is '|'. (If not then the whole + * wiki-link has to be below the 100 characters.) */ + delim_index = opener_index + 1; + while(delim_index < closer_index) { + MD_MARK* m = &ctx->marks[delim_index]; + if(m->ch == '|') { + delim = m; + break; + } + if(m->ch != 'D' && m->beg - opener->end > 100) + break; + delim_index++; + } + dest_beg = opener->end; + dest_end = (delim != NULL) ? delim->beg : closer->beg; + if(dest_end - dest_beg == 0 || dest_end - dest_beg > 100) + is_link = FALSE; + + /* There may not be any new line in the destination. */ + if(is_link) { + OFF off; + for(off = dest_beg; off < dest_end; off++) { + if(ISNEWLINE(off)) { + is_link = FALSE; + break; + } + } + } + + if(is_link) { + if(delim != NULL) { + if(delim->end < closer->beg) { + md_rollback(ctx, opener_index, delim_index, MD_ROLLBACK_ALL); + md_rollback(ctx, delim_index, closer_index, MD_ROLLBACK_CROSSING); + delim->flags |= MD_MARK_RESOLVED; + opener->end = delim->beg; + } else { + /* The pipe is just before the closer: [[foo|]] */ + md_rollback(ctx, opener_index, closer_index, MD_ROLLBACK_ALL); + closer->beg = delim->beg; + delim = NULL; + } + } + + opener->beg = next_opener->beg; + opener->next = closer_index; + opener->flags |= MD_MARK_OPENER | MD_MARK_RESOLVED; + + closer->end = next_closer->end; + closer->prev = opener_index; + closer->flags |= MD_MARK_CLOSER | MD_MARK_RESOLVED; + + last_link_beg = opener->beg; + last_link_end = closer->end; + + if(delim != NULL) + md_analyze_link_contents(ctx, lines, n_lines, delim_index+1, closer_index); + + opener_index = next_opener->prev; + continue; + } + } + + if(next_opener != NULL && next_opener->beg == closer->end) { + if(next_closer->beg > closer->end + 1) { + /* Might be full reference link. */ + if(!(next_opener->flags & MD_MARK_HASNESTEDBRACKETS)) + is_link = md_is_link_reference(ctx, lines, n_lines, next_opener->beg, next_closer->end, &attr); + } else { + /* Might be shortcut reference link. */ + if(!(opener->flags & MD_MARK_HASNESTEDBRACKETS)) + is_link = md_is_link_reference(ctx, lines, n_lines, opener->beg, closer->end, &attr); + } + + if(is_link < 0) + return -1; + + if(is_link) { + /* Eat the 2nd "[...]". */ + closer->end = next_closer->end; + + /* Do not analyze the label as a standalone link in the next + * iteration. */ + next_index = ctx->marks[next_index].prev; + } + } else { + if(closer->end < ctx->size && CH(closer->end) == _T('(')) { + /* Might be inline link. */ + OFF inline_link_end = UINT_MAX; + + is_link = md_is_inline_link_spec(ctx, lines, n_lines, closer->end, &inline_link_end, &attr); + if(is_link < 0) + return -1; + + /* Check the closing ')' is not inside an already resolved range + * (i.e. a range with a higher priority), e.g. a code span. */ + if(is_link) { + int i = closer_index + 1; + + while(i < ctx->n_marks) { + MD_MARK* mark = &ctx->marks[i]; + + if(mark->beg >= inline_link_end) + break; + if((mark->flags & (MD_MARK_OPENER | MD_MARK_RESOLVED)) == (MD_MARK_OPENER | MD_MARK_RESOLVED)) { + if(ctx->marks[mark->next].beg >= inline_link_end) { + /* Cancel the link status. */ + if(attr.title_needs_free) + free(attr.title); + is_link = FALSE; + break; + } + + i = mark->next + 1; + } else { + i++; + } + } + } + + if(is_link) { + /* Eat the "(...)" */ + closer->end = inline_link_end; + } + } + + if(!is_link) { + /* Might be collapsed reference link. */ + if(!(opener->flags & MD_MARK_HASNESTEDBRACKETS)) + is_link = md_is_link_reference(ctx, lines, n_lines, opener->beg, closer->end, &attr); + if(is_link < 0) + return -1; + } + } + + if(is_link) { + /* Resolve the brackets as a link. */ + opener->flags |= MD_MARK_OPENER | MD_MARK_RESOLVED; + closer->flags |= MD_MARK_CLOSER | MD_MARK_RESOLVED; + + /* If it is a link, we store the destination and title in the two + * dummy marks after the opener. */ + MD_ASSERT(ctx->marks[opener_index+1].ch == 'D'); + ctx->marks[opener_index+1].beg = attr.dest_beg; + ctx->marks[opener_index+1].end = attr.dest_end; + + MD_ASSERT(ctx->marks[opener_index+2].ch == 'D'); + md_mark_store_ptr(ctx, opener_index+2, attr.title); + /* The title might or might not have been allocated for us. */ + if(attr.title_needs_free) + md_mark_chain_append(ctx, &PTR_CHAIN, opener_index+2); + ctx->marks[opener_index+2].prev = attr.title_size; + + if(opener->ch == '[') { + last_link_beg = opener->beg; + last_link_end = closer->end; + } else { + last_img_beg = opener->beg; + last_img_end = closer->end; + } + + md_analyze_link_contents(ctx, lines, n_lines, opener_index+1, closer_index); + + /* If the link text is formed by nothing but permissive autolink, + * suppress the autolink. + * See https://github.com/mity/md4c/issues/152 for more info. */ + if(ctx->parser.flags & MD_FLAG_PERMISSIVEAUTOLINKS) { + MD_MARK* first_nested; + MD_MARK* last_nested; + + first_nested = opener + 1; + while(first_nested->ch == _T('D') && first_nested < closer) + first_nested++; + + last_nested = closer - 1; + while(first_nested->ch == _T('D') && last_nested > opener) + last_nested--; + + if((first_nested->flags & MD_MARK_RESOLVED) && + first_nested->beg == opener->end && + ISANYOF_(first_nested->ch, _T("@:.")) && + first_nested->next == (last_nested - ctx->marks) && + last_nested->end == closer->beg) + { + first_nested->ch = _T('D'); + first_nested->flags &= ~MD_MARK_RESOLVED; + last_nested->ch = _T('D'); + last_nested->flags &= ~MD_MARK_RESOLVED; + } + } + } + + opener_index = next_index; + } + + return 0; +} + +/* Analyze whether the mark '&' starts a HTML entity. + * If so, update its flags as well as flags of corresponding closer ';'. */ +static void +md_analyze_entity(MD_CTX* ctx, int mark_index) +{ + MD_MARK* opener = &ctx->marks[mark_index]; + MD_MARK* closer; + OFF off; + + /* Cannot be entity if there is no closer as the next mark. + * (Any other mark between would mean strange character which cannot be + * part of the entity. + * + * So we can do all the work on '&' and do not call this later for the + * closing mark ';'. + */ + if(mark_index + 1 >= ctx->n_marks) + return; + closer = &ctx->marks[mark_index+1]; + if(closer->ch != ';') + return; + + if(md_is_entity(ctx, opener->beg, closer->end, &off)) { + MD_ASSERT(off == closer->end); + + md_resolve_range(ctx, NULL, mark_index, mark_index+1); + opener->end = closer->end; + } +} + +static void +md_analyze_table_cell_boundary(MD_CTX* ctx, int mark_index) +{ + MD_MARK* mark = &ctx->marks[mark_index]; + mark->flags |= MD_MARK_RESOLVED; + + md_mark_chain_append(ctx, &TABLECELLBOUNDARIES, mark_index); + ctx->n_table_cell_boundaries++; +} + +/* Split a longer mark into two. The new mark takes the given count of + * characters. May only be called if an adequate number of dummy 'D' marks + * follows. + */ +static int +md_split_emph_mark(MD_CTX* ctx, int mark_index, SZ n) +{ + MD_MARK* mark = &ctx->marks[mark_index]; + int new_mark_index = mark_index + (mark->end - mark->beg - n); + MD_MARK* dummy = &ctx->marks[new_mark_index]; + + MD_ASSERT(mark->end - mark->beg > n); + MD_ASSERT(dummy->ch == 'D'); + + memcpy(dummy, mark, sizeof(MD_MARK)); + mark->end -= n; + dummy->beg = mark->end; + + return new_mark_index; +} + +static void +md_analyze_emph(MD_CTX* ctx, int mark_index) +{ + MD_MARK* mark = &ctx->marks[mark_index]; + MD_MARKCHAIN* chain = md_mark_chain(ctx, mark_index); + + /* If we can be a closer, try to resolve with the preceding opener. */ + if(mark->flags & MD_MARK_POTENTIAL_CLOSER) { + MD_MARK* opener = NULL; + int opener_index = 0; + + if(mark->ch == _T('*')) { + MD_MARKCHAIN* opener_chains[6]; + int i, n_opener_chains; + unsigned flags = mark->flags; + + /* Apply the "rule of three". */ + n_opener_chains = 0; + opener_chains[n_opener_chains++] = &ASTERISK_OPENERS_intraword_mod3_0; + if((flags & MD_MARK_EMPH_MOD3_MASK) != MD_MARK_EMPH_MOD3_2) + opener_chains[n_opener_chains++] = &ASTERISK_OPENERS_intraword_mod3_1; + if((flags & MD_MARK_EMPH_MOD3_MASK) != MD_MARK_EMPH_MOD3_1) + opener_chains[n_opener_chains++] = &ASTERISK_OPENERS_intraword_mod3_2; + opener_chains[n_opener_chains++] = &ASTERISK_OPENERS_extraword_mod3_0; + if(!(flags & MD_MARK_EMPH_INTRAWORD) || (flags & MD_MARK_EMPH_MOD3_MASK) != MD_MARK_EMPH_MOD3_2) + opener_chains[n_opener_chains++] = &ASTERISK_OPENERS_extraword_mod3_1; + if(!(flags & MD_MARK_EMPH_INTRAWORD) || (flags & MD_MARK_EMPH_MOD3_MASK) != MD_MARK_EMPH_MOD3_1) + opener_chains[n_opener_chains++] = &ASTERISK_OPENERS_extraword_mod3_2; + + /* Opener is the most recent mark from the allowed chains. */ + for(i = 0; i < n_opener_chains; i++) { + if(opener_chains[i]->tail >= 0) { + int tmp_index = opener_chains[i]->tail; + MD_MARK* tmp_mark = &ctx->marks[tmp_index]; + if(opener == NULL || tmp_mark->end > opener->end) { + opener_index = tmp_index; + opener = tmp_mark; + } + } + } + } else { + /* Simple emph. mark */ + if(chain->tail >= 0) { + opener_index = chain->tail; + opener = &ctx->marks[opener_index]; + } + } + + /* Resolve, if we have found matching opener. */ + if(opener != NULL) { + SZ opener_size = opener->end - opener->beg; + SZ closer_size = mark->end - mark->beg; + MD_MARKCHAIN* opener_chain = md_mark_chain(ctx, opener_index); + + if(opener_size > closer_size) { + opener_index = md_split_emph_mark(ctx, opener_index, closer_size); + md_mark_chain_append(ctx, opener_chain, opener_index); + } else if(opener_size < closer_size) { + md_split_emph_mark(ctx, mark_index, closer_size - opener_size); + } + + md_rollback(ctx, opener_index, mark_index, MD_ROLLBACK_CROSSING); + md_resolve_range(ctx, opener_chain, opener_index, mark_index); + return; + } + } + + /* If we could not resolve as closer, we may be yet be an opener. */ + if(mark->flags & MD_MARK_POTENTIAL_OPENER) + md_mark_chain_append(ctx, chain, mark_index); +} + +static void +md_analyze_tilde(MD_CTX* ctx, int mark_index) +{ + MD_MARK* mark = &ctx->marks[mark_index]; + MD_MARKCHAIN* chain = md_mark_chain(ctx, mark_index); + + /* We attempt to be Github Flavored Markdown compatible here. GFM accepts + * only tildes sequences of length 1 and 2, and the length of the opener + * and closer has to match. */ + + if((mark->flags & MD_MARK_POTENTIAL_CLOSER) && chain->head >= 0) { + int opener_index = chain->head; + + md_rollback(ctx, opener_index, mark_index, MD_ROLLBACK_CROSSING); + md_resolve_range(ctx, chain, opener_index, mark_index); + return; + } + + if(mark->flags & MD_MARK_POTENTIAL_OPENER) + md_mark_chain_append(ctx, chain, mark_index); +} + +static void +md_analyze_dollar(MD_CTX* ctx, int mark_index) +{ + /* This should mimic the way inline equations work in LaTeX, so there + * can only ever be one item in the chain (i.e. the dollars can't be + * nested). This is basically the same as the md_analyze_tilde function, + * except that we require matching openers and closers to be of the same + * length. + * + * E.g.: $abc$$def$$ => abc (display equation) def (end equation) */ + if(DOLLAR_OPENERS.head >= 0) { + /* If the potential closer has a non-matching number of $, discard */ + MD_MARK* open = &ctx->marks[DOLLAR_OPENERS.head]; + MD_MARK* close = &ctx->marks[mark_index]; + + int opener_index = DOLLAR_OPENERS.head; + md_rollback(ctx, opener_index, mark_index, MD_ROLLBACK_ALL); + if (open->end - open->beg == close->end - close->beg) { + /* We are the matching closer */ + md_resolve_range(ctx, &DOLLAR_OPENERS, opener_index, mark_index); + return; + } + } + + md_mark_chain_append(ctx, &DOLLAR_OPENERS, mark_index); +} + +static void +md_analyze_permissive_url_autolink(MD_CTX* ctx, int mark_index) +{ + MD_MARK* opener = &ctx->marks[mark_index]; + int closer_index = mark_index + 1; + MD_MARK* closer = &ctx->marks[closer_index]; + MD_MARK* next_resolved_mark; + OFF off = opener->end; + int n_dots = FALSE; + int has_underscore_in_last_seg = FALSE; + int has_underscore_in_next_to_last_seg = FALSE; + int n_opened_parenthesis = 0; + int n_excess_parenthesis = 0; + + /* Check for domain. */ + while(off < ctx->size) { + if(ISALNUM(off) || CH(off) == _T('-')) { + off++; + } else if(CH(off) == _T('.')) { + /* We must see at least one period. */ + n_dots++; + has_underscore_in_next_to_last_seg = has_underscore_in_last_seg; + has_underscore_in_last_seg = FALSE; + off++; + } else if(CH(off) == _T('_')) { + /* No underscore may be present in the last two domain segments. */ + has_underscore_in_last_seg = TRUE; + off++; + } else { + break; + } + } + if(off > opener->end && CH(off-1) == _T('.')) { + off--; + n_dots--; + } + if(off <= opener->end || n_dots == 0 || has_underscore_in_next_to_last_seg || has_underscore_in_last_seg) + return; + + /* Check for path. */ + next_resolved_mark = closer + 1; + while(next_resolved_mark->ch == 'D' || !(next_resolved_mark->flags & MD_MARK_RESOLVED)) + next_resolved_mark++; + while(off < next_resolved_mark->beg && CH(off) != _T('<') && !ISWHITESPACE(off) && !ISNEWLINE(off)) { + /* Parenthesis must be balanced. */ + if(CH(off) == _T('(')) { + n_opened_parenthesis++; + } else if(CH(off) == _T(')')) { + if(n_opened_parenthesis > 0) + n_opened_parenthesis--; + else + n_excess_parenthesis++; + } + + off++; + } + + /* Trim a trailing punctuation from the end. */ + while(TRUE) { + if(ISANYOF(off-1, _T("?!.,:*_~"))) { + off--; + } else if(CH(off-1) == ')' && n_excess_parenthesis > 0) { + /* Unmatched ')' can be in an interior of the path but not at the + * of it, so the auto-link may be safely nested in a parenthesis + * pair. */ + off--; + n_excess_parenthesis--; + } else { + break; + } + } + + /* Ok. Lets call it an auto-link. Adapt opener and create closer to zero + * length so all the contents becomes the link text. */ + MD_ASSERT(closer->ch == 'D' || + ((ctx->parser.flags & MD_FLAG_PERMISSIVEWWWAUTOLINKS) && + (closer->ch == '.' || closer->ch == ':' || closer->ch == '@'))); + opener->end = opener->beg; + closer->ch = opener->ch; + closer->beg = off; + closer->end = off; + md_resolve_range(ctx, NULL, mark_index, closer_index); +} + +/* The permissive autolinks do not have to be enclosed in '<' '>' but we + * instead impose stricter rules what is understood as an e-mail address + * here. Actually any non-alphanumeric characters with exception of '.' + * are prohibited both in username and after '@'. */ +static void +md_analyze_permissive_email_autolink(MD_CTX* ctx, int mark_index) +{ + MD_MARK* opener = &ctx->marks[mark_index]; + int closer_index; + MD_MARK* closer; + OFF beg = opener->beg; + OFF end = opener->end; + int dot_count = 0; + + MD_ASSERT(opener->ch == _T('@')); + + /* Scan for name before '@'. */ + while(beg > 0 && (ISALNUM(beg-1) || ISANYOF(beg-1, _T(".-_+")))) + beg--; + + /* Scan for domain after '@'. */ + while(end < ctx->size && (ISALNUM(end) || ISANYOF(end, _T(".-_")))) { + if(CH(end) == _T('.')) + dot_count++; + end++; + } + if(CH(end-1) == _T('.')) { /* Final '.' not part of it. */ + dot_count--; + end--; + } + else if(ISANYOF2(end-1, _T('-'), _T('_'))) /* These are forbidden at the end. */ + return; + if(CH(end-1) == _T('@') || dot_count == 0) + return; + + /* Ok. Lets call it auto-link. Adapt opener and create closer to zero + * length so all the contents becomes the link text. */ + closer_index = mark_index + 1; + closer = &ctx->marks[closer_index]; + if (closer->ch != 'D') return; + + opener->beg = beg; + opener->end = beg; + closer->ch = opener->ch; + closer->beg = end; + closer->end = end; + md_resolve_range(ctx, NULL, mark_index, closer_index); +} + +static inline void +md_analyze_marks(MD_CTX* ctx, const MD_LINE* lines, int n_lines, + int mark_beg, int mark_end, const CHAR* mark_chars) +{ + int i = mark_beg; + MD_UNUSED(lines); + MD_UNUSED(n_lines); + + while(i < mark_end) { + MD_MARK* mark = &ctx->marks[i]; + + /* Skip resolved spans. */ + if(mark->flags & MD_MARK_RESOLVED) { + if(mark->flags & MD_MARK_OPENER) { + MD_ASSERT(i < mark->next); + i = mark->next + 1; + } else { + i++; + } + continue; + } + + /* Skip marks we do not want to deal with. */ + if(!ISANYOF_(mark->ch, mark_chars)) { + i++; + continue; + } + + /* Analyze the mark. */ + switch(mark->ch) { + case '[': /* Pass through. */ + case '!': /* Pass through. */ + case ']': md_analyze_bracket(ctx, i); break; + case '&': md_analyze_entity(ctx, i); break; + case '|': md_analyze_table_cell_boundary(ctx, i); break; + case '_': /* Pass through. */ + case '*': md_analyze_emph(ctx, i); break; + case '~': md_analyze_tilde(ctx, i); break; + case '$': md_analyze_dollar(ctx, i); break; + case '.': /* Pass through. */ + case ':': md_analyze_permissive_url_autolink(ctx, i); break; + case '@': md_analyze_permissive_email_autolink(ctx, i); break; + } + + i++; + } +} + +/* Analyze marks (build ctx->marks). */ +static int +md_analyze_inlines(MD_CTX* ctx, const MD_LINE* lines, int n_lines, int table_mode) +{ + int ret; + + /* Reset the previously collected stack of marks. */ + ctx->n_marks = 0; + + /* Collect all marks. */ + MD_CHECK(md_collect_marks(ctx, lines, n_lines, table_mode)); + + /* (1) Links. */ + md_analyze_marks(ctx, lines, n_lines, 0, ctx->n_marks, _T("[]!")); + MD_CHECK(md_resolve_links(ctx, lines, n_lines)); + BRACKET_OPENERS.head = -1; + BRACKET_OPENERS.tail = -1; + ctx->unresolved_link_head = -1; + ctx->unresolved_link_tail = -1; + + if(table_mode) { + /* (2) Analyze table cell boundaries. + * Note we reset TABLECELLBOUNDARIES chain prior to the call md_analyze_marks(), + * not after, because caller may need it. */ + MD_ASSERT(n_lines == 1); + TABLECELLBOUNDARIES.head = -1; + TABLECELLBOUNDARIES.tail = -1; + ctx->n_table_cell_boundaries = 0; + md_analyze_marks(ctx, lines, n_lines, 0, ctx->n_marks, _T("|")); + return ret; + } + + /* (3) Emphasis and strong emphasis; permissive autolinks. */ + md_analyze_link_contents(ctx, lines, n_lines, 0, ctx->n_marks); + +abort: + return ret; +} + +static void +md_analyze_link_contents(MD_CTX* ctx, const MD_LINE* lines, int n_lines, + int mark_beg, int mark_end) +{ + int i; + + md_analyze_marks(ctx, lines, n_lines, mark_beg, mark_end, _T("&")); + md_analyze_marks(ctx, lines, n_lines, mark_beg, mark_end, _T("*_~$@:.")); + + for(i = OPENERS_CHAIN_FIRST; i <= OPENERS_CHAIN_LAST; i++) { + ctx->mark_chains[i].head = -1; + ctx->mark_chains[i].tail = -1; + } +} + +static int +md_enter_leave_span_a(MD_CTX* ctx, int enter, MD_SPANTYPE type, + const CHAR* dest, SZ dest_size, int prohibit_escapes_in_dest, + const CHAR* title, SZ title_size) +{ + MD_ATTRIBUTE_BUILD href_build = { 0 }; + MD_ATTRIBUTE_BUILD title_build = { 0 }; + MD_SPAN_A_DETAIL det; + int ret = 0; + + /* Note we here rely on fact that MD_SPAN_A_DETAIL and + * MD_SPAN_IMG_DETAIL are binary-compatible. */ + memset(&det, 0, sizeof(MD_SPAN_A_DETAIL)); + MD_CHECK(md_build_attribute(ctx, dest, dest_size, + (prohibit_escapes_in_dest ? MD_BUILD_ATTR_NO_ESCAPES : 0), + &det.href, &href_build)); + MD_CHECK(md_build_attribute(ctx, title, title_size, 0, &det.title, &title_build)); + + if(enter) + MD_ENTER_SPAN(type, &det); + else + MD_LEAVE_SPAN(type, &det); + +abort: + md_free_attribute(ctx, &href_build); + md_free_attribute(ctx, &title_build); + return ret; +} + +static int +md_enter_leave_span_wikilink(MD_CTX* ctx, int enter, const CHAR* target, SZ target_size) +{ + MD_ATTRIBUTE_BUILD target_build = { 0 }; + MD_SPAN_WIKILINK_DETAIL det; + int ret = 0; + + memset(&det, 0, sizeof(MD_SPAN_WIKILINK_DETAIL)); + MD_CHECK(md_build_attribute(ctx, target, target_size, 0, &det.target, &target_build)); + + if (enter) + MD_ENTER_SPAN(MD_SPAN_WIKILINK, &det); + else + MD_LEAVE_SPAN(MD_SPAN_WIKILINK, &det); + +abort: + md_free_attribute(ctx, &target_build); + return ret; +} + + +/* Render the output, accordingly to the analyzed ctx->marks. */ +static int +md_process_inlines(MD_CTX* ctx, const MD_LINE* lines, int n_lines) +{ + MD_TEXTTYPE text_type; + const MD_LINE* line = lines; + MD_MARK* prev_mark = NULL; + MD_MARK* mark; + OFF off = lines[0].beg; + OFF end = lines[n_lines-1].end; + int enforce_hardbreak = 0; + int ret = 0; + + /* Find first resolved mark. Note there is always at least one resolved + * mark, the dummy last one after the end of the latest line we actually + * never really reach. This saves us of a lot of special checks and cases + * in this function. */ + mark = ctx->marks; + while(!(mark->flags & MD_MARK_RESOLVED)) + mark++; + + text_type = MD_TEXT_NORMAL; + + while(1) { + /* Process the text up to the next mark or end-of-line. */ + OFF tmp = (line->end < mark->beg ? line->end : mark->beg); + if(tmp > off) { + MD_TEXT(text_type, STR(off), tmp - off); + off = tmp; + } + + /* If reached the mark, process it and move to next one. */ + if(off >= mark->beg) { + switch(mark->ch) { + case '\\': /* Backslash escape. */ + if(ISNEWLINE(mark->beg+1)) + enforce_hardbreak = 1; + else + MD_TEXT(text_type, STR(mark->beg+1), 1); + break; + + case ' ': /* Non-trivial space. */ + MD_TEXT(text_type, _T(" "), 1); + break; + + case '`': /* Code span. */ + if(mark->flags & MD_MARK_OPENER) { + MD_ENTER_SPAN(MD_SPAN_CODE, NULL); + text_type = MD_TEXT_CODE; + } else { + MD_LEAVE_SPAN(MD_SPAN_CODE, NULL); + text_type = MD_TEXT_NORMAL; + } + break; + + case '-': /* faint */ + if(mark->flags & MD_MARK_OPENER) { + MD_ENTER_SPAN(MD_SPAN_FNT, NULL); + } else { + MD_LEAVE_SPAN(MD_SPAN_FNT, NULL); + } + break; + + case '%': /* inverse */ + if(mark->flags & MD_MARK_OPENER) { + MD_ENTER_SPAN(MD_SPAN_INV, NULL); + } else { + MD_LEAVE_SPAN(MD_SPAN_INV, NULL); + } + break; + + case '^': /* blink */ + if(mark->flags & MD_MARK_OPENER) { + MD_ENTER_SPAN(MD_SPAN_BLI, NULL); + } else { + MD_LEAVE_SPAN(MD_SPAN_BLI, NULL); + } + break; + + case '_': /* Underline (or emphasis if we fall through). */ + if(ctx->parser.flags & MD_FLAG_UNDERLINE) { + if(mark->flags & MD_MARK_OPENER) { + /* while(off < mark->end) { */ + /* MD_ENTER_SPAN(MD_SPAN_U, NULL); */ + /* off++; */ + /* } */ + if((mark->end - off) % 2) { + MD_ENTER_SPAN(MD_SPAN_U, NULL); + off++; + } + while(off + 1 < mark->end) { + MD_ENTER_SPAN(MD_SPAN_STRONG, NULL); + off += 2; + } + } else { + /* while(off < mark->end) { */ + /* MD_LEAVE_SPAN(MD_SPAN_U, NULL); */ + /* off++; */ + /* } */ + while(off + 1 < mark->end) { + MD_LEAVE_SPAN(MD_SPAN_STRONG, NULL); + off += 2; + } + if((mark->end - off) % 2) { + MD_LEAVE_SPAN(MD_SPAN_U, NULL); + off++; + } + } + break; + } + MD_FALLTHROUGH(); + + case '*': /* Emphasis, strong emphasis. */ + if(mark->flags & MD_MARK_OPENER) { + if((mark->end - off) % 2) { + MD_ENTER_SPAN(MD_SPAN_EM, NULL); + off++; + } + while(off + 1 < mark->end) { + MD_ENTER_SPAN(MD_SPAN_STRONG, NULL); + off += 2; + } + } else { + while(off + 1 < mark->end) { + MD_LEAVE_SPAN(MD_SPAN_STRONG, NULL); + off += 2; + } + if((mark->end - off) % 2) { + MD_LEAVE_SPAN(MD_SPAN_EM, NULL); + off++; + } + } + break; + + case '~': /* crossed */ + if(mark->flags & MD_MARK_OPENER) + MD_ENTER_SPAN(MD_SPAN_DEL, NULL); + else + MD_LEAVE_SPAN(MD_SPAN_DEL, NULL); + break; + + case '$': + if(mark->flags & MD_MARK_OPENER) { + MD_ENTER_SPAN((mark->end - off) % 2 ? MD_SPAN_LATEXMATH : MD_SPAN_LATEXMATH_DISPLAY, NULL); + text_type = MD_TEXT_LATEXMATH; + } else { + MD_LEAVE_SPAN((mark->end - off) % 2 ? MD_SPAN_LATEXMATH : MD_SPAN_LATEXMATH_DISPLAY, NULL); + text_type = MD_TEXT_NORMAL; + } + break; + + case '!': /* conceal/hidden */ + if (mark->prev == -1) { + if(mark->flags & MD_MARK_OPENER) { + MD_ENTER_SPAN(MD_SPAN_COC, NULL); + } else { + MD_LEAVE_SPAN(MD_SPAN_COC, NULL); + } + break; + } + case '[': /* Link, wiki link, image, anchor. */ + case ']': + { + const MD_MARK* opener = (mark->ch != ']' ? mark : &ctx->marks[mark->prev]); + const MD_MARK* closer = &ctx->marks[opener->next]; + const MD_MARK* dest_mark; + const MD_MARK* title_mark; + + if ((opener->ch == '[' && closer->ch == ']') && + opener->end - opener->beg >= 2 && + closer->end - closer->beg >= 2) + { + int has_label = (opener->end - opener->beg > 2); + SZ target_sz; + + if(has_label) + target_sz = opener->end - (opener->beg+2); + else + target_sz = closer->beg - opener->end; + + MD_CHECK(md_enter_leave_span_wikilink(ctx, (mark->ch != ']'), + has_label ? STR(opener->beg+2) : STR(opener->end), + target_sz)); + + break; + } + + if ((opener->ch == '[' && closer->ch == ']') && + opener->end - opener->beg == 2 && + closer->end - closer->beg == 1 && + CH(opener->beg+1) == _T('|')) + { + if(mark->flags & MD_MARK_OPENER) { + MD_ENTER_SPAN(MD_SPAN_ANCHOR, NULL); + } else { + MD_LEAVE_SPAN(MD_SPAN_ANCHOR, NULL); + } + } + + dest_mark = opener+1; + MD_ASSERT(dest_mark->ch == 'D'); + title_mark = opener+2; + if (title_mark->ch != 'D') break; + + MD_CHECK(md_enter_leave_span_a(ctx, (mark->ch != ']'), + (opener->ch == '!' ? MD_SPAN_IMG : MD_SPAN_A), + STR(dest_mark->beg), dest_mark->end - dest_mark->beg, FALSE, + md_mark_get_ptr(ctx, (int)(title_mark - ctx->marks)), + title_mark->prev)); + + /* link/image closer may span multiple lines. */ + if(mark->ch == ']') { + while(mark->end > line->end) + line++; + } + + break; + } + + case '<': + case '>': /* Autolink or raw HTML. */ + if(!(mark->flags & MD_MARK_AUTOLINK)) { + /* Raw HTML. */ + if(mark->flags & MD_MARK_OPENER) + text_type = MD_TEXT_HTML; + else + text_type = MD_TEXT_NORMAL; + break; + } + /* Pass through, if auto-link. */ + MD_FALLTHROUGH(); + + case '@': /* Permissive e-mail autolink. */ + case ':': /* Permissive URL autolink. */ + case '.': /* Permissive WWW autolink. */ + { + MD_MARK* opener = ((mark->flags & MD_MARK_OPENER) ? mark : &ctx->marks[mark->prev]); + MD_MARK* closer = &ctx->marks[opener->next]; + const CHAR* dest = STR(opener->end); + SZ dest_size = closer->beg - opener->end; + + /* For permissive auto-links we do not know closer mark + * position at the time of md_collect_marks(), therefore + * it can be out-of-order in ctx->marks[]. + * + * With this flag, we make sure that we output the closer + * only if we processed the opener. */ + if(mark->flags & MD_MARK_OPENER) + closer->flags |= MD_MARK_VALIDPERMISSIVEAUTOLINK; + + if(opener->ch == '@' || opener->ch == '.') { + dest_size += 7; + MD_TEMP_BUFFER(dest_size * sizeof(CHAR)); + memcpy(ctx->buffer, + (opener->ch == '@' ? _T("mailto:") : _T("http://")), + 7 * sizeof(CHAR)); + memcpy(ctx->buffer + 7, dest, (dest_size-7) * sizeof(CHAR)); + dest = ctx->buffer; + } + + if(closer->flags & MD_MARK_VALIDPERMISSIVEAUTOLINK) + MD_CHECK(md_enter_leave_span_a(ctx, (mark->flags & MD_MARK_OPENER), + MD_SPAN_A, dest, dest_size, TRUE, NULL, 0)); + break; + } + + case '&': /* Entity. */ + MD_TEXT(MD_TEXT_ENTITY, STR(mark->beg), mark->end - mark->beg); + break; + + case '\0': + MD_TEXT(MD_TEXT_NULLCHAR, _T(""), 1); + break; + + case 127: + goto abort; + } + + off = mark->end; + + /* Move to next resolved mark. */ + prev_mark = mark; + mark++; + while(!(mark->flags & MD_MARK_RESOLVED) || mark->beg < off) + mark++; + } + + /* If reached end of line, move to next one. */ + if(off >= line->end) { + /* If it is the last line, we are done. */ + if(off >= end) + break; + + if(text_type == MD_TEXT_CODE || text_type == MD_TEXT_LATEXMATH) { + OFF tmp; + + MD_ASSERT(prev_mark != NULL); + MD_ASSERT(ISANYOF2_(prev_mark->ch, '`', '$') && (prev_mark->flags & MD_MARK_OPENER)); + MD_ASSERT(ISANYOF2_(mark->ch, '`', '$') && (mark->flags & MD_MARK_CLOSER)); + + /* Inside a code span, trailing line whitespace has to be + * outputted. */ + tmp = off; + while(off < ctx->size && ISBLANK(off)) + off++; + if(off > tmp) + MD_TEXT(text_type, STR(tmp), off-tmp); + + /* and new lines are transformed into single spaces. */ + if(prev_mark->end < off && off < mark->beg) + MD_TEXT(text_type, _T(" "), 1); + } else if(text_type == MD_TEXT_HTML) { + /* Inside raw HTML, we output the new line verbatim, including + * any trailing spaces. */ + OFF tmp = off; + + while(tmp < end && ISBLANK(tmp)) + tmp++; + if(tmp > off) + MD_TEXT(MD_TEXT_HTML, STR(off), tmp - off); + MD_TEXT(MD_TEXT_HTML, _T("\n"), 1); + } else { + /* Output soft or hard line break. */ + MD_TEXTTYPE break_type = MD_TEXT_SOFTBR; + + if(text_type == MD_TEXT_NORMAL) { + if(enforce_hardbreak) + break_type = MD_TEXT_BR; + else if((CH(line->end) == _T(' ') && CH(line->end+1) == _T(' '))) + break_type = MD_TEXT_BR; + } + + MD_TEXT(break_type, _T("\n"), 1); + } + + /* Move to the next line. */ + line++; + off = line->beg; + + enforce_hardbreak = 0; + } + } + +abort: + return ret; +} + + +/*************************** + *** Processing Tables *** + ***************************/ + +static void +md_analyze_table_alignment(MD_CTX* ctx, OFF beg, OFF end, MD_ALIGN* align, int n_align) +{ + static const MD_ALIGN align_map[] = { MD_ALIGN_DEFAULT, MD_ALIGN_LEFT, MD_ALIGN_RIGHT, MD_ALIGN_CENTER }; + OFF off = beg; + + while(n_align > 0) { + int index = 0; /* index into align_map[] */ + + while(CH(off) != _T('-')) + off++; + if(off > beg && CH(off-1) == _T(':')) + index |= 1; + while(off < end && CH(off) == _T('-')) + off++; + if(off < end && CH(off) == _T(':')) + index |= 2; + + *align = align_map[index]; + align++; + n_align--; + } + +} + +/* Forward declaration. */ +static int md_process_normal_block_contents(MD_CTX* ctx, const MD_LINE* lines, int n_lines); + +static int +md_process_table_cell(MD_CTX* ctx, MD_BLOCKTYPE cell_type, MD_ALIGN align, OFF beg, OFF end) +{ + MD_LINE line; + MD_BLOCK_TD_DETAIL det; + int ret = 0; + + while(beg < end && ISWHITESPACE(beg)) + beg++; + while(end > beg && ISWHITESPACE(end-1)) + end--; + + det.align = align; + line.beg = beg; + line.end = end; + + MD_ENTER_BLOCK(cell_type, &det); + MD_CHECK(md_process_normal_block_contents(ctx, &line, 1)); + MD_LEAVE_BLOCK(cell_type, &det); + +abort: + return ret; +} + +static int +md_process_table_row(MD_CTX* ctx, MD_BLOCKTYPE cell_type, OFF beg, OFF end, + const MD_ALIGN* align, int col_count) +{ + MD_LINE line; + OFF* pipe_offs = NULL; + int i, j, k, n; + int ret = 0; + + line.beg = beg; + line.end = end; + + /* Break the line into table cells by identifying pipe characters who + * form the cell boundary. */ + MD_CHECK(md_analyze_inlines(ctx, &line, 1, TRUE)); + + /* We have to remember the cell boundaries in local buffer because + * ctx->marks[] shall be reused during cell contents processing. */ + n = ctx->n_table_cell_boundaries + 2; + pipe_offs = (OFF*) malloc(n * sizeof(OFF)); + if(pipe_offs == NULL) { + MD_LOG("malloc() failed."); + ret = -1; + goto abort; + } + j = 0; + pipe_offs[j++] = beg; + for(i = TABLECELLBOUNDARIES.head; i >= 0; i = ctx->marks[i].next) { + MD_MARK* mark = &ctx->marks[i]; + pipe_offs[j++] = mark->end; + } + pipe_offs[j++] = end+1; + + /* Process cells. */ + MD_ENTER_BLOCK(MD_BLOCK_TR, NULL); + k = 0; + for(i = 0; i < j-1 && k < col_count; i++) { + if(pipe_offs[i] < pipe_offs[i+1]-1) + MD_CHECK(md_process_table_cell(ctx, cell_type, align[k++], pipe_offs[i], pipe_offs[i+1]-1)); + } + /* Make sure we call enough table cells even if the current table contains + * too few of them. */ + while(k < col_count) + MD_CHECK(md_process_table_cell(ctx, cell_type, align[k++], 0, 0)); + MD_LEAVE_BLOCK(MD_BLOCK_TR, NULL); + +abort: + free(pipe_offs); + + /* Free any temporary memory blocks stored within some dummy marks. */ + for(i = PTR_CHAIN.head; i >= 0; i = ctx->marks[i].next) + free(md_mark_get_ptr(ctx, i)); + PTR_CHAIN.head = -1; + PTR_CHAIN.tail = -1; + + return ret; +} + +static int +md_process_table_block_contents(MD_CTX* ctx, int col_count, const MD_LINE* lines, int n_lines) +{ + MD_ALIGN* align; + int i; + int ret = 0; + + /* At least two lines have to be present: The column headers and the line + * with the underlines. */ + MD_ASSERT(n_lines >= 2); + + align = malloc(col_count * sizeof(MD_ALIGN)); + if(align == NULL) { + MD_LOG("malloc() failed."); + ret = -1; + goto abort; + } + + md_analyze_table_alignment(ctx, lines[1].beg, lines[1].end, align, col_count); + + MD_ENTER_BLOCK(MD_BLOCK_THEAD, NULL); + MD_CHECK(md_process_table_row(ctx, MD_BLOCK_TH, + lines[0].beg, lines[0].end, align, col_count)); + MD_LEAVE_BLOCK(MD_BLOCK_THEAD, NULL); + + if(n_lines > 2) { + MD_ENTER_BLOCK(MD_BLOCK_TBODY, NULL); + for(i = 2; i < n_lines; i++) { + MD_CHECK(md_process_table_row(ctx, MD_BLOCK_TD, + lines[i].beg, lines[i].end, align, col_count)); + } + MD_LEAVE_BLOCK(MD_BLOCK_TBODY, NULL); + } + +abort: + free(align); + return ret; +} + + +/************************** + *** Processing Block *** + **************************/ + +#define MD_BLOCK_CONTAINER_OPENER 0x01 +#define MD_BLOCK_CONTAINER_CLOSER 0x02 +#define MD_BLOCK_CONTAINER (MD_BLOCK_CONTAINER_OPENER | MD_BLOCK_CONTAINER_CLOSER) +#define MD_BLOCK_LOOSE_LIST 0x04 +#define MD_BLOCK_SETEXT_HEADER 0x08 + +struct MD_BLOCK_tag { + MD_BLOCKTYPE type : 8; + unsigned flags : 8; + + /* MD_BLOCK_H: Header level (1 - 6) + * MD_BLOCK_CODE: Non-zero if fenced, zero if indented. + * MD_BLOCK_LI: Task mark character (0 if not task list item, 'x', 'X' or ' '). + * MD_BLOCK_TABLE: Column count (as determined by the table underline). + */ + unsigned data : 16; + + /* Leaf blocks: Count of lines (MD_LINE or MD_VERBATIMLINE) on the block. + * MD_BLOCK_LI: Task mark offset in the input doc. + * MD_BLOCK_OL: Start item number. + */ + unsigned n_lines; +}; + +struct MD_CONTAINER_tag { + CHAR ch; + unsigned is_loose : 8; + unsigned is_task : 8; + unsigned start; + unsigned mark_indent; + unsigned contents_indent; + OFF block_byte_off; + OFF task_mark_off; +}; + + +static int +md_process_normal_block_contents(MD_CTX* ctx, const MD_LINE* lines, int n_lines) +{ + int i; + int ret; + + MD_CHECK(md_analyze_inlines(ctx, lines, n_lines, FALSE)); + MD_CHECK(md_process_inlines(ctx, lines, n_lines)); + +abort: + /* Free any temporary memory blocks stored within some dummy marks. */ + for(i = PTR_CHAIN.head; i >= 0; i = ctx->marks[i].next) + free(md_mark_get_ptr(ctx, i)); + PTR_CHAIN.head = -1; + PTR_CHAIN.tail = -1; + + return ret; +} + +static int +md_process_verbatim_block_contents(MD_CTX* ctx, MD_TEXTTYPE text_type, const MD_VERBATIMLINE* lines, int n_lines) +{ + static const CHAR indent_chunk_str[] = _T(" "); + static const SZ indent_chunk_size = SIZEOF_ARRAY(indent_chunk_str) - 1; + + int i; + int ret = 0; + + for(i = 0; i < n_lines; i++) { + const MD_VERBATIMLINE* line = &lines[i]; + int indent = line->indent; + + MD_ASSERT(indent >= 0); + + /* Output code indentation. */ + while(indent > (int) indent_chunk_size) { + MD_TEXT(text_type, indent_chunk_str, indent_chunk_size); + indent -= indent_chunk_size; + } + if(indent > 0) + MD_TEXT(text_type, indent_chunk_str, indent); + + /* Output the code line itself. */ + MD_TEXT_INSECURE(text_type, STR(line->beg), line->end - line->beg); + + /* Enforce end-of-line. */ + MD_TEXT(text_type, _T("\n"), 1); + } + +abort: + return ret; +} + +static int +md_process_code_block_contents(MD_CTX* ctx, int is_fenced, const MD_VERBATIMLINE* lines, int n_lines) +{ + if(is_fenced) { + /* Skip the first line in case of fenced code: It is the fence. + * (Only the starting fence is present due to logic in md_analyze_line().) */ + lines++; + n_lines--; + } else { + /* Ignore blank lines at start/end of indented code block. */ + while(n_lines > 0 && lines[0].beg == lines[0].end) { + lines++; + n_lines--; + } + while(n_lines > 0 && lines[n_lines-1].beg == lines[n_lines-1].end) { + n_lines--; + } + } + + if(n_lines == 0) + return 0; + + return md_process_verbatim_block_contents(ctx, MD_TEXT_CODE, lines, n_lines); +} + +static int +md_setup_fenced_code_detail(MD_CTX* ctx, const MD_BLOCK* block, MD_BLOCK_CODE_DETAIL* det, + MD_ATTRIBUTE_BUILD* info_build, MD_ATTRIBUTE_BUILD* lang_build) +{ + const MD_VERBATIMLINE* fence_line = (const MD_VERBATIMLINE*)(block + 1); + OFF beg = fence_line->beg; + OFF end = fence_line->end; + OFF lang_end; + CHAR fence_ch = CH(fence_line->beg); + int ret = 0; + + /* Skip the fence itself. */ + while(beg < ctx->size && CH(beg) == fence_ch) + beg++; + /* Trim initial spaces. */ + while(beg < ctx->size && CH(beg) == _T(' ')) + beg++; + + /* Trim trailing spaces. */ + while(end > beg && CH(end-1) == _T(' ')) + end--; + + /* Build info string attribute. */ + MD_CHECK(md_build_attribute(ctx, STR(beg), end - beg, 0, &det->info, info_build)); + + /* Build info string attribute. */ + lang_end = beg; + while(lang_end < end && !ISWHITESPACE(lang_end)) + lang_end++; + MD_CHECK(md_build_attribute(ctx, STR(beg), lang_end - beg, 0, &det->lang, lang_build)); + + det->fence_char = fence_ch; + +abort: + return ret; +} + +static int +md_process_leaf_block(MD_CTX* ctx, const MD_BLOCK* block) +{ + union { + MD_BLOCK_H_DETAIL header; + MD_BLOCK_CODE_DETAIL code; + MD_BLOCK_TABLE_DETAIL table; + } det; + MD_ATTRIBUTE_BUILD info_build; + MD_ATTRIBUTE_BUILD lang_build; + int is_in_tight_list; + int clean_fence_code_detail = FALSE; + int ret = 0; + + memset(&det, 0, sizeof(det)); + + if(ctx->n_containers == 0) + is_in_tight_list = FALSE; + else + is_in_tight_list = !ctx->containers[ctx->n_containers-1].is_loose; + + switch(block->type) { + case MD_BLOCK_H: + det.header.level = block->data; + break; + + case MD_BLOCK_CODE: + /* For fenced code block, we may need to set the info string. */ + if(block->data != 0) { + memset(&det.code, 0, sizeof(MD_BLOCK_CODE_DETAIL)); + clean_fence_code_detail = TRUE; + MD_CHECK(md_setup_fenced_code_detail(ctx, block, &det.code, &info_build, &lang_build)); + } + break; + + case MD_BLOCK_TABLE: + det.table.col_count = block->data; + det.table.head_row_count = 1; + det.table.body_row_count = block->n_lines - 2; + break; + + default: + /* Noop. */ + break; + } + + if(!is_in_tight_list || block->type != MD_BLOCK_P) + MD_ENTER_BLOCK(block->type, (void*) &det); + + /* Process the block contents accordingly to is type. */ + switch(block->type) { + case MD_BLOCK_HR: + /* noop */ + break; + + case MD_BLOCK_CODE: + MD_CHECK(md_process_code_block_contents(ctx, (block->data != 0), + (const MD_VERBATIMLINE*)(block + 1), block->n_lines)); + break; + + case MD_BLOCK_HTML: + MD_CHECK(md_process_verbatim_block_contents(ctx, MD_TEXT_HTML, + (const MD_VERBATIMLINE*)(block + 1), block->n_lines)); + break; + + case MD_BLOCK_TABLE: + MD_CHECK(md_process_table_block_contents(ctx, block->data, + (const MD_LINE*)(block + 1), block->n_lines)); + break; + + default: + MD_CHECK(md_process_normal_block_contents(ctx, + (const MD_LINE*)(block + 1), block->n_lines)); + break; + } + + if(!is_in_tight_list || block->type != MD_BLOCK_P) + MD_LEAVE_BLOCK(block->type, (void*) &det); + +abort: + if(clean_fence_code_detail) { + md_free_attribute(ctx, &info_build); + md_free_attribute(ctx, &lang_build); + } + return ret; +} + +static int +md_process_all_blocks(MD_CTX* ctx) +{ + int byte_off = 0; + int ret = 0; + + /* ctx->containers now is not needed for detection of lists and list items + * so we reuse it for tracking what lists are loose or tight. We rely + * on the fact the vector is large enough to hold the deepest nesting + * level of lists. */ + ctx->n_containers = 0; + + while(byte_off < ctx->n_block_bytes) { + MD_BLOCK* block = (MD_BLOCK*)((char*)ctx->block_bytes + byte_off); + union { + MD_BLOCK_UL_DETAIL ul; + MD_BLOCK_OL_DETAIL ol; + MD_BLOCK_LI_DETAIL li; + } det; + + switch(block->type) { + case MD_BLOCK_UL: + det.ul.is_tight = (block->flags & MD_BLOCK_LOOSE_LIST) ? FALSE : TRUE; + det.ul.mark = (CHAR) block->data; + break; + + case MD_BLOCK_OL: + det.ol.start = block->n_lines; + det.ol.is_tight = (block->flags & MD_BLOCK_LOOSE_LIST) ? FALSE : TRUE; + det.ol.mark_delimiter = (CHAR) block->data; + break; + + case MD_BLOCK_LI: + det.li.is_task = (block->data != 0); + det.li.task_mark = (CHAR) block->data; + det.li.task_mark_offset = (OFF) block->n_lines; + break; + + default: + /* noop */ + break; + } + + if(block->flags & MD_BLOCK_CONTAINER) { + if(block->flags & MD_BLOCK_CONTAINER_CLOSER) { + MD_LEAVE_BLOCK(block->type, &det); + + if(block->type == MD_BLOCK_UL || block->type == MD_BLOCK_OL || block->type == MD_BLOCK_QUOTE) + ctx->n_containers--; + } + + if(block->flags & MD_BLOCK_CONTAINER_OPENER) { + MD_ENTER_BLOCK(block->type, &det); + + if(block->type == MD_BLOCK_UL || block->type == MD_BLOCK_OL) { + ctx->containers[ctx->n_containers].is_loose = (block->flags & MD_BLOCK_LOOSE_LIST); + ctx->n_containers++; + } else if(block->type == MD_BLOCK_QUOTE) { + /* This causes that any text in a block quote, even if + * nested inside a tight list item, is wrapped with + * <p>...</p>. */ + ctx->containers[ctx->n_containers].is_loose = TRUE; + ctx->n_containers++; + } + } + } else { + MD_CHECK(md_process_leaf_block(ctx, block)); + + if(block->type == MD_BLOCK_CODE || block->type == MD_BLOCK_HTML) + byte_off += block->n_lines * sizeof(MD_VERBATIMLINE); + else + byte_off += block->n_lines * sizeof(MD_LINE); + } + + byte_off += sizeof(MD_BLOCK); + } + + ctx->n_block_bytes = 0; + +abort: + return ret; +} + + +/************************************ + *** Grouping Lines into Blocks *** + ************************************/ + +static void* +md_push_block_bytes(MD_CTX* ctx, int n_bytes) +{ + void* ptr; + + if(ctx->n_block_bytes + n_bytes > ctx->alloc_block_bytes) { + void* new_block_bytes; + + ctx->alloc_block_bytes = (ctx->alloc_block_bytes > 0 + ? ctx->alloc_block_bytes + ctx->alloc_block_bytes / 2 + : 512); + new_block_bytes = realloc(ctx->block_bytes, ctx->alloc_block_bytes); + if(new_block_bytes == NULL) { + MD_LOG("realloc() failed."); + return NULL; + } + + /* Fix the ->current_block after the reallocation. */ + if(ctx->current_block != NULL) { + OFF off_current_block = (OFF) ((char*) ctx->current_block - (char*) ctx->block_bytes); + ctx->current_block = (MD_BLOCK*) ((char*) new_block_bytes + off_current_block); + } + + ctx->block_bytes = new_block_bytes; + } + + ptr = (char*)ctx->block_bytes + ctx->n_block_bytes; + ctx->n_block_bytes += n_bytes; + return ptr; +} + +static int +md_start_new_block(MD_CTX* ctx, const MD_LINE_ANALYSIS* line) +{ + MD_BLOCK* block; + + MD_ASSERT(ctx->current_block == NULL); + + block = (MD_BLOCK*) md_push_block_bytes(ctx, sizeof(MD_BLOCK)); + if(block == NULL) + return -1; + + switch(line->type) { + case MD_LINE_HR: + block->type = MD_BLOCK_HR; + break; + + case MD_LINE_ATXHEADER: + case MD_LINE_SETEXTHEADER: + block->type = MD_BLOCK_H; + break; + + case MD_LINE_FENCEDCODE: + case MD_LINE_INDENTEDCODE: + block->type = MD_BLOCK_CODE; + break; + + case MD_LINE_TEXT: + block->type = MD_BLOCK_P; + break; + + case MD_LINE_HTML: + block->type = MD_BLOCK_HTML; + break; + + case MD_LINE_BLANK: + case MD_LINE_SETEXTUNDERLINE: + case MD_LINE_TABLEUNDERLINE: + default: + MD_UNREACHABLE(); + break; + } + + block->flags = 0; + block->data = line->data; + block->n_lines = 0; + + ctx->current_block = block; + return 0; +} + +/* Eat from start of current (textual) block any reference definitions and + * remember them so we can resolve any links referring to them. + * + * (Reference definitions can only be at start of it as they cannot break + * a paragraph.) + */ +static int +md_consume_link_reference_definitions(MD_CTX* ctx) +{ + MD_LINE* lines = (MD_LINE*) (ctx->current_block + 1); + int n_lines = ctx->current_block->n_lines; + int n = 0; + + /* Compute how many lines at the start of the block form one or more + * reference definitions. */ + while(n < n_lines) { + int n_link_ref_lines; + + n_link_ref_lines = md_is_link_reference_definition(ctx, + lines + n, n_lines - n); + /* Not a reference definition? */ + if(n_link_ref_lines == 0) + break; + + /* We fail if it is the ref. def. but it could not be stored due + * a memory allocation error. */ + if(n_link_ref_lines < 0) + return -1; + + n += n_link_ref_lines; + } + + /* If there was at least one reference definition, we need to remove + * its lines from the block, or perhaps even the whole block. */ + if(n > 0) { + if(n == n_lines) { + /* Remove complete block. */ + ctx->n_block_bytes -= n * sizeof(MD_LINE); + ctx->n_block_bytes -= sizeof(MD_BLOCK); + ctx->current_block = NULL; + } else { + /* Remove just some initial lines from the block. */ + memmove(lines, lines + n, (n_lines - n) * sizeof(MD_LINE)); + ctx->current_block->n_lines -= n; + ctx->n_block_bytes -= n * sizeof(MD_LINE); + } + } + + return 0; +} + +static int +md_end_current_block(MD_CTX* ctx) +{ + int ret = 0; + + if(ctx->current_block == NULL) + return ret; + + /* Check whether there is a reference definition. (We do this here instead + * of in md_analyze_line() because reference definition can take multiple + * lines.) */ + if(ctx->current_block->type == MD_BLOCK_P || + (ctx->current_block->type == MD_BLOCK_H && (ctx->current_block->flags & MD_BLOCK_SETEXT_HEADER))) + { + MD_LINE* lines = (MD_LINE*) (ctx->current_block + 1); + if(CH(lines[0].beg) == _T('[')) { + MD_CHECK(md_consume_link_reference_definitions(ctx)); + if(ctx->current_block == NULL) + return ret; + } + } + + if(ctx->current_block->type == MD_BLOCK_H && (ctx->current_block->flags & MD_BLOCK_SETEXT_HEADER)) { + int n_lines = ctx->current_block->n_lines; + + if(n_lines > 1) { + /* Get rid of the underline. */ + ctx->current_block->n_lines--; + ctx->n_block_bytes -= sizeof(MD_LINE); + } else { + /* Only the underline has left after eating the ref. defs. + * Keep the line as beginning of a new ordinary paragraph. */ + ctx->current_block->type = MD_BLOCK_P; + return 0; + } + } + + /* Mark we are not building any block anymore. */ + ctx->current_block = NULL; + +abort: + return ret; +} + +static int +md_add_line_into_current_block(MD_CTX* ctx, const MD_LINE_ANALYSIS* analysis) +{ + MD_ASSERT(ctx->current_block != NULL); + + if(ctx->current_block->type == MD_BLOCK_CODE || ctx->current_block->type == MD_BLOCK_HTML) { + MD_VERBATIMLINE* line; + + line = (MD_VERBATIMLINE*) md_push_block_bytes(ctx, sizeof(MD_VERBATIMLINE)); + if(line == NULL) + return -1; + + line->indent = analysis->indent; + line->beg = analysis->beg; + line->end = analysis->end; + } else { + MD_LINE* line; + + line = (MD_LINE*) md_push_block_bytes(ctx, sizeof(MD_LINE)); + if(line == NULL) + return -1; + + line->beg = analysis->beg; + line->end = analysis->end; + } + ctx->current_block->n_lines++; + + return 0; +} + +static int +md_push_container_bytes(MD_CTX* ctx, MD_BLOCKTYPE type, unsigned start, + unsigned data, unsigned flags) +{ + MD_BLOCK* block; + int ret = 0; + + MD_CHECK(md_end_current_block(ctx)); + + block = (MD_BLOCK*) md_push_block_bytes(ctx, sizeof(MD_BLOCK)); + if(block == NULL) + return -1; + + block->type = type; + block->flags = flags; + block->data = data; + block->n_lines = start; + +abort: + return ret; +} + + + +/*********************** + *** Line Analysis *** + ***********************/ + +static int +md_is_hr_line(MD_CTX* ctx, OFF beg, OFF* p_end, OFF* p_killer) +{ + OFF off = beg + 1; + int n = 1; + + while(off < ctx->size && (CH(off) == CH(beg) || CH(off) == _T(' ') || CH(off) == _T('\t'))) { + if(CH(off) == CH(beg)) + n++; + off++; + } + + if(n < 3) { + *p_killer = off; + return FALSE; + } + + /* Nothing else can be present on the line. */ + if(off < ctx->size && !ISNEWLINE(off)) { + *p_killer = off; + return FALSE; + } + + *p_end = off; + return TRUE; +} + +static int +md_is_atxheader_line(MD_CTX* ctx, OFF beg, OFF* p_beg, OFF* p_end, unsigned* p_level) +{ + int n; + OFF off = beg + 1; + + while(off < ctx->size && CH(off) == _T('#') && off - beg < 7) + off++; + n = off - beg; + + if(n > 6) + return FALSE; + *p_level = n; + + if(!(ctx->parser.flags & MD_FLAG_PERMISSIVEATXHEADERS) && off < ctx->size && + CH(off) != _T(' ') && CH(off) != _T('\t') && !ISNEWLINE(off)) + return FALSE; + + while(off < ctx->size && CH(off) == _T(' ')) + off++; + *p_beg = off; + *p_end = off; + return TRUE; +} + +static int +md_is_setext_underline(MD_CTX* ctx, OFF beg, OFF* p_end, unsigned* p_level) +{ + OFF off = beg + 1; + + while(off < ctx->size && CH(off) == CH(beg)) + off++; + + /* Optionally, space(s) can follow. */ + while(off < ctx->size && CH(off) == _T(' ')) + off++; + + /* But nothing more is allowed on the line. */ + if(off < ctx->size && !ISNEWLINE(off)) + return FALSE; + + *p_level = (CH(beg) == _T('=') ? 1 : 2); + *p_end = off; + return TRUE; +} + +static int +md_is_table_underline(MD_CTX* ctx, OFF beg, OFF* p_end, unsigned* p_col_count) +{ + OFF off = beg; + int found_pipe = FALSE; + unsigned col_count = 0; + + if(off < ctx->size && CH(off) == _T('|')) { + found_pipe = TRUE; + off++; + while(off < ctx->size && ISWHITESPACE(off)) + off++; + } + + while(1) { + int delimited = FALSE; + + /* Cell underline ("-----", ":----", "----:" or ":----:") */ + if(off < ctx->size && CH(off) == _T(':')) + off++; + if(off >= ctx->size || CH(off) != _T('-')) + return FALSE; + while(off < ctx->size && CH(off) == _T('-')) + off++; + if(off < ctx->size && CH(off) == _T(':')) + off++; + + col_count++; + + /* Pipe delimiter (optional at the end of line). */ + while(off < ctx->size && ISWHITESPACE(off)) + off++; + if(off < ctx->size && CH(off) == _T('|')) { + delimited = TRUE; + found_pipe = TRUE; + off++; + while(off < ctx->size && ISWHITESPACE(off)) + off++; + } + + /* Success, if we reach end of line. */ + if(off >= ctx->size || ISNEWLINE(off)) + break; + + if(!delimited) + return FALSE; + } + + if(!found_pipe) + return FALSE; + + *p_end = off; + *p_col_count = col_count; + return TRUE; +} + +static int +md_is_opening_code_fence(MD_CTX* ctx, OFF beg, OFF* p_end) +{ + OFF off = beg; + + while(off < ctx->size && CH(off) == CH(beg)) + off++; + + /* Fence must have at least three characters. */ + if(off - beg < 3) + return FALSE; + + ctx->code_fence_length = off - beg; + + /* Optionally, space(s) can follow. */ + while(off < ctx->size && CH(off) == _T(' ')) + off++; + + /* Optionally, an info string can follow. */ + while(off < ctx->size && !ISNEWLINE(off)) { + /* Backtick-based fence must not contain '`' in the info string. */ + if(CH(beg) == _T('`') && CH(off) == _T('`')) + return FALSE; + off++; + } + + *p_end = off; + return TRUE; +} + +static int +md_is_closing_code_fence(MD_CTX* ctx, CHAR ch, OFF beg, OFF* p_end) +{ + OFF off = beg; + int ret = FALSE; + + /* Closing fence must have at least the same length and use same char as + * opening one. */ + while(off < ctx->size && CH(off) == ch) + off++; + if(off - beg < ctx->code_fence_length) + goto out; + + /* Optionally, space(s) can follow */ + while(off < ctx->size && CH(off) == _T(' ')) + off++; + + /* But nothing more is allowed on the line. */ + if(off < ctx->size && !ISNEWLINE(off)) + goto out; + + ret = TRUE; + +out: + /* Note we set *p_end even on failure: If we are not closing fence, caller + * would eat the line anyway without any parsing. */ + *p_end = off; + return ret; +} + +/* Returns type of the raw HTML block, or FALSE if it is not HTML block. + * (Refer to CommonMark specification for details about the types.) + */ +static int +md_is_html_block_start_condition(MD_CTX* ctx, OFF beg) +{ + typedef struct TAG_tag TAG; + struct TAG_tag { + const CHAR* name; + unsigned len : 8; + }; + + /* Type 6 is started by a long list of allowed tags. We use two-level + * tree to speed-up the search. */ +#ifdef X + #undef X +#endif +#define X(name) { _T(name), (sizeof(name)-1) / sizeof(CHAR) } +#define Xend { NULL, 0 } + static const TAG t1[] = { X("pre"), X("script"), X("style"), X("textarea"), Xend }; + + static const TAG a6[] = { X("address"), X("article"), X("aside"), Xend }; + static const TAG b6[] = { X("base"), X("basefont"), X("blockquote"), X("body"), Xend }; + static const TAG c6[] = { X("caption"), X("center"), X("col"), X("colgroup"), Xend }; + static const TAG d6[] = { X("dd"), X("details"), X("dialog"), X("dir"), + X("div"), X("dl"), X("dt"), Xend }; + static const TAG f6[] = { X("fieldset"), X("figcaption"), X("figure"), X("footer"), + X("form"), X("frame"), X("frameset"), Xend }; + static const TAG h6[] = { X("h1"), X("head"), X("header"), X("hr"), X("html"), Xend }; + static const TAG i6[] = { X("iframe"), Xend }; + static const TAG l6[] = { X("legend"), X("li"), X("link"), Xend }; + static const TAG m6[] = { X("main"), X("menu"), X("menuitem"), Xend }; + static const TAG n6[] = { X("nav"), X("noframes"), Xend }; + static const TAG o6[] = { X("ol"), X("optgroup"), X("option"), Xend }; + static const TAG p6[] = { X("p"), X("param"), Xend }; + static const TAG s6[] = { X("section"), X("source"), X("summary"), Xend }; + static const TAG t6[] = { X("table"), X("tbody"), X("td"), X("tfoot"), X("th"), + X("thead"), X("title"), X("tr"), X("track"), Xend }; + static const TAG u6[] = { X("ul"), Xend }; + static const TAG xx[] = { Xend }; +#undef X + + static const TAG* map6[26] = { + a6, b6, c6, d6, xx, f6, xx, h6, i6, xx, xx, l6, m6, + n6, o6, p6, xx, xx, s6, t6, u6, xx, xx, xx, xx, xx + }; + OFF off = beg + 1; + int i; + + /* Check for type 1: <script, <pre, or <style */ + for(i = 0; t1[i].name != NULL; i++) { + if(off + t1[i].len <= ctx->size) { + if(md_ascii_case_eq(STR(off), t1[i].name, t1[i].len)) + return 1; + } + } + + /* Check for type 2: <!-- */ + if(off + 3 < ctx->size && CH(off) == _T('!') && CH(off+1) == _T('-') && CH(off+2) == _T('-')) + return 2; + + /* Check for type 3: <? */ + if(off < ctx->size && CH(off) == _T('?')) + return 3; + + /* Check for type 4 or 5: <! */ + if(off < ctx->size && CH(off) == _T('!')) { + /* Check for type 4: <! followed by uppercase letter. */ + if(off + 1 < ctx->size && ISASCII(off+1)) + return 4; + + /* Check for type 5: <![CDATA[ */ + if(off + 8 < ctx->size) { + if(md_ascii_eq(STR(off), _T("![CDATA["), 8)) + return 5; + } + } + + /* Check for type 6: Many possible starting tags listed above. */ + if(off + 1 < ctx->size && (ISALPHA(off) || (CH(off) == _T('/') && ISALPHA(off+1)))) { + int slot; + const TAG* tags; + + if(CH(off) == _T('/')) + off++; + + slot = (ISUPPER(off) ? CH(off) - 'A' : CH(off) - 'a'); + tags = map6[slot]; + + for(i = 0; tags[i].name != NULL; i++) { + if(off + tags[i].len <= ctx->size) { + if(md_ascii_case_eq(STR(off), tags[i].name, tags[i].len)) { + OFF tmp = off + tags[i].len; + if(tmp >= ctx->size) + return 6; + if(ISBLANK(tmp) || ISNEWLINE(tmp) || CH(tmp) == _T('>')) + return 6; + if(tmp+1 < ctx->size && CH(tmp) == _T('/') && CH(tmp+1) == _T('>')) + return 6; + break; + } + } + } + } + + /* Check for type 7: any COMPLETE other opening or closing tag. */ + if(off + 1 < ctx->size) { + OFF end; + + if(md_is_html_tag(ctx, NULL, 0, beg, ctx->size, &end)) { + /* Only optional whitespace and new line may follow. */ + while(end < ctx->size && ISWHITESPACE(end)) + end++; + if(end >= ctx->size || ISNEWLINE(end)) + return 7; + } + } + + return FALSE; +} + +/* Case sensitive check whether there is a substring 'what' between 'beg' + * and end of line. */ +static int +md_line_contains(MD_CTX* ctx, OFF beg, const CHAR* what, SZ what_len, OFF* p_end) +{ + OFF i; + for(i = beg; i + what_len < ctx->size; i++) { + if(ISNEWLINE(i)) + break; + if(memcmp(STR(i), what, what_len * sizeof(CHAR)) == 0) { + *p_end = i + what_len; + return TRUE; + } + } + + *p_end = i; + return FALSE; +} + +/* Returns type of HTML block end condition or FALSE if not an end condition. + * + * Note it fills p_end even when it is not end condition as the caller + * does not need to analyze contents of a raw HTML block. + */ +static int +md_is_html_block_end_condition(MD_CTX* ctx, OFF beg, OFF* p_end) +{ + switch(ctx->html_block_type) { + case 1: + { + OFF off = beg; + + while(off < ctx->size && !ISNEWLINE(off)) { + if(CH(off) == _T('<')) { + #define FIND_TAG_END(string, length) \ + if(off + length <= ctx->size && \ + md_ascii_case_eq(STR(off), _T(string), length)) { \ + *p_end = off + length; \ + return TRUE; \ + } + FIND_TAG_END("</script>", 9) + FIND_TAG_END("</style>", 8) + FIND_TAG_END("</pre>", 6) + #undef FIND_TAG_END + } + + off++; + } + *p_end = off; + return FALSE; + } + + case 2: + return (md_line_contains(ctx, beg, _T("-->"), 3, p_end) ? 2 : FALSE); + + case 3: + return (md_line_contains(ctx, beg, _T("?>"), 2, p_end) ? 3 : FALSE); + + case 4: + return (md_line_contains(ctx, beg, _T(">"), 1, p_end) ? 4 : FALSE); + + case 5: + return (md_line_contains(ctx, beg, _T("]]>"), 3, p_end) ? 5 : FALSE); + + case 6: /* Pass through */ + case 7: + *p_end = beg; + return (beg >= ctx->size || ISNEWLINE(beg) ? ctx->html_block_type : FALSE); + + default: + MD_UNREACHABLE(); + } + return FALSE; +} + + +static int +md_is_container_compatible(const MD_CONTAINER* pivot, const MD_CONTAINER* container) +{ + /* Block quote has no "items" like lists. */ + if(container->ch == _T('>')) + return FALSE; + + if(container->ch != pivot->ch) + return FALSE; + if(container->mark_indent > pivot->contents_indent) + return FALSE; + + return TRUE; +} + +static int +md_push_container(MD_CTX* ctx, const MD_CONTAINER* container) +{ + if(ctx->n_containers >= ctx->alloc_containers) { + MD_CONTAINER* new_containers; + + ctx->alloc_containers = (ctx->alloc_containers > 0 + ? ctx->alloc_containers + ctx->alloc_containers / 2 + : 16); + new_containers = realloc(ctx->containers, ctx->alloc_containers * sizeof(MD_CONTAINER)); + if(new_containers == NULL) { + MD_LOG("realloc() failed."); + return -1; + } + + ctx->containers = new_containers; + } + + memcpy(&ctx->containers[ctx->n_containers++], container, sizeof(MD_CONTAINER)); + return 0; +} + +static int +md_enter_child_containers(MD_CTX* ctx, int n_children) +{ + int i; + int ret = 0; + + for(i = ctx->n_containers - n_children; i < ctx->n_containers; i++) { + MD_CONTAINER* c = &ctx->containers[i]; + int is_ordered_list = FALSE; + + switch(c->ch) { + case _T(')'): + case _T('.'): + is_ordered_list = TRUE; + MD_FALLTHROUGH(); + + case _T('-'): + case _T('+'): + case _T('*'): + /* Remember offset in ctx->block_bytes so we can revisit the + * block if we detect it is a loose list. */ + md_end_current_block(ctx); + c->block_byte_off = ctx->n_block_bytes; + + MD_CHECK(md_push_container_bytes(ctx, + (is_ordered_list ? MD_BLOCK_OL : MD_BLOCK_UL), + c->start, c->ch, MD_BLOCK_CONTAINER_OPENER)); + MD_CHECK(md_push_container_bytes(ctx, MD_BLOCK_LI, + c->task_mark_off, + (c->is_task ? CH(c->task_mark_off) : 0), + MD_BLOCK_CONTAINER_OPENER)); + break; + + case _T('>'): + MD_CHECK(md_push_container_bytes(ctx, MD_BLOCK_QUOTE, 0, 0, MD_BLOCK_CONTAINER_OPENER)); + break; + + default: + MD_UNREACHABLE(); + break; + } + } + +abort: + return ret; +} + +static int +md_leave_child_containers(MD_CTX* ctx, int n_keep) +{ + int ret = 0; + + while(ctx->n_containers > n_keep) { + MD_CONTAINER* c = &ctx->containers[ctx->n_containers-1]; + int is_ordered_list = FALSE; + + switch(c->ch) { + case _T(')'): + case _T('.'): + is_ordered_list = TRUE; + MD_FALLTHROUGH(); + + case _T('-'): + case _T('+'): + case _T('*'): + MD_CHECK(md_push_container_bytes(ctx, MD_BLOCK_LI, + c->task_mark_off, (c->is_task ? CH(c->task_mark_off) : 0), + MD_BLOCK_CONTAINER_CLOSER)); + MD_CHECK(md_push_container_bytes(ctx, + (is_ordered_list ? MD_BLOCK_OL : MD_BLOCK_UL), 0, + c->ch, MD_BLOCK_CONTAINER_CLOSER)); + break; + + case _T('>'): + MD_CHECK(md_push_container_bytes(ctx, MD_BLOCK_QUOTE, 0, + 0, MD_BLOCK_CONTAINER_CLOSER)); + break; + + default: + MD_UNREACHABLE(); + break; + } + + ctx->n_containers--; + } + +abort: + return ret; +} + +static int +md_is_container_mark(MD_CTX* ctx, unsigned indent, OFF beg, OFF* p_end, MD_CONTAINER* p_container) +{ + OFF off = beg; + OFF max_end; + + if(off >= ctx->size || indent >= ctx->code_indent_offset) + return FALSE; + + /* Check for block quote mark. */ + if(CH(off) == _T('>')) { + off++; + p_container->ch = _T('>'); + p_container->is_loose = FALSE; + p_container->is_task = FALSE; + p_container->mark_indent = indent; + p_container->contents_indent = indent + 1; + *p_end = off; + return TRUE; + } + + /* Check for list item bullet mark. */ + if(ISANYOF(off, _T("-+*")) && (off+1 >= ctx->size || ISBLANK(off+1) || ISNEWLINE(off+1))) { + p_container->ch = CH(off); + p_container->is_loose = FALSE; + p_container->is_task = FALSE; + p_container->mark_indent = indent; + p_container->contents_indent = indent + 1; + *p_end = off+1; + return TRUE; + } + + /* Check for ordered list item marks. */ + max_end = off + 9; + if(max_end > ctx->size) + max_end = ctx->size; + p_container->start = 0; + while(off < max_end && ISDIGIT(off)) { + p_container->start = p_container->start * 10 + CH(off) - _T('0'); + off++; + } + if(off > beg && + off < ctx->size && + (CH(off) == _T('.') || CH(off) == _T(')')) && + (off+1 >= ctx->size || ISBLANK(off+1) || ISNEWLINE(off+1))) + { + p_container->ch = CH(off); + p_container->is_loose = FALSE; + p_container->is_task = FALSE; + p_container->mark_indent = indent; + p_container->contents_indent = indent + off - beg + 1; + *p_end = off+1; + return TRUE; + } + + return FALSE; +} + +static unsigned +md_line_indentation(MD_CTX* ctx, unsigned total_indent, OFF beg, OFF* p_end) +{ + OFF off = beg; + unsigned indent = total_indent; + + while(off < ctx->size && ISBLANK(off)) { + if(CH(off) == _T('\t')) + indent = (indent + 4) & ~3; + else + indent++; + off++; + } + + *p_end = off; + return indent - total_indent; +} + +static const MD_LINE_ANALYSIS md_dummy_blank_line = { MD_LINE_BLANK, 0, 0, 0, 0 }; + +/* Analyze type of the line and find some its properties. This serves as a + * main input for determining type and boundaries of a block. */ +static int +md_analyze_line(MD_CTX* ctx, OFF beg, OFF* p_end, + const MD_LINE_ANALYSIS* pivot_line, MD_LINE_ANALYSIS* line) +{ + unsigned total_indent = 0; + int n_parents = 0; + int n_brothers = 0; + int n_children = 0; + MD_CONTAINER container = { 0 }; + int prev_line_has_list_loosening_effect = ctx->last_line_has_list_loosening_effect; + OFF off = beg; + OFF hr_killer = 0; + int ret = 0; + + line->indent = md_line_indentation(ctx, total_indent, off, &off); + total_indent += line->indent; + line->beg = off; + + /* Given the indentation and block quote marks '>', determine how many of + * the current containers are our parents. */ + while(n_parents < ctx->n_containers) { + MD_CONTAINER* c = &ctx->containers[n_parents]; + + if(c->ch == _T('>') && line->indent < ctx->code_indent_offset && + off < ctx->size && CH(off) == _T('>')) + { + /* Block quote mark. */ + off++; + total_indent++; + line->indent = md_line_indentation(ctx, total_indent, off, &off); + total_indent += line->indent; + + /* The optional 1st space after '>' is part of the block quote mark. */ + if(line->indent > 0) + line->indent--; + + line->beg = off; + + } else if(c->ch != _T('>') && line->indent >= c->contents_indent) { + /* List. */ + line->indent -= c->contents_indent; + } else { + break; + } + + n_parents++; + } + + if(off >= ctx->size || ISNEWLINE(off)) { + /* Blank line does not need any real indentation to be nested inside + * a list. */ + if(n_brothers + n_children == 0) { + while(n_parents < ctx->n_containers && ctx->containers[n_parents].ch != _T('>')) + n_parents++; + } + } + + while(TRUE) { + /* Check whether we are fenced code continuation. */ + if(pivot_line->type == MD_LINE_FENCEDCODE) { + line->beg = off; + + /* We are another MD_LINE_FENCEDCODE unless we are closing fence + * which we transform into MD_LINE_BLANK. */ + if(line->indent < ctx->code_indent_offset) { + if(md_is_closing_code_fence(ctx, CH(pivot_line->beg), off, &off)) { + line->type = MD_LINE_BLANK; + ctx->last_line_has_list_loosening_effect = FALSE; + break; + } + } + + /* Change indentation accordingly to the initial code fence. */ + if(n_parents == ctx->n_containers) { + if(line->indent > pivot_line->indent) + line->indent -= pivot_line->indent; + else + line->indent = 0; + + line->type = MD_LINE_FENCEDCODE; + break; + } + } + + /* Check whether we are HTML block continuation. */ + if(pivot_line->type == MD_LINE_HTML && ctx->html_block_type > 0) { + if(n_parents < ctx->n_containers) { + /* HTML block is implicitly ended if the enclosing container + * block ends. */ + ctx->html_block_type = 0; + } else { + int html_block_type; + + html_block_type = md_is_html_block_end_condition(ctx, off, &off); + if(html_block_type > 0) { + MD_ASSERT(html_block_type == ctx->html_block_type); + + /* Make sure this is the last line of the block. */ + ctx->html_block_type = 0; + + /* Some end conditions serve as blank lines at the same time. */ + if(html_block_type == 6 || html_block_type == 7) { + line->type = MD_LINE_BLANK; + line->indent = 0; + break; + } + } + + line->type = MD_LINE_HTML; + n_parents = ctx->n_containers; + break; + } + } + + /* Check for blank line. */ + if(off >= ctx->size || ISNEWLINE(off)) { + if(pivot_line->type == MD_LINE_INDENTEDCODE && n_parents == ctx->n_containers) { + line->type = MD_LINE_INDENTEDCODE; + if(line->indent > ctx->code_indent_offset) + line->indent -= ctx->code_indent_offset; + else + line->indent = 0; + ctx->last_line_has_list_loosening_effect = FALSE; + } else { + line->type = MD_LINE_BLANK; + ctx->last_line_has_list_loosening_effect = (n_parents > 0 && + n_brothers + n_children == 0 && + ctx->containers[n_parents-1].ch != _T('>')); + + #if 1 + /* See https://github.com/mity/md4c/issues/6 + * + * This ugly checking tests we are in (yet empty) list item but + * not its very first line (i.e. not the line with the list + * item mark). + * + * If we are such a blank line, then any following non-blank + * line which would be part of the list item actually has to + * end the list because according to the specification, "a list + * item can begin with at most one blank line." + */ + if(n_parents > 0 && ctx->containers[n_parents-1].ch != _T('>') && + n_brothers + n_children == 0 && ctx->current_block == NULL && + ctx->n_block_bytes > (int) sizeof(MD_BLOCK)) + { + MD_BLOCK* top_block = (MD_BLOCK*) ((char*)ctx->block_bytes + ctx->n_block_bytes - sizeof(MD_BLOCK)); + if(top_block->type == MD_BLOCK_LI) + ctx->last_list_item_starts_with_two_blank_lines = TRUE; + } + #endif + } + break; + } else { + #if 1 + /* This is the 2nd half of the hack. If the flag is set (i.e. there + * was a 2nd blank line at the beginning of the list item) and if + * we would otherwise still belong to the list item, we enforce + * the end of the list. */ + ctx->last_line_has_list_loosening_effect = FALSE; + if(ctx->last_list_item_starts_with_two_blank_lines) { + if(n_parents > 0 && ctx->containers[n_parents-1].ch != _T('>') && + n_brothers + n_children == 0 && ctx->current_block == NULL && + ctx->n_block_bytes > (int) sizeof(MD_BLOCK)) + { + MD_BLOCK* top_block = (MD_BLOCK*) ((char*)ctx->block_bytes + ctx->n_block_bytes - sizeof(MD_BLOCK)); + if(top_block->type == MD_BLOCK_LI) + n_parents--; + } + + ctx->last_list_item_starts_with_two_blank_lines = FALSE; + } + #endif + } + + /* Check whether we are Setext underline. */ + if(line->indent < ctx->code_indent_offset && pivot_line->type == MD_LINE_TEXT + && off < ctx->size && ISANYOF2(off, _T('='), _T('-')) + && (n_parents == ctx->n_containers)) + { + unsigned level; + + if(md_is_setext_underline(ctx, off, &off, &level)) { + line->type = MD_LINE_SETEXTUNDERLINE; + line->data = level; + break; + } + } + + /* Check for thematic break line. */ + if(line->indent < ctx->code_indent_offset + && off < ctx->size && off >= hr_killer + && ISANYOF(off, _T("-_*"))) + { + if(md_is_hr_line(ctx, off, &off, &hr_killer)) { + line->type = MD_LINE_HR; + break; + } + } + + /* Check for "brother" container. I.e. whether we are another list item + * in already started list. */ + if(n_parents < ctx->n_containers && n_brothers + n_children == 0) { + OFF tmp; + + if(md_is_container_mark(ctx, line->indent, off, &tmp, &container) && + md_is_container_compatible(&ctx->containers[n_parents], &container)) + { + pivot_line = &md_dummy_blank_line; + + off = tmp; + + total_indent += container.contents_indent - container.mark_indent; + line->indent = md_line_indentation(ctx, total_indent, off, &off); + total_indent += line->indent; + line->beg = off; + + /* Some of the following whitespace actually still belongs to the mark. */ + if(off >= ctx->size || ISNEWLINE(off)) { + container.contents_indent++; + } else if(line->indent <= ctx->code_indent_offset) { + container.contents_indent += line->indent; + line->indent = 0; + } else { + container.contents_indent += 1; + line->indent--; + } + + ctx->containers[n_parents].mark_indent = container.mark_indent; + ctx->containers[n_parents].contents_indent = container.contents_indent; + + n_brothers++; + continue; + } + } + + /* Check for indented code. + * Note indented code block cannot interrupt a paragraph. */ + if(line->indent >= ctx->code_indent_offset && + (pivot_line->type == MD_LINE_BLANK || pivot_line->type == MD_LINE_INDENTEDCODE)) + { + line->type = MD_LINE_INDENTEDCODE; + MD_ASSERT(line->indent >= ctx->code_indent_offset); + line->indent -= ctx->code_indent_offset; + line->data = 0; + break; + } + + /* Check for start of a new container block. */ + if(line->indent < ctx->code_indent_offset && + md_is_container_mark(ctx, line->indent, off, &off, &container)) + { + if(pivot_line->type == MD_LINE_TEXT && n_parents == ctx->n_containers && + (off >= ctx->size || ISNEWLINE(off)) && container.ch != _T('>')) + { + /* Noop. List mark followed by a blank line cannot interrupt a paragraph. */ + } else if(pivot_line->type == MD_LINE_TEXT && n_parents == ctx->n_containers && + ISANYOF2_(container.ch, _T('.'), _T(')')) && container.start != 1) + { + /* Noop. Ordered list cannot interrupt a paragraph unless the start index is 1. */ + } else { + total_indent += container.contents_indent - container.mark_indent; + line->indent = md_line_indentation(ctx, total_indent, off, &off); + total_indent += line->indent; + + line->beg = off; + line->data = container.ch; + + /* Some of the following whitespace actually still belongs to the mark. */ + if(off >= ctx->size || ISNEWLINE(off)) { + container.contents_indent++; + } else if(line->indent <= ctx->code_indent_offset) { + container.contents_indent += line->indent; + line->indent = 0; + } else { + container.contents_indent += 1; + line->indent--; + } + + if(n_brothers + n_children == 0) + pivot_line = &md_dummy_blank_line; + + if(n_children == 0) + MD_CHECK(md_leave_child_containers(ctx, n_parents + n_brothers)); + + n_children++; + MD_CHECK(md_push_container(ctx, &container)); + continue; + } + } + + /* Check whether we are table continuation. */ + if(pivot_line->type == MD_LINE_TABLE && n_parents == ctx->n_containers) { + line->type = MD_LINE_TABLE; + break; + } + + /* Check for ATX header. */ + if(line->indent < ctx->code_indent_offset && + off < ctx->size && CH(off) == _T('#')) + { + unsigned level; + + if(md_is_atxheader_line(ctx, off, &line->beg, &off, &level)) { + line->type = MD_LINE_ATXHEADER; + line->data = level; + break; + } + } + + /* Check whether we are starting code fence. */ + if(off < ctx->size && ISANYOF2(off, _T('`'), _T('~'))) { + if(md_is_opening_code_fence(ctx, off, &off)) { + line->type = MD_LINE_FENCEDCODE; + line->data = 1; + break; + } + } + + /* Check for start of raw HTML block. */ + if(off < ctx->size && CH(off) == _T('<') + && !(ctx->parser.flags & MD_FLAG_NOHTMLBLOCKS)) + { + ctx->html_block_type = md_is_html_block_start_condition(ctx, off); + + /* HTML block type 7 cannot interrupt paragraph. */ + if(ctx->html_block_type == 7 && pivot_line->type == MD_LINE_TEXT) + ctx->html_block_type = 0; + + if(ctx->html_block_type > 0) { + /* The line itself also may immediately close the block. */ + if(md_is_html_block_end_condition(ctx, off, &off) == ctx->html_block_type) { + /* Make sure this is the last line of the block. */ + ctx->html_block_type = 0; + } + + line->type = MD_LINE_HTML; + break; + } + } + + /* Check for table underline. */ + if((ctx->parser.flags & MD_FLAG_TABLES) && pivot_line->type == MD_LINE_TEXT + && off < ctx->size && ISANYOF3(off, _T('|'), _T('-'), _T(':')) + && n_parents == ctx->n_containers) + { + unsigned col_count; + + if(ctx->current_block != NULL && ctx->current_block->n_lines == 1 && + md_is_table_underline(ctx, off, &off, &col_count)) + { + line->data = col_count; + line->type = MD_LINE_TABLEUNDERLINE; + break; + } + } + + /* By default, we are normal text line. */ + line->type = MD_LINE_TEXT; + if(pivot_line->type == MD_LINE_TEXT && n_brothers + n_children == 0) { + /* Lazy continuation. */ + n_parents = ctx->n_containers; + } + + /* Check for task mark. */ + if((ctx->parser.flags & MD_FLAG_TASKLISTS) && n_brothers + n_children > 0 && + ISANYOF_(ctx->containers[ctx->n_containers-1].ch, _T("-+*.)"))) + { + OFF tmp = off; + + while(tmp < ctx->size && tmp < off + 3 && ISBLANK(tmp)) + tmp++; + if(tmp + 2 < ctx->size && CH(tmp) == _T('[') && + ISANYOF(tmp+1, _T("xX ")) && CH(tmp+2) == _T(']') && + (tmp + 3 == ctx->size || ISBLANK(tmp+3) || ISNEWLINE(tmp+3))) + { + MD_CONTAINER* task_container = (n_children > 0 ? &ctx->containers[ctx->n_containers-1] : &container); + task_container->is_task = TRUE; + task_container->task_mark_off = tmp + 1; + off = tmp + 3; + while(off < ctx->size && ISWHITESPACE(off)) + off++; + if (off == ctx->size) break; + line->beg = off; + } + } + + break; + } + + /* Scan for end of the line. + * + * Note this is quite a bottleneck of the parsing as we here iterate almost + * over compete document. + */ +#if defined __linux__ && !defined MD4C_USE_UTF16 + /* Recent glibc versions have superbly optimized strcspn(), even using + * vectorization if available. */ + if(ctx->doc_ends_with_newline && off < ctx->size) { + while(TRUE) { + off += (OFF) strcspn(STR(off), "\r\n"); + + /* strcspn() can stop on zero terminator; but that can appear + * anywhere in the Markfown input... */ + if(CH(off) == _T('\0')) + off++; + else + break; + } + } else +#endif + { + /* Optimization: Use some loop unrolling. */ + while(off + 3 < ctx->size && !ISNEWLINE(off+0) && !ISNEWLINE(off+1) + && !ISNEWLINE(off+2) && !ISNEWLINE(off+3)) + off += 4; + while(off < ctx->size && !ISNEWLINE(off)) + off++; + } + + /* Set end of the line. */ + line->end = off; + + /* But for ATX header, we should exclude the optional trailing mark. */ + if(line->type == MD_LINE_ATXHEADER) { + OFF tmp = line->end; + while(tmp > line->beg && CH(tmp-1) == _T(' ')) + tmp--; + while(tmp > line->beg && CH(tmp-1) == _T('#')) + tmp--; + if(tmp == line->beg || CH(tmp-1) == _T(' ') || (ctx->parser.flags & MD_FLAG_PERMISSIVEATXHEADERS)) + line->end = tmp; + } + + /* Trim trailing spaces. */ + if(line->type != MD_LINE_INDENTEDCODE && line->type != MD_LINE_FENCEDCODE) { + while(line->end > line->beg && CH(line->end-1) == _T(' ')) + line->end--; + } + + /* Eat also the new line. */ + if(off < ctx->size && CH(off) == _T('\r')) + off++; + if(off < ctx->size && CH(off) == _T('\n')) + off++; + + *p_end = off; + + /* If we belong to a list after seeing a blank line, the list is loose. */ + if(prev_line_has_list_loosening_effect && line->type != MD_LINE_BLANK && n_parents + n_brothers > 0) { + MD_CONTAINER* c = &ctx->containers[n_parents + n_brothers - 1]; + if(c->ch != _T('>')) { + MD_BLOCK* block = (MD_BLOCK*) (((char*)ctx->block_bytes) + c->block_byte_off); + block->flags |= MD_BLOCK_LOOSE_LIST; + } + } + + /* Leave any containers we are not part of anymore. */ + if(n_children == 0 && n_parents + n_brothers < ctx->n_containers) + MD_CHECK(md_leave_child_containers(ctx, n_parents + n_brothers)); + + /* Enter any container we found a mark for. */ + if(n_brothers > 0) { + MD_ASSERT(n_brothers == 1); + MD_CHECK(md_push_container_bytes(ctx, MD_BLOCK_LI, + ctx->containers[n_parents].task_mark_off, + (ctx->containers[n_parents].is_task ? CH(ctx->containers[n_parents].task_mark_off) : 0), + MD_BLOCK_CONTAINER_CLOSER)); + MD_CHECK(md_push_container_bytes(ctx, MD_BLOCK_LI, + container.task_mark_off, + (container.is_task ? CH(container.task_mark_off) : 0), + MD_BLOCK_CONTAINER_OPENER)); + ctx->containers[n_parents].is_task = container.is_task; + ctx->containers[n_parents].task_mark_off = container.task_mark_off; + } + + if(n_children > 0) + MD_CHECK(md_enter_child_containers(ctx, n_children)); + +abort: + return ret; +} + +static int +md_process_line(MD_CTX* ctx, const MD_LINE_ANALYSIS** p_pivot_line, MD_LINE_ANALYSIS* line) +{ + const MD_LINE_ANALYSIS* pivot_line = *p_pivot_line; + int ret = 0; + + /* Blank line ends current leaf block. */ + if(line->type == MD_LINE_BLANK) { + MD_CHECK(md_end_current_block(ctx)); + *p_pivot_line = &md_dummy_blank_line; + return 0; + } + + /* Some line types form block on their own. */ + if(line->type == MD_LINE_HR || line->type == MD_LINE_ATXHEADER) { + MD_CHECK(md_end_current_block(ctx)); + + /* Add our single-line block. */ + MD_CHECK(md_start_new_block(ctx, line)); + MD_CHECK(md_add_line_into_current_block(ctx, line)); + MD_CHECK(md_end_current_block(ctx)); + *p_pivot_line = &md_dummy_blank_line; + return 0; + } + + /* MD_LINE_SETEXTUNDERLINE changes meaning of the current block and ends it. */ + if(line->type == MD_LINE_SETEXTUNDERLINE) { + MD_ASSERT(ctx->current_block != NULL); + ctx->current_block->type = MD_BLOCK_H; + ctx->current_block->data = line->data; + ctx->current_block->flags |= MD_BLOCK_SETEXT_HEADER; + MD_CHECK(md_add_line_into_current_block(ctx, line)); + MD_CHECK(md_end_current_block(ctx)); + if(ctx->current_block == NULL) { + *p_pivot_line = &md_dummy_blank_line; + } else { + /* This happens if we have consumed all the body as link ref. defs. + * and downgraded the underline into start of a new paragraph block. */ + line->type = MD_LINE_TEXT; + *p_pivot_line = line; + } + return 0; + } + + /* MD_LINE_TABLEUNDERLINE changes meaning of the current block. */ + if(line->type == MD_LINE_TABLEUNDERLINE) { + MD_ASSERT(ctx->current_block != NULL); + MD_ASSERT(ctx->current_block->n_lines == 1); + ctx->current_block->type = MD_BLOCK_TABLE; + ctx->current_block->data = line->data; + MD_ASSERT(pivot_line != &md_dummy_blank_line); + ((MD_LINE_ANALYSIS*)pivot_line)->type = MD_LINE_TABLE; + MD_CHECK(md_add_line_into_current_block(ctx, line)); + return 0; + } + + /* The current block also ends if the line has different type. */ + if(line->type != pivot_line->type) + MD_CHECK(md_end_current_block(ctx)); + + /* The current line may start a new block. */ + if(ctx->current_block == NULL) { + MD_CHECK(md_start_new_block(ctx, line)); + *p_pivot_line = line; + } + + /* In all other cases the line is just a continuation of the current block. */ + MD_CHECK(md_add_line_into_current_block(ctx, line)); + +abort: + return ret; +} + +static int +md_process_doc(MD_CTX *ctx) +{ + const MD_LINE_ANALYSIS* pivot_line = &md_dummy_blank_line; + MD_LINE_ANALYSIS line_buf[2]; + MD_LINE_ANALYSIS* line = &line_buf[0]; + OFF off = 0; + int ret = 0; + + MD_ENTER_BLOCK(MD_BLOCK_DOC, NULL); + + while(off < ctx->size) { + if(line == pivot_line) + line = (line == &line_buf[0] ? &line_buf[1] : &line_buf[0]); + + MD_CHECK(md_analyze_line(ctx, off, &off, pivot_line, line)); + MD_CHECK(md_process_line(ctx, &pivot_line, line)); + } + + md_end_current_block(ctx); + + MD_CHECK(md_build_ref_def_hashtable(ctx)); + + /* Process all blocks. */ + MD_CHECK(md_leave_child_containers(ctx, 0)); + MD_CHECK(md_process_all_blocks(ctx)); + + MD_LEAVE_BLOCK(MD_BLOCK_DOC, NULL); + +abort: + +#if 0 + /* Output some memory consumption statistics. */ + { + char buffer[256]; + sprintf(buffer, "Alloced %u bytes for block buffer.", + (unsigned)(ctx->alloc_block_bytes)); + MD_LOG(buffer); + + sprintf(buffer, "Alloced %u bytes for containers buffer.", + (unsigned)(ctx->alloc_containers * sizeof(MD_CONTAINER))); + MD_LOG(buffer); + + sprintf(buffer, "Alloced %u bytes for marks buffer.", + (unsigned)(ctx->alloc_marks * sizeof(MD_MARK))); + MD_LOG(buffer); + + sprintf(buffer, "Alloced %u bytes for aux. buffer.", + (unsigned)(ctx->alloc_buffer * sizeof(MD_CHAR))); + MD_LOG(buffer); + } +#endif + + return ret; +} + + +/******************** + *** Public API *** + ********************/ + +int +md_parse(const MD_CHAR* text, MD_SIZE size, const MD_PARSER* parser, void* userdata) +{ + MD_CTX ctx = {.text = text, + .size = size, + .userdata = userdata, + .code_indent_offset = (ctx.parser.flags & MD_FLAG_NOINDENTEDCODEBLOCKS) ? (OFF)(-1) : 4, + .doc_ends_with_newline = (size > 0 && ISNEWLINE_(text[size-1]))}; + int i; + int ret; + + if(parser->abi_version != 0) { + if(parser->debug_log != NULL) + parser->debug_log("Unsupported abi_version.", userdata); + return -1; + } + + /* Setup context structure. */ + memcpy(&ctx.parser, parser, sizeof(MD_PARSER)); + md_build_mark_char_map(&ctx); + + /* Reset all unresolved opener mark chains. */ + for(i = 0; i < (int) SIZEOF_ARRAY(ctx.mark_chains); i++) { + ctx.mark_chains[i].head = -1; + ctx.mark_chains[i].tail = -1; + } + ctx.unresolved_link_head = -1; + ctx.unresolved_link_tail = -1; + + /* All the work. */ + ret = md_process_doc(&ctx); + + /* Clean-up. */ + md_free_ref_defs(&ctx); + md_free_ref_def_hashtable(&ctx); + free(ctx.buffer); + free(ctx.marks); + free(ctx.block_bytes); + free(ctx.containers); + + return ret; +} diff --git a/shpPackages/md4c/md4c.h b/shpPackages/md4c/md4c.h @@ -0,0 +1,417 @@ +/* + * MD4C: Markdown parser for C + * (http://github.com/mity/md4c) + * + * Copyright (c) 2016-2020 Martin Mitas + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef MD4C_H +#define MD4C_H + +#ifdef __cplusplus + extern "C" { +#endif + +#if defined MD4C_USE_UTF16 + /* Magic to support UTF-16. Note that in order to use it, you have to define + * the macro MD4C_USE_UTF16 both when building MD4C as well as when + * including this header in your code. */ + #ifdef _WIN32 + #include <windows.h> + typedef WCHAR MD_CHAR; + #else + #error MD4C_USE_UTF16 is only supported on Windows. + #endif +#else + typedef char MD_CHAR; +#endif + +typedef unsigned MD_SIZE; +typedef unsigned MD_OFFSET; + + +/* Block represents a part of document hierarchy structure like a paragraph + * or list item. + */ +typedef enum MD_BLOCKTYPE { + /* <body>...</body> */ + MD_BLOCK_DOC = 0, + + /* <blockquote>...</blockquote> */ + MD_BLOCK_QUOTE, + + /* <ul>...</ul> + * Detail: Structure MD_BLOCK_UL_DETAIL. */ + MD_BLOCK_UL, + + /* <ol>...</ol> + * Detail: Structure MD_BLOCK_OL_DETAIL. */ + MD_BLOCK_OL, + + /* <li>...</li> + * Detail: Structure MD_BLOCK_LI_DETAIL. */ + MD_BLOCK_LI, + + /* <hr> */ + MD_BLOCK_HR, + + /* <h1>...</h1> (for levels up to 6) + * Detail: Structure MD_BLOCK_H_DETAIL. */ + MD_BLOCK_H, + + /* <pre><code>...</code></pre> + * Note the text lines within code blocks are terminated with '\n' + * instead of explicit MD_TEXT_BR. */ + MD_BLOCK_CODE, + + /* Raw HTML block. This itself does not correspond to any particular HTML + * tag. The contents of it _is_ raw HTML source intended to be put + * in verbatim form to the HTML output. */ + MD_BLOCK_HTML, + + /* <p>...</p> */ + MD_BLOCK_P, + + /* <table>...</table> and its contents. + * Detail: Structure MD_BLOCK_TABLE_DETAIL (for MD_BLOCK_TABLE), + * structure MD_BLOCK_TD_DETAIL (for MD_BLOCK_TH and MD_BLOCK_TD) + * Note all of these are used only if extension MD_FLAG_TABLES is enabled. */ + MD_BLOCK_TABLE, + MD_BLOCK_THEAD, + MD_BLOCK_TBODY, + MD_BLOCK_TR, + MD_BLOCK_TH, + MD_BLOCK_TD +} MD_BLOCKTYPE; + +/* Span represents an in-line piece of a document which should be rendered with + * the same font, color and other attributes. A sequence of spans forms a block + * like paragraph or list item. */ +typedef enum MD_SPANTYPE { + /* <em>...</em> */ + MD_SPAN_EM, + + /* <strong>...</strong> */ + MD_SPAN_STRONG, + + /* <a href="xxx">...</a> + * Detail: Structure MD_SPAN_A_DETAIL. */ + MD_SPAN_A, + + /* <img src="xxx">...</a> + * Detail: Structure MD_SPAN_IMG_DETAIL. + * Note: Image text can contain nested spans and even nested images. + * If rendered into ALT attribute of HTML <IMG> tag, it's responsibility + * of the parser to deal with it. + */ + MD_SPAN_IMG, + + /* <code>...</code> */ + MD_SPAN_CODE, + + /* <del>...</del> + * Note: Recognized only when MD_FLAG_STRIKETHROUGH is enabled. + */ + MD_SPAN_DEL, + + /* For recognizing inline ($) and display ($$) equations + * Note: Recognized only when MD_FLAG_LATEXMATHSPANS is enabled. + */ + MD_SPAN_LATEXMATH, + MD_SPAN_LATEXMATH_DISPLAY, + + /* Wiki links + * Note: Recognized only when MD_FLAG_WIKILINKS is enabled. + */ + MD_SPAN_WIKILINK, + + /* <u>...</u> + * Note: Recognized only when MD_FLAG_UNDERLINE is enabled. */ + MD_SPAN_U, + MD_SPAN_FNT, + MD_SPAN_INV, + MD_SPAN_COC, + MD_SPAN_BLI, + MD_SPAN_ANCHOR, + /* This span type is issued by md4c + * MD_SPAN_COLOR allows supporting RGB colors: + * [text with colors](#1#13) + * md4c treats colors as MD_SPAN_A and the parsing of the color + * is done by the user. + */ + MD_SPAN_COLOR, +} MD_SPANTYPE; + +/* Text is the actual textual contents of span. */ +typedef enum MD_TEXTTYPE { + /* Normal text. */ + MD_TEXT_NORMAL = 0, + + /* NULL character. CommonMark requires replacing NULL character with + * the replacement char U+FFFD, so this allows caller to do that easily. */ + MD_TEXT_NULLCHAR, + + /* Line breaks. + * Note these are not sent from blocks with verbatim output (MD_BLOCK_CODE + * or MD_BLOCK_HTML). In such cases, '\n' is part of the text itself. */ + MD_TEXT_BR, /* <br> (hard break) */ + MD_TEXT_SOFTBR, /* '\n' in source text where it is not semantically meaningful (soft break) */ + + /* Entity. + * (a) Named entity, e.g. &nbsp; + * (Note MD4C does not have a list of known entities. + * Anything matching the regexp /&[A-Za-z][A-Za-z0-9]{1,47};/ is + * treated as a named entity.) + * (b) Numerical entity, e.g. &#1234; + * (c) Hexadecimal entity, e.g. &#x12AB; + * + * As MD4C is mostly encoding agnostic, application gets the verbatim + * entity text into the MD_PARSER::text_callback(). */ + MD_TEXT_ENTITY, + + /* Text in a code block (inside MD_BLOCK_CODE) or inlined code (`code`). + * If it is inside MD_BLOCK_CODE, it includes spaces for indentation and + * '\n' for new lines. MD_TEXT_BR and MD_TEXT_SOFTBR are not sent for this + * kind of text. */ + MD_TEXT_CODE, + + /* Text is a raw HTML. If it is contents of a raw HTML block (i.e. not + * an inline raw HTML), then MD_TEXT_BR and MD_TEXT_SOFTBR are not used. + * The text contains verbatim '\n' for the new lines. */ + MD_TEXT_HTML, + + /* Text is inside an equation. This is processed the same way as inlined code + * spans (`code`). */ + MD_TEXT_LATEXMATH +} MD_TEXTTYPE; + + +/* Alignment enumeration. */ +typedef enum MD_ALIGN { + MD_ALIGN_DEFAULT = 0, /* When unspecified. */ + MD_ALIGN_LEFT, + MD_ALIGN_CENTER, + MD_ALIGN_RIGHT +} MD_ALIGN; + + +/* String attribute. + * + * This wraps strings which are outside of a normal text flow and which are + * propagated within various detailed structures, but which still may contain + * string portions of different types like e.g. entities. + * + * So, for example, lets consider this image: + * + * ![image alt text](http://example.org/image.png 'foo &quot; bar') + * + * The image alt text is propagated as a normal text via the MD_PARSER::text() + * callback. However, the image title ('foo &quot; bar') is propagated as + * MD_ATTRIBUTE in MD_SPAN_IMG_DETAIL::title. + * + * Then the attribute MD_SPAN_IMG_DETAIL::title shall provide the following: + * -- [0]: "foo " (substr_types[0] == MD_TEXT_NORMAL; substr_offsets[0] == 0) + * -- [1]: "&quot;" (substr_types[1] == MD_TEXT_ENTITY; substr_offsets[1] == 4) + * -- [2]: " bar" (substr_types[2] == MD_TEXT_NORMAL; substr_offsets[2] == 10) + * -- [3]: (n/a) (n/a ; substr_offsets[3] == 14) + * + * Note that these invariants are always guaranteed: + * -- substr_offsets[0] == 0 + * -- substr_offsets[LAST+1] == size + * -- Currently, only MD_TEXT_NORMAL, MD_TEXT_ENTITY, MD_TEXT_NULLCHAR + * substrings can appear. This could change only of the specification + * changes. + */ +typedef struct MD_ATTRIBUTE { + const MD_CHAR* text; + MD_SIZE size; + const MD_TEXTTYPE* substr_types; + const MD_OFFSET* substr_offsets; +} MD_ATTRIBUTE; + + +/* Detailed info for MD_BLOCK_UL. */ +typedef struct MD_BLOCK_UL_DETAIL { + int is_tight; /* Non-zero if tight list, zero if loose. */ + MD_CHAR mark; /* Item bullet character in MarkDown source of the list, e.g. '-', '+', '*'. */ +} MD_BLOCK_UL_DETAIL; + +/* Detailed info for MD_BLOCK_OL. */ +typedef struct MD_BLOCK_OL_DETAIL { + unsigned start; /* Start index of the ordered list. */ + int is_tight; /* Non-zero if tight list, zero if loose. */ + MD_CHAR mark_delimiter; /* Character delimiting the item marks in MarkDown source, e.g. '.' or ')' */ +} MD_BLOCK_OL_DETAIL; + +/* Detailed info for MD_BLOCK_LI. */ +typedef struct MD_BLOCK_LI_DETAIL { + int is_task; /* Can be non-zero only with MD_FLAG_TASKLISTS */ + MD_CHAR task_mark; /* If is_task, then one of 'x', 'X' or ' '. Undefined otherwise. */ + MD_OFFSET task_mark_offset; /* If is_task, then offset in the input of the char between '[' and ']'. */ +} MD_BLOCK_LI_DETAIL; + +/* Detailed info for MD_BLOCK_H. */ +typedef struct MD_BLOCK_H_DETAIL { + unsigned level; /* Header level (1 - 6) */ +} MD_BLOCK_H_DETAIL; + +/* Detailed info for MD_BLOCK_CODE. */ +typedef struct MD_BLOCK_CODE_DETAIL { + MD_ATTRIBUTE info; + MD_ATTRIBUTE lang; + MD_CHAR fence_char; /* The character used for fenced code block; or zero for indented code block. */ +} MD_BLOCK_CODE_DETAIL; + +/* Detailed info for MD_BLOCK_TABLE. */ +typedef struct MD_BLOCK_TABLE_DETAIL { + unsigned col_count; /* Count of columns in the table. */ + unsigned head_row_count; /* Count of rows in the table header (currently always 1) */ + unsigned body_row_count; /* Count of rows in the table body */ +} MD_BLOCK_TABLE_DETAIL; + +/* Detailed info for MD_BLOCK_TH and MD_BLOCK_TD. */ +typedef struct MD_BLOCK_TD_DETAIL { + MD_ALIGN align; +} MD_BLOCK_TD_DETAIL; + +/* Detailed info for MD_SPAN_A. */ +typedef struct MD_SPAN_A_DETAIL { + MD_ATTRIBUTE href; + MD_ATTRIBUTE title; +} MD_SPAN_A_DETAIL; + +/* Detailed info for MD_SPAN_IMG. */ +typedef struct MD_SPAN_IMG_DETAIL { + MD_ATTRIBUTE src; + MD_ATTRIBUTE title; +} MD_SPAN_IMG_DETAIL; + +/* Detailed info for MD_SPAN_WIKILINK. */ +typedef struct MD_SPAN_WIKILINK { + MD_ATTRIBUTE target; +} MD_SPAN_WIKILINK_DETAIL; + +/* Flags specifying extensions/deviations from CommonMark specification. + * + * By default (when MD_PARSER::flags == 0), we follow CommonMark specification. + * The following flags may allow some extensions or deviations from it. + */ +#define MD_FLAG_COLLAPSEWHITESPACE 0x0001 /* In MD_TEXT_NORMAL, collapse non-trivial whitespace into single ' ' */ +#define MD_FLAG_PERMISSIVEATXHEADERS 0x0002 /* Do not require space in ATX headers ( ###header ) */ +#define MD_FLAG_PERMISSIVEURLAUTOLINKS 0x0004 /* Recognize URLs as autolinks even without '<', '>' */ +#define MD_FLAG_PERMISSIVEEMAILAUTOLINKS 0x0008 /* Recognize e-mails as autolinks even without '<', '>' and 'mailto:' */ +#define MD_FLAG_NOINDENTEDCODEBLOCKS 0x0010 /* Disable indented code blocks. (Only fenced code works.) */ +#define MD_FLAG_NOHTMLBLOCKS 0x0020 /* Disable raw HTML blocks. */ +#define MD_FLAG_NOHTMLSPANS 0x0040 /* Disable raw HTML (inline). */ +#define MD_FLAG_TABLES 0x0100 /* Enable tables extension. */ +#define MD_FLAG_STRIKETHROUGH 0x0200 /* Enable strikethrough extension. */ +#define MD_FLAG_PERMISSIVEWWWAUTOLINKS 0x0400 /* Enable WWW autolinks (even without any scheme prefix, if they begin with 'www.') */ +#define MD_FLAG_TASKLISTS 0x0800 /* Enable task list extension. */ +#define MD_FLAG_LATEXMATHSPANS 0x1000 /* Enable $ and $$ containing LaTeX equations. */ +#define MD_FLAG_WIKILINKS 0x2000 /* Enable wiki links extension. */ +#define MD_FLAG_UNDERLINE 0x4000 /* Enable underline extension (and disables '_' for normal emphasis). */ + +#define MD_FLAG_PERMISSIVEAUTOLINKS (MD_FLAG_PERMISSIVEEMAILAUTOLINKS | MD_FLAG_PERMISSIVEURLAUTOLINKS | MD_FLAG_PERMISSIVEWWWAUTOLINKS) +#define MD_FLAG_NOHTML (MD_FLAG_NOHTMLBLOCKS | MD_FLAG_NOHTMLSPANS) + +/* Convenient sets of flags corresponding to well-known Markdown dialects. + * + * Note we may only support subset of features of the referred dialect. + * The constant just enables those extensions which bring us as close as + * possible given what features we implement. + * + * ABI compatibility note: Meaning of these can change in time as new + * extensions, bringing the dialect closer to the original, are implemented. + */ +#define MD_DIALECT_COMMONMARK 0 +#define MD_DIALECT_GITHUB (MD_FLAG_PERMISSIVEAUTOLINKS | MD_FLAG_TABLES | MD_FLAG_STRIKETHROUGH | MD_FLAG_TASKLISTS) + +/* Parser structure. + */ +typedef struct MD_PARSER { + /* Reserved. Set to zero. + */ + unsigned abi_version; + + /* Dialect options. Bitmask of MD_FLAG_xxxx values. + */ + unsigned flags; + + /* Caller-provided rendering callbacks. + * + * For some block/span types, more detailed information is provided in a + * type-specific structure pointed by the argument 'detail'. + * + * The last argument of all callbacks, 'userdata', is just propagated from + * md_parse() and is available for any use by the application. + * + * Note any strings provided to the callbacks as their arguments or as + * members of any detail structure are generally not zero-terminated. + * Application has to take the respective size information into account. + * + * Any rendering callback may abort further parsing of the document by + * returning non-zero. + */ + int (*enter_block)(MD_BLOCKTYPE /*type*/, void* /*detail*/, void* /*userdata*/); + int (*leave_block)(MD_BLOCKTYPE /*type*/, void* /*detail*/, void* /*userdata*/); + + int (*enter_span)(MD_SPANTYPE /*type*/, void* /*detail*/, void* /*userdata*/); + int (*leave_span)(MD_SPANTYPE /*type*/, void* /*detail*/, void* /*userdata*/); + + int (*text)(MD_TEXTTYPE /*type*/, const MD_CHAR* /*text*/, MD_SIZE /*size*/, void* /*userdata*/); + + /* Debug callback. Optional (may be NULL). + * + * If provided and something goes wrong, this function gets called. + * This is intended for debugging and problem diagnosis for developers; + * it is not intended to provide any errors suitable for displaying to an + * end user. + */ + void (*debug_log)(const char* /*msg*/, void* /*userdata*/); + + /* Reserved. Set to NULL. + */ + void (*syntax)(void); +} MD_PARSER; + + +/* For backward compatibility. Do not use in new code. + */ +typedef MD_PARSER MD_RENDERER; + + +/* Parse the Markdown document stored in the string 'text' of size 'size'. + * The parser provides callbacks to be called during the parsing so the + * caller can render the document on the screen or convert the Markdown + * to another format. + * + * Zero is returned on success. If a runtime error occurs (e.g. a memory + * fails), -1 is returned. If the processing is aborted due any callback + * returning non-zero, the return value of the callback is returned. + */ +int md_parse(const MD_CHAR* text, MD_SIZE size, const MD_PARSER* parser, void* userdata); + + +#ifdef __cplusplus + } /* extern "C" { */ +#endif + +#endif /* MD4C_H */ diff --git a/shpPackages/md4c/package.yml b/shpPackages/md4c/package.yml @@ -0,0 +1,18 @@ +--- + name: md4c + version: 0.0.1 + description: "md4c is a markdown parser library (forked from mity/md4c)" + bin: ./md4c.c + scripts: + test: echo "Error no test specified" && exit 1 + repository: + type: git + url: git+https://github.com/mity/md4c.git + keywords: + - library + - markdown + author: Remy Noulin + license: MIT + bugs: + url: https://github.com/mity/md4c/issues + homepage: https://github.com/mity/md4c#readme