ribbit/test/hopdown.test.ts

537 lines
23 KiB
TypeScript

import { ribbit } from './setup';
const lib = ribbit();
const hopdown = new lib.HopDown();
const H = (md: string) => hopdown.toHTML(md);
const M = (html: string) => hopdown.toMarkdown(html);
const rt = (md: string) => M(H(md));
describe('Markdown → HTML', () => {
describe('inline formatting', () => {
it('bold', () => expect(H('**bold**')).toBe('<p><strong>bold</strong></p>'));
it('italic', () => expect(H('*italic*')).toBe('<p><em>italic</em></p>'));
it('inline code', () => expect(H('`code`')).toBe('<p><code>code</code></p>'));
it('link', () => expect(H('[t](http://x)')).toBe('<p><a href="http://x">t</a></p>'));
it('bold+italic', () => expect(H('***bi***')).toBe('<p><em><strong>bi</strong></em></p>'));
it('mixed', () => expect(H('a **b** *c* `d`')).toBe('<p>a <strong>b</strong> <em>c</em> <code>d</code></p>'));
it('code before bold', () => expect(H('`a` **b**')).toBe('<p><code>a</code> <strong>b</strong></p>'));
});
describe('headings', () => {
it.each([1, 2, 3, 4, 5, 6])('h%i', (level) => {
const prefix = '#'.repeat(level);
expect(H(`${prefix} Sub`)).toContain(`<h${level}`);
});
it('heading id', () => expect(H('## Hello World')).toContain("id='HelloWorld'"));
it('heading inline md', () => expect(H('## **Bold** text')).toContain('<strong>Bold</strong>'));
});
describe('horizontal rules', () => {
it('***', () => expect(H('***')).toBe('<hr>'));
it('---', () => expect(H('---')).toBe('<hr>'));
it('___', () => expect(H('___')).toBe('<hr>'));
});
describe('lists', () => {
it('ul *', () => expect(H('* a\n* b')).toBe('<ul><li>a</li><li>b</li></ul>'));
it('ul -', () => expect(H('- a\n- b')).toBe('<ul><li>a</li><li>b</li></ul>'));
it('ol', () => expect(H('1. a\n2. b')).toBe('<ol><li>a</li><li>b</li></ol>'));
it('ul inline', () => expect(H('* **bold** item')).toContain('<strong>bold</strong>'));
});
describe('blockquotes', () => {
it('basic', () => expect(H('> text')).toContain('<blockquote>'));
it('content', () => expect(H('> hello')).toContain('hello'));
it('multi-line', () => expect(H('> a\n> b')).toContain('a'));
});
describe('fenced code', () => {
it('basic', () => expect(H('```\nx = 1\n```')).toContain('<pre><code>'));
it('content', () => expect(H('```\nx = 1\n```')).toContain('x = 1'));
it('language', () => expect(H('```js\nvar x;\n```')).toContain('language-js'));
it('escapes html', () => expect(H('```\n<div>\n```')).toContain('&lt;div&gt;'));
it('no lang when none', () => expect(H('```\nplain\n```')).not.toContain('language-'));
});
describe('tables', () => {
const tbl = '| a | b |\n|---|---|\n| 1 | 2 |';
it('table tag', () => expect(H(tbl)).toContain('<table>'));
it('thead', () => expect(H(tbl)).toContain('<thead>'));
it('th cells', () => expect(H(tbl)).toContain('<th>a</th>'));
it('td cells', () => expect(H(tbl)).toContain('<td>1</td>'));
it('center align', () => expect(H('| C |\n|:--:|\n| x |')).toContain('text-align:center'));
it('right align', () => expect(H('| R |\n|--:|\n| x |')).toContain('text-align:right'));
it('inline md', () => expect(H('| **b** |\n|---|\n| x |')).toContain('<strong>b</strong>'));
});
describe('paragraphs', () => {
it('single', () => expect(H('hello')).toBe('<p>hello</p>'));
it('two', () => expect(H('a\n\nb')).toBe('<p>a</p>\n<p>b</p>'));
it('soft break', () => expect(H('a\nb')).toBe('<p>a\nb</p>'));
});
describe('edge cases', () => {
it('empty', () => expect(H('')).toBe(''));
it('whitespace', () => expect(H(' ')).toBe(''));
it('html entities', () => expect(H('a & b < c')).toContain('&amp;'));
it('html in code', () => expect(H('`<div>`')).toContain('&lt;div&gt;'));
it('para then heading', () => expect(H('text\n\n## H')).toContain('<h2'));
it('list then para', () => expect(H('- a\n\ntext')).toContain('<p>text</p>'));
});
});
describe('HTML → Markdown', () => {
it('strong→**', () => expect(M('<p><strong>b</strong></p>')).toBe('**b**'));
it('em→*', () => expect(M('<p><em>i</em></p>')).toBe('*i*'));
it('code→`', () => expect(M('<p><code>c</code></p>')).toBe('`c`'));
it('a→[]', () => expect(M('<a href="http://x">t</a>')).toBe('[t](http://x)'));
it('h1→#', () => expect(M('<h1>T</h1>')).toBe('# T'));
it('hr→---', () => expect(M('<hr>')).toBe('---'));
it('ul→-', () => expect(M('<ul><li>a</li><li>b</li></ul>')).toBe('- a\n- b'));
it('ol→1.', () => expect(M('<ol><li>a</li><li>b</li></ol>')).toBe('1. a\n2. b'));
it('bq→>', () => expect(M('<blockquote><p>q</p></blockquote>')).toContain('> '));
it('pre→```', () => expect(M('<pre><code>x</code></pre>')).toContain('```'));
it('pre lang', () => expect(M('<pre><code class="language-py">x</code></pre>')).toContain('```py'));
it('table→pipes', () => {
const html = '<table><thead><tr><th>a</th><th>b</th></tr></thead><tbody><tr><td>1</td><td>2</td></tr></tbody></table>';
expect(M(html)).toContain('| a | b |');
});
});
describe('Round-trips', () => {
it.each([
['paragraph', 'Hello world'],
['bold', '**bold**'],
['italic', '*italic*'],
['code', '`code`'],
['link', '[t](http://x)'],
['h1', '# Title'],
['h2', '## Sub'],
['ul', '- a\n- b'],
['ol', '1. a\n2. b'],
])('%s', (_, md) => expect(rt(md)).toBe(md));
it('hr', () => expect(rt('---')).toBe('---'));
it('blockquote', () => expect(rt('> quoted')).toContain('> '));
it('code block', () => expect(rt('```\nx = 1\n```')).toContain('```'));
it('table', () => expect(rt('| a | b |\n|---|---|\n| 1 | 2 |')).toContain('| a | b |'));
});
describe('Nested inline', () => {
it('bold wraps italic', () => expect(H('**a *b* c**')).toBe('<p><strong>a <em>b</em> c</strong></p>'));
it('italic wraps bold', () => expect(H('*a **b** c*')).toBe('<p><em>a <strong>b</strong> c</em></p>'));
it('bold wraps code', () => expect(H('**a `b` c**')).toBe('<p><strong>a <code>b</code> c</strong></p>'));
it('bold wraps link', () => expect(H('**[t](u)**')).toBe('<p><strong><a href="u">t</a></strong></p>'));
it('link with bold', () => expect(H('[**t**](u)')).toBe('<p><a href="u"><strong>t</strong></a></p>'));
it('link with code', () => expect(H('[`t`](u)')).toBe('<p><a href="u"><code>t</code></a></p>'));
});
describe('Nested blocks', () => {
it('bq > heading', () => expect(H('> # Title')).toContain('<h1'));
it('bq > list', () => expect(H('> - a\n> - b')).toContain('<ul>'));
it('bq > bq', () => expect(H('> > nested')).toContain('<blockquote>'));
it('li > bold', () => expect(H('- **bold**')).toContain('<strong>bold</strong>'));
it('heading > code', () => expect(H('## `code`')).toContain('<code>code</code>'));
it('table > bold', () => expect(H('| **b** |\n|---|\n| x |')).toContain('<strong>b</strong>'));
});
describe('Nested lists', () => {
it('ul > ul', () => expect(H('- a\n - b\n - c\n- d')).toBe('<ul><li>a<ul><li>b</li><li>c</li></ul></li><li>d</li></ul>'));
it('ol > ol', () => expect(H('1. a\n 1. b\n 1. c\n2. d')).toBe('<ol><li>a<ol><li>b</li><li>c</li></ol></li><li>d</li></ol>'));
it('ul > ol', () => expect(H('- a\n 1. b\n 2. c\n- d')).toBe('<ul><li>a<ol><li>b</li><li>c</li></ol></li><li>d</li></ul>'));
it('3-level', () => expect(H('- a\n - b\n - c\n- d')).toBe('<ul><li>a<ul><li>b<ul><li>c</li></ul></li></ul></li><li>d</li></ul>'));
it('ul>ul rt', () => expect(rt('- a\n - b\n - c\n- d')).toBe('- a\n - b\n - c\n- d'));
});
describe('Tables with nested markdown', () => {
it('td bold', () => expect(H('| h |\n|---|\n| **b** |')).toContain('<td><strong>b</strong></td>'));
it('td link>bold', () => expect(H('| h |\n|---|\n| [**t**](u) |')).toContain('<a href="u"><strong>t</strong></a>'));
it('td bold rt', () => expect(rt('| h |\n|---|\n| **b** |')).toBe('| h |\n| --- |\n| **b** |'));
it('multi-cell rt', () => expect(rt('| **a** | *b* |\n|---|---|\n| `c` | [d](e) |')).toBe('| **a** | *b* |\n| --- | --- |\n| `c` | [d](e) |'));
});
describe('Backslash escapes', () => {
it('escaped asterisk', () => expect(H('\\*not italic\\*')).toBe('<p>*not italic*</p>'));
it('escaped backslash', () => expect(H('a \\\\ b')).toBe('<p>a \\ b</p>'));
it('escaped backtick', () => expect(H('\\`not code\\`')).toBe('<p>`not code`</p>'));
it('round-trip preserves escape', () => {
const html = H('\\*literal\\*');
expect(html).toContain('*literal*');
expect(html).not.toContain('<em>');
});
});
describe('Strikethrough', () => {
it('md→html', () => expect(H('~~deleted~~')).toBe('<p><del>deleted</del></p>'));
it('html→md', () => expect(M('<p><del>gone</del></p>')).toBe('~~gone~~'));
it('round-trip', () => expect(rt('~~struck~~')).toBe('~~struck~~'));
it('mixed with bold', () => expect(H('**bold** and ~~struck~~')).toContain('<del>struck</del>'));
});
describe('Link titles', () => {
it('link with title', () => expect(H('[t](http://x "My Title")')).toBe('<p><a href="http://x" title="My Title">t</a></p>'));
it('title round-trip', () => expect(rt('[t](http://x "My Title")')).toBe('[t](http://x "My Title")'));
});
describe('Reference links', () => {
it('basic reference', () => expect(H('[text][ref]\n\n[ref]: http://x')).toContain('<a href="http://x">text</a>'));
it('shortcut reference', () => expect(H('[ref][]\n\n[ref]: http://x')).toContain('<a href="http://x">ref</a>'));
it('reference with title', () => expect(H('[t][r]\n\n[r]: http://x "T"')).toContain('title="T"'));
it('case insensitive', () => expect(H('[t][REF]\n\n[ref]: http://x')).toContain('<a href="http://x">'));
it('undefined reference passes through', () => expect(H('[t][missing]')).toContain('[t][missing]'));
it('definition not rendered', () => expect(H('[ref]: http://x\n\ntext')).toBe('<p>text</p>'));
});
describe('HTML passthrough', () => {
it('inline html preserved', () => expect(H('a <span class="x">b</span> c')).toContain('<span class="x">b</span>'));
it('self-closing tag', () => expect(H('a <br/> b')).toContain('<br/>'));
it('html not double-escaped', () => expect(H('<em>hi</em>')).not.toContain('&lt;'));
});
describe('Autolinks', () => {
it('angle bracket autolink', () => expect(H('<https://example.com>')).toContain('<a href="https://example.com">'));
it('bare URL', () => expect(H('visit https://example.com today')).toContain('<a href="https://example.com">'));
it('URL not matched inside link', () => {
const html = H('[text](https://example.com)');
// Should have exactly one <a> tag, not nested
const anchorPattern = /<a /g;
const count = (html.match(anchorPattern) || []).length;
expect(count).toBe(1);
});
});
describe('Alternate syntax (parse-only, canonical output)', () => {
describe('underscore emphasis', () => {
it('_italic_ → *italic*', () => {
expect(H('_italic_')).toBe('<p><em>italic</em></p>');
expect(rt('_italic_')).toBe('*italic*');
});
it('__bold__ → **bold**', () => {
expect(H('__bold__')).toBe('<p><strong>bold</strong></p>');
expect(rt('__bold__')).toBe('**bold**');
});
it('___both___ → ***both***', () => {
expect(H('___both___')).toContain('<em><strong>both</strong></em>');
expect(rt('___both___')).toBe('***both***');
});
it('mid-word _ not converted', () => {
expect(H('foo_bar_baz')).toBe('<p>foo_bar_baz</p>');
});
});
describe('setext headings', () => {
it('=== underline → h1', () => {
expect(H('Title\n=====')).toContain('<h1');
expect(H('Title\n=====')).toContain('Title');
});
it('--- underline → h2', () => {
expect(H('Sub\n---')).toContain('<h2');
});
it('round-trips to ATX', () => {
expect(rt('Title\n=====')).toBe('# Title');
expect(rt('Sub\n---')).toBe('## Sub');
});
});
describe('ATX closing hashes', () => {
it('## Title ## → h2', () => {
expect(H('## Title ##')).toContain('<h2');
expect(H('## Title ##')).toContain('Title');
});
it('round-trips without closing', () => {
expect(rt('## Title ##')).toBe('## Title');
});
});
describe('tilde fenced code', () => {
it('~~~ fence accepted', () => {
expect(H('~~~\ncode\n~~~')).toContain('<code>code</code>');
});
it('round-trips to backtick', () => {
expect(rt('~~~\ncode\n~~~')).toContain('```');
});
});
describe('plus list marker', () => {
it('+ item accepted', () => {
expect(H('+ item')).toContain('<li>');
});
it('round-trips to -', () => {
expect(rt('+ item')).toContain('- item');
});
});
});
describe('HopDown delimiter matching API', () => {
describe('findCompletePair', () => {
it('finds bold pair', () => {
const result = hopdown.findCompletePair('hello **world** end');
expect(result).not.toBeNull();
expect(result!.htmlTag).toBe('strong');
expect(result!.content).toBe('world');
expect(result!.delimiter).toBe('**');
});
it('finds italic pair', () => {
const result = hopdown.findCompletePair('hello *world* end');
expect(result).not.toBeNull();
expect(result!.htmlTag).toBe('em');
});
it('finds strikethrough pair', () => {
const result = hopdown.findCompletePair('hello ~~gone~~ end');
expect(result).not.toBeNull();
expect(result!.htmlTag).toBe('del');
});
it('returns null when no pair exists', () => {
expect(hopdown.findCompletePair('hello world')).toBeNull();
});
it('skips sentinel-wrapped content', () => {
expect(hopdown.findCompletePair('hello \x01<strong>world</strong>\x02 end')).toBeNull();
});
it('respects precedence (boldItalic before bold)', () => {
const result = hopdown.findCompletePair('***both***');
expect(result).not.toBeNull();
expect(result!.htmlTag).toBe('em');
expect(result!.tag.name).toBe('boldItalic');
});
});
describe('findUnmatchedOpener', () => {
it('finds unclosed bold', () => {
const result = hopdown.findUnmatchedOpener('hello **world');
expect(result).not.toBeNull();
expect(result!.htmlTag).toBe('strong');
expect(result!.content).toBe('world');
});
it('returns null when no opener exists', () => {
expect(hopdown.findUnmatchedOpener('hello world end')).toBeNull();
});
it('returns null for plain text', () => {
expect(hopdown.findUnmatchedOpener('hello world')).toBeNull();
});
});
describe('getTagForElement', () => {
it('returns tag for strong element', () => {
const element = document.createElement('strong');
const tag = hopdown.getTagForElement(element);
expect(tag).not.toBeNull();
expect(tag!.name).toBe('bold');
expect(tag!.delimiter).toBe('**');
});
it('returns tag for em element', () => {
const element = document.createElement('em');
const tag = hopdown.getTagForElement(element);
expect(tag).not.toBeNull();
expect(tag!.name).toBe('italic');
});
it('returns null for div element', () => {
const element = document.createElement('div');
expect(hopdown.getTagForElement(element)).toBeNull();
});
});
describe('getEditableSelector', () => {
it('returns a non-empty string', () => {
const selector = hopdown.getEditableSelector();
expect(selector.length).toBeGreaterThan(0);
});
it('includes inline tag selectors', () => {
const selector = hopdown.getEditableSelector();
expect(selector).toContain('strong');
expect(selector).toContain('em');
expect(selector).toContain('code');
});
it('includes block tag selectors', () => {
const selector = hopdown.getEditableSelector();
expect(selector).toContain('pre');
expect(selector).toContain('blockquote');
});
});
});
describe('Hard line breaks', () => {
it('trailing two spaces', () => {
expect(H('line one \nline two')).toContain('<br>');
});
it('trailing backslash', () => {
expect(H('line one\\\nline two')).toContain('<br>');
});
it('single space does not break', () => {
expect(H('line one \nline two')).not.toContain('<br>');
});
it('round-trip', () => {
const html = H('line one \nline two');
const markdown = M(html);
expect(markdown).toContain(' \n');
});
});
describe('Link nesting prevention', () => {
it('nested brackets prevent link match', () => {
const html = H('[outer [inner](http://b)](http://a)');
// The outer [ prevents matching as a single link — the inner
// link matches instead, and the outer brackets are literal text
expect(html).toContain('<a href="http://b">inner</a>');
});
it('preserves inner link text', () => {
const html = H('[outer [inner](http://b)](http://a)');
expect(html).toContain('inner');
});
it('autolink inside link is stripped', () => {
const html = H('[see <https://b.com>](http://a)');
const anchorPattern = /<a /g;
const linkCount = (html.match(anchorPattern) || []).length;
expect(linkCount).toBe(1);
});
});
describe('Multiple-of-3 emphasis rule', () => {
it('***foo*** is bold-italic', () => {
expect(H('***foo***')).toContain('<em><strong>foo</strong></em>');
});
it('**foo** is bold', () => {
expect(H('**foo**')).toBe('<p><strong>foo</strong></p>');
});
it('*foo* is italic', () => {
expect(H('*foo*')).toBe('<p><em>foo</em></p>');
});
it('*foo** does not match (1+2=3, rule applies)', () => {
const html = H('*foo**');
expect(html).not.toContain('<em>');
expect(html).not.toContain('<strong>');
});
it('**foo* does not match (2+1=3, rule applies)', () => {
const html = H('**foo*');
expect(html).not.toContain('<em>');
expect(html).not.toContain('<strong>');
});
});
describe('HTML entity resolution', () => {
it('&amp; resolves to &', () => {
expect(H('a &amp; b')).toBe('<p>a &amp; b</p>');
});
it('&lt; resolves to <', () => {
expect(H('a &lt; b')).toBe('<p>a &lt; b</p>');
});
it('&gt; resolves to >', () => {
expect(H('a &gt; b')).toBe('<p>a &gt; b</p>');
});
it('&#123; resolves to {', () => {
expect(H('&#123;')).toBe('<p>{</p>');
});
it('&#x7B; resolves to {', () => {
expect(H('&#x7B;')).toBe('<p>{</p>');
});
it('unknown entity passes through', () => {
expect(H('&unknown;')).toContain('&amp;unknown;');
});
});
describe('Nested inline scenarios', () => {
describe('markdown → HTML nesting', () => {
it('strikethrough wraps bold', () => {
expect(H('~~**bold** struck~~')).toBe('<p><del><strong>bold</strong> struck</del></p>');
});
it('bold wraps strikethrough', () => {
expect(H('**~~struck~~ bold**')).toBe('<p><strong><del>struck</del> bold</strong></p>');
});
it('italic wraps link', () => {
expect(H('*[text](http://x)*')).toContain('<em><a href="http://x">text</a></em>');
});
it('code inside strikethrough', () => {
expect(H('~~`code` struck~~')).toContain('<del><code>code</code> struck</del>');
});
it('adjacent bold and italic', () => {
const html = H('**bold***italic*');
expect(html).toContain('<strong>bold</strong>');
expect(html).toContain('<em>italic</em>');
});
});
describe('HTML → markdown → HTML round-trip nesting', () => {
it('bold wraps italic', () => {
const html = '<p><strong>a <em>b</em> c</strong></p>';
expect(H(M(html))).toBe(html);
});
it('italic wraps bold', () => {
const html = '<p><em>a <strong>b</strong> c</em></p>';
expect(H(M(html))).toBe(html);
});
it('bold wraps code', () => {
const html = '<p><strong>a <code>b</code> c</strong></p>';
expect(H(M(html))).toBe(html);
});
it('bold wraps link', () => {
const html = '<p><strong><a href="http://x">t</a></strong></p>';
expect(H(M(html))).toBe(html);
});
it('strikethrough wraps bold', () => {
const html = '<p><del><strong>bold</strong> struck</del></p>';
expect(H(M(html))).toBe(html);
});
it('italic wraps link', () => {
const html = '<p><em><a href="http://x">t</a></em></p>';
expect(H(M(html))).toBe(html);
});
});
describe('literal delimiters in text round-trip', () => {
it('literal * in bold', () => {
const html = '<p><strong>a * b</strong></p>';
expect(H(M(html))).toBe(html);
});
it('literal ~ in strikethrough', () => {
const html = '<p><del>a ~ b</del></p>';
expect(H(M(html))).toBe(html);
});
it('literal ` adjacent to code', () => {
const html = '<p>a ` b <code>c</code></p>';
expect(H(M(html))).toBe(html);
});
it('literal * in plain text', () => {
const html = '<p>hello * world</p>';
expect(H(M(html))).toBe(html);
});
it('literal ** in plain text', () => {
const html = '<p>hello ** world</p>';
expect(H(M(html))).toBe(html);
});
it('literal _ in plain text', () => {
const html = '<p>hello _ world</p>';
expect(H(M(html))).toBe(html);
});
});
});
describe('Backslash-escaped HTML tags', () => {
it('\\<em> does not produce a real em element', () => {
const html = H('\\<em>text');
expect(html).not.toContain('<em>');
expect(html).toContain('&lt;em&gt;');
});
it('\\<b> does not produce a real b element', () => {
const html = H('\\<b>text');
expect(html).not.toContain('<b>');
});
it('round-trip of escaped HTML tag in text', () => {
const html = '<p>~~\\<em>---\\<b></em></p>';
const markdown = M(html);
const rehtml = H(markdown);
const markdown2 = M(rehtml);
const rehtml2 = H(markdown2);
expect(rehtml).toBe(rehtml2);
});
});