Rework Markdown parsing (#719)

* Switch markdown parser

* Add inline maths

* Basic plain text rendering

* Add display math support

* Remove unnecessary <p> tag

* Fixed spoiler not working

* Add spoiler reason input support

* Make paragraphs display with newline in between

* Handle single newlines

* Fix typo when allowing start attribute

* Cleanup for merge

* Remove unused import
This commit is contained in:
ginnyTheCat 2022-08-21 16:04:09 +02:00 committed by GitHub
parent 76c16ce294
commit 80aa55b706
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 206 additions and 1404 deletions

1271
package-lock.json generated

File diff suppressed because it is too large Load diff

View file

@ -17,6 +17,7 @@
"dependencies": { "dependencies": {
"@fontsource/inter": "^4.5.12", "@fontsource/inter": "^4.5.12",
"@fontsource/roboto": "^4.5.8", "@fontsource/roboto": "^4.5.8",
"@khanacademy/simple-markdown": "^0.8.3",
"@matrix-org/olm": "https://gitlab.matrix.org/api/v4/projects/27/packages/npm/@matrix-org/olm/-/@matrix-org/olm-3.2.12.tgz", "@matrix-org/olm": "https://gitlab.matrix.org/api/v4/projects/27/packages/npm/@matrix-org/olm/-/@matrix-org/olm-3.2.12.tgz",
"@tippyjs/react": "^4.2.6", "@tippyjs/react": "^4.2.6",
"babel-polyfill": "^6.26.0", "babel-polyfill": "^6.26.0",
@ -32,12 +33,6 @@
"linkify-html": "^4.0.0-beta.5", "linkify-html": "^4.0.0-beta.5",
"linkifyjs": "^4.0.0-beta.5", "linkifyjs": "^4.0.0-beta.5",
"matrix-js-sdk": "^19.2.0", "matrix-js-sdk": "^19.2.0",
"micromark": "^3.0.10",
"micromark-extension-gfm": "^2.0.1",
"micromark-extension-math": "^2.0.2",
"micromark-util-chunked": "^1.0.0",
"micromark-util-resolve-all": "^1.0.0",
"micromark-util-symbol": "^1.0.1",
"prop-types": "^15.8.1", "prop-types": "^15.8.1",
"react": "^17.0.2", "react": "^17.0.2",
"react-autosize-textarea": "^7.1.0", "react-autosize-textarea": "^7.1.0",

View file

@ -101,7 +101,6 @@
} }
} }
.message__header { .message__header {
display: flex; display: flex;
align-items: baseline; align-items: baseline;
@ -115,10 +114,16 @@
@extend .cp-txt__ellipsis; @extend .cp-txt__ellipsis;
color: inherit; color: inherit;
} }
& > span:last-child { display: none; } & > span:last-child {
display: none;
}
&:hover { &:hover {
& > span:first-child { display: none; } & > span:first-child {
& > span:last-child { display: block; } display: none;
}
& > span:last-child {
display: block;
}
} }
} }
@ -169,6 +174,10 @@
white-space: initial !important; white-space: initial !important;
} }
& p:not(:last-child) {
margin-bottom: var(--sp-normal);
}
& span[data-mx-pill] { & span[data-mx-pill] {
background-color: hsla(0, 0%, 64%, 0.15); background-color: hsla(0, 0%, 64%, 0.15);
padding: 0 2px; padding: 0 2px;
@ -255,7 +264,7 @@
} }
&-count { &-count {
margin: 0 var(--sp-ultra-tight); margin: 0 var(--sp-ultra-tight);
color: var(--tc-surface-normal) color: var(--tc-surface-normal);
} }
&-tooltip .react-emoji { &-tooltip .react-emoji {
width: 16px; width: 16px;
@ -270,7 +279,7 @@
} }
} }
&:active { &:active {
background-color: var(--bg-surface-active) background-color: var(--bg-surface-active);
} }
&--active { &--active {
@ -282,7 +291,7 @@
} }
} }
&:active { &:active {
background-color: var(--bg-caution-active) background-color: var(--bg-caution-active);
} }
} }
} }
@ -303,7 +312,12 @@
// markdown formating // markdown formating
.message__body { .message__body {
& h1, h2, h3, h4, h5, h6 { & h1,
h2,
h3,
h4,
h5,
h6 {
margin: 0; margin: 0;
margin-bottom: var(--sp-ultra-tight); margin-bottom: var(--sp-ultra-tight);
font-weight: var(--fw-medium); font-weight: var(--fw-medium);
@ -424,7 +438,8 @@
@include scrollbar.scroll__h; @include scrollbar.scroll__h;
@include scrollbar.scroll--auto-hide; @include scrollbar.scroll--auto-hide;
& td, & th { & td,
& th {
padding: var(--sp-extra-tight); padding: var(--sp-extra-tight);
border: 1px solid var(--bg-surface-border); border: 1px solid var(--bg-surface-border);
border-width: 0 1px 1px 0; border-width: 0 1px 1px 0;
@ -432,11 +447,11 @@
&:last-child { &:last-child {
border-width: 0; border-width: 0;
border-bottom-width: 1px; border-bottom-width: 1px;
[dir=rtl] & { [dir='rtl'] & {
border-width: 0 1px 1px 0; border-width: 0 1px 1px 0;
} }
} }
[dir=rtl] &:first-child { [dir='rtl'] &:first-child {
border-width: 0; border-width: 0;
border-bottom-width: 1px; border-bottom-width: 1px;
} }

View file

@ -1,15 +1,12 @@
import EventEmitter from 'events'; import EventEmitter from 'events';
import { micromark } from 'micromark';
import { gfm, gfmHtml } from 'micromark-extension-gfm';
import encrypt from 'browser-encrypt-attachment'; import encrypt from 'browser-encrypt-attachment';
import { math } from 'micromark-extension-math';
import { encode } from 'blurhash'; import { encode } from 'blurhash';
import { getShortcodeToEmoji } from '../../app/organisms/emoji-board/custom-emoji'; import { getShortcodeToEmoji } from '../../app/organisms/emoji-board/custom-emoji';
import { mathExtensionHtml, spoilerExtension, spoilerExtensionHtml } from '../../util/markdown';
import { getBlobSafeMimeType } from '../../util/mimetypes'; import { getBlobSafeMimeType } from '../../util/mimetypes';
import { sanitizeText } from '../../util/sanitize'; import { sanitizeText } from '../../util/sanitize';
import cons from './cons'; import cons from './cons';
import settings from './settings'; import settings from './settings';
import { htmlOutput, parser } from '../../util/markdown';
const blurhashField = 'xyz.amorgan.blurhash'; const blurhashField = 'xyz.amorgan.blurhash';
const MXID_REGEX = /\B@\S+:\S+\.\S+[^.,:;?!\s]/g; const MXID_REGEX = /\B@\S+:\S+\.\S+[^.,:;?!\s]/g;
@ -104,14 +101,11 @@ function getVideoThumbnail(video, width, height, mimeType) {
} }
function getFormattedBody(markdown) { function getFormattedBody(markdown) {
const result = micromark(markdown, { let content = parser(markdown);
extensions: [gfm(), spoilerExtension(), math()], if (content.length === 1 && content[0].type === 'paragraph') {
htmlExtensions: [gfmHtml(), spoilerExtensionHtml, mathExtensionHtml], content = content[0].content;
}); }
const bodyParts = result.match(/^(<p>)(.*)(<\/p>)$/); return htmlOutput(content);
if (bodyParts === null) return result;
if (bodyParts[2].indexOf('</p>') >= 0) return result;
return bodyParts[2];
} }
function getReplyFormattedBody(roomId, reply) { function getReplyFormattedBody(roomId, reply) {

View file

@ -1,198 +1,89 @@
/* eslint-disable no-param-reassign */ import SimpleMarkdown from '@khanacademy/simple-markdown';
/* eslint-disable no-plusplus */
/* eslint-disable no-continue */
import { codes } from 'micromark-util-symbol/codes'; const {
import { types } from 'micromark-util-symbol/types'; defaultRules, parserFor, outputFor, anyScopeRegex, blockRegex, inlineRegex, htmlTag, sanitizeText,
import { resolveAll } from 'micromark-util-resolve-all'; } = SimpleMarkdown;
import { splice } from 'micromark-util-chunked';
function inlineExtension(marker, len, key) { function mathHtml(wrap, node) {
const keySeq = `${key}Sequence`; return htmlTag(wrap, htmlTag('code', sanitizeText(node.content)), { 'data-mx-maths': node.content });
const keySeqTmp = `${keySeq}Temporary`;
return () => {
function tokenize(effects, ok, nok) {
const { previous, events } = this;
let size = 0;
function more(code) {
// consume more markers if the maximum length hasn't been reached yet
if (code === marker && size < len) {
effects.consume(code);
size += 1;
return more;
} }
// check for minimum length const rules = {
if (size < len) return nok(code); ...defaultRules,
Array: {
effects.exit(keySeqTmp); ...defaultRules.Array,
return ok(code); plain: (arr, output, state) => arr.map((node) => output(node, state)).join(''),
}
function start(code) {
// ignore code if it's not a marker
if (code !== marker) return nok(code);
if (previous === marker
&& events[events.length - 1][1].type !== types.characterEscape) return nok(code);
effects.enter(keySeqTmp);
return more(code);
}
return start;
}
function resolve(events, context) {
let i = -1;
while (++i < events.length) {
if (events[i][0] !== 'enter' || events[i][1].type !== keySeqTmp) continue;
let open = i;
while (open--) {
if (events[open][0] !== 'exit' || events[open][1].type !== keySeqTmp) continue;
events[i][1].type = keySeq;
events[open][1].type = keySeq;
const border = {
type: key,
start: { ...events[open][1].start },
end: { ...events[i][1].end },
};
const text = {
type: `${key}Text`,
start: { ...events[open][1].end },
end: { ...events[i][1].start },
};
const nextEvents = [
['enter', border, context],
['enter', events[open][1], context],
['exit', events[open][1], context],
['enter', text, context],
];
splice(
nextEvents,
nextEvents.length,
0,
resolveAll(
context.parser.constructs.insideSpan.null,
events.slice(open + 1, i),
context,
),
);
splice(nextEvents, nextEvents.length, 0, [
['exit', text, context],
['enter', events[i][1], context],
['exit', events[i][1], context],
['exit', border, context],
]);
splice(events, open - 1, i - open + 3, nextEvents);
i = open + nextEvents.length - 2;
break;
}
}
events.forEach((event) => {
if (event[1].type === keySeqTmp) {
event[1].type = types.data;
}
});
return events;
}
const tokenizer = {
tokenize,
resolveAll: resolve,
};
return {
text: { [marker]: tokenizer },
insideSpan: { null: [tokenizer] },
attentionMarkers: { null: [marker] },
};
};
}
const spoilerExtension = inlineExtension(codes.verticalBar, 2, 'spoiler');
const spoilerExtensionHtml = {
enter: {
spoiler() {
this.tag('<span data-mx-spoiler>');
}, },
displayMath: {
order: defaultRules.list.order + 0.5,
match: blockRegex(/^\$\$\n*([\s\S]+?)\n*\$\$/),
parse: (capture) => ({ content: capture[1] }),
plain: (node) => `$$\n${node.content}\n$$`,
html: (node) => mathHtml('div', node),
}, },
exit: { newline: {
spoiler() { ...defaultRules.newline,
this.tag('</span>'); plain: () => '\n',
}, },
paragraph: {
...defaultRules.paragraph,
plain: (node, output, state) => `${output(node.content, state)}\n\n`,
html: (node, output, state) => htmlTag('p', output(node.content, state)),
},
escape: {
...defaultRules.escape,
plain: (node, output, state) => `\\${output(node.content, state)}`,
},
em: {
...defaultRules.em,
plain: (node, output, state) => `_${output(node.content, state)}_`,
},
strong: {
...defaultRules.strong,
plain: (node, output, state) => `**${output(node.content, state)}**`,
},
u: {
...defaultRules.u,
plain: (node, output, state) => `__${output(node.content, state)}__`,
},
del: {
...defaultRules.del,
plain: (node, output, state) => `~~${output(node.content, state)}~~`,
},
spoiler: {
order: defaultRules.em.order - 0.5,
match: inlineRegex(/^\|\|([\s\S]+?)\|\|(?:\(([\s\S]+?)\))?/),
parse: (capture, parse, state) => ({
content: parse(capture[1], state),
reason: capture[2],
}),
plain: (node) => `[spoiler${node.reason ? `: ${node.reason}` : ''}](mxc://somewhere)`,
html: (node, output, state) => `<span data-mx-spoiler${node.reason ? `="${sanitizeText(node.reason)}"` : ''}>${output(node.content, state)}</span>`,
},
inlineMath: {
order: defaultRules.del.order + 0.5,
match: inlineRegex(/^\$(\S[\s\S]+?\S|\S)\$(?!\d)/),
parse: (capture) => ({ content: capture[1] }),
plain: (node) => `$${node.content}$`,
html: (node) => mathHtml('span', node),
},
br: {
...defaultRules.br,
match: anyScopeRegex(/^ *\n/),
plain: () => '\n',
},
text: {
...defaultRules.text,
match: anyScopeRegex(/^[\s\S]+?(?=[^0-9A-Za-z\s\u00c0-\uffff]| *\n|\w+:\S|$)/),
plain: (node) => node.content,
}, },
}; };
const mathExtensionHtml = { const parser = parserFor(rules);
enter: {
mathFlow() { const plainOutput = outputFor(rules, 'plain');
this.lineEndingIfNeeded(); const htmlOutput = outputFor(rules, 'html');
},
mathFlowFenceMeta() {
this.buffer();
},
mathText() {
this.buffer();
},
},
exit: {
mathFlow() {
const value = this.encode(this.resume().replace(/(?:\r?\n|\r)$/, ''));
this.tag('<div data-mx-maths="');
this.tag(value);
this.tag('"><code>');
this.raw(value);
this.tag('</code></div>');
this.setData('mathFlowOpen');
this.setData('slurpOneLineEnding');
},
mathFlowFence() {
// After the first fence.
if (!this.getData('mathFlowOpen')) {
this.setData('mathFlowOpen', true);
this.setData('slurpOneLineEnding', true);
this.buffer();
}
},
mathFlowFenceMeta() {
this.resume();
},
mathFlowValue(token) {
this.raw(this.sliceSerialize(token));
},
mathText() {
const value = this.encode(this.resume());
this.tag('<span data-mx-maths="');
this.tag(value);
this.tag('"><code>');
this.raw(value);
this.tag('</code></span>');
},
mathTextData(token) {
this.raw(this.sliceSerialize(token));
},
},
};
export { export {
inlineExtension, parser, plainOutput, htmlOutput,
spoilerExtension, spoilerExtensionHtml,
mathExtensionHtml,
}; };

View file

@ -19,7 +19,7 @@ const permittedTagToAttributes = {
div: ['data-mx-maths'], div: ['data-mx-maths'],
a: ['name', 'target', 'href', 'rel'], a: ['name', 'target', 'href', 'rel'],
img: ['width', 'height', 'alt', 'title', 'src', 'data-mx-emoticon'], img: ['width', 'height', 'alt', 'title', 'src', 'data-mx-emoticon'],
o: ['start'], ol: ['start'],
code: ['class'], code: ['class'],
}; };