diff --git a/changelog_unreleased/markdown/16619.md b/changelog_unreleased/markdown/16619.md new file mode 100644 index 000000000000..f9822600cfcc --- /dev/null +++ b/changelog_unreleased/markdown/16619.md @@ -0,0 +1,35 @@ +#### Preserve non-ASCII whitespaces at the end of the line and beginning of the next line (#16619 by @tats-u) + +Prettier removes non-ASCII spaces at the end of the line and beginning of the next line. However, this behavior is not consistent with the CommonMark spec. + +https://spec.commonmark.org/0.31.2/#soft-line-breaks + +> Spaces at the end of the line and beginning of the next line are removed: + +https://spec.commonmark.org/0.31.2/#unicode-whitespace-character + +> A Unicode whitespace character is a character in the Unicode Zs general category, or a tab (U+0009), line feed (U+000A), form feed (U+000C), or carriage return (U+000D). + +> Unicode whitespace is a sequence of one or more Unicode whitespace characters. + +> A space is U+0020. + +The CommonMark spec doesn't mention non-ASCII spaces here, so removing them changes the content of the Markdown document. + + +```md + + EM Space (U+2003) EM Space  + + 全角スペース (U+3000) 全形空白  + + +EM Space (U+2003) EM Space + +全角スペース (U+3000) 全形空白 + + + EM Space (U+2003) EM Space  + + 全角スペース (U+3000) 全形空白  +``` diff --git a/cspell.json b/cspell.json index 5c4e2dc743f7..972595f8a92c 100644 --- a/cspell.json +++ b/cspell.json @@ -335,7 +335,8 @@ "author: \".*?\"", "authorURL: \".*?\"", "\"author\": \".*?\"", - "(long|after){3,}" + "(long|after){3,}", + "\\\\u\\{[0-9a-fA-F]{4,6}\\}" ], "files": [ "*", diff --git a/eslint.config.js b/eslint.config.js index 9a9307c62ab7..6f0cc84b2af6 100644 --- a/eslint.config.js +++ b/eslint.config.js @@ -19,15 +19,15 @@ const toPath = (file) => url.fileURLToPath(new URL(file, import.meta.url)); const ignores = ` .tmp +test*.* # Ignore directories and files in 'tests/format' tests/format/**/* # Unignore directories and 'jsfmt.spec.js', 'format.test.js' file !tests/format/**/ !tests/format/**/format.test.js -# TODO: Remove this in 2025 -!tests/format/**/jsfmt.spec.js +# TODO: Remove this in 2025, somehow '!tests/format/**/jsfmt.spec.js' does not work +!tests/format/**/jsfmt.*.js tests/integration/cli/ -test*.* scripts/release/node_modules coverage/ dist*/ diff --git a/src/language-markdown/print-preprocess.js b/src/language-markdown/print-preprocess.js index 2b80bc458b5b..14a5543f9b82 100644 --- a/src/language-markdown/print-preprocess.js +++ b/src/language-markdown/print-preprocess.js @@ -1,3 +1,4 @@ +import htmlWhitespaceUtils from "../utils/html-whitespace-utils.js"; import { getOrderedListItemInfo, mapAst, splitText } from "./utils.js"; // 0x0 ~ 0x10ffff @@ -72,11 +73,13 @@ function splitTextIntoSentences(ast) { let { value } = node; if (parentNode.type === "paragraph") { + // CommonMark doesn't remove trailing/leading \f, but it should be + // removed in the HTML rendering process if (index === 0) { - value = value.trimStart(); + value = htmlWhitespaceUtils.trimStart(value); } if (index === parentNode.children.length - 1) { - value = value.trimEnd(); + value = htmlWhitespaceUtils.trimEnd(value); } } diff --git a/tests/format/markdown/trim-space/format.test.js b/tests/format/markdown/trim-space/format.test.js new file mode 100644 index 000000000000..95bcec5e2c60 --- /dev/null +++ b/tests/format/markdown/trim-space/format.test.js @@ -0,0 +1,67 @@ +function leadingTestCase() { + const paragraphs = Array.from( + { length: 3 }, + (_, i) => " ".repeat(i + 1) + "This is not a code block.\n", + ); + return { + name: "Trim leading U+0020 less than 4", + code: paragraphs.join("\n"), + output: paragraphs.map((p) => p.replace(/^ +/u, "")).join("\n"), + }; +} + +function trailingTestCase() { + const line = "The trailing space not producing hard break should be removed."; + // trailing tab is not treated as 4 spaces because trailing spaces don't + // define block structure. (https://spec.commonmark.org/0.30/#tabs) + // 2 or more spaces are treated as hard line break. + const spaces = [" ", "\t", "\t\t", "\t \t "]; + return { + name: "Trim trailing U+0020 or tab that don't produce hard break", + code: spaces.map((sp) => line + sp + "\n").join("\n"), + output: spaces.map(() => line + "\n").join("\n"), + }; +} + +function preserveVariantSpacesTestCase() { + const variantSpaces = `\v\vvertical tab\v\v + +\u{a0}\u{a0}NBSP\u{a0}\u{a0} + +\u{2002}\u{2002}en space\u{2002}\u{2002} + +\u{2003}\u{2003}em space\u{2003}\u{2003} + +\u{2004}\u{2004}1/3em\u{2004}\u{2004} + +\u{2005}\u{2005}1/4em\u{2005}\u{2005} + +\u{2028}\u{2028}line separator\u{2028}\u{2028} + +\u{2029}\u{2029}paragraph separator\u{2029}\u{2029} + +\u{3000}\u{3000}全角空白\u{3000}\u{3000} + +\u{feff}\u{feff}zero width NBSP\u{feff}\u{feff} +`; + return { + name: "Preserve non-ASCII Unicode spaces / line terminators, and vertical tab", + code: variantSpaces, + output: variantSpaces, + }; +} + +runFormatTest( + { + importMeta: import.meta, + snippets: [ + leadingTestCase(), + trailingTestCase(), + preserveVariantSpacesTestCase(), + ], + }, + ["markdown"], + { + proseWrap: "always", + }, +);