From b833f7034ccd58699251e2341d21368d3b13abf8 Mon Sep 17 00:00:00 2001 From: Joshua Chen Date: Wed, 30 Oct 2024 22:19:37 -0400 Subject: [PATCH] Reorganize custom dictionaries, better spell checking infra (#36255) * Reorganize custom dictionaries, better spell checking infra * Update scripts/sort_and_unique_file_lines.js * Reorg files * Updates * Apply suggestions from code review Co-authored-by: Onkar Khadangale <87750369+OnkarRuikar@users.noreply.github.com> * Typo * Fix action * Fix checkout * Update scripts/sort_and_unique_file_lines.js Co-authored-by: Onkar Khadangale <87750369+OnkarRuikar@users.noreply.github.com> * Update .vscode/cspell.json Co-authored-by: Onkar Khadangale <87750369+OnkarRuikar@users.noreply.github.com> * Add docs * Update files/en-us/mdn/writing_guidelines/writing_style_guide/index.md Co-authored-by: Onkar Khadangale <87750369+OnkarRuikar@users.noreply.github.com> --------- Co-authored-by: Onkar Khadangale <87750369+OnkarRuikar@users.noreply.github.com> --- .github/workflows/auto-cleanup-bot.yml | 3 +- .github/workflows/pr-check_cspell_lists.yml | 9 +- .github/workflows/spelling-check-bot.yml | 2 +- .lintstagedrc.js | 7 +- .vscode/cspell.json | 101 +- .vscode/dictionaries/code-entities.txt | 817 +++ .vscode/dictionaries/cultural-words.txt | 73 + .vscode/dictionaries/ignore-list.txt | 302 + .vscode/dictionaries/non-english.txt | 84 + .vscode/dictionaries/proper-names.txt | 632 ++ .vscode/dictionaries/terms-abbreviations.txt | 883 +++ .vscode/ignore-list.txt | 5810 ----------------- .vscode/terms-abbreviations.txt | 87 - .../automated_testing/index.md | 2 +- .../writing_style_guide/index.md | 8 + scripts/sort_and_unique_file_lines.js | 80 +- tests/front-matter_test_files/values.md | 1 + 17 files changed, 2942 insertions(+), 5959 deletions(-) create mode 100644 .vscode/dictionaries/code-entities.txt create mode 100644 .vscode/dictionaries/cultural-words.txt create mode 100644 .vscode/dictionaries/ignore-list.txt create mode 100644 .vscode/dictionaries/non-english.txt create mode 100644 .vscode/dictionaries/proper-names.txt create mode 100644 .vscode/dictionaries/terms-abbreviations.txt delete mode 100644 .vscode/ignore-list.txt delete mode 100644 .vscode/terms-abbreviations.txt diff --git a/.github/workflows/auto-cleanup-bot.yml b/.github/workflows/auto-cleanup-bot.yml index 620555d775b641a..c3e23c58576680b 100644 --- a/.github/workflows/auto-cleanup-bot.yml +++ b/.github/workflows/auto-cleanup-bot.yml @@ -31,8 +31,7 @@ jobs: yarn content fix-flaws yarn fix:md yarn fix:fm - node scripts/sort_and_unique_file_lines.js .vscode/ignore-list.txt - node scripts/sort_and_unique_file_lines.js .vscode/terms-abbreviations.txt + node scripts/sort_and_unique_file_lines.js .vscode/dictionaries - name: Create PR with only fixable issues if: success() diff --git a/.github/workflows/pr-check_cspell_lists.yml b/.github/workflows/pr-check_cspell_lists.yml index a40d41797fd9669..eb3139a842d4953 100644 --- a/.github/workflows/pr-check_cspell_lists.yml +++ b/.github/workflows/pr-check_cspell_lists.yml @@ -5,8 +5,7 @@ on: branches: - main paths: - - .vscode/ignore-list.txt - - .vscode/terms-abbreviations.txt + - .vscode/dictionaries/* jobs: docs: @@ -16,8 +15,7 @@ jobs: with: sparse-checkout-cone-mode: false sparse-checkout: | - .vscode/ignore-list.txt - .vscode/terms-abbreviations.txt + .vscode/dictionaries/* .nvmrc package.json scripts/sort_and_unique_file_lines.js @@ -29,5 +27,4 @@ jobs: - name: Check if cSpell word lists are in correct order run: | - node scripts/sort_and_unique_file_lines.js .vscode/ignore-list.txt --check - node scripts/sort_and_unique_file_lines.js .vscode/terms-abbreviations.txt --check + node scripts/sort_and_unique_file_lines.js --check .vscode/dictionaries diff --git a/.github/workflows/spelling-check-bot.yml b/.github/workflows/spelling-check-bot.yml index f8eacc8444f979d..6869ba085cfeee5 100644 --- a/.github/workflows/spelling-check-bot.yml +++ b/.github/workflows/spelling-check-bot.yml @@ -51,4 +51,4 @@ jobs: ${{ env.OUTPUT }} > [!TIP] - > To exclude words from the spellchecker, you can add valid words (web technology terms or abbreviations) to the [terms-abbreviations.txt](https://github.com/mdn/content/blob/main/.vscode/terms-abbreviations.txt) dictionary for IDE autocompletion. To ignore strings that are not words (\`AABBCC\` in code, for instance), you can add them to [ignore-list.txt](https://github.com/mdn/content/blob/main/.vscode/ignore-list.txt). + > If the word is actually valid or it is required to be ignored, consider adding it to one of the dictionaries under [`.vscode/dictionaries`](https://github.com/mdn/content/tree/main/.vscode/dictionaries). diff --git a/.lintstagedrc.js b/.lintstagedrc.js index decfb9664873fbd..838a8de976078ac 100644 --- a/.lintstagedrc.js +++ b/.lintstagedrc.js @@ -17,10 +17,7 @@ export default { `yarn filecheck ${filenames.join(" ")}`, ], "*": (filenames) => [`node scripts/log-url-issues.js`], - ".vscode/ignore-list.txt": (filenames) => [ - `node scripts/sort_and_unique_file_lines.js .vscode/ignore-list.txt`, - ], - ".vscode/terms-abbreviations.txt": (filenames) => [ - `node scripts/sort_and_unique_file_lines.js .vscode/terms-abbreviations.txt`, + ".vscode/dictionaries/*.txt": (filenames) => [ + `node scripts/sort_and_unique_file_lines.js ${filenames.join(" ")}`, ], }; diff --git a/.vscode/cspell.json b/.vscode/cspell.json index 0710edc9c1e1465..16154a9e2b0f183 100644 --- a/.vscode/cspell.json +++ b/.vscode/cspell.json @@ -6,6 +6,10 @@ "useGitignore": true, "dictionaries": [ "terms-abbreviations", + "cultural-words", + "proper-names", + "non-english", + "code-entities", "ignore-list", "bash", "css", @@ -32,16 +36,31 @@ ], "ignoreRegExpList": [ // macros - "{{\\s?\\w*\\(", - "{{EmbedInteractiveExample\\(.*\\)}}", - "{{EmbedLiveSample\\(.*\\)}}", - "{{EmbedYouTube\\(\"[\\w-]*\"\\)}}", - // TODO - add some details what these match - "\\(#\\w*\\)", + "{{\\s?\\w*", + "{{\\s*EmbedInteractiveExample\\(.*\\)\\s*}}", + "{{\\s*EmbedLiveSample\\(.*\\)\\s*}}", + "{{\\s*EmbedYouTube\\(.*\\)\\s*}}", + "{{\\s*EmbedGHLiveSample\\(.*\\)\\s*}}", + // Markdown links "\\]\\(\\S*\\)", - "\\*\\*\\w\\*\\*\\w*", - "\\*\\w\\*\\w*", + // Website references + "[\\w\\-.]+\\.(com|net|org|ac\\.uk)\\b", + // Things like "**J**ava**S**cript" + "\\*\\*\\w+\\*\\*\\w*", + "\\*\\w+\\*\\w*", "#[À-ž\\w-]*", + // Old Firefox interfaces + "nsIDOM\\w+", + // Don't check other scripts + "[\\u0370-\\u03FF]+", // Greek + "[\\u0400-\\u04FF]+", // Cyrillic + "[\\u0590-\\u05FF]+", // Hebrew + "[\\u0600-\\u06FF]+", // Arabic + "(\\uD835[\\uDC00-\\uDFFF])+", // Mathematical Alphanumeric Symbols + "(\\uD83A[\\uDD00-\\uDD5F])+", // Adlam script + // Percent-encoding + "[A-Za-z]*%[A-F0-9]{2}[A-Za-z]*", + // Various HTML attributes that often have non-word values "aria-activedescendant=\"(?:[^\\\"]+|\\.)*\"", "aria-controls=\"(?:[^\\\"]+|\\.)*\"", "aria-describedby=\"(?:[^\\\"]+|\\.)*\"", @@ -50,29 +69,81 @@ "aria-flowto=\"(?:[^\\\"]+|\\.)*\"", "aria-labelledby=\"(?:[^\\\"]+|\\.)*\"", "aria-owns=\"(?:[^\\\"]+|\\.)*\"", - "Base64", "class=\"(?:[^\\\"]+|\\.)*\"", "data-test-id=\"(?:[^\\\"]+|\\.)*\"", "for=\"(?:[^\\\"]+|\\.)*\"", - "HexValues", + "pattern=\"(?:[^\\\"]+|\\.)*\"", "href=\"(?:[^\\\"]+|\\.)*\"", - "(?<=id)=\"(?:[^\\\"]+|\\.)*\"", + "(?.* [--check]", + "Usage:\n\tnode scripts/sort_and_unique_file_lines.js [--check] [inputPaths...]", ); process.exit(1); +} else { + const invalidFiles = inputPaths.filter( + (filePath) => !fs.existsSync(filePath), + ); + if (invalidFiles.length) { + console.error(`Invalid file paths: ${invalidFiles.join(", ")}`); + process.exit(1); + } } -const equalsIgnoreCase = (a, b) => a?.toLowerCase() === b?.toLowerCase(); -const uniq = []; -const content = fs.readFileSync(filePath, "utf-8"); -const lines = content.split("\n").sort((a, b) => { - a = a.toLowerCase(); - b = b.toLowerCase(); - return a < b ? -1 : a > b ? 1 : 0; +const filePaths = inputPaths.flatMap((arg) => { + if (fs.statSync(arg).isDirectory()) { + return fs.readdirSync(arg).map((file) => `${arg}/${file}`); + } + return arg; }); -for (let i = 0; i < lines.length; ) { - const line = lines[i]; - if (line.trim() !== "") { - uniq.push(line); - while (equalsIgnoreCase(line, lines[++i])); - } else { - i++; +const equalsIgnoreCase = (a, b) => a?.toLowerCase() === b?.toLowerCase(); + +for (const filePath of filePaths) { + const uniq = []; + const content = fs.readFileSync(filePath, "utf-8"); + const lines = content.split("\n").sort((a, b) => { + a = a.toLowerCase(); + b = b.toLowerCase(); + return a < b ? -1 : a > b ? 1 : 0; + }); + + for (let i = 0; i < lines.length; ) { + const line = lines[i]; + if (line.trim() !== "") { + uniq.push(line); + while (equalsIgnoreCase(line, lines[++i])); + } else { + i++; + } } -} -const sortedContent = uniq.join("\n") + "\n"; -if (check) { - if (content !== sortedContent) { - console.error( - `The file is not formatted properly. Run 'node scripts/sort_and_unique_file_lines.js ${filePath}' to format the file.`, - ); - process.exit(1); + const sortedContent = uniq.join("\n") + "\n"; + if (check) { + if (content !== sortedContent) { + console.error( + `The file is not formatted properly. Run 'node scripts/sort_and_unique_file_lines.js ${filePath}' to format the file.`, + ); + process.exitCode = 1; + } else { + console.log(`The file ${filePath} looks good.`); + } } else { - console.log("The file looks good."); - process.exit(0); + fs.writeFileSync(filePath, sortedContent); } } - -fs.writeFileSync(filePath, sortedContent); diff --git a/tests/front-matter_test_files/values.md b/tests/front-matter_test_files/values.md index 2e656fb66996295..5e70ce7e8df81c9 100644 --- a/tests/front-matter_test_files/values.md +++ b/tests/front-matter_test_files/values.md @@ -3,6 +3,7 @@ title: some api method() short-title: method() page-type: api-method1 status: + # cSpell:ignore depreceted - depreceted browser-compat: api.method spec-urls: .w3c.org/api/method