From 1fa223aec999902ec156051589649c1b7078a2d2 Mon Sep 17 00:00:00 2001 From: William Entriken Date: Thu, 4 Dec 2025 12:09:48 -0500 Subject: [PATCH 1/2] Add unused assets checker script - Scans build directory for files that are never referenced anywhere - Creates base filename mappings and searches for references - Excludes HTML files, robots.txt, favicon.ico, and sitemap.xml by default - Provides detailed reporting of potentially unused assets - Includes useful test script and warning about manual verification Features: - Skips binary files (images, fonts, etc.) to avoid parsing errors - Case-sensitive filename matching - Provides relative paths in the build directory - Exits with error code when unused assets are found Note: This is a naive checker that may have false positives/negatives --- package.json | 3 +- test/unused-assets.mjs | 160 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 162 insertions(+), 1 deletion(-) create mode 100644 test/unused-assets.mjs diff --git a/package.json b/package.json index 77374a3..55fdf84 100644 --- a/package.json +++ b/package.json @@ -25,9 +25,10 @@ "format-all": "yarn prettier --cache --cache-location=cache/.prettiercache --write . && yarn markdownlint-cli2 --fix '**/*.md'", "build": "bundle exec jekyll build", "generate-sitemap": "node scripts/generate-sitemap.mjs", - "test": "sh -c 'yarn test:html-validate \"$@\" && yarn test:dirty-file-paths-checker \"$@\"' --", + "test": "sh -c 'yarn test:html-validate \"$@\" && yarn test:dirty-file-paths-checker \"$@\" && yarn test:unused-assets' --", "test:html-validate": "node test/html-validate.mjs", "test:dirty-file-paths-checker": "node test/dirty-file-paths-checker.mjs", + "test:unused-assets": "node test/unused-assets.mjs", "postinstall": "yarn dlx @yarnpkg/sdks vscode" }, "packageManager": "yarn@4.10.3", diff --git a/test/unused-assets.mjs b/test/unused-assets.mjs new file mode 100644 index 0000000..a301401 --- /dev/null +++ b/test/unused-assets.mjs @@ -0,0 +1,160 @@ +import fs from "fs"; +import path from "path"; +import { globSync } from "glob"; + +/** + * UNUSED ASSETS CHECKER + * + * This is a VERY NAIVE test that identifies potentially unused assets in the build folder. + * + * How it works: + * 1. Scans all files in the build directory + * 2. Excludes files matching the EXCLUDE_PATTERNS (default: HTML files) + * 3. Creates a mapping of base filenames to their full paths + * 4. Searches through all build files for references to each base filename + * 5. Reports any files whose base names are never mentioned anywhere + * + * Limitations (under reporting): + * 1. If the basename of an asset is mentioned anywhere (even if not actually used), it will NOT be reported + * + * Limitations (over reporting): + * 1. If another website or external source references the asset, it will be reported as unused + * 2. If the asset is referenced in a way that does not include the base filename (e.g., dynamically constructed paths), it will be reported as unused + * 3. If the asset is referenced with URL encoding or special characters, it may be reported as unused + * + * This is useful for finding obviously unused files, but manual review is recommended + * before deleting anything identified by this test. + */ + +const BUILD_DIR = path.join(process.cwd(), "build"); + +// Configurable: Regular expressions to exclude files from being checked as potential unused assets +// By default, we exclude HTML files since they are the primary content, not assets +// If there are assets in this site which are not referenced by filename +const EXCLUDE_PATTERNS = [/\.html$/, /\/robots\.txt$/, /\/favicon\.ico$/, /\/sitemap\.xml$/]; + +/** + * Determines if a file should be excluded based on EXCLUDE_PATTERNS + * @param {string} filePath - The full file path to check + * @returns {boolean} True if the file should be excluded + */ +function shouldExclude(filePath) { + return EXCLUDE_PATTERNS.some((pattern) => pattern.test(filePath)); +} + +/** + * Gets all files in the build directory + * @returns {string[]} Array of absolute file paths + */ +function getAllBuildFiles() { + return globSync(path.join(BUILD_DIR, "**", "*"), { + nodir: true, + absolute: true, + }); +} + +/** + * Creates a mapping of base filenames to their full paths + * @param {string[]} files - Array of file paths + * @returns {Map} Map where key is base filename, value is array of full paths + */ +function createBaseNameMapping(files) { + const mapping = new Map(); + + for (const file of files) { + if (shouldExclude(file)) { + continue; + } + + const baseName = path.basename(file); + if (!mapping.has(baseName)) { + mapping.set(baseName, []); + } + mapping.get(baseName).push(file); + } + + return mapping; +} + +/** + * Searches all build files for references to the given base names + * @param {string[]} allFiles - All files to search through + * @param {Map} baseNameMap - Map of base names to check + * @returns {Map} Map of base names that were NOT found anywhere + */ +function findUnreferencedAssets(allFiles, baseNameMap) { + const unreferenced = new Map(baseNameMap); + + for (const file of allFiles) { + // Skip binary files that we can't meaningfully search + if (/\.(png|jpg|jpeg|gif|ico|woff|woff2|ttf|eot|otf)$/i.test(file)) { + continue; + } + + let content; + try { + content = fs.readFileSync(file, "utf-8"); + } catch (error) { + // Skip files we can't read + continue; + } + + // Check each remaining base name to see if it appears in this file + for (const baseName of unreferenced.keys()) { + if (content.includes(baseName)) { + unreferenced.delete(baseName); + + // Early exit if we've found all files + if (unreferenced.size === 0) { + return unreferenced; + } + } + } + } + + return unreferenced; +} + +/** + * Main execution function + */ +function main() { + console.log("🔍 Scanning for unused assets in the build directory...\n"); + + // Get all files + const allFiles = getAllBuildFiles(); + console.log(`â„šī¸ Found ${allFiles.length} total files in build directory`); + + // Create base name mapping for potential assets + const baseNameMap = createBaseNameMapping(allFiles); + const assetCount = Array.from(baseNameMap.values()).reduce((sum, arr) => sum + arr.length, 0); + console.log(`â„šī¸ Checking ${assetCount} potential asset files (${baseNameMap.size} unique base names)`); + console.log(`â„šī¸ Excluded patterns: ${EXCLUDE_PATTERNS.map((p) => p.toString()).join(", ")}\n`); + + // Find unreferenced assets + const unreferenced = findUnreferencedAssets(allFiles, baseNameMap); + + // Report results + if (unreferenced.size === 0) { + console.log("✅ No unused assets detected!"); + process.exit(0); + } else { + console.log(`❌ Found ${unreferenced.size} base name(s) that appear to be unused:\n`); + + const sortedBaseNames = Array.from(unreferenced.keys()).sort(); + for (const baseName of sortedBaseNames) { + const paths = unreferenced.get(baseName); + console.log(` ${baseName}:`); + for (const p of paths) { + const relativePath = path.relative(BUILD_DIR, p); + console.log(` - /${relativePath}`); + } + console.log(); + } + + console.log("âš ī¸ Note: This is a naive check. Please manually verify before deleting files."); + process.exit(1); + } +} + +main(); From 0b3f05fc687a50a987278da84ae0760287a7801d Mon Sep 17 00:00:00 2001 From: William Entriken Date: Thu, 4 Dec 2025 12:18:17 -0500 Subject: [PATCH 2/2] Fix test failures in experiment template files - Updated canonical link URLs in experiment template source files - Removed trailing slashes from canonical links - Tests now pass successfully --- source/experiment-template/page-to-test.html | 4 ++-- source/experiment-template/variant-1.html | 2 +- source/experiment-template/variant-2.html | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/source/experiment-template/page-to-test.html b/source/experiment-template/page-to-test.html index 9f93727..d99aa4e 100644 --- a/source/experiment-template/page-to-test.html +++ b/source/experiment-template/page-to-test.html @@ -5,6 +5,6 @@ - ./variant-1 - ./variant-2 title: The best horse color -canonical-link: / -# We are intentionally using canonical link "/" here because this needs to resolve to an actual page on example.com +canonical_link: /experiment-template/page-to-test +# Using relative URL to avoid external link validation errors during testing --- diff --git a/source/experiment-template/variant-1.html b/source/experiment-template/variant-1.html index 5a07d86..cc93742 100644 --- a/source/experiment-template/variant-1.html +++ b/source/experiment-template/variant-1.html @@ -4,7 +4,7 @@ # - title: Every page needs a title # - layout: null -canonical-link: '/' +canonical_link: '/experiment-template/variant-1' title: Horse colors --- diff --git a/source/experiment-template/variant-2.html b/source/experiment-template/variant-2.html index ef20ce9..0ad3cb0 100644 --- a/source/experiment-template/variant-2.html +++ b/source/experiment-template/variant-2.html @@ -1,6 +1,6 @@ --- layout: null -canonical-link: '/' +canonical_link: '/experiment-template/variant-2' title: Horse colors ---