From 0bca4ec9688e13a1152521c2b4bd85e42b958867 Mon Sep 17 00:00:00 2001 From: gapry Date: Thu, 5 Mar 2026 16:01:52 +0800 Subject: [PATCH] blog: import posts from Jekyll --- build.js | 9 +- bun.lock | 62 +++ package.json | 4 +- public/posts/2025/2025-12-13-Test7.md | 1 - ...f-Compiler-Optimisations-Study-Notes-01.md | 308 ++++++++++++++ public/posts/2026/2026-01-29-Test3.md | 1 - ...f-Compiler-Optimisations-Study-Notes-02.md | 186 ++++++++ ...f-Compiler-Optimisations-Study-Notes-03.md | 274 ++++++++++++ public/posts/2026/2026-01-31-Test123.md | 1 - public/posts/2026/2026-02-03-Test5.md | 1 - ...f-Compiler-Optimisations-Study-Notes-04.md | 399 ++++++++++++++++++ ...f-Compiler-Optimisations-Study-Notes-05.md | 292 +++++++++++++ ...f-Compiler-Optimisations-Study-Notes-06.md | 247 +++++++++++ public/posts/2026/2026-03-05-Test.md | 16 - src/App.css | 88 +++- src/App.jsx | 56 ++- src/Home.css | 59 +++ src/Home.jsx | 35 +- src/NotFound.css | 31 ++ src/NotFound.jsx | 15 +- 20 files changed, 2027 insertions(+), 58 deletions(-) delete mode 100644 public/posts/2025/2025-12-13-Test7.md create mode 100644 public/posts/2026/2026-01-01-Advent-of-Compiler-Optimisations-Study-Notes-01.md delete mode 100644 public/posts/2026/2026-01-29-Test3.md create mode 100644 public/posts/2026/2026-01-31-Advent-of-Compiler-Optimisations-Study-Notes-02.md create mode 100644 public/posts/2026/2026-01-31-Advent-of-Compiler-Optimisations-Study-Notes-03.md delete mode 100644 public/posts/2026/2026-01-31-Test123.md delete mode 100644 public/posts/2026/2026-02-03-Test5.md create mode 100644 public/posts/2026/2026-02-27-Advent-of-Compiler-Optimisations-Study-Notes-04.md create mode 100644 public/posts/2026/2026-02-28-Advent-of-Compiler-Optimisations-Study-Notes-05.md create mode 100644 public/posts/2026/2026-02-28-Advent-of-Compiler-Optimisations-Study-Notes-06.md delete mode 100644 public/posts/2026/2026-03-05-Test.md create mode 100644 src/Home.css create mode 100644 src/NotFound.css diff --git a/build.js b/build.js index 0e94bee..c5f605b 100644 --- a/build.js +++ b/build.js @@ -34,7 +34,14 @@ years.forEach(year => { } }); -allPosts.sort((a, b) => b.date.localeCompare(a.date)); +allPosts.sort((a, b) => { + const dateCompare = b.date.localeCompare(a.date); + + if (dateCompare === 0) { + return b.originalName.localeCompare(a.originalName); + } + return dateCompare; +}); const postsData = JSON.stringify(allPosts, null, 2); fs.writeFileSync('./public/posts.json', postsData); diff --git a/bun.lock b/bun.lock index 883ea82..89397c9 100644 --- a/bun.lock +++ b/bun.lock @@ -8,6 +8,8 @@ "react": "^19.2.0", "react-dom": "^19.2.0", "react-markdown": "^10.1.0", + "react-syntax-highlighter": "^16.1.1", + "remark-gfm": "^4.0.1", }, "devDependencies": { "@eslint/js": "^9.39.1", @@ -55,6 +57,8 @@ "@babel/plugin-transform-react-jsx-source": ["@babel/plugin-transform-react-jsx-source@7.27.1", "", { "dependencies": { "@babel/helper-plugin-utils": "^7.27.1" }, "peerDependencies": { "@babel/core": "^7.0.0-0" } }, "sha512-zbwoTsBruTeKB9hSq73ha66iFeJHuaFkUbwvqElnygoNbj/jHRsSeokowZFN3CZ64IvEqcmmkVe89OPXc7ldAw=="], + "@babel/runtime": ["@babel/runtime@7.28.6", "", {}, "sha512-05WQkdpL9COIMz4LjTxGpPNCdlpyimKppYNoJ5Di5EUObifl8t4tuLuUBBZEpoLYOmfvIWrsp9fCl0HoPRVTdA=="], + "@babel/template": ["@babel/template@7.28.6", "", { "dependencies": { "@babel/code-frame": "^7.28.6", "@babel/parser": "^7.28.6", "@babel/types": "^7.28.6" } }, "sha512-YA6Ma2KsCdGb+WC6UpBVFJGXL58MDA6oyONbjyF/+5sBgxY/dwkhLogbMT2GXXyU84/IhRw/2D1Os1B/giz+BQ=="], "@babel/traverse": ["@babel/traverse@7.29.0", "", { "dependencies": { "@babel/code-frame": "^7.29.0", "@babel/generator": "^7.29.0", "@babel/helper-globals": "^7.28.0", "@babel/parser": "^7.29.0", "@babel/template": "^7.28.6", "@babel/types": "^7.29.0", "debug": "^4.3.1" } }, "sha512-4HPiQr0X7+waHfyXPZpWPfWL/J7dcN1mx9gL6WdQVMbPnF3+ZhSMs8tCxN7oHddJE9fhNE7+lxdnlyemKfJRuA=="], @@ -223,6 +227,8 @@ "@types/ms": ["@types/ms@2.1.0", "", {}, "sha512-GsCCIZDE/p3i96vtEqx+7dBUGXrc7zeSK3wwPHIaRThS+9OhWIXRqzs4d6k1SVU8g91DrNRWxWUGhp5KXQb2VA=="], + "@types/prismjs": ["@types/prismjs@1.26.6", "", {}, "sha512-vqlvI7qlMvcCBbVe0AKAb4f97//Hy0EBTaiW8AalRnG/xAN5zOiWWyrNqNXeq8+KAuvRewjCVY1+IPxk4RdNYw=="], + "@types/react": ["@types/react@19.2.14", "", { "dependencies": { "csstype": "^3.2.2" } }, "sha512-ilcTH/UniCkMdtexkoCN0bI7pMcJDvmQFPvuPvmEaYA/NSfFTAgdUSLAoVjaRJm7+6PvcM+q1zYOwS4wTYMF9w=="], "@types/react-dom": ["@types/react-dom@19.2.3", "", { "peerDependencies": { "@types/react": "^19.2.0" } }, "sha512-jp2L/eY6fn+KgVVQAOqYItbF0VY/YApe5Mz2F0aykSO8gx31bYCZyvSeYxCHKvzHG5eZjc+zyaS5BrBWya2+kQ=="], @@ -331,6 +337,8 @@ "fast-levenshtein": ["fast-levenshtein@2.0.6", "", {}, "sha512-DCXu6Ifhqcks7TZKY3Hxp3y6qphY5SJZmrWMDrKcERSOXWQdMhU9Ig/PYrzyw/ul9jOIyh0N4M0tbC5hodg8dw=="], + "fault": ["fault@1.0.4", "", { "dependencies": { "format": "^0.2.0" } }, "sha512-CJ0HCB5tL5fYTEA7ToAq5+kTwd++Borf1/bifxd9iT70QcXr4MRrO3Llf8Ifs70q+SJcGHFtnIE/Nw6giCtECA=="], + "fdir": ["fdir@6.5.0", "", { "peerDependencies": { "picomatch": "^3 || ^4" }, "optionalPeers": ["picomatch"] }, "sha512-tIbYtZbucOs0BRGqPJkshJUYdL+SDH7dVM8gjy+ERp3WAUjLEFJE+02kanyHtwjWOnwrKYBiwAmM0p4kLJAnXg=="], "file-entry-cache": ["file-entry-cache@8.0.0", "", { "dependencies": { "flat-cache": "^4.0.0" } }, "sha512-XXTUwCvisa5oacNGRP9SfNtYBNAMi+RPwBFmblZEF7N7swHYQS6/Zfk7SRwx4D5j3CH211YNRco1DEMNVfZCnQ=="], @@ -341,6 +349,8 @@ "flatted": ["flatted@3.3.4", "", {}, "sha512-3+mMldrTAPdta5kjX2G2J7iX4zxtnwpdA8Tr2ZSjkyPSanvbZAcy6flmtnXbEybHrDcU9641lxrMfFuUxVz9vA=="], + "format": ["format@0.2.2", "", {}, "sha512-wzsgA6WOq+09wrU1tsJ09udeR/YZRaeArL9e1wPbFg3GG2yDnC2ldKpxs4xunpFF9DgqCqOIra3bc1HWrJ37Ww=="], + "fsevents": ["fsevents@2.3.3", "", { "os": "darwin" }, "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw=="], "gensync": ["gensync@1.0.0-beta.2", "", {}, "sha512-3hN7NaskYvMDLQY55gnW3NQ+mesEAepTqlg+VEbj7zzqEMBVNhzcGYYeqFo/TlYz6eQiFcp1HcsCZO+nGgS8zg=="], @@ -351,14 +361,22 @@ "has-flag": ["has-flag@4.0.0", "", {}, "sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ=="], + "hast-util-parse-selector": ["hast-util-parse-selector@4.0.0", "", { "dependencies": { "@types/hast": "^3.0.0" } }, "sha512-wkQCkSYoOGCRKERFWcxMVMOcYE2K1AaNLU8DXS9arxnLOUEWbOXKXiJUNzEpqZ3JOKpnha3jkFrumEjVliDe7A=="], + "hast-util-to-jsx-runtime": ["hast-util-to-jsx-runtime@2.3.6", "", { "dependencies": { "@types/estree": "^1.0.0", "@types/hast": "^3.0.0", "@types/unist": "^3.0.0", "comma-separated-tokens": "^2.0.0", "devlop": "^1.0.0", "estree-util-is-identifier-name": "^3.0.0", "hast-util-whitespace": "^3.0.0", "mdast-util-mdx-expression": "^2.0.0", "mdast-util-mdx-jsx": "^3.0.0", "mdast-util-mdxjs-esm": "^2.0.0", "property-information": "^7.0.0", "space-separated-tokens": "^2.0.0", "style-to-js": "^1.0.0", "unist-util-position": "^5.0.0", "vfile-message": "^4.0.0" } }, "sha512-zl6s8LwNyo1P9uw+XJGvZtdFF1GdAkOg8ujOw+4Pyb76874fLps4ueHXDhXWdk6YHQ6OgUtinliG7RsYvCbbBg=="], "hast-util-whitespace": ["hast-util-whitespace@3.0.0", "", { "dependencies": { "@types/hast": "^3.0.0" } }, "sha512-88JUN06ipLwsnv+dVn+OIYOvAuvBMy/Qoi6O7mQHxdPXpjy+Cd6xRkWwux7DKO+4sYILtLBRIKgsdpS2gQc7qw=="], + "hastscript": ["hastscript@9.0.1", "", { "dependencies": { "@types/hast": "^3.0.0", "comma-separated-tokens": "^2.0.0", "hast-util-parse-selector": "^4.0.0", "property-information": "^7.0.0", "space-separated-tokens": "^2.0.0" } }, "sha512-g7df9rMFX/SPi34tyGCyUBREQoKkapwdY/T04Qn9TDWfHhAYt4/I0gMVirzK5wEzeUqIjEB+LXC/ypb7Aqno5w=="], + "hermes-estree": ["hermes-estree@0.25.1", "", {}, "sha512-0wUoCcLp+5Ev5pDW2OriHC2MJCbwLwuRx+gAqMTOkGKJJiBCLjtrvy4PWUGn6MIVefecRpzoOZ/UV6iGdOr+Cw=="], "hermes-parser": ["hermes-parser@0.25.1", "", { "dependencies": { "hermes-estree": "0.25.1" } }, "sha512-6pEjquH3rqaI6cYAXYPcz9MS4rY6R4ngRgrgfDshRptUZIc3lw0MCIJIGDj9++mfySOuPTHB4nrSW99BCvOPIA=="], + "highlight.js": ["highlight.js@10.7.3", "", {}, "sha512-tzcUFauisWKNHaRkN4Wjl/ZA07gENAjFl3J/c480dprkGTg5EQstgaNFqBfUqCq54kZRIEcreTsAgF/m2quD7A=="], + + "highlightjs-vue": ["highlightjs-vue@1.0.0", "", {}, "sha512-PDEfEF102G23vHmPhLyPboFCD+BkMGu+GuJe2d9/eH4FsCwvgBpnc9n0pGE+ffKdph38s6foEZiEjdgHdzp+IA=="], + "html-url-attributes": ["html-url-attributes@3.0.1", "", {}, "sha512-ol6UPyBWqsrO6EJySPz2O7ZSr856WDrEzM5zMqp+FJJLGMW35cLYmmZnl0vztAZxRUoNZJFTCohfjuIJ8I4QBQ=="], "ignore": ["ignore@5.3.2", "", {}, "sha512-hsBTNUqQTDwkWtcdYI2i06Y/nUBEsNEDJKjWdigLvegy8kDuJAS8uRlpkkcQpyEXL0Z/pjDy5HBmMjRCJ2gq+g=="], @@ -409,10 +427,28 @@ "longest-streak": ["longest-streak@3.1.0", "", {}, "sha512-9Ri+o0JYgehTaVBBDoMqIl8GXtbWg711O3srftcHhZ0dqnETqLaoIK0x17fUw9rFSlK/0NlsKe0Ahhyl5pXE2g=="], + "lowlight": ["lowlight@1.20.0", "", { "dependencies": { "fault": "^1.0.0", "highlight.js": "~10.7.0" } }, "sha512-8Ktj+prEb1RoCPkEOrPMYUN/nCggB7qAWe3a7OpMjWQkh3l2RD5wKRQ+o8Q8YuI9RG/xs95waaI/E6ym/7NsTw=="], + "lru-cache": ["lru-cache@5.1.1", "", { "dependencies": { "yallist": "^3.0.2" } }, "sha512-KpNARQA3Iwv+jTA0utUVVbrh+Jlrr1Fv0e56GGzAFOXN7dk/FviaDW8LHmK52DlcH4WP2n6gI8vN1aesBFgo9w=="], + "markdown-table": ["markdown-table@3.0.4", "", {}, "sha512-wiYz4+JrLyb/DqW2hkFJxP7Vd7JuTDm77fvbM8VfEQdmSMqcImWeeRbHwZjBjIFki/VaMK2BhFi7oUUZeM5bqw=="], + + "mdast-util-find-and-replace": ["mdast-util-find-and-replace@3.0.2", "", { "dependencies": { "@types/mdast": "^4.0.0", "escape-string-regexp": "^5.0.0", "unist-util-is": "^6.0.0", "unist-util-visit-parents": "^6.0.0" } }, "sha512-Tmd1Vg/m3Xz43afeNxDIhWRtFZgM2VLyaf4vSTYwudTyeuTneoL3qtWMA5jeLyz/O1vDJmmV4QuScFCA2tBPwg=="], + "mdast-util-from-markdown": ["mdast-util-from-markdown@2.0.3", "", { "dependencies": { "@types/mdast": "^4.0.0", "@types/unist": "^3.0.0", "decode-named-character-reference": "^1.0.0", "devlop": "^1.0.0", "mdast-util-to-string": "^4.0.0", "micromark": "^4.0.0", "micromark-util-decode-numeric-character-reference": "^2.0.0", "micromark-util-decode-string": "^2.0.0", "micromark-util-normalize-identifier": "^2.0.0", "micromark-util-symbol": "^2.0.0", "micromark-util-types": "^2.0.0", "unist-util-stringify-position": "^4.0.0" } }, "sha512-W4mAWTvSlKvf8L6J+VN9yLSqQ9AOAAvHuoDAmPkz4dHf553m5gVj2ejadHJhoJmcmxEnOv6Pa8XJhpxE93kb8Q=="], + "mdast-util-gfm": ["mdast-util-gfm@3.1.0", "", { "dependencies": { "mdast-util-from-markdown": "^2.0.0", "mdast-util-gfm-autolink-literal": "^2.0.0", "mdast-util-gfm-footnote": "^2.0.0", "mdast-util-gfm-strikethrough": "^2.0.0", "mdast-util-gfm-table": "^2.0.0", "mdast-util-gfm-task-list-item": "^2.0.0", "mdast-util-to-markdown": "^2.0.0" } }, "sha512-0ulfdQOM3ysHhCJ1p06l0b0VKlhU0wuQs3thxZQagjcjPrlFRqY215uZGHHJan9GEAXd9MbfPjFJz+qMkVR6zQ=="], + + "mdast-util-gfm-autolink-literal": ["mdast-util-gfm-autolink-literal@2.0.1", "", { "dependencies": { "@types/mdast": "^4.0.0", "ccount": "^2.0.0", "devlop": "^1.0.0", "mdast-util-find-and-replace": "^3.0.0", "micromark-util-character": "^2.0.0" } }, "sha512-5HVP2MKaP6L+G6YaxPNjuL0BPrq9orG3TsrZ9YXbA3vDw/ACI4MEsnoDpn6ZNm7GnZgtAcONJyPhOP8tNJQavQ=="], + + "mdast-util-gfm-footnote": ["mdast-util-gfm-footnote@2.1.0", "", { "dependencies": { "@types/mdast": "^4.0.0", "devlop": "^1.1.0", "mdast-util-from-markdown": "^2.0.0", "mdast-util-to-markdown": "^2.0.0", "micromark-util-normalize-identifier": "^2.0.0" } }, "sha512-sqpDWlsHn7Ac9GNZQMeUzPQSMzR6Wv0WKRNvQRg0KqHh02fpTz69Qc1QSseNX29bhz1ROIyNyxExfawVKTm1GQ=="], + + "mdast-util-gfm-strikethrough": ["mdast-util-gfm-strikethrough@2.0.0", "", { "dependencies": { "@types/mdast": "^4.0.0", "mdast-util-from-markdown": "^2.0.0", "mdast-util-to-markdown": "^2.0.0" } }, "sha512-mKKb915TF+OC5ptj5bJ7WFRPdYtuHv0yTRxK2tJvi+BDqbkiG7h7u/9SI89nRAYcmap2xHQL9D+QG/6wSrTtXg=="], + + "mdast-util-gfm-table": ["mdast-util-gfm-table@2.0.0", "", { "dependencies": { "@types/mdast": "^4.0.0", "devlop": "^1.0.0", "markdown-table": "^3.0.0", "mdast-util-from-markdown": "^2.0.0", "mdast-util-to-markdown": "^2.0.0" } }, "sha512-78UEvebzz/rJIxLvE7ZtDd/vIQ0RHv+3Mh5DR96p7cS7HsBhYIICDBCu8csTNWNO6tBWfqXPWekRuj2FNOGOZg=="], + + "mdast-util-gfm-task-list-item": ["mdast-util-gfm-task-list-item@2.0.0", "", { "dependencies": { "@types/mdast": "^4.0.0", "devlop": "^1.0.0", "mdast-util-from-markdown": "^2.0.0", "mdast-util-to-markdown": "^2.0.0" } }, "sha512-IrtvNvjxC1o06taBAVJznEnkiHxLFTzgonUdy8hzFVeDun0uTjxxrRGVaNFqkU1wJR3RBPEfsxmU6jDWPofrTQ=="], + "mdast-util-mdx-expression": ["mdast-util-mdx-expression@2.0.1", "", { "dependencies": { "@types/estree-jsx": "^1.0.0", "@types/hast": "^3.0.0", "@types/mdast": "^4.0.0", "devlop": "^1.0.0", "mdast-util-from-markdown": "^2.0.0", "mdast-util-to-markdown": "^2.0.0" } }, "sha512-J6f+9hUp+ldTZqKRSg7Vw5V6MqjATc+3E4gf3CFNcuZNWD8XdyI6zQ8GqH7f8169MM6P7hMBRDVGnn7oHB9kXQ=="], "mdast-util-mdx-jsx": ["mdast-util-mdx-jsx@3.2.0", "", { "dependencies": { "@types/estree-jsx": "^1.0.0", "@types/hast": "^3.0.0", "@types/mdast": "^4.0.0", "@types/unist": "^3.0.0", "ccount": "^2.0.0", "devlop": "^1.1.0", "mdast-util-from-markdown": "^2.0.0", "mdast-util-to-markdown": "^2.0.0", "parse-entities": "^4.0.0", "stringify-entities": "^4.0.0", "unist-util-stringify-position": "^4.0.0", "vfile-message": "^4.0.0" } }, "sha512-lj/z8v0r6ZtsN/cGNNtemmmfoLAFZnjMbNyLzBafjzikOM+glrjNHPlf6lQDOTccj9n5b0PPihEBbhneMyGs1Q=="], @@ -431,6 +467,20 @@ "micromark-core-commonmark": ["micromark-core-commonmark@2.0.3", "", { "dependencies": { "decode-named-character-reference": "^1.0.0", "devlop": "^1.0.0", "micromark-factory-destination": "^2.0.0", "micromark-factory-label": "^2.0.0", "micromark-factory-space": "^2.0.0", "micromark-factory-title": "^2.0.0", "micromark-factory-whitespace": "^2.0.0", "micromark-util-character": "^2.0.0", "micromark-util-chunked": "^2.0.0", "micromark-util-classify-character": "^2.0.0", "micromark-util-html-tag-name": "^2.0.0", "micromark-util-normalize-identifier": "^2.0.0", "micromark-util-resolve-all": "^2.0.0", "micromark-util-subtokenize": "^2.0.0", "micromark-util-symbol": "^2.0.0", "micromark-util-types": "^2.0.0" } }, "sha512-RDBrHEMSxVFLg6xvnXmb1Ayr2WzLAWjeSATAoxwKYJV94TeNavgoIdA0a9ytzDSVzBy2YKFK+emCPOEibLeCrg=="], + "micromark-extension-gfm": ["micromark-extension-gfm@3.0.0", "", { "dependencies": { "micromark-extension-gfm-autolink-literal": "^2.0.0", "micromark-extension-gfm-footnote": "^2.0.0", "micromark-extension-gfm-strikethrough": "^2.0.0", "micromark-extension-gfm-table": "^2.0.0", "micromark-extension-gfm-tagfilter": "^2.0.0", "micromark-extension-gfm-task-list-item": "^2.0.0", "micromark-util-combine-extensions": "^2.0.0", "micromark-util-types": "^2.0.0" } }, "sha512-vsKArQsicm7t0z2GugkCKtZehqUm31oeGBV/KVSorWSy8ZlNAv7ytjFhvaryUiCUJYqs+NoE6AFhpQvBTM6Q4w=="], + + "micromark-extension-gfm-autolink-literal": ["micromark-extension-gfm-autolink-literal@2.1.0", "", { "dependencies": { "micromark-util-character": "^2.0.0", "micromark-util-sanitize-uri": "^2.0.0", "micromark-util-symbol": "^2.0.0", "micromark-util-types": "^2.0.0" } }, "sha512-oOg7knzhicgQ3t4QCjCWgTmfNhvQbDDnJeVu9v81r7NltNCVmhPy1fJRX27pISafdjL+SVc4d3l48Gb6pbRypw=="], + + "micromark-extension-gfm-footnote": ["micromark-extension-gfm-footnote@2.1.0", "", { "dependencies": { "devlop": "^1.0.0", "micromark-core-commonmark": "^2.0.0", "micromark-factory-space": "^2.0.0", "micromark-util-character": "^2.0.0", "micromark-util-normalize-identifier": "^2.0.0", "micromark-util-sanitize-uri": "^2.0.0", "micromark-util-symbol": "^2.0.0", "micromark-util-types": "^2.0.0" } }, "sha512-/yPhxI1ntnDNsiHtzLKYnE3vf9JZ6cAisqVDauhp4CEHxlb4uoOTxOCJ+9s51bIB8U1N1FJ1RXOKTIlD5B/gqw=="], + + "micromark-extension-gfm-strikethrough": ["micromark-extension-gfm-strikethrough@2.1.0", "", { "dependencies": { "devlop": "^1.0.0", "micromark-util-chunked": "^2.0.0", "micromark-util-classify-character": "^2.0.0", "micromark-util-resolve-all": "^2.0.0", "micromark-util-symbol": "^2.0.0", "micromark-util-types": "^2.0.0" } }, "sha512-ADVjpOOkjz1hhkZLlBiYA9cR2Anf8F4HqZUO6e5eDcPQd0Txw5fxLzzxnEkSkfnD0wziSGiv7sYhk/ktvbf1uw=="], + + "micromark-extension-gfm-table": ["micromark-extension-gfm-table@2.1.1", "", { "dependencies": { "devlop": "^1.0.0", "micromark-factory-space": "^2.0.0", "micromark-util-character": "^2.0.0", "micromark-util-symbol": "^2.0.0", "micromark-util-types": "^2.0.0" } }, "sha512-t2OU/dXXioARrC6yWfJ4hqB7rct14e8f7m0cbI5hUmDyyIlwv5vEtooptH8INkbLzOatzKuVbQmAYcbWoyz6Dg=="], + + "micromark-extension-gfm-tagfilter": ["micromark-extension-gfm-tagfilter@2.0.0", "", { "dependencies": { "micromark-util-types": "^2.0.0" } }, "sha512-xHlTOmuCSotIA8TW1mDIM6X2O1SiX5P9IuDtqGonFhEK0qgRI4yeC6vMxEV2dgyr2TiD+2PQ10o+cOhdVAcwfg=="], + + "micromark-extension-gfm-task-list-item": ["micromark-extension-gfm-task-list-item@2.1.0", "", { "dependencies": { "devlop": "^1.0.0", "micromark-factory-space": "^2.0.0", "micromark-util-character": "^2.0.0", "micromark-util-symbol": "^2.0.0", "micromark-util-types": "^2.0.0" } }, "sha512-qIBZhqxqI6fjLDYFTBIa4eivDMnP+OZqsNwmQ3xNLE4Cxwc+zfQEfbs6tzAo2Hjq+bh6q5F+Z8/cksrLFYWQQw=="], + "micromark-factory-destination": ["micromark-factory-destination@2.0.1", "", { "dependencies": { "micromark-util-character": "^2.0.0", "micromark-util-symbol": "^2.0.0", "micromark-util-types": "^2.0.0" } }, "sha512-Xe6rDdJlkmbFRExpTOmRj9N3MaWmbAgdpSrBQvCFqhezUn4AHqJHbaEnfbVYYiexVSs//tqOdY/DxhjdCiJnIA=="], "micromark-factory-label": ["micromark-factory-label@2.0.1", "", { "dependencies": { "devlop": "^1.0.0", "micromark-util-character": "^2.0.0", "micromark-util-symbol": "^2.0.0", "micromark-util-types": "^2.0.0" } }, "sha512-VFMekyQExqIW7xIChcXn4ok29YE3rnuyveW3wZQWWqF4Nv9Wk5rgJ99KzPvHjkmPXF93FXIbBp6YdW3t71/7Vg=="], @@ -501,6 +551,8 @@ "prelude-ls": ["prelude-ls@1.2.1", "", {}, "sha512-vkcDPrRZo1QZLbn5RLGPpg/WmIQ65qoWWhcGKf/b5eplkkarX0m9z8ppCat4mlOqUsWpyNuYgO3VRyrYHSzX5g=="], + "prismjs": ["prismjs@1.30.0", "", {}, "sha512-DEvV2ZF2r2/63V+tK8hQvrR2ZGn10srHbXviTlcv7Kpzw8jWiNTqbVgjO3IY8RxrrOUF8VPMQQFysYYYv0YZxw=="], + "property-information": ["property-information@7.1.0", "", {}, "sha512-TwEZ+X+yCJmYfL7TPUOcvBZ4QfoT5YenQiJuX//0th53DE6w0xxLEtfK3iyryQFddXuvkIk51EEgrJQ0WJkOmQ=="], "punycode": ["punycode@2.3.1", "", {}, "sha512-vYt7UD1U9Wg6138shLtLOvdAu+8DsC/ilFtEVHcH+wydcSpNE20AfSOduf6MkRFahL5FY7X1oU7nKVZFtfq8Fg=="], @@ -513,10 +565,18 @@ "react-refresh": ["react-refresh@0.18.0", "", {}, "sha512-QgT5//D3jfjJb6Gsjxv0Slpj23ip+HtOpnNgnb2S5zU3CB26G/IDPGoy4RJB42wzFE46DRsstbW6tKHoKbhAxw=="], + "react-syntax-highlighter": ["react-syntax-highlighter@16.1.1", "", { "dependencies": { "@babel/runtime": "^7.28.4", "highlight.js": "^10.4.1", "highlightjs-vue": "^1.0.0", "lowlight": "^1.17.0", "prismjs": "^1.30.0", "refractor": "^5.0.0" }, "peerDependencies": { "react": ">= 0.14.0" } }, "sha512-PjVawBGy80C6YbC5DDZJeUjBmC7skaoEUdvfFQediQHgCL7aKyVHe57SaJGfQsloGDac+gCpTfRdtxzWWKmCXA=="], + + "refractor": ["refractor@5.0.0", "", { "dependencies": { "@types/hast": "^3.0.0", "@types/prismjs": "^1.0.0", "hastscript": "^9.0.0", "parse-entities": "^4.0.0" } }, "sha512-QXOrHQF5jOpjjLfiNk5GFnWhRXvxjUVnlFxkeDmewR5sXkr3iM46Zo+CnRR8B+MDVqkULW4EcLVcRBNOPXHosw=="], + + "remark-gfm": ["remark-gfm@4.0.1", "", { "dependencies": { "@types/mdast": "^4.0.0", "mdast-util-gfm": "^3.0.0", "micromark-extension-gfm": "^3.0.0", "remark-parse": "^11.0.0", "remark-stringify": "^11.0.0", "unified": "^11.0.0" } }, "sha512-1quofZ2RQ9EWdeN34S79+KExV1764+wCUGop5CPL1WGdD0ocPpu91lzPGbwWMECpEpd42kJGQwzRfyov9j4yNg=="], + "remark-parse": ["remark-parse@11.0.0", "", { "dependencies": { "@types/mdast": "^4.0.0", "mdast-util-from-markdown": "^2.0.0", "micromark-util-types": "^2.0.0", "unified": "^11.0.0" } }, "sha512-FCxlKLNGknS5ba/1lmpYijMUzX2esxW5xQqjWxw2eHFfS2MSdaHVINFmhjo+qN1WhZhNimq0dZATN9pH0IDrpA=="], "remark-rehype": ["remark-rehype@11.1.2", "", { "dependencies": { "@types/hast": "^3.0.0", "@types/mdast": "^4.0.0", "mdast-util-to-hast": "^13.0.0", "unified": "^11.0.0", "vfile": "^6.0.0" } }, "sha512-Dh7l57ianaEoIpzbp0PC9UKAdCSVklD8E5Rpw7ETfbTl3FqcOOgq5q2LVDhgGCkaBv7p24JXikPdvhhmHvKMsw=="], + "remark-stringify": ["remark-stringify@11.0.0", "", { "dependencies": { "@types/mdast": "^4.0.0", "mdast-util-to-markdown": "^2.0.0", "unified": "^11.0.0" } }, "sha512-1OSmLd3awB/t8qdoEOMazZkNsfVTeY4fTsgzcQFdXNq8ToTN4ZGwrMnlda4K6smTFKD+GRV6O48i6Z4iKgPPpw=="], + "resolve-from": ["resolve-from@4.0.0", "", {}, "sha512-pb/MYmXstAkysRFx8piNI1tGFNQIFA3vkE3Gq4EuA1dF6gHp/+vgZqsCGJapvy8N3Q+4o7FwvquPJcnZ7RYy4g=="], "rollup": ["rollup@4.59.0", "", { "dependencies": { "@types/estree": "1.0.8" }, "optionalDependencies": { "@rollup/rollup-android-arm-eabi": "4.59.0", "@rollup/rollup-android-arm64": "4.59.0", "@rollup/rollup-darwin-arm64": "4.59.0", "@rollup/rollup-darwin-x64": "4.59.0", "@rollup/rollup-freebsd-arm64": "4.59.0", "@rollup/rollup-freebsd-x64": "4.59.0", "@rollup/rollup-linux-arm-gnueabihf": "4.59.0", "@rollup/rollup-linux-arm-musleabihf": "4.59.0", "@rollup/rollup-linux-arm64-gnu": "4.59.0", "@rollup/rollup-linux-arm64-musl": "4.59.0", "@rollup/rollup-linux-loong64-gnu": "4.59.0", "@rollup/rollup-linux-loong64-musl": "4.59.0", "@rollup/rollup-linux-ppc64-gnu": "4.59.0", "@rollup/rollup-linux-ppc64-musl": "4.59.0", "@rollup/rollup-linux-riscv64-gnu": "4.59.0", "@rollup/rollup-linux-riscv64-musl": "4.59.0", "@rollup/rollup-linux-s390x-gnu": "4.59.0", "@rollup/rollup-linux-x64-gnu": "4.59.0", "@rollup/rollup-linux-x64-musl": "4.59.0", "@rollup/rollup-openbsd-x64": "4.59.0", "@rollup/rollup-openharmony-arm64": "4.59.0", "@rollup/rollup-win32-arm64-msvc": "4.59.0", "@rollup/rollup-win32-ia32-msvc": "4.59.0", "@rollup/rollup-win32-x64-gnu": "4.59.0", "@rollup/rollup-win32-x64-msvc": "4.59.0", "fsevents": "~2.3.2" }, "bin": { "rollup": "dist/bin/rollup" } }, "sha512-2oMpl67a3zCH9H79LeMcbDhXW/UmWG/y2zuqnF2jQq5uq9TbM9TVyXvA4+t+ne2IIkBdrLpAaRQAvo7YI/Yyeg=="], @@ -591,6 +651,8 @@ "@eslint/eslintrc/globals": ["globals@14.0.0", "", {}, "sha512-oahGvuMGQlPw/ivIYBjVSrWAfWLBeku5tpPE2fOPLi+WHffIWbuh2tCjhyQhTBPMf5E9jDEH4FOmTYgYwbKwtQ=="], + "mdast-util-find-and-replace/escape-string-regexp": ["escape-string-regexp@5.0.0", "", {}, "sha512-/veY75JbMK4j1yjvuUxuVsiS/hr/4iHs9FTT6cgTexxdE0Ly/glccBAkloH/DofkjRbZU3bnoj38mOmhkZ0lHw=="], + "parse-entities/@types/unist": ["@types/unist@2.0.11", "", {}, "sha512-CmBKiL6NNo/OqgmMn95Fk9Whlp2mtvIv+KNpQKN2F4SjvrEesubTRWGYSg+BnWZOnlCaSTU1sMpsBOzgbYhnsA=="], } } diff --git a/package.json b/package.json index 6dfcf32..45e41a2 100644 --- a/package.json +++ b/package.json @@ -14,7 +14,9 @@ "dependencies": { "react": "^19.2.0", "react-dom": "^19.2.0", - "react-markdown": "^10.1.0" + "react-markdown": "^10.1.0", + "react-syntax-highlighter": "^16.1.1", + "remark-gfm": "^4.0.1" }, "devDependencies": { "@eslint/js": "^9.39.1", diff --git a/public/posts/2025/2025-12-13-Test7.md b/public/posts/2025/2025-12-13-Test7.md deleted file mode 100644 index 7b82441..0000000 --- a/public/posts/2025/2025-12-13-Test7.md +++ /dev/null @@ -1 +0,0 @@ -Test7 file \ No newline at end of file diff --git a/public/posts/2026/2026-01-01-Advent-of-Compiler-Optimisations-Study-Notes-01.md b/public/posts/2026/2026-01-01-Advent-of-Compiler-Optimisations-Study-Notes-01.md new file mode 100644 index 0000000..e5b1b0b --- /dev/null +++ b/public/posts/2026/2026-01-01-Advent-of-Compiler-Optimisations-Study-Notes-01.md @@ -0,0 +1,308 @@ +## Study Notes: Why xor eax, eax?, Advent of Compiler Optimisations 2025 + +These notes are based on the post [**Why xor eax, eax?**](https://xania.org/202512/01-xor-eax-eax), which is Day 1 of the [Advent of Compiler Optimisations 2025](https://xania.org/AoCO2025-archive) Series by [Matt Godbolt](https://xania.org/MattGodbolt). + +#### Development Environment + +```bash +$ lsb_release -d +Description: Ubuntu 24.04.3 LTS + +$ gcc -v +gcc version 13.3.0 + +$ clang -v +Ubuntu clang version 18.1.8 + +$ llvm-objdump -v +Ubuntu LLVM version 18.1.8 + +$ lldb -v +lldb version 18.1.8 + +$ nvim --version +NVIM v0.11.5 + +$ echo $SHELL +/usr/bin/fish +``` + +#### The difference between `-O0`, `-O1`, and `-O2` + +Basically, we know the compiler has the following stages. If we use the same code but choose different +optimization levels, the final assembly code will be different. + +```text + [ FRONTEND ] [ MIDDLE-END ] [ BACKEND ] + .----------. .------------. .------------. + | Lexer | | | | Code | +Source Code (*.c) ---> | & | ---> | Optimizer | ---> | Generator | ---> [ Assembly (*.s) ] + | Parser | | | | (e.g. x86) | + '----------' '------------' '------------' +``` + +For now, we use `main.c` as input (shown below), apply different optimization levels, +and use `llvm-objdump` to analyze the corresponding assembly code. + +```bash +$ nvim main.c +``` + +```c +int main() { + return 0; +} +``` + +###### Use `-O0` as optimization level + +```bash +$ rm -f (path filter *.o); gcc -O0 -c main.c; llvm-objdump -d --x86-asm-syntax=att main.o +``` + +```bash +main.o: file format elf64-x86-64 + +Disassembly of section .text: + +0000000000000000
: + 0: f3 0f 1e fa endbr64 + 4: 55 pushq %rbp + 5: 48 89 e5 movq %rsp, %rbp + 8: b8 00 00 00 00 movl $0x0, %eax + d: 5d popq %rbp + e: c3 retq +``` + +```bash +$ size main.o +``` + +```bash +text data bss dec hex filename + 103 0 0 103 67 main.o +``` + +As `-O0`, the compiler generates a stack frame, leading to unnecessary instruction overhead. + +###### Use `-O1` as optimization level +``` +$ rm -f (path filter *.o); gcc -O1 -c main.c; llvm-objdump -d --x86-asm-syntax=att main.o +``` + +```bash +main.o: file format elf64-x86-64 + +Disassembly of section .text: + +0000000000000000
: + 0: f3 0f 1e fa endbr64 + 4: b8 00 00 00 00 movl $0x0, %eax + 9: c3 retq +``` + +```bash +$ size main.o +``` + +```bash +text data bss dec hex filename + 90 0 0 90 5a main.o +``` + +It reduces the output from six instructions to three by removing the stack frame setup. + +###### Use `-O2` as optimization level +```bash +$ rm -f (path filter *.o); gcc -O2 -c main.c; llvm-objdump -d --x86-asm-syntax=att main.o +``` + +```bash +main.o: file format elf64-x86-64 + +Disassembly of section .text: + +0000000000000000
: + 0: f3 0f 1e fa endbr64 + 4: 31 c0 xorl %eax, %eax + 6: c3 retq +``` + +```bash +$ size main.o +``` + +```bash +text data bss dec hex filename + 87 0 0 87 57 main.o +``` + +As you can see, `-O2` and `-O1` both produce three instructions. +The only differences is that `-O2` changes from `movl` to `xorl`. +The reason is the instruction size. `xorl %eax, %eax` only use two bytes, +making it smaller than the five bytes `movl $0x0, %eax`. +Hence, you can see the total `.text` size reduces from 90 bytes to 87 bytes. + +#### How about we change `gcc` to `clang`? +```bash +$ rm -f (path filter *.o); clang -O1 -c main.c; llvm-objdump -d --x86-asm-syntax=att main.o +``` + +```bash +rm -f (path filter *.o); clang -O1 -c main.c; llvm-objdump -d --x86-asm-syntax=att main.o + +main.o: file format elf64-x86-64 + +Disassembly of section .text: + +0000000000000000
: + 0: 31 c0 xorl %eax, %eax + 2: c3 retq +``` + +You will find that the Clang's `-O1` output already uses `xorl`, making it similar to GCC's `-O2`. +Additionally, it consists of only two instructions because Clang does not generate the `endbr64` instruction. + +#### Why `eax`, not `rax` ? + +```bash +$ nvim get_val.c +``` + +```c +long get_val() { + return 0; +} +``` + +```bash +$ rm -f (path filter *.o); clang -O2 -c get_val.c; llvm-objdump -d --x86-asm-syntax=att get_val.o +``` + +```bash +get_val.o: file format elf64-x86-64 + +Disassembly of section .text: + +0000000000000000 : + 0: 31 c0 xorl %eax, %eax + 2: c3 retq +``` + +As we know, the x86-64 calling convention requires the return value to be stored in the +64-bit `rax` register. However, we see that the compiler uses the 32-bit `eax` register for the `xorl` instruction. +The reason is that in x86-64, any operation that writes to a 32-bit register automatically zero-extends the result +into the upper 32 bits of the corresponding 64-bit register. + +Let's see an example for verifying Zero-Extension with LLDB + +```bash +$ nvim main.c +``` + +```c +int main() { + return 0; +} +``` + +```bash +$ rm -f (path filter *.out); clang -g -O2 -o app.out main.c; lldb app.out +``` + +```bash +(lldb) target create "app.out" +Current executable set to '/home/gapry/Workspaces/test/app.out' (x86_64). +(lldb) breakpoint set -n main +Breakpoint 1: where = app.out`main at main.c:2:3, address = 0x0000000000001130 +(lldb) r +Process 409020 launched: '/home/gapry/Workspaces/test/app.out' (x86_64) +Process 409020 stopped +* thread #1, name = 'app.out', stop reason = breakpoint 1.1 + frame #0: 0x0000555555555130 app.out`main at main.c:2:3 + 1 int main() { +-> 2 return 0; + 3 } +(lldb) register write rax 0xffffffffffffffff +(lldb) register read rax + rax = 0xffffffffffffffff +(lldb) disassemble --pc +app.out`main: +-> 0x555555555130 <+0>: xorl %eax, %eax + 0x555555555132 <+2>: retq + 0x555555555133: addb %dh, %bl +(lldb) thread step-inst +Process 409020 stopped +* thread #1, name = 'app.out', stop reason = instruction step into + frame #0: 0x0000555555555132 app.out`main at main.c:2:3 + 1 int main() { +-> 2 return 0; + 3 } +(lldb) disassemble --pc +app.out`main: +-> 0x555555555132 <+2>: retq + 0x555555555133: addb %dh, %bl +(lldb) register read rax + rax = 0x0000000000000000 +(lldb) +``` + +The LLDB output confirms that even though `xorl` only targets the lower 32 bits `%eax` register, +the hardware automatically cleared the entire 64-bit `%rax` register. + +#### Caller/Callee Arguments +```bash +$ nvim main.c +``` + +```c +extern void g(long arg1, + long arg2, + long arg3, + long arg4, + long arg5, + long arg6); + +void f() { + g(0, 0, 0, 0, 0, 0); +} +``` + +```bash +$ rm -f (path filter *.o); clang -O2 -c main.c; llvm-objdump -d --disassemble-symbols=f --x86-asm-syntax=att main.o +``` + +```bash +main.o: file format elf64-x86-64 + +Disassembly of section .text: + +0000000000000000 : + 0: 31 ff xorl %edi, %edi + 2: 31 f6 xorl %esi, %esi + 4: 31 d2 xorl %edx, %edx + 6: 31 c9 xorl %ecx, %ecx + 8: 45 31 c0 xorl %r8d, %r8d + b: 45 31 c9 xorl %r9d, %r9d + e: e9 00 00 00 00 jmp 0x13 +``` + +According to the x86-64 System V ABI, the first six integer or pointer arguments are passed in +specific registers. To pass `0` to all of them, the compiler again uses the `xorl` optimization to +zero out each one: + +| Argument | 64-bit Register | 32-bit Register | +| :--- | :--- | :--- | +| 1st | `%rdi` | `%edi` | +| 2nd | `%rsi` | `%esi` | +| 3rd | `%rdx` | `%edx` | +| 4th | `%rcx` | `%ecx` | +| 5th | `%r8` | `%r8d` | +| 6th | `%r9` | `%r9d` | + +The `xorl` optimization does not only appear for return values, you will also see it frequently +when a caller prepares arguments for a callee. As with the previous example, zeroing the 32-bit +version of these registers automatically zero-extends to the full 64-bit register. + +## References +- [x64 architecture](https://learn.microsoft.com/en-us/windows-hardware/drivers/debugger/x64-architecture) \ No newline at end of file diff --git a/public/posts/2026/2026-01-29-Test3.md b/public/posts/2026/2026-01-29-Test3.md deleted file mode 100644 index 0c1d4e5..0000000 --- a/public/posts/2026/2026-01-29-Test3.md +++ /dev/null @@ -1 +0,0 @@ -test3 file \ No newline at end of file diff --git a/public/posts/2026/2026-01-31-Advent-of-Compiler-Optimisations-Study-Notes-02.md b/public/posts/2026/2026-01-31-Advent-of-Compiler-Optimisations-Study-Notes-02.md new file mode 100644 index 0000000..510f88a --- /dev/null +++ b/public/posts/2026/2026-01-31-Advent-of-Compiler-Optimisations-Study-Notes-02.md @@ -0,0 +1,186 @@ +## Study Notes: Addressing the adding situation, Advent of Compiler Optimisations 2025 + +These notes are based on the post [**Addressing the adding situation**](https://xania.org/202512/02-adding-integers) and the YouTube video [**[AoCO 2/25] Adding Integers on x86 - just an ADD, right?**](https://www.youtube.com/watch?v=BOvg0sGJnes&list=PL2HVqYf7If8cY4wLk7JUQ2f0JXY_xMQm2&index=3) which are Day 2 of the [Advent of Compiler Optimisations 2025](https://xania.org/AoCO2025-archive) Series by [Matt Godbolt](https://xania.org/MattGodbolt). + +My notes focus on reproducing and verifying [Matt Godbolt](https://xania.org/MattGodbolt)'s teaching within a local development environment using `LLVM` toolchain on `Ubuntu`. + +Additionally, I have extended the discussion by implementing a manual Proof of Concept in assembly to demonstrate the equivalence of `add` vs `lea` instructions. + +Selected technical insights from the YouTube comment section are reproduced at the end of these notes to provide additional context. + +Written by me and assisted by AI, proofread by me and assisted by AI. + +#### Development Environment +```bash +$ lsb_release -d +Description: Ubuntu 24.04.3 LTS + +$ clang -v +Ubuntu clang version 18.1.8 + +$ llvm-objdump -v +Ubuntu LLVM version 18.1.8 + +$ nvim --version +NVIM v0.11.5 + +$ echo $SHELL +/usr/bin/fish +``` + +## Integer Addition + +To understand how `clang` translates `C` addition into `x86-64` machine instructions, we use the following implementation + +```bash +$ nvim add.c +``` + +```c +int add(int x, int y) { + return x + y; +} +``` + +## Unoptimized Analysis + +```bash +$ rm -f (path filter *.o); clang -O0 -c add.c; llvm-objdump -d --x86-asm-syntax=att add.o +``` + +```bash +add.o: file format elf64-x86-64 + +Disassembly of section .text: + +0000000000000000 : + 0: 55 pushq %rbp + 1: 48 89 e5 movq %rsp, %rbp + 4: 89 7d fc movl %edi, -0x4(%rbp) + 7: 89 75 f8 movl %esi, -0x8(%rbp) + a: 8b 45 fc movl -0x4(%rbp), %eax + d: 03 45 f8 addl -0x8(%rbp), %eax + 10: 5d popq %rbp + 11: c3 retq +``` + +In C, the expression `a = b + c` allows for three distinct variables to execute the addition. +However, the x86-64 ISA does not support a three-operand format for standard addition. +The format for the `add` instruction is `add source, destination`, which executes the operation +`destination = destination + source`. + +Because the hardware logic requires the destination register to overlap with one of the source operands, +the compiler cannot translate `a = b + c` directly to a single `add` instruction. To prevent overwriting the original +value of `b` or `c` before the operation is executed, the compiler needs to use `mov` instruction to +initialize the destination with one of the operands first: + +```bash +movl -0x4(%rbp), %eax +addl -0x8(%rbp), %eax +``` + +Hence, the compiler needs to use two instructions to execute the addition at the -O0 level. + +## Optimized Analysis +```bash +rm -f (path filter *.o); clang -O2 -c add.c; llvm-objdump -d --x86-asm-syntax=att add.o +``` + +```bash +add.o: file format elf64-x86-64 + +Disassembly of section .text: + +0000000000000000 : + 0: 8d 04 37 leal (%rdi,%rsi), %eax + 3: c3 retq +``` + +At the `-O2` level, the compiler translates the `C` statement `return x + y;` directly into a single `lea` instruction. +Because lea supports two source registers, the compiler can take two independent inputs (`%rdi` and `%rsi`) and +store the result in an independent destination (`%eax`) without overwriting the original operands. +This allows the `a = b + c` logic to be executed in one step, +eliminating the need for the extra mov instruction required at the `-O0` level. + +## Proof of Concept +The following assembly code demonstrates these two approaches: +one utilizing the `mov` + `add` instruction sequence, +and the other employing a single `lea` instruction. + +```bash +$ nvim add.s +``` + +```text +.section .note.GNU-stack, "", @progbits + +.section .rodata + fmt: .string "Result: %d\n" + +.section .text + .globl main + .extern printf + +main: + pushq %rbp + movq %rsp, %rbp + + # --- Case A: Using mov + add --- + movl $1, %edx + movl $2, %ecx + movl %edx, %eax + addl %ecx, %eax + + # Print Result + movq fmt@GOTPCREL(%rip), %rdi + movslq %eax, %rsi + movl $0, %eax + call printf + + # --- Case B: Using lea --- + movl $1, %edx + movl $2, %ecx + leal (%edx, %ecx), %eax + + # Print Result + movq fmt@GOTPCREL(%rip), %rdi + movslq %eax, %rsi + movl $0, %eax + call printf + + movl $0, %eax + popq %rbp + retq +``` + +```bash +$ rm -f (path filter *.out); clang -o add.out add.s; ./add.out +Result: 3 +Result: 3 +``` + +As demonstrated, both approaches produce identical results, confirming that the single `lea` instruction is +logically equivalent to the `a = b + c` mathematical operation. + +## YouTube Comment Insights + +Since YouTube does not currently support generating direct permanent links to individual comments, +I have reproduced the relevant technical insight below in its entirety to ensure both accuracy and proper attribution. + +```text +@sulix314 +LEA doesn't affect flags. While this is sometimes annoying (when you need to carry with ADC), +it is often extremely useful because you can perform arithmetic without destroying the flag state +needed for a subsequent conditional jump or another calculation. + +@incubus3827 +In addition, LEA could run on the V-pipeline in the original Pentium, which often allowed performing some arithmetics + reshuffling registers +for no additional cycles. A true gamechanger for software rasterizers. + +@mytech6779 +I recall the LEA instruction also uses a dedicated module on the CPU with an independent execution pipeline, +so the LEA operation can be concurrent with an ALU operation. +I can't say the address module addition [in isolation] is faster or the same cycle count as the ALU, +but being specialized I imagine the address module is somewhat simpler with fewer transistors +(reducing area and heat some small amount). +``` diff --git a/public/posts/2026/2026-01-31-Advent-of-Compiler-Optimisations-Study-Notes-03.md b/public/posts/2026/2026-01-31-Advent-of-Compiler-Optimisations-Study-Notes-03.md new file mode 100644 index 0000000..962e7f6 --- /dev/null +++ b/public/posts/2026/2026-01-31-Advent-of-Compiler-Optimisations-Study-Notes-03.md @@ -0,0 +1,274 @@ +## Study Notes: You can't fool the optimiser, Advent of Compiler Optimisations 2025 + +These notes are based on the post [**You can't fool the optimiser**](https://xania.org/202512/03-more-adding-integers) and the YouTube video [**[AoCO 3/25] More Adding**](https://www.youtube.com/watch?v=wHg9lYPMvvE&list=PL2HVqYf7If8cY4wLk7JUQ2f0JXY_xMQm2&index=4) which are Day 3 of the [Advent of Compiler Optimisations 2025](https://xania.org/AoCO2025-archive) Series by [Matt Godbolt](https://xania.org/MattGodbolt). + +My notes focus on reproducing and verifying [Matt Godbolt](https://xania.org/MattGodbolt)'s teaching within a local development environment using `LLVM` toolchain on `Ubuntu`. + +Written by me and assisted by AI, proofread by me and assisted by AI. + +## Development Environment +```bash +$ lsb_release -d +Description: Ubuntu 24.04.3 LTS + +$ clang -v +Ubuntu clang version 18.1.8 + +$ sudo apt install gcc-aarch64-linux-gnu libc6-dev-arm64-cross + +$ aarch64-linux-gnu-gcc -v +COLLECT_GCC=aarch64-linux-gnu-gcc +gcc version 13.3.0 (Ubuntu 13.3.0-6ubuntu2~24.04) + +$ qemu-aarch64 -version +qemu-aarch64 version 8.2.2 (Debian 1:8.2.2+ds-0ubuntu1.11) + +$ llvm-objdump -v +Ubuntu LLVM version 18.1.8 + +$ nvim --version +NVIM v0.11.5 + +$ echo $SHELL +/usr/bin/fish +``` + +## Recursion Integer Addition + +We can observe the compiler's optimization by implementing addition through recursion, as shown in the following code: + +```bash +$ nvim add.c +``` + +```c +#include + +unsigned add(unsigned x, unsigned y) { + return y <= 0 ? x : add(x + 1, y - 1); +} + +int main(void) { + unsigned a = 1; + unsigned b = 10; + unsigned r = add(a, b); + printf("%u = %u\n", r, a + b); + return 0; +} +``` + +## Unoptimized Analysis + +The following analysis examines the unoptimized assembly code generated with the `-O0` flag. + +```bash +$ rm -f (path filter *.out); clang -O0 -target aarch64-linux-gnu --sysroot=/usr/aarch64-linux-gnu -static add.c -o app.out; qemu-aarch64 ./app.out +11 = 11 +``` + +```bash +$ llvm-objdump -d --disassemble-symbols=add app.out +``` + +```text +app.out: file format elf64-littleaarch64 + +Disassembly of section .text: + +00000000004007f0 : + 4007f0: d10083ff sub sp, sp, #0x20 // Allocate 32 bytes on stack + 4007f4: a9017bfd stp x29, x30, [sp, #0x10] // Save Frame Pointer (x29) and Link Register (x30) + 4007f8: 910043fd add x29, sp, #0x10 // Set up new Frame Pointer + 4007fc: b81fc3a0 stur w0, [x29, #-0x4] // Store 'x' (w0) into stack + 400800: b9000be1 str w1, [sp, #0x8] // Store 'y' (w1) into stack + 400804: b9400be8 ldr w8, [sp, #0x8] // Load 'y' from stack into w8 + 400808: 71000108 subs w8, w8, #0x0 // Compare w8 (y) with 0 + 40080c: 540000a8 b.hi 0x400820 // If y > 0, jump to recursive case (400820) + 400810: 14000001 b 0x400814 // Else, branch to base case logic + 400814: b85fc3a0 ldur w0, [x29, #-0x4] // [Base Case] Load 'x' into w0 + 400818: b90007e0 str w0, [sp, #0x4] // Store 'x' as the potential return value + 40081c: 14000008 b 0x40083c // Jump to epilogue (return) + 400820: b85fc3a8 ldur w8, [x29, #-0x4] // [Recursive Case] Load 'x' into w8 + 400824: 11000500 add w0, w8, #0x1 // w0 = x + 1 (Preparing 1st argument) + 400828: b9400be8 ldr w8, [sp, #0x8] // Load 'y' into w8 + 40082c: 71000501 subs w1, w8, #0x1 // w1 = y - 1 (Preparing 2nd argument) + 400830: 97fffff0 bl 0x4007f0 // Recursive call: add(x + 1, y - 1) + 400834: b90007e0 str w0, [sp, #0x4] // Store the recursive result onto stack + 400838: 14000001 b 0x40083c // Jump to epilogue (return) + 40083c: b94007e0 ldr w0, [sp, #0x4] // Load the result from stack into w0 + 400840: a9417bfd ldp x29, x30, [sp, #0x10] // Restore Frame Pointer and Link Register + 400844: 910083ff add sp, sp, #0x20 // Deallocate stack space + 400848: d65f03c0 ret // Return to caller +``` + +#### Part 01: Function Prologue +```text +4007f0: d10083ff sub sp, sp, #0x20 // Allocate 32 bytes on stack +4007f4: a9017bfd stp x29, x30, [sp, #0x10] // Save Frame Pointer (x29) and Link Register (x30) +4007f8: 910043fd add x29, sp, #0x10 // Set up new Frame Pointer +``` + +Each recursive invocation initiates a Function Prologue to establish the execution context. +The instruction `sub sp, sp, #0x20` performs Stack Allocation by decrementing the Stack Pointer (`SP`), reserving 32 bytes for the current Stack Frame. +The subsequent `stp` (Store Pair) instruction implements Context Saving, pushing the Frame Pointer (`X29`) and Link Register (`X30`) onto the stack to +facilitate the Stack Unwinding process during the Function Epilogue. + +```bash +| Higher Address | +| | ++----------------+ <--- Previous SP +| Unused / Local | (16 bytes) ++----------------+ +| Link Register | (X30) ++----------------+ <--- X29 (New Frame Pointer) +| Frame Pointer | (X29) ++----------------+ <--- SP (Current Stack Pointer) +| | +| Lower Address | +``` + +#### Part 02: Parameter Storage +```text +4007fc: b81fc3a0 stur w0, [x29, #-0x4] // Store 'x' (w0) into stack +400800: b9000be1 str w1, [sp, #0x8] // Store 'y' (w1) into stack +400804: b9400be8 ldr w8, [sp, #0x8] // Load 'y' from stack into w8 +``` + +The input parameters `x` and `y` are stored from registers (`w0` and `w1`) into stack memory. +Since it is at the `-O0` optimization level, +an additional instruction is used to load `y` from stack memory back into a register (`w8`) for subsequent conditional evaluation. + +#### Part 03: Branching +```text +400808: 71000108 subs w8, w8, #0x0 // Compare w8 (y) with 0 +40080c: 540000a8 b.hi 0x400820 // If y > 0, jump to recursive case (400820) +400810: 14000001 b 0x400814 // Else, branch to base case logic +``` + +The subs instruction performs an arithmetic subtraction to compare `y` (in `w8`) with `0`, +updating the Condition Flags in the processor's state register. +The `b.hi` (Branch if Higher) instruction then evaluates these flags: +if `y > 0`, the Program Counter (`PC`) jumps to the Recursive Case; +otherwise, it jumps to the Base Case. + +#### Part 04: The Base Case: `y == 0` +```text +400814: b85fc3a0 ldur w0, [x29, #-0x4] // [Base Case] Load 'x' into w0 +400818: b90007e0 str w0, [sp, #0x4] // Store 'x' as the potential return value +40081c: 14000008 b 0x40083c // Jump to epilogue (return) (Part 06) +``` + +When the base case is met, the value of `x` is loaded into register `W0`. +The compiler then executes a `store` operation from register `W0` to stack memory to preserve the +return value. + +#### Part 05: The Recursive Step: `add(x + 1, y - 1)` +```text +400820: b85fc3a8 ldur w8, [x29, #-0x4] // [Recursive Case] Load 'x' into w8 +400824: 11000500 add w0, w8, #0x1 // w0 = x + 1 (Preparing 1st argument) +400828: b9400be8 ldr w8, [sp, #0x8] // Load 'y' into w8 +40082c: 71000501 subs w1, w8, #0x1 // w1 = y - 1 (Preparing 2nd argument) +400830: 97fffff0 bl 0x4007f0 // Recursive call: add(x + 1, y - 1) +400834: b90007e0 str w0, [sp, #0x4] // Store the recursive result onto stack +400838: 14000001 b 0x40083c // Jump to epilogue (return) +``` + +The compiler prepares the arguments for the recursive call by loading values from the stack into registers `w0` (`x + 1`) and `w1` (`y - 1`) according to the Procedure Call Standard. +The `bl` (Branch with Link) instruction then executes the recursive call, redirecting the Control Flow back to the function start. +Once the recursive call returns, the resulting value in `w0` is stored into stack memory before jumping to the epilogue. + +#### Part 06: Function Epilogue +```text +40083c: b94007e0 ldr w0, [sp, #0x4] // Load the result from stack into w0 +400840: a9417bfd ldp x29, x30, [sp, #0x10] // Restore Frame Pointer and Link Register +400844: 910083ff add sp, sp, #0x20 // Deallocate stack space +400848: d65f03c0 ret // Return to caller +``` + +The function epilogue restores the caller's execution environment. +The return value is loaded from stack memory into register `w0`. +The `ldp` instruction performs a pair load to restore the frame pointer (`x29`) and link register (`x30`). +Finally, the stack pointer (`sp`) is incremented by 32 bytes to perform stack deallocation before the ret instruction +redirects the Control Flow back to the address stored in the link register. + +## Optimized Analysis + +The following analysis examines the optimized assembly code generated with the `-O2` flag. + +```bash +$ rm -f (path filter *.out); clang -O2 -target aarch64-linux-gnu --sysroot=/usr/aarch64-linux-gnu -static add.c -o app.out; qemu-aarch64 ./app.out +11 = 11 +``` + +```bash +$ llvm-objdump -d --disassemble-symbols=add app.out +``` + +```text +app.out: file format elf64-littleaarch64 + +Disassembly of section .text: + +00000000004007f0 : + 4007f0: 0b000020 add w0, w1, w0 + 4007f4: d65f03c0 ret +``` + +#### Recursion Call + +In the unoptimized execution, each recursive call of `add(x + 1, y - 1)` triggers the function prologue, +resulting in the allocation of a new activation record. +This manifests as `O(n)` space complexity, as the stack grows linearly with the input value of `y`. + +```bash +| Higher Address | ++--------------------+ +| add(1, 10) Frame | (Initial Call: a = 1, b = 10) ++--------------------+ +| add(2, 9) Frame | ++--------------------+ +| add(3, 8) Frame | ++--------------------+ +| add(4, 7) Frame | ++--------------------+ +| add(5, 6) Frame | ++--------------------+ +| add(6, 5) Frame | ++--------------------+ +| add(7, 4) Frame | ++--------------------+ +| add(8, 3) Frame | ++--------------------+ +| add(9, 2) Frame | ++--------------------+ +| add(10, 1) Frame | ++--------------------+ +| add(11, 0) Frame | (Base Case: x = 11, returns x) ++--------------------+ <--- SP (Current Stack Pointer) +| Lower Address | +``` + +#### Tail Recursion Call + +The `-O2` optimization level identifies the recursive call as a Tail Call. +The compiler recognizes that the current stack frame can be reused, +as no further operations are required after the callee returns. +This reduces the space complexity from `O(n)` to `O(1)` and eliminates the overhead associated +with stack frame allocation and deallocation. + +```bash +| Higher Address | | Higher Address | | Higher Address | ++--------------------+ +-------------------+ +-------------------+ <--- Previous SP +| add(1, 10) Frame | -> | add(2, 9) Frame | -> ... -> | add(11, 0) Frame | (Reused for all steps) ++--------------------+ +-------------------+ +-------------------+ <--- SP (Static: Never moves) +| Lower Address | | Lower Address | | Lower Address | +``` + +#### Arithmetic Folding +While Tail Call Optimization handles the stack efficiency, Arithmetic Folding is an optimization technique +where the compiler analyzes the symbolic behavior of a loop or recursion to replace it with a simplified mathematical expression. + +#### Conclusion +Through Tail Call Optimization and Arithmetic Folding, +the compiler optimizes the recursive logic by eliminating the stack-related and control-flow instructions (totaling 21 instructions) into a single `add` instruction. +It effectively reduces the space complexity from `O(n)` to `O(1)` and minimizes the execution latency to a single instruction cycle. \ No newline at end of file diff --git a/public/posts/2026/2026-01-31-Test123.md b/public/posts/2026/2026-01-31-Test123.md deleted file mode 100644 index a208335..0000000 --- a/public/posts/2026/2026-01-31-Test123.md +++ /dev/null @@ -1 +0,0 @@ -test 123 file \ No newline at end of file diff --git a/public/posts/2026/2026-02-03-Test5.md b/public/posts/2026/2026-02-03-Test5.md deleted file mode 100644 index 4f1bd33..0000000 --- a/public/posts/2026/2026-02-03-Test5.md +++ /dev/null @@ -1 +0,0 @@ -test5 \ No newline at end of file diff --git a/public/posts/2026/2026-02-27-Advent-of-Compiler-Optimisations-Study-Notes-04.md b/public/posts/2026/2026-02-27-Advent-of-Compiler-Optimisations-Study-Notes-04.md new file mode 100644 index 0000000..5ba8b80 --- /dev/null +++ b/public/posts/2026/2026-02-27-Advent-of-Compiler-Optimisations-Study-Notes-04.md @@ -0,0 +1,399 @@ +## Study Notes: Multiplying with a constant, Advent of Compiler Optimisations 2025 + +These notes are based on the post [**Multiplying with a constant**](https://xania.org/202512/04-multiplying-integers) and the YouTube video [**[AoCO 4/25] Multiplying with a Constant**](https://www.youtube.com/watch?v=1X88od0miHs&list=PL2HVqYf7If8cY4wLk7JUQ2f0JXY_xMQm2&index=5) which are Day 4 of the [Advent of Compiler Optimisations 2025](https://xania.org/AoCO2025-archive) Series by [Matt Godbolt](https://xania.org/MattGodbolt). + +My notes focus on reproducing and verifying [Matt Godbolt](https://xania.org/MattGodbolt)'s teaching within a local development environment using `LLVM` toolchain on `Ubuntu`. + +Selected technical insights from the YouTube comment section are reproduced at the end of these notes to provide additional context. + +Written by me and assisted by AI, proofread by me and assisted by AI. + +#### Development Environment +```bash +$ lsb_release -d +Description: Ubuntu 24.04.3 LTS + +$ clang -v +Ubuntu clang version 18.1.8 + +$ llvm-objdump -v +Ubuntu LLVM version 18.1.8 + +$ nvim --version +NVIM v0.11.5 + +$ echo $SHELL +/usr/bin/fish +``` + +## Introduction + +After studying the Day 04 YouTube video and blog post, I conducted a series of sequential tests by multiplying `x` by every constant from `2` to `20`. + +By analyzing the assembly output for each case, I identified and selected 10 distinct compiler optimization strategies. + +These notes document specific patterns and filter out redundant results, highlighting unique compiler optimization strategies. + +## Case 01 : `x * 2` + +```bash +$ nvim mul.c +``` + +```c +int mul(int x) { + return x * 2; +} +``` + +```bash +$ rm -f (path filter *.o); clang -O2 -c mul.c; llvm-objdump -d --x86-asm-syntax=att mul.o +``` + +```text +mul.o: file format elf64-x86-64 + +Disassembly of section .text: + +0000000000000000 : + 0: 8d 04 3f leal (%rdi,%rdi), %eax + 3: c3 retq +``` + +The compiler avoids the `imul` instruction in favor of the `shift`, `add`, and `lea` instructions for the following reasons: + +- Constant multiplication can often be expressed using free address generation +- `lea` can compute `x + x * scale` without using ALU ports +- It may reduce dependency chains + +Here, it uses the leal instruction to perform `x + x` in a single cycle. + +## Case 02: `x * 3` + +```bash +$ nvim mul.c +``` + +```c +int mul(int x) { + return x * 3; +} +``` + +```bash +$ rm -f (path filter *.o); clang -O2 -c mul.c; llvm-objdump -d --x86-asm-syntax=att mul.o +``` + +```text +mul.o: file format elf64-x86-64 + +Disassembly of section .text: + +0000000000000000 : + 0: 8d 04 7f leal (%rdi,%rdi,2), %eax + 3: c3 retq +``` + +It utilizes x86 the `lea` with **Base + (Index * Scale)** addressing to calculate `x + x * 2` + +## Case 03: `x * 4` + +```bash +$ nvim mul.c +``` + +```c +int mul(int x) { + return x * 4; +} +``` + +```bash +$ rm -f (path filter *.o); clang -O2 -c mul.c; llvm-objdump -d --x86-asm-syntax=att mul.o +``` + +```text +mul.o: file format elf64-x86-64 + +Disassembly of section .text: + +0000000000000000 : + 0: 8d 04 bd 00 00 00 00 leal (,%rdi,4), %eax + 7: c3 retq +``` + +Using the `lea` instruction with a scale factor enables multiplication to be performed in a single instruction, +eliminating the need for separate shift and multiply instructions. + +## Case 04 : `x * 6` + +```bash +$ nvim mul.c +``` + +```c +int mul(int x) { + return x * 6; +} +``` + +```bash +$ rm -f (path filter *.o); clang -O2 -c mul.c; llvm-objdump -d --x86-asm-syntax=att mul.o +``` + +```text +mul.o: file format elf64-x86-64 + +Disassembly of section .text: + +0000000000000000 : + 0: 01 ff addl %edi, %edi + 2: 8d 04 7f leal (%rdi,%rdi,2), %eax + 5: c3 retq +``` + +The compiler splits the multiplication `x * 6` into two distinct steps: first `x + x`, followed by `2x + (2x * 2)`. + +You might wonder why the compiler doesn't simply use a single instruction like `leal (%rdi,%rdi,6), %eax`. + +The reason lies in the instruction encoding. + +The `leal` instruction calculates addresses using the formula **Base + (Index * Scale)**, where the scale factor is encoded at the bit level. + +If we assume the scale field uses **2 bits** within the instruction encoding, there are only four possible binary combinations: `00`, `01`, `10`, and `11`. + +These bits correspond to the multipliers **1, 2, 4, and 8**. Because of this limitation, it is not possible for the hardware to represent a scale of `6`. + +Consequently, the compiler must break the operation into multiple valid steps to achieve the desired result. + +## Case 05 : `x * 7` + +```bash +$ nvim mul.c +``` + +```c +int mul(int x) { + return x * 7; +} +``` + +```bash +$ rm -f (path filter *.o); clang -O2 -c mul.c; llvm-objdump -d --x86-asm-syntax=att mul.o +``` + +```text +mul.o: file format elf64-x86-64 + +Disassembly of section .text: + +0000000000000000 : + 0: 8d 04 fd 00 00 00 00 leal (,%rdi,8), %eax + 7: 29 f8 subl %edi, %eax + 9: c3 retq +``` + +The compiler uses **multiply and subtract** logic: It first scales `x` by 8 using leal and then subtracts the original `x`. + +## Case 06 : `x * 11` + +```bash +$ nvim mul.c +``` + +```c +int mul(int x) { + return x * 11; +} +``` + +```bash +$ rm -f (path filter *.o); clang -O2 -c mul.c; llvm-objdump -d --x86-asm-syntax=att mul.o +``` + +```text +mul.o: file format elf64-x86-64 + +Disassembly of section .text: + +0000000000000000 : + 0: 8d 04 bf leal (%rdi,%rdi,4), %eax + 3: 8d 04 47 leal (%rdi,%rax,2), %eax + 6: c3 retq +``` + +The compiler splits the multiplication `x * 11` into two distinct steps: +first `x + x * 4`, followed by `x + (5x * 2)`. + +## Case 07 : `x * 12` + +```bash +$ nvim mul.c +``` + +```c +int mul(int x) { + return x * 12; +} +``` + +```bash +$ rm -f (path filter *.o); clang -O2 -c mul.c; llvm-objdump -d --x86-asm-syntax=att mul.o +``` + +```text +mul.o: file format elf64-x86-64 + +Disassembly of section .text: + +0000000000000000 : + 0: c1 e7 02 shll $0x2, %edi + 3: 8d 04 7f leal (%rdi,%rdi,2), %eax + 6: c3 retq +``` + +The compiler splits the multiplication `x * 12` into two distinct steps: +first `x << 2`, followed by `4x + (4x * 2)`. + +## Case 08: `x * 14` + +```bash +$ nvim mul.c +``` + +```c +int mul(int x) { + return x * 14; +} +``` + +```bash +$ rm -f (path filter *.o); clang -O2 -c mul.c; llvm-objdump -d --x86-asm-syntax=att mul.o +``` + +```text +mul.o: file format elf64-x86-64 + +Disassembly of section .text: + +0000000000000000 : + 0: 89 f8 movl %edi, %eax + 2: 8d 0c 00 leal (%rax,%rax), %ecx + 5: c1 e0 04 shll $0x4, %eax + 8: 29 c8 subl %ecx, %eax + a: c3 retq +``` + +The compiler splits the multiplication `x * 14` into three distinct steps: +1. calculates 2x and store it in ecx (`ecx = x + x`) +2. calculates 16x and store it in eax (`eax = 16 * x`) +3. calculates 14x by subtracting the two results (`eax - ecx = 16x - 2x = 14x`) + +## Case 09: `x * 16` + +```bash +$ nvim mul.c +``` + +```c +int mul(int x) { + return x * 16; +} +``` + +```bash +$ rm -f (path filter *.o); clang -O2 -c mul.c; llvm-objdump -d --x86-asm-syntax=att mul.o +``` + +```text +mul.o: file format elf64-x86-64 + +Disassembly of section .text: + +0000000000000000 : + 0: 89 f8 movl %edi, %eax + 2: c1 e0 04 shll $0x4, %eax + 5: c3 retq +``` + +According the x86-64 calling convention, the result must be returned in %eax. +The compiler cannot simply generate a single `shll $0x4, %edi` instruction. +It need to generate an extra instruction to move the input value from `%edi` to `%eax` before the shift operation. + +## Case 10: `x * 17` + +```bash +$ nvim mul.c +``` + +```c +int mul(int x) { + return x * 17; +} +``` + +```bash +$ rm -f (path filter *.o); clang -O2 -c mul.c; llvm-objdump -d --x86-asm-syntax=att mul.o +``` + +```text +mul.o: file format elf64-x86-64 + +Disassembly of section .text: + +0000000000000000 : + 0: 89 f8 movl %edi, %eax + 2: c1 e0 04 shll $0x4, %eax + 5: 01 f8 addl %edi, %eax + 7: c3 retq +``` + +Similar to case 09, we first need to move the input value from `%edi` to `%eax` to satisfy the calling convention. +Then we get `16 * x`. Finally, we get `x + 16x`. + +## YouTube Comment Insights + +Since YouTube does not currently support generating direct permanent links to individual comments, +I have reproduced the relevant technical insight below in its entirety to ensure both accuracy and proper attribution. + +```text +@moregirl4585 +Fact: on some architecture x*-3 is better expressed as x-(x<<2) and some better as -(x+x+x). Seems compilers don't work well for both case + +@SLiV9 +Do you know why for multiplying by 6, it uses an ADD for the second x2? Why not another LEA like for multiplying by 2? +| +|--> @nurmr +| My suspicion is that address adder is "cheaper" to use than an ALU. +| Especially with more complex code which might have other operations pipelined and running on an ALU. +| +|--> @HenryLoenwind + lea is faster than mov+add, but not faster than a "naked" add. + And even if the cpu cycles are the same in the end, + add reg,reg is a 2-byte instruction (like the xor we had on day 1). + +@lpprogrammingllc +It's worth noting the shifts-and-adds version might still be faster on modern CPUs due to instruction pipelining. +Yes, the total work done is more than a single imul, but it can do each part in parallel, until the final add. +However, it also requires more code, and more "slots" in the decoding/execution pipeline. +Lots of modern machines have a 4-wide instruction frontend. +So with the imul, one of them handles the imul, and 3 keep going on any other calculations they can. +With the shifts-and-adds version, 3 get held up waiting on the shifts, to then issue the adds. +Only one gets to work on anything else. +The compiler assumes there will usually be other work to do, +so aims for maximum throughput rather than prioritizing finishing the mult as soon as possible. + +@lpprogrammingllc +The CPU execution backend may well execute shifts and adds. However between your code in memory and that backend is the instruction decoder, +which uses the microcode to turn your single imul into whatever real micro instructions are required for the execution backend. + +If I understand the implicit part of your comment correctly, +you think there is no value in giving the instruction decoder the split-apart shift-and-add instructions because what the backend executes may be the same in either case. +This is incorrect. The cyclic latency (the number of cycles from beginning decode to commit) in the imul instruction is the number of cycles required for the instruction decoder +to issue microcode to the backend and for the backend to execute that microcode. Often the limiting factor is the frontend decode speed. +If you can decrease the number of cycles spent by the frontend doing the decode, you can decrease the total cycles required, +at the expense of more power use or less other work happening at the same time. +``` + +## References +[1] https://cs61.seas.harvard.edu/site/2018/Asm1/ \ No newline at end of file diff --git a/public/posts/2026/2026-02-28-Advent-of-Compiler-Optimisations-Study-Notes-05.md b/public/posts/2026/2026-02-28-Advent-of-Compiler-Optimisations-Study-Notes-05.md new file mode 100644 index 0000000..53a3ecb --- /dev/null +++ b/public/posts/2026/2026-02-28-Advent-of-Compiler-Optimisations-Study-Notes-05.md @@ -0,0 +1,292 @@ +## Study Notes: ARM's barrel shifter tricks, Advent of Compiler Optimisations 2025 + +These notes are based on the post [**ARM's barrel shifter tricks**](https://xania.org/202512/05-barrel-shifting-with-arm) and the YouTube video [**[AoCO 5/25] Multiplying with a Constant**](https://www.youtube.com/watch?v=TZubUyr2UEY&list=PL2HVqYf7If8cY4wLk7JUQ2f0JXY_xMQm2&index=6) which are Day 5 of the [Advent of Compiler Optimisations 2025](https://xania.org/AoCO2025-archive) Series by [Matt Godbolt](https://xania.org/MattGodbolt). + +My notes focus on reproducing and verifying [Matt Godbolt](https://xania.org/MattGodbolt)'s teaching within a local development environment using `LLVM` toolchain on `Ubuntu`. + +Selected technical insights from the YouTube comment section are reproduced at the end of these notes to provide additional context. + +Written by me and assisted by AI, proofread by me and assisted by AI. + +#### Development Environment +```bash +$ lsb_release -d +Description: Ubuntu 24.04.3 LTS + +$ clang -v +Ubuntu clang version 18.1.8 + +$ llvm-objdump -v +Ubuntu LLVM version 18.1.8 + +$ nvim --version +NVIM v0.11.5 + +$ echo $SHELL +/usr/bin/fish +``` + +## Introduction + +Following the Day 05 technical materials, I performed sequential tests for constant + +multiplication ranging from x multiplied by two to x multiplied by twenty on the AArch64 target. + +After evaluating the assembly output, + +I identified six distinct compiler optimization strategies that I would like to share with you. + +## Case 01 : `x * 2` + +```bash +$ nvim mul.c +``` + +```c +int mul(int x) { + return x * 2; +} +``` + +```bash +$ rm -f (path filter *.o); clang -O2 -target aarch64-linux-gnu -c mul.c; llvm-objdump -d mul.o +``` + +```text +mul.o: file format elf64-littleaarch64 + +Disassembly of section .text: + +0000000000000000 : + 0: 531f7800 lsl w0, w0, #1 + 4: d65f03c0 ret +``` + +ARM Instruction: `lsl , , #` + +The compiler utilizes a Logical Shift Left (`lsl`) to perform multiplication by powers of two. +Here, w0 is the destination (`Rd`), the original `w0` is the source (`Rn`), and `#1` is the immediate shift value. +Shifting a register left by 1 bit is equivalent to multiplying by 2. + +## Case 02: `x * 3` + +```bash +$ nvim mul.c +``` + +```c +int mul(int x) { + return x * 3; +} +``` + +```bash +$ rm -f (path filter *.o); clang -O2 -target aarch64-linux-gnu -c mul.c; llvm-objdump -d mul.o +``` + +```text +mul.o: file format elf64-littleaarch64 + +Disassembly of section .text: + +0000000000000000 : + 0: 0b000400 add w0, w0, w0, lsl #1 + 4: d65f03c0 ret +``` + +ARM Instruction: `add , , , lsl #` + +AArch64 supports shifted-register operands within arithmetic instructions. +This add instruction performs a left shift of 1 bit on the second source register (`Rm`) before addition. +The operation represents the formula `w0 = w0 + (w0 << 1)`, which computes `x = x + x * 2`. + +## Case 03 : `x * 6` + +```bash +$ nvim mul.c +``` + +```c +int mul(int x) { + return x * 6; +} +``` + +```bash +$ rm -f (path filter *.o); clang -O2 -target aarch64-linux-gnu -c mul.c; llvm-objdump -d mul.o +``` + +```text +mul.o: file format elf64-littleaarch64 + +Disassembly of section .text: + +0000000000000000 : + 0: 0b000408 add w8, w0, w0, lsl #1 + 4: 531f7900 lsl w0, w8, #1 + 8: d65f03c0 ret +``` + +ARM Instructions: +- `add , , , lsl #` +- `lsl , , #` + +The multiplication of 6x is decomposed into two discrete stages. +First, the compiler calculates `w8 = w0 + (w0 << 1) = w0 + 2 * w0 = 3 * w0`. +Second, it calculates `w0 = (w8 << 1) = 2 * w8 = 2 * (3 * w0) = 6 * w0` + +## Case 04 : `x * 7` + +```bash +$ nvim mul.c +``` + +```c +int mul(int x) { + return x * 7; +} +``` + +```bash +$ rm -f (path filter *.o); clang -O2 -target aarch64-linux-gnu -c mul.c; llvm-objdump -d mul.o +``` + +```text +mul.o: file format elf64-littleaarch64 + +Disassembly of section .text: + +0000000000000000 : + 0: 531d7008 lsl w8, w0, #3 + 4: 4b000100 sub w0, w8, w0 + 8: d65f03c0 ret +``` + +ARM Instructions: +- `lsl , , #` +- `sub , , ` + +The compiler implements a shift-and-subtract strategy for constants near powers of two. +To compute `7x`, it first executes `w8 = w0 << 3 = 8 * w0` +It then performs `w0 = w8 - w0 = 8 * w0 - w0 = 7 * w0`. + +## Case 05 : `x * 11` + +```bash +$ nvim mul.c +``` + +```c +int mul(int x) { + return x * 11; +} +``` + +```bash +$ rm -f (path filter *.o); clang -O2 -target aarch64-linux-gnu -c mul.c; llvm-objdump -d mul.o +``` + +```text +mul.o: file format elf64-littleaarch64 + +Disassembly of section .text: + +0000000000000000 : + 0: 52800168 mov w8, #0xb // =11 + 4: 1b087c00 mul w0, w0, w8 + 8: d65f03c0 ret +``` + +ARM Instructions: +- `mov , ` +- `mul , , ` + +The compiler defaults to the `mul` instruction because decomposing the constant `11` cannot be achieved in only two instructions. + +If the compiler were to adopt a manual shift-and-subtract strategy, +the code generator would need to output three instructions: +```text +add w8, w0, w0, lsl 1 // w8 = x + 2x = 3x +lsl w8, w8, #2 // w8 = w8 << 2 = 3x << 2 = 3x * 4 = 12x +sub w0, w8, w0 // w0 = 12x - x = 11x +``` +Obviously, this requires 3 instructions. By using mov followed by mul, +the compiler achieves the same result in only 2 instructions. + +## Case 06 : `x * 14` + +```bash +$ nvim mul.c +``` + +```c +int mul(int x) { + return x * 14; +} +``` + +```bash +$ rm -f (path filter *.o); clang -O2 -target aarch64-linux-gnu -c mul.c; llvm-objdump -d mul.o +``` + +```text +mul.o: file format elf64-littleaarch64 + +Disassembly of section .text: + +0000000000000000 : + 0: 531c6c08 lsl w8, w0, #4 + 4: 4b000500 sub w0, w8, w0, lsl #1 + 8: d65f03c0 ret +``` + +ARM Instructions: +- `lsl , , #` +- `sub , , , lsl #` + +The computation of 14x demonstrates the flexibility of the sub instruction with shifted operands. +The compiler first calculates `w8 = w0 << 4 = 16 * w0`. +Subsequently, it performs `w0 = w8 - (w0 << 1) = w8 - w0 * 2 = 16 * w0 - 2 * w0 = 14 * w0`. + +## YouTube Comment Insights + +Since YouTube does not currently support generating direct permanent links to individual comments, +I have reproduced the relevant technical insight below in its entirety to ensure both accuracy and proper attribution. + +```text +@kruador +@ciberman Yes, it means 'ARMv8'. That's not quite right because ARM Ltd enhanced the 32-bit instruction set (which they now call A32) +as well as adding the 64-bit instruction set (A64) in version 8. + +They also refer to 'AArch32' and 'AArch64' for extra confusion. I think 'AArch32' means 'the architectural state of a 32-bit ARM processor' +because you can use the alternative "Thumb" instruction set (which ARM Ltd renamed to T32 with ARMv8, in their documentation at least) instead of A32. +The embedded ARM Cortex-M only support T32, not A32. + +There is no equivalent of Thumb for AArch64 (no 'T64'), at least not as of yet (probably not ever), so 'AArch64' and 'A64' are virtually interchangeable. +And most people just say 'arm64' because 'AArch64' is unpronounceable while 'A64' is too ambiguous. + +@tlhIngan +ARMv8 was designed to be more streamlined for modern superscalar architectures so it jettisoned a lot of ARM stuff that was responsible +for causing pipeline stalls and dependencies in favor of simpler instructions that can run faster. +When AArch64 was being introduced I remember seeing the ARM presentations on why the instruction set dumped a lot of it. +It's why an ARMv8 core only beats an ARMv7 core by about 10% in AArch32 mode but running the same code in AArch64 mode you can achieve a 50+% speedup. +Losing RSB for a two instruction LSB/SUB combination was deemed far superior in simplifying ALU operations. + +@kruador +I think RSB was only really useful for this kind of operation. If you're not doing a shift on one of the operands, you can just swap which register is which. +But the 32-bit ARM architecture only supports the shift on operand 2, +so you have to have an instruction that does say Rdest := operand2 - Rn instead of Rdest := Rm - operand2. + +ARM1 didn't even have a multiply instruction. Adds, shifts and subtracts were the only options out there. +No room for a multiplier in only 25,000 transistors! So RSB was really helpful there. However, these days there an abundance of transistors available: +even the lowly ARM Cortex-M0 (a 32-bit ARMv6 architecture core that only supports the Thumb instruction set, and not all of that) can be configured with a single-cycle multiplier. + +The main issue wasn't simplifying the ALU operations, I don't think, but simply releasing bits to be able to encode more different operations and more registers. +AArch64 needs three bits more per instruction for register mapping - one for the destination register and one for each source - because it has twice as many registers as AArch32 (32 vs 16). +``` + +## References +1. https://developer.arm.com/documentation/dui0473/m/overview-of-the-arm-architecture/access-to-the-inline-barrel-shifter +2. https://www.davespace.co.uk/arm/introduction-to-arm/barrel-shifter.html +3. https://www.d.umn.edu/~gshute/logic/barrel-shifter.html +4. https://community.element14.com/technologies/fpga-group/b/blog/posts/systemverilog-study-notes-barrel-shifter-rtl-combinational-circuit diff --git a/public/posts/2026/2026-02-28-Advent-of-Compiler-Optimisations-Study-Notes-06.md b/public/posts/2026/2026-02-28-Advent-of-Compiler-Optimisations-Study-Notes-06.md new file mode 100644 index 0000000..448e083 --- /dev/null +++ b/public/posts/2026/2026-02-28-Advent-of-Compiler-Optimisations-Study-Notes-06.md @@ -0,0 +1,247 @@ +## Study Notes: Division, Advent of Compiler Optimisations 2025 + +These notes are based on the post [**Division**](https://xania.org/202512/06-dividing-to-conquer) and the YouTube video [**[AoCO 6/25] Integer Division**](https://www.youtube.com/watch?v=7Rtk0qOX9zs&list=PL2HVqYf7If8cY4wLk7JUQ2f0JXY_xMQm2&index=7) which are Day 6 of the [Advent of Compiler Optimisations 2025](https://xania.org/AoCO2025-archive) Series by [Matt Godbolt](https://xania.org/MattGodbolt). + +My notes focus on reproducing and verifying [Matt Godbolt](https://xania.org/MattGodbolt)'s teaching within a local development environment using `LLVM` toolchain on `Ubuntu`. + +Written by me and assisted by AI, proofread by me and assisted by AI. + +#### Development Environment +```bash +$ lsb_release -d +Description: Ubuntu 24.04.3 LTS + +$ clang -v +Ubuntu clang version 18.1.8 + +$ llvm-objdump -v +Ubuntu LLVM version 18.1.8 + +$ nvim --version +NVIM v0.11.5 + +$ echo $SHELL +/usr/bin/fish +``` + +## x86 Signed Integer Division + +```bash +$ nvim div.c +``` + +```c +int div(int x) { + return x / 512; +} +``` + +```bash +$ rm -f (path filter *.o); clang -O2 -c div.c; llvm-objdump -d --x86-asm-syntax=att div.o +``` + +```text +div.o: file format elf64-x86-64 + +Disassembly of section .text: + +0000000000000000
: + 0: 8d 87 ff 01 00 00 leal 0x1ff(%rdi), %eax + 6: 85 ff testl %edi, %edi + 8: 0f 49 c7 cmovnsl %edi, %eax + b: c1 f8 09 sarl $0x9, %eax + e: c3 retq +``` + +Instructions: +```text +- leal (), ; = offset + base +- cmovnsl , ; cmov = conditional move + ; ns = Not Signed + ; cmovns = Conditional Move if Not Sign +- sarl , ; sar = Shift Arithmetic Right +``` + +In C, signed integer division truncates toward zero. For example: +```c +#include + +int main(void) { + printf("%d %d\n", 1 / 512, -1 / 512); + return 0; +} +``` + +```bash +$ clang -o app.out main.c +$ ./app.out +$ 0 0 +``` + +But replacing division with an arithmetic right shift does not produce the same result for negative numbers. + +```c +#include + +int main(void) { + printf("%d %d\n", 1 >> 9, -1 >> 9); + return 0; +} +``` + +```bash +$ clang -o app.out main.c +$ ./app.out +$ 0 -1 +``` + +To resolve this problem, the compiler adds `2^n - 1` to the negative number. +In this case, n = 9, hence it is `2^9 - 1 = 512 - 1 = 511 = 0x1FF` + +Why `2^n - 1` ? Let's consider `n = 9` + +In binary, `2^9 - 1` creates the exactly 9 ones: + +```text + 2^9 | 1 0 0 0 0 0 0 0 0 0 +- 1 | 0 0 0 0 0 0 0 0 0 1 +--------------------------- + 0 1 1 1 1 1 1 1 1 1 +``` + +It can help us to flip the bit of `x`. For example, `x = -1` +```text +Position | 32 (Sign Bit) 10 1 + | v v v + Carry | 1 1 1 1 1 ... 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 + -1 | 1 1 1 1 1 ... 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 (0xFFFFFFFF) + + 511 | 0 0 0 0 0 ... 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 (0x000001FF) + ----- |----------------------------------------------- + 510 | 0 0 0 0 0 ... 0 0 0 0 0 1 1 1 1 1 1 1 1 1 0 (0x000001FE) +``` + +Hence, the compiler does `(-1 + 511) / 512 = 510 / 512 = 510 >> 9 = 0`, we get the correct result. + +In summary, the compiler use the `cmovns` and `test` instructions to detect whether `x` is positive. +If `x` is a positive number, shift it. Otherwise, use `2^n - 1` to create an `n` one mask. +Then we can use the shift operator to achieve the same result as we use the division operator. + +## x86 Unsigned Integer Division +```c +unsigned div(unsigned x) { + return x / 512; +} +``` + +```bash +$ rm -f (path filter *.o); clang -O2 -c div.c; llvm-objdump -d --x86-asm-syntax=att div.o +``` + +```text +div.o: file format elf64-x86-64 + +Disassembly of section .text: + +0000000000000000
: + 0: 89 f8 movl %edi, %eax + 2: c1 e8 09 shrl $0x9, %eax + 5: c3 retq +``` + +Instruction: +```text +shrl , ; shr := Shift Right Logical, that is = >> +``` + +This case is easier than previous one. It only requires knowing what `shr` is. + +You may also want to know the difference between `shr` and `sar`. + +Here, I show an example to you. + +| Original Dec | Original Binary | Operation | Result Binary | +| :----------- | :-------------- | :-------- | :------------ | +| 3 | 0b0011 | `shrl $2` | 0b0000 | +| 3 | 0b0011 | `sarl $2` | 0b0000 | +| -3 | 0b1101 | `shrl $2` | 0b0011 | +| -3 | 0b1101 | `sarl $2` | 0b1111 | + +## Arm Signed Division (AArch64) +```bash +$ nvim div.c +``` + +```c +int div(int x) { + return x / 512; +} +``` + +```bash +$ rm -f (path filter *.o); clang -O2 -target aarch64-linux-gnu -c div.c; llvm-objdump -d div.o +``` + +```text +div.o: file format elf64-littleaarch64 + +Disassembly of section .text: + +0000000000000000
: + 0: 1107fc08 add w8, w0, #0x1ff + 4: 7100001f cmp w0, #0x0 + 8: 1a80b108 csel w8, w8, w0, lt + c: 13097d00 asr w0, w8, #9 + 10: d65f03c0 ret +``` + +The reason is the same as in the previous x86 case, we need to know why we need to use `0x1FF`. + +Instructions: +```text +- add , , #imm ; w8 = w0 + 0x1ff +- cmp , #imm ; Compares w0 with #0x0, and update the processor flags NZCV +- csel , , , ; Conditional Select. + ; If the condition lt (Less Than) is true, it selects w8; + ; otherwise, it selects w0. +- asr , , #imm : Arithmetic Shift Right, w0 = w8 >> 9 +``` + +| Flag | Name | Bit | Description (when set to 1) | +| :--- | :--- | :---| :--- | +| **N** | Negative | 31 | The result of the operation was negative (MSB = 1). | +| **Z** | Zero | 30 | The result of the operation was exactly zero. | +| **C** | Carry | 29 | An unsigned overflow occurred (or a borrow in subtraction). | +| **V** | oVerflow | 28 | A signed overflow occurred (result exceeded signed range). | + +## Arm Unsigned Division (AArch64) +```bash +$ nvim div.c +``` + +```c +unsigned div(unsigned x) { + return x / 512; +} +``` + +```bash +$ rm -f (path filter *.o); clang -O2 -target aarch64-linux-gnu -c div.c; llvm-objdump -d div.o +``` + +```text +div.o: file format elf64-littleaarch64 + +Disassembly of section .text: + +0000000000000000
: + 0: 53097c00 lsr w0, w0, #9 + 4: d65f03c0 ret +``` + +Instruction: +```text +lsr , , #imm ; Logical Shift Right, that is w0 = w0 >> 9. +``` + +## References +- https://developer.arm.com/documentation/ddi0601/latest/AArch64-Registers/NZCV--Condition-Flags diff --git a/public/posts/2026/2026-03-05-Test.md b/public/posts/2026/2026-03-05-Test.md deleted file mode 100644 index 49b808c..0000000 --- a/public/posts/2026/2026-03-05-Test.md +++ /dev/null @@ -1,16 +0,0 @@ -# Hello React 19 - -This is a post read from `public/posts/2026/Test.md`. - -## Testing Functionality - -* **Automatic Path**: Accessed via `/2026/Test.html` - -* **GA Tracing**: `Analytics.jsx` is enabled - -* **Environment**: Driven by **Bun** - -### Code Example -```javascript -console.log("Hello from 2026!"); -``` \ No newline at end of file diff --git a/src/App.css b/src/App.css index e749d87..a2e5e12 100644 --- a/src/App.css +++ b/src/App.css @@ -1,12 +1,84 @@ -#root { +body { + margin: 0; + background-color: #1a1a1a; + color: #e0e0e0; + font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Helvetica, Arial, sans-serif; + -webkit-font-smoothing: antialiased; +} + +.app-shell { + padding: 40px; max-width: 800px; margin: 0 auto; - padding: 2rem; - text-align: center; - line-height: 1.6; + line-height: 1.7; +} + +a { + color: #1890ff; + text-decoration: none; + transition: color 0.2s; } -.main-title { - text-align: center; - margin-top: 100px; -} \ No newline at end of file +a:hover { + text-decoration: underline; + color: #40a9ff; +} + +.markdown-body { + font-size: 1.1rem; +} + +.markdown-body h1, .markdown-body h2, .markdown-body h3 { + color: #ffffff; + margin-top: 1.5em; + margin-bottom: 0.5em; +} + +.markdown-body hr { + margin: 40px 0; + border: 0; + border-top: 1px solid #333; +} + +.markdown-body table { + width: 100%; + border-collapse: collapse; + margin: 20px 0; + font-size: 0.95rem; +} + +.markdown-body th, .markdown-body td { + border: 1px solid #333; + padding: 12px; + text-align: left; +} + +.markdown-body th { + background-color: #252525; + color: #ffffff; +} + +.markdown-body tr:nth-child(even) { + background-color: #212121; +} + +.markdown-body code { + font-family: 'Fira Code', 'Cascadia Code', monospace; + background-color: #2d2d2d; + padding: 0.2em 0.4em; + border-radius: 4px; + font-size: 85%; + color: #ff7875; +} + +.markdown-body pre { + background-color: transparent !important; + border: none !important; + padding: 0 !important; +} + +.markdown-body pre code { + background-color: transparent; + padding: 0; + color: #e0e0e0; +} diff --git a/src/App.jsx b/src/App.jsx index 676336b..4317260 100644 --- a/src/App.jsx +++ b/src/App.jsx @@ -1,17 +1,22 @@ import { useState, useEffect } from 'react'; import ReactMarkdown from 'react-markdown'; +import remarkGfm from 'remark-gfm'; +import { Prism as SyntaxHighlighter } from 'react-syntax-highlighter'; +import { vscDarkPlus } from 'react-syntax-highlighter/dist/esm/styles/prism'; import Analytics from './Analytics'; import NotFound from './NotFound'; import Home from './Home'; +import './App.css'; export default function App() { const [content, setContent] = useState(''); - const [posts, setPosts] = useState([]); - const [status, setStatus] = useState('loading'); + const [posts , setPosts] = useState([]); + const [status , setStatus] = useState('loading'); useEffect(() => { - const params = new URLSearchParams(window.location.search); + const params = new URLSearchParams(window.location.search); const redirectedPath = params.get('p'); + let currentPath = redirectedPath || window.location.pathname; if (redirectedPath) { @@ -24,7 +29,7 @@ export default function App() { setPosts(data); const pathClean = currentPath.replace(/\.html$/, ''); - const parts = pathClean.split('/').filter(Boolean); + const parts = pathClean.split('/').filter(Boolean); if (parts.length === 0 || (parts.length === 1 && parts[0] === 'index')) { setStatus('home'); @@ -35,10 +40,10 @@ export default function App() { const [year, month, day, slug] = parts; const found = data.find(p => - p.year === year && + p.year === year && p.month === month && - p.day === day && - p.slug === slug + p.day === day && + p.slug === slug ); if (found) { @@ -59,24 +64,49 @@ export default function App() { .catch(() => setStatus('404')); }, []); - if (status === 'loading') return
Loading...
; + if (status === 'loading') { + return
Loading...
; + } return ( <> -
+
{status === '404' ? ( ) : status === 'home' ? ( ) : ( -
); -} \ No newline at end of file +} diff --git a/src/Home.css b/src/Home.css new file mode 100644 index 0000000..c298db0 --- /dev/null +++ b/src/Home.css @@ -0,0 +1,59 @@ +.home-container { + max-width: 800px; + margin: 0 auto; + padding: 20px; +} + +.home-title { + font-size: 2rem; + margin-bottom: 30px; + color: #ffffff; +} + +.post-list { + list-style: none; + padding: 0; +} + +.post-item { + margin-bottom: 12px; + font-size: 1.1rem; +} + +.post-date { + font-family: 'SFMono-Regular', Consolas, 'Liberation Mono', Menlo, monospace; + color: #999; + margin-right: 10px; +} + +.post-link { + color: #1890ff; + text-decoration: none; + transition: color 0.2s, text-decoration 0.2s; +} + +.post-link:hover { + color: #40a9ff; + text-decoration: underline; +} + +.pagination { + display: flex; + gap: 20px; + margin-top: 40px; +} + +.pagination-btn { + background: none; + border: none; + color: #1890ff; + cursor: pointer; + font-size: 1rem; + padding: 0; + transition: color 0.2s; +} + +.pagination-btn:hover { + color: #40a9ff; + text-decoration: underline; +} \ No newline at end of file diff --git a/src/Home.jsx b/src/Home.jsx index 29fc441..05fb30d 100644 --- a/src/Home.jsx +++ b/src/Home.jsx @@ -1,35 +1,52 @@ import { useState } from 'react'; +import './Home.css'; const POSTS_PER_PAGE = 10; export default function Home({ posts }) { const [currentPage, setCurrentPage] = useState(0); - const startIndex = currentPage * POSTS_PER_PAGE; + const startIndex = currentPage * POSTS_PER_PAGE; const currentPosts = posts.slice(startIndex, startIndex + POSTS_PER_PAGE); - const hasNext = startIndex + POSTS_PER_PAGE < posts.length; - const hasPrev = currentPage > 0; + const hasNext = startIndex + POSTS_PER_PAGE < posts.length; + const hasPrev = currentPage > 0; return (
-

Recent Posts

+

Recent Posts

+ -
diff --git a/src/NotFound.css b/src/NotFound.css new file mode 100644 index 0000000..dbab7b4 --- /dev/null +++ b/src/NotFound.css @@ -0,0 +1,31 @@ +.not-found-container { + text-align: center; + padding: 10vh 20px; +} + +.not-found-code { + font-size: 3rem; + color: #ff4d4f; + margin-bottom: 0.5em; +} + +.not-found-title { + color: #ffffff; + margin-bottom: 1em; +} + +.not-found-text { + color: #999; + margin-bottom: 2em; +} + +.not-found-link { + color: #1890ff; + text-decoration: none; + transition: color 0.2s; +} + +.not-found-link:hover { + color: #40a9ff; + text-decoration: underline; +} \ No newline at end of file diff --git a/src/NotFound.jsx b/src/NotFound.jsx index b9b7ac5..a569155 100644 --- a/src/NotFound.jsx +++ b/src/NotFound.jsx @@ -1,17 +1,20 @@ import Analytics from './Analytics'; +import './NotFound.css'; export default function NotFound() { return ( <> -
-

404

-

Page Not Found

-

Sorry, the article or page you are looking for seems to have moved or no longer exists.

- + ); -} \ No newline at end of file +}