@@ -18,10 +18,14 @@ def main():
1818 date_str = datetime .now ().strftime ('%Y-%m-%d' )
1919 sitemap_path = os .path .join (BASE_DIR , 'sitemap.xml' )
2020
21- # HERALD Standard: High-compatibility XML with strict UTF-8
21+ # HERALD Standard: Ultra-Compatible Schema for Google 2026
2222 xml_header = '<?xml version="1.0" encoding="UTF-8"?>\n '
23- # Optional XML stylesheet can help in some cases, but for GSC, the structure is most critical
24- xml_urlset_open = '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">\n '
23+ xml_urlset_open = (
24+ '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" '
25+ 'xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" '
26+ 'xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 '
27+ 'http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd">\n '
28+ )
2529 xml_content = xml_header + xml_urlset_open
2630
2731 html_files = sorted (get_html_files ())
@@ -31,19 +35,10 @@ def main():
3135
3236 for f in html_files :
3337 rel_path = os .path .relpath (f , BASE_DIR ).replace ('\\ ' , '/' )
34-
35- # Skip utility and low-value pages
3638 if rel_path in ['404.html' , 'thank-you.html' , 'index.html' ] or 'google' in rel_path :
3739 continue
3840
39- # GSD Strategy: Strict canonical URLs
40- # Remove .html extension for cleaner indexing if server supports it,
41- # but for GH Pages, we keep the .html but ensure consistency.
42- # We also want to assign priority based on depth.
43-
4441 url = f"{ BASE_URL } /{ rel_path } "
45-
46- # Priority Logic: Higher priority for core clusters
4742 priority = "0.8"
4843 if "posts/" in rel_path :
4944 priority = "0.7"
@@ -54,11 +49,15 @@ def main():
5449
5550 xml_content += '</urlset>'
5651
57- # Write with explicit UTF-8 and ensure NO BOM to prevent GSC parsing errors
5852 with open (sitemap_path , 'wb' ) as f :
5953 f .write (xml_content .encode ('utf-8' ))
6054
61- print (f"GSD Optimized sitemap generated at { sitemap_path } " )
55+ # Also ensure .nojekyll exists
56+ nojekyll_path = os .path .join (BASE_DIR , '.nojekyll' )
57+ with open (nojekyll_path , 'w' ) as f :
58+ pass
59+
60+ print (f"GSD Advanced Sitemap and .nojekyll refreshed." )
6261
6362if __name__ == "__main__" :
6463 main ()
0 commit comments