diff --git a/Makefile b/Makefile
index 6788c8850..0ae1d38e6 100644
--- a/Makefile
+++ b/Makefile
@@ -16,7 +16,7 @@ test-all: uv-sync
 	@uv run pytest -W error packages/
 
 test: uv-sync
-	@uv run pytest -W error packages/ -x
+	@uv run pytest -W error packages/ -x -q --tb=short
 
 coverage: uv-sync
 	@uv run pytest packages/ --cov overture.schema --cov-report=term --cov-report=html && open htmlcov/index.html
diff --git a/packages/overture-schema-addresses-theme/pyproject.toml b/packages/overture-schema-addresses-theme/pyproject.toml
index 485f21a8b..ad840f40d 100644
--- a/packages/overture-schema-addresses-theme/pyproject.toml
+++ b/packages/overture-schema-addresses-theme/pyproject.toml
@@ -38,3 +38,36 @@ testpaths = ["tests"]
 
 [project.entry-points."overture.models"]
 "overture:addresses:address" = "overture.schema.addresses:Address"
+
+[[examples.Address]]
+id = "416ab01c-d836-4c4f-aedc-2f30941ce94d"
+geometry = "POINT (-176.5637854 -43.9471955)"
+country = "NZ"
+postcode = "null"
+street = "Tikitiki Hill Road"
+number = "54"
+unit = "null"
+postal_city = "null"
+version = 1
+theme = "addresses"
+type = "address"
+
+[examples.Address.bbox]
+xmin = -176.56381225585938
+xmax = -176.56378173828125
+ymin = -43.94719696044922
+ymax = -43.94718933105469
+
+[[examples.Address.address_levels]]
+value = "Chatham Islands"
+
+[[examples.Address.address_levels]]
+value = "Chatham Island"
+
+[[examples.Address.sources]]
+property = ""
+dataset = "OpenAddresses/LINZ"
+record_id = "null"
+update_time = "null"
+confidence = "null"
+between = "null"
diff --git a/packages/overture-schema-base-theme/pyproject.toml b/packages/overture-schema-base-theme/pyproject.toml
index 09a46f613..dedd9e0db 100644
--- a/packages/overture-schema-base-theme/pyproject.toml
+++ b/packages/overture-schema-base-theme/pyproject.toml
@@ -41,3 +41,210 @@ packages = ["src/overture"]
 "overture:base:land_cover" = "overture.schema.base:LandCover"
 "overture:base:land_use" = "overture.schema.base:LandUse"
 "overture:base:water" = "overture.schema.base:Water"
+ 
+[[examples.Bathymetry]]
+id = "5d40bd6c-db14-5492-b29f-5e25a59032bc"
+geometry = "MULTIPOLYGON (((-170.71296928 -76.744313428, -170.719841483 -76.757076376, -170.731061124 -76.761566192, -170.775652756 -76.76338726, -170.853616381 -76.76253958, -170.918562293 -76.755380155, -170.970490492 -76.741908984, -170.998699301 -76.729180777, -171.003188718 -76.717195533, -170.990421551 -76.703765214, -170.960397802 -76.68888982, -170.940748072 -76.674697941, -170.931472364 -76.661189576, -170.927114414 -76.637296658, -170.927674224 -76.603019188, -170.939335393 -76.574637428, -170.962097922 -76.552151379, -170.999015387 -76.535715361, -171.050087788 -76.525329373, -171.079133298 -76.50751024, -171.086151917 -76.482257963, -171.098653755 -76.462747286, -171.11663881 -76.448978211, -171.146691397 -76.437601179, -171.188811514 -76.428616191, -171.296181785 -76.4228609, -171.468802209 -76.420335306, -171.566055241 -76.41501101, -171.587940879 -76.406888013, -171.59004284 -76.387987744, -171.572361122 -76.358310204, -171.549343725 -76.334488281, -171.520990649 -76.316521976, -171.453759127 -76.301763636, -171.347649159 -76.290213262, -171.30597166 -76.267707269, -171.328726628 -76.234245658, -171.36676019 -76.195627518, -171.420072345 -76.151852851, -171.444766298 -76.12494912, -171.44084205 -76.114916326, -171.378107286 -76.099627787, -171.256562007 -76.079083503, -171.228218647 -76.058825682, -171.293077208 -76.038854322, -171.421365419 -76.023534207, -171.613083278 -76.012865337, -171.76411833 -75.99938969, -171.874470572 -75.983107266, -172.121928361 -75.958403596, -172.506491695 -75.925278679, -172.744527804 -75.899736153, -172.836036689 -75.88177602, -172.904681746 -75.862406785, -172.950462974 -75.841628448, -173.000855857 -75.830396498, -173.055860393 -75.828710933, -173.177561398 -75.810743709, -173.365958872 -75.776494827, -173.493573084 -75.759370386, -173.560404033 -75.759370386, -173.620925776 -75.77158365, -173.675138312 -75.796010178, -173.733786206 -75.808642966, -173.796869456 -75.809482015, -173.847216433 -75.805553449, -173.884827135 -75.79685727, -173.90475244 -75.789177124, -173.906992347 -75.782513013, -173.881736947 -75.76894365, -173.828986239 -75.748469035, -173.797974615 -75.732298475, -173.788702075 -75.72043197, -173.82491541 -75.701013882, -173.90661462 -75.674044211, -173.977087913 -75.656066882, -174.03633529 -75.647081894, -174.150190099 -75.643010485, -174.31865234 -75.643852655, -174.444433211 -75.652836726, -174.527532713 -75.669962696, -174.581709229 -75.687086831, -174.606962758 -75.704209131, -174.631095834 -75.708279163, -174.654108458 -75.699296928, -174.688637451 -75.699296928, -174.734682816 -75.708279163, -174.797846917 -75.708699866, -174.878129754 -75.700559037, -174.939903816 -75.70870181, -174.9831691 -75.733128185, -175.025841122 -75.746602837, -175.06791988 -75.749125768, -175.09922327 -75.755318987, -175.119751293 -75.765182495, -175.127900229 -75.775197415, -175.123670077 -75.785363749, -175.111718372 -75.791289392, -175.092045112 -75.792974345, -175.049907399 -75.780622976, -174.985305232 -75.754235285, -174.935355308 -75.74552996, -174.900057628 -75.754507001, -174.886060973 -75.766815613, -174.893365345 -75.782455795, -174.907537393 -75.791536245, -174.928577117 -75.794056963, -174.971105378 -75.818213107, -175.035122174 -75.864004677, -175.060941949 -75.892403254, -175.048564703 -75.903408839, -175.020469049 -75.909193043, -174.976654988 -75.909755867, -174.944760829 -75.90482541, -174.924786572 -75.894401673, -174.92111336 -75.881479168, -174.933741192 -75.866057897, -174.900484967 -75.857513625, -174.821344686 -75.855846351, -174.752433709 -75.839289534, -174.693752038 -75.807843172, -174.652894268 -75.780747792, -174.629860399 -75.758003392, -174.571227588 -75.745793709, -174.476995837 -75.744118743, -174.398722205 -75.751841803, -174.336406693 -75.768962888, -174.300477946 -75.783262828, -174.290935964 -75.794741623, -174.28812912 -75.812412878, -174.292057414 -75.836276591, -174.289237223 -75.852155302, -174.279668547 -75.860049012, -174.205113931 -75.879998026, -174.065573375 -75.912002343, -173.957779122 -75.924071248, -173.881731171 -75.916204739, -173.846521251 -75.926706189, -173.852149361 -75.955575598, -173.845408416 -75.979439305, -173.826298414 -75.99829731, -173.76424232 -76.018956172, -173.659240133 -76.041415889, -173.560434089 -76.057698465, -173.467824188 -76.067803901, -173.404678836 -76.077625909, -173.370998032 -76.087164489, -173.332530272 -76.106814524, -173.289275555 -76.136576014, -173.231864101 -76.154545405, -173.160295911 -76.1607227, -173.093917454 -76.17278471, -173.032728732 -76.190731436, -173.009710709 -76.205560908, -173.024863387 -76.217273124, -173.048718935 -76.225374126, -173.081277354 -76.229863912, -173.219658797 -76.237442552, -173.463863265 -76.248110046, -173.60352174 -76.25793895, -173.638634223 -76.266929265, -173.658723482 -76.274676093, -173.663789516 -76.281179435, -173.661403366 -76.289363255, -173.651565032 -76.299227554, -173.627282775 -76.313843189, -173.588556596 -76.33321016, -173.575369172 -76.355231445, -173.587720504 -76.379907046, -173.573965869 -76.402499893, -173.53410527 -76.423009985, -173.518376226 -76.437156259, -173.526778738 -76.444938715, -173.559015515 -76.446303683, -173.615086557 -76.441251162, -173.686785609 -76.421600788, -173.774112673 -76.387352563, -173.854573513 -76.372333877, -173.928168128 -76.37654473, -173.968906731 -76.383732772, -173.97678932 -76.393898005, -173.979325549 -76.410884215, -173.976515417 -76.434691403, -174.000646474 -76.454452818, -174.051718722 -76.470168462, -174.08231827 -76.482963711, -174.092445119 -76.492838563, -174.075053216 -76.514344245, -174.030142562 -76.547480757, -174.016669929 -76.575274601, -174.034635317 -76.597725777, -174.037021169 -76.62030279, -174.023827484 -76.64300564, -174.034634583 -76.661942018, -174.069442464 -76.677111923, -174.086843964 -76.690616859, -174.086839082 -76.702456825, -174.080513222 -76.712456309, -174.067866385 -76.72061531, -174.036259441 -76.725116584, -173.98569239 -76.725960131, -173.93723318 -76.720486558, -173.89088181 -76.708695864, -173.780274695 -76.695221211, -173.605411835 -76.6800626, -173.487930602 -76.662096294, -173.427830996 -76.641322294, -173.370307559 -76.630935294, -173.315360292 -76.630935294, -173.249406002 -76.637251344, -173.17244469 -76.649883444, -173.110795196 -76.653532162, -173.06445752 -76.648197497, -173.029349452 -76.637355272, -173.005470993 -76.621005486, -173.01753216 -76.605236858, -173.065532955 -76.590049388, -173.096548505 -76.576599032, -173.11057881 -76.564885791, -173.108053605 -76.552301955, -173.08897289 -76.538847523, -173.051362225 -76.527628807, -172.99522161 -76.518645807, -172.891534181 -76.516119525, -172.740299938 -76.52004996, -172.648684331 -76.524540794, -172.61668736 -76.529592027, -172.584268588 -76.541098757, -172.551428016 -76.559060982, -172.533042741 -76.576141146, -172.529112765 -76.592339249, -172.540195073 -76.604524646, -172.566289666 -76.612697339, -172.576243291 -76.621303431, -172.570055947 -76.630342924, -172.555183534 -76.636123529, -172.531626051 -76.638645245, -172.517040304 -76.643518276, -172.511426292 -76.650742621, -172.551848294 -76.672312544, -172.63830631 -76.708228042, -172.701431121 -76.728711408, -172.741222726 -76.733762641, -172.81460886 -76.72534004, -172.921589524 -76.703443605, -173.006960733 -76.697273314, -173.070722487 -76.706829166, -173.101615682 -76.719791531, -173.099640316 -76.736160408, -173.033958817 -76.759064999, -172.904571183 -76.788505304, -172.847033841 -76.810916113, -172.861346791 -76.826297424, -172.924787296 -76.856444925, -173.037355356 -76.901358615, -173.149640378 -76.935043659, -173.26164236 -76.957500057, -173.354942309 -76.968728255, -173.429540223 -76.968728255, -173.487771718 -76.964657535, -173.529636796 -76.956516094, -173.572768938 -76.955559014, -173.617168145 -76.961786296, -173.614655836 -76.97446809, -173.565232013 -76.993604396, -173.461502424 -77.006682128, -173.303467069 -77.013701287, -173.163373388 -77.02787859, -173.041221382 -77.049214037, -172.918094542 -77.059179951, -172.793992869 -77.057776334, -172.720418717 -77.044861043, -172.697372088 -77.020434079, -172.675885915 -77.003730799, -172.655960197 -76.994751205, -172.60882792 -76.987594764, -172.534489083 -76.982261476, -172.480072837 -76.983094424, -172.445579184 -76.990093609, -172.428332542 -76.998610734, -172.428332911 -77.008645799, -172.435068344 -77.018150822, -172.448538839 -77.027125803, -172.490777829 -77.039613708, -172.561785312 -77.055614535, -172.628175119 -77.080598263, -172.68994725 -77.114564892, -172.751818039 -77.133793765, -172.813787485 -77.138284883, -172.900229764 -77.131828165, -173.011144875 -77.114423613, -173.119679588 -77.128474884, -173.2258339 -77.17398198, -173.273849553 -77.202664633, -173.263726547 -77.214522842, -173.165895559 -77.239681117, -172.980356589 -77.278139457, -172.880291531 -77.312658914, -172.865700386 -77.343239487, -172.867667457 -77.371126102, -172.886192744 -77.39631876, -172.999732531 -77.429966955, -173.208286817 -77.472070689, -173.335454668 -77.509278677, -173.381236082 -77.541590921, -173.403703936 -77.570407724, -173.40285823 -77.595729086, -173.378288408 -77.634921, -173.329994472 -77.687983467, -173.241287742 -77.735563094, -173.112168219 -77.777659882, -173.054064387 -77.81089869, -173.066976248 -77.835279519, -173.063736051 -77.854657976, -173.044343797 -77.869034061, -172.890349983 -77.896435115, -172.60175461 -77.936861139, -172.376181212 -77.961986812, -172.213629791 -77.971812135, -172.023427102 -77.967320559, -171.805573145 -77.948512083, -171.581263004 -77.918894833, -171.350496677 -77.87846881, -171.217147208 -77.851799157, -171.181214596 -77.838885875, -171.160572341 -77.826074082, -171.155220441 -77.813363779, -171.178789134 -77.790158543, -171.231278422 -77.756458375, -171.27338337 -77.70988804, -171.305103978 -77.65044754, -171.293875473 -77.602346602, -171.239697854 -77.565585227, -171.168401509 -77.532887375, -171.079986438 -77.504253044, -171.028614514 -77.483042244, -171.014285737 -77.469254974, -171.016677114 -77.456576914, -171.035788644 -77.445008064, -171.086879845 -77.431646501, -171.169950715 -77.416492226, -171.216537864 -77.403175691, -171.226641293 -77.391696895, -171.228607057 -77.378968685, -171.222435157 -77.364991059, -171.168824693 -77.334840949, -171.067775664 -77.288518355, -171.000402018 -77.24121644, -170.966703754 -77.192935206, -170.894838531 -77.157002595, -170.784806349 -77.133418606, -170.725150821 -77.11627156, -170.715871945 -77.105561456, -170.710674146 -77.077210652, -170.709557424 -77.031219147, -170.697909144 -76.992502178, -170.675729304 -76.961059744, -170.654536164 -76.940848729, -170.634329723 -76.931869135, -170.581564681 -76.922044903, -170.496241038 -76.911376032, -170.429709562 -76.893409727, -170.381970254 -76.868145986, -170.285260999 -76.838950739, -170.139581798 -76.805823986, -170.061542334 -76.78431495, -170.051142608 -76.77442363, -170.076677284 -76.763148845, -170.138146365 -76.750490597, -170.192753568 -76.731526593, -170.240498896 -76.706256833, -170.315896371 -76.686462585, -170.418945993 -76.67214385, -170.498267121 -76.665405567, -170.553859754 -76.666247738, -170.609039198 -76.673409769, -170.663805452 -76.68689166, -170.695686968 -76.698414281, -170.704683743 -76.70797763, -170.710444514 -76.723277346, -170.71296928 -76.744313428), (-172.46185717 -77.485683162, -172.491725041 -77.49003391, -172.535448064 -77.490594163, -172.566986057 -77.488349711, -172.586339021 -77.483300552, -172.598540475 -77.476173053, -172.60359042 -77.466967216, -172.601627836 -77.458872071, -172.592652724 -77.451887618, -172.556765055 -77.448396429, -172.49396483 -77.448398503, -172.453726685 -77.452881992, -172.436050621 -77.461846897, -172.429868964 -77.468114837, -172.435181715 -77.47168581, -172.44584445 -77.477541919, -172.46185717 -77.485683162), (-172.812798475 -76.363628771, -172.855573928 -76.365453015, -172.885037626 -76.36040045, -172.90720433 -76.351027386, -172.92207404 -76.337333821, -172.9168827 -76.324750727, -172.89163031 -76.313278104, -172.862193885 -76.307261221, -172.828573425 -76.30670008, -172.792121028 -76.311189877, -172.752836694 -76.320730613, -172.732062811 -76.331770033, -172.729799379 -76.344308139, -172.756711267 -76.354927718, -172.812798475 -76.363628771), (-171.932998671 -76.183124002, -172.010021088 -76.180457336, -172.070931389 -76.166984091, -172.113033554 -76.150312062, -172.136327583 -76.130441248, -172.133522137 -76.111120124, -172.104617217 -76.092348689, -172.06028165 -76.080296327, -172.000515436 -76.074963039, -171.918725408 -76.076928027, -171.814911566 -76.086191292, -171.745182124 -76.097695899, -171.709537083 -76.111441849, -171.696346087 -76.126554541, -171.705609136 -76.143033974, -171.731004713 -76.156183802, -171.77253282 -76.166004024, -171.83986414 -76.174984091, -171.932998671 -76.183124002), (-173.16885937 -76.066345013, -173.199147981 -76.070696107, -173.23950163 -76.071257052, -173.269213382 -76.065813298, -173.288283234 -76.054364845, -173.2799961 -76.038973879, -173.244351978 -76.0196404, -173.207608446 -76.007588038, -173.169765504 -76.002816794, -173.139490241 -76.003094691, -173.116782658 -76.008421729, -173.104589039 -76.016938854, -173.102909386 -76.028646065, -173.111183172 -76.03940804, -173.129410398 -76.049224779, -173.148635798 -76.05820377, -173.16885937 -76.066345013)))"
+version = 0
+depth = 500
+theme = "base"
+type = "bathymetry"
+
+[examples.Bathymetry.bbox]
+xmin = -175.12791442871094
+xmax = -170.05111694335938
+ymin = -77.9718246459961
+ymax = -75.64299774169922
+
+[[examples.Bathymetry.sources]]
+property = ""
+dataset = "ETOPO/GLOBathy"
+record_id = "2024-12-09T00:00:00.000Z"
+update_time = "null"
+confidence = "null"
+between = "null"
+
+[examples.Bathymetry.cartography]
+prominence = "null"
+min_zoom = "null"
+max_zoom = "null"
+sort_key = 12
+
+[[examples.Infrastructure]]
+id = "e9e3d506-89c0-3473-8cee-5e5ac6596d6c"
+geometry = "POINT (-179.9999994 -82.42408)"
+version = 0
+level = "null"
+subtype = "pedestrian"
+class = "information"
+height = "null"
+surface = "null"
+wikidata = "Q800558"
+theme = "base"
+type = "infrastructure"
+
+[examples.Infrastructure.bbox]
+xmin = -180.0
+xmax = -179.99998474121094
+ymin = -82.42408752441406
+ymax = -82.42407989501953
+
+[[examples.Infrastructure.sources]]
+property = ""
+dataset = "OpenStreetMap"
+record_id = "n7674174803@2"
+update_time = "2023-04-07T17:37:48.000Z"
+confidence = "null"
+between = "null"
+
+[examples.Infrastructure.names]
+primary = "1306 km to South Pole"
+common = "null"
+rules = "null"
+
+[examples.Infrastructure.source_tags]
+description = "1036 km to South Pole."
+information = "route_marker"
+note = "The road continue in west side of the map"
+start_date = "2007"
+tourism = "information"
+wikipedia = "en:South Pole Traverse"
+
+[[examples.Land]]
+id = "70fc3596-a987-3fea-820c-c016c0a2f0da"
+geometry = "POINT (-178.7 -85.45)"
+version = 0
+level = "null"
+subtype = "physical"
+class = "cliff"
+surface = "null"
+wikidata = "Q5282342"
+elevation = "null"
+theme = "base"
+type = "land"
+
+[examples.Land.bbox]
+xmin = -178.7000274658203
+xmax = -178.6999969482422
+ymin = -85.45001220703125
+ymax = -85.44999694824219
+
+[[examples.Land.sources]]
+property = ""
+dataset = "OpenStreetMap"
+record_id = "n11693475112@1"
+update_time = "2024-03-05T09:23:39.000Z"
+confidence = "null"
+between = "null"
+
+[examples.Land.names]
+primary = "Dismal Buttress"
+common = "null"
+rules = "null"
+
+[examples.Land.source_tags]
+natural = "cliff"
+"ref:linz:place_id" = "12318"
+wikipedia = "en:Dismal Buttress"
+
+[[examples.LandCover]]
+id = "c347312d-012b-5e73-8bd3-a10d04b2981d"
+geometry = "POLYGON ((-179.99877531181616 65.95172539425603, -179.99740705536922 65.95265577758867, -179.99751722434937 65.9532545912543, -179.9974078443441 65.9541507615366, -179.9965398649702 65.95451215813897, -179.99644396804533 65.95493010632842, -179.99602533095998 65.95502533095993, -179.99468737767813 65.95677071067811, -179.9933586639601 65.9576086639598, -179.99313729490444 65.95812767174695, -179.99314866836227 65.95857649030111, -179.99370507548738 65.95907781410224, -179.99372539425596 65.95947468818369, -179.99395850935272 65.95967260714353, -179.99410866395988 65.96030800303998, -179.99534017576838 65.96101799736452, -179.99575621846904 65.96104928900519, -179.9959057775888 65.96150961146397, -179.9965950523775 65.96161426988128, -179.99663895270027 65.96216619349144, -179.99807649030126 65.96218466463768, -179.99819891654494 65.96189707483568, -179.99799883949768 65.96160842248709, -179.99825961146388 65.96142755541139, -179.99830761159433 65.9610635173197, -179.99936104612706 65.9609995273612, -179.9993797906372 65.96051410937864, -179.99964133604004 65.96039133604008, -179.9997 65.96016912258357, -179.99936104729989 65.95958380650865, -179.99900447103303 65.95954329910117, -179.9987608894112 65.95924038853603, -179.99806463264497 65.95902716440592, -179.99798856507215 65.95838313921075, -179.99834294463088 65.95801088941111, -179.9983374593203 65.9575910941953, -179.99855761159426 65.95723018431977, -179.99921013502978 65.95698784186104, -179.99931463264488 65.95663950159415, -179.99990450886096 65.95637680202988, -179.99997427859432 65.9560635173197, -180.00019127274402 65.9558913550169, -180.00019127274405 65.95544197881631, -180.0000389948438 65.9553039610106, -179.9996246090062 65.9553159274193, -179.99935793918766 65.95327531026125, -179.9988434361254 65.95288259953995, -179.99885243016726 65.95244253241113, -179.9991661934914 65.95227771429981, -179.9991960389287 65.95187767174694, -179.99877531181616 65.95172539425603))"
+version = 0
+subtype = "barren"
+theme = "base"
+type = "land_cover"
+
+[examples.LandCover.bbox]
+xmin = -180.0001983642578
+xmax = -179.99313354492188
+ymin = 65.95172119140625
+ymax = 65.96218872070312
+
+[[examples.LandCover.sources]]
+property = ""
+dataset = "ESA WorldCover"
+record_id = "null"
+update_time = "2024-11-07T00:00:00.000Z"
+confidence = "null"
+between = "null"
+
+[examples.LandCover.cartography]
+prominence = "null"
+min_zoom = 8
+max_zoom = 15
+sort_key = 3
+
+[[examples.LandUse]]
+id = "1e1f6095-5bd2-3fdb-a422-41351b848e9d"
+geometry = "POLYGON ((-176.5623454 -43.9567812, -176.5627644 -43.9561272, -176.5626898 -43.9557432, -176.5624297 -43.9553592, -176.562679 -43.9551603, -176.5629058 -43.9552064, -176.5631441 -43.9551769, -176.5632428 -43.9550676, -176.5633066 -43.9548702, -176.5634402 -43.9548071, -176.5639052 -43.9546682, -176.5642479 -43.9544118, -176.5647302 -43.9542142, -176.5651547 -43.954277, -176.5658293 -43.9545243, -176.5659454 -43.9543521, -176.566934 -43.9547987, -176.5669179 -43.955018, -176.5682465 -43.9553205, -176.5671004 -43.9579593, -176.5662034 -43.9600044, -176.5655366 -43.9597247, -176.5646109 -43.9595326, -176.564467 -43.9592563, -176.5639885 -43.9589226, -176.5637013 -43.9586925, -176.563223 -43.9586237, -176.5623454 -43.9567812))"
+version = 0
+level = "null"
+subtype = "golf"
+class = "golf_course"
+surface = "null"
+wikidata = "null"
+elevation = "null"
+theme = "base"
+type = "land_use"
+
+[examples.LandUse.bbox]
+xmin = -176.56825256347656
+xmax = -176.56231689453125
+ymin = -43.96001052856445
+ymax = -43.95420837402344
+
+[[examples.LandUse.sources]]
+property = ""
+dataset = "OpenStreetMap"
+record_id = "w56117029@3"
+update_time = "2010-04-24T22:35:13.000Z"
+confidence = "null"
+between = "null"
+
+[examples.LandUse.names]
+primary = "Chatham Islands Golf Club"
+common = "null"
+rules = "null"
+
+[examples.LandUse.source_tags]
+"LINZ:source_version" = "V16"
+attribution = "http://wiki.osm.org/wiki/Attribution#LINZ"
+leisure = "golf_course"
+source_ref = "http://www.linz.govt.nz/topography/topo-maps/"
+
+[[examples.Water]]
+id = "6bbb5fe5-bf26-3efa-b120-0a7079b60840"
+geometry = "POINT (-177.031799 -84.934793)"
+version = 0
+level = "null"
+subtype = "physical"
+class = "cape"
+wikidata = "Q33140589"
+is_salt = "null"
+is_intermittent = "null"
+theme = "base"
+type = "water"
+
+[examples.Water.bbox]
+xmin = -177.03179931640625
+xmax = -177.0317840576172
+ymin = -84.93480682373047
+ymax = -84.9347915649414
+
+[[examples.Water.sources]]
+property = ""
+dataset = "OpenStreetMap"
+record_id = "n11109190647@2"
+update_time = "2024-02-11T05:52:05.000Z"
+confidence = "null"
+between = "null"
+
+[examples.Water.names]
+primary = "Thanksgiving Point"
+common = "null"
+rules = "null"
+
+[examples.Water.source_tags]
+natural = "cape"
+"ref:linz:place_id" = "13433"
diff --git a/packages/overture-schema-buildings-theme/pyproject.toml b/packages/overture-schema-buildings-theme/pyproject.toml
index c0ca9297c..8d169f4f4 100644
--- a/packages/overture-schema-buildings-theme/pyproject.toml
+++ b/packages/overture-schema-buildings-theme/pyproject.toml
@@ -37,3 +37,81 @@ packages = ["src/overture"]
 [project.entry-points."overture.models"]
 "overture:buildings:building" = "overture.schema.buildings:Building"
 "overture:buildings:building_part" = "overture.schema.buildings:BuildingPart"
+
+[[examples.Building]]
+id = "148f35b1-7bc1-4180-9280-10d39b13883b"
+geometry = "POLYGON ((-176.6435004 -43.9938042, -176.6435738 -43.9937107, -176.6437726 -43.9937913, -176.6436992 -43.9938849, -176.6435004 -43.9938042))"
+version = 1
+level = "null"
+subtype = "null"
+class = "null"
+height = "null"
+names = "null"
+has_parts = false
+is_underground = false
+num_floors = "null"
+num_floors_underground = "null"
+min_height = "null"
+min_floor = "null"
+facade_color = "null"
+facade_material = "null"
+roof_material = "null"
+roof_shape = "null"
+roof_direction = "null"
+roof_orientation = "null"
+roof_color = "null"
+roof_height = "null"
+theme = "buildings"
+type = "building"
+
+[examples.Building.bbox]
+xmin = -176.643798828125
+xmax = -176.64349365234375
+ymin = -43.9938850402832
+ymax = -43.993709564208984
+
+[[examples.Building.sources]]
+property = ""
+dataset = "OpenStreetMap"
+record_id = "w519166507@1"
+update_time = "2017-08-27T21:39:50.000Z"
+confidence = "null"
+between = "null"
+
+[[examples.BuildingPart]]
+id = "19412d64-51ac-3d6a-ac2f-8a8c8b91bb60"
+geometry = "POLYGON ((-73.2462509 -39.8108937, -73.2462755 -39.8109047, -73.246291 -39.8109182, -73.2463022 -39.8109382, -73.2463039 -39.810959, -73.2462962 -39.81098, -73.2462796 -39.8109977, -73.2462674 -39.8110052, -73.2462281 -39.8110153, -73.2461998 -39.811013, -73.2461743 -39.8110034, -73.2461566 -39.8109898, -73.246144 -39.8109702, -73.2461418 -39.8109427, -73.2461511 -39.8109221, -73.2461669 -39.8109066, -73.2461908 -39.8108947, -73.2462184 -39.8108898, -73.2462509 -39.8108937))"
+version = 0
+level = 3
+height = "null"
+names = "null"
+is_underground = false
+num_floors = "null"
+num_floors_underground = "null"
+min_height = "null"
+min_floor = "null"
+facade_color = "null"
+facade_material = "null"
+roof_material = "null"
+roof_shape = "null"
+roof_direction = "null"
+roof_orientation = "null"
+roof_color = "null"
+roof_height = "null"
+building_id = "bd663bd4-1844-4d7d-a400-114de051cf49"
+theme = "buildings"
+type = "building_part"
+
+[examples.BuildingPart.bbox]
+xmin = -73.24630737304688
+xmax = -73.24613952636719
+ymin = -39.81101608276367
+ymax = -39.81088638305664
+
+[[examples.BuildingPart.sources]]
+property = ""
+dataset = "OpenStreetMap"
+record_id = "w223076787@2"
+update_time = "2014-10-31T22:55:36.000Z"
+confidence = "null"
+between = "null"
diff --git a/packages/overture-schema-cli/src/overture/schema/cli/commands.py b/packages/overture-schema-cli/src/overture/schema/cli/commands.py
index 8fdd8bdf4..a75b16cd4 100644
--- a/packages/overture-schema-cli/src/overture/schema/cli/commands.py
+++ b/packages/overture-schema-cli/src/overture/schema/cli/commands.py
@@ -798,7 +798,7 @@ def dump_namespace(
         sorted_types = sorted(theme_types[theme], key=lambda x: x[0].type)
         for key, model_class in sorted_types:
             stdout.print(
-                f"  [bright_black]→[/bright_black] [bold cyan]{key.type}[/bold cyan] [dim magenta]({key.class_name})[/dim magenta]"
+                f"  [bright_black]→[/bright_black] [bold cyan]{key.type}[/bold cyan] [dim magenta]({key.entry_point})[/dim magenta]"
             )
             docstring = get_model_docstring(model_class)
             if docstring:
diff --git a/packages/overture-schema-cli/src/overture/schema/cli/type_analysis.py b/packages/overture-schema-cli/src/overture/schema/cli/type_analysis.py
index 9316c80c0..4362d7f76 100644
--- a/packages/overture-schema-cli/src/overture/schema/cli/type_analysis.py
+++ b/packages/overture-schema-cli/src/overture/schema/cli/type_analysis.py
@@ -8,6 +8,8 @@
 from pydantic import BaseModel
 from pydantic.fields import FieldInfo
 
+from overture.schema.system.feature import resolve_discriminator_field_name
+
 from .types import ErrorLocation, ValidationErrorDict
 
 # Type aliases for structural tuple elements
@@ -29,11 +31,23 @@ class UnionMetadata:
     nested_unions: dict[str, "UnionMetadata"]
 
 
+def _extract_literal_value(model: type[BaseModel], field_name: str) -> str | None:
+    """Extract the single Literal value from a model field as a string, if present."""
+    field_info = model.model_fields.get(field_name)
+    if field_info is None or field_info.annotation is None:
+        return None
+    if get_origin(field_info.annotation) is Literal:
+        args = get_args(field_info.annotation)
+        return str(args[0]) if args else None
+    return None
+
+
 def _process_union_member(
     member: Any,  # noqa: ANN401
     discriminator_to_model: dict[str, type[BaseModel]],
     model_name_to_model: dict[str, type[BaseModel]],
     nested_unions: dict[str, UnionMetadata],
+    discriminator_field: str | None = None,
 ) -> None:
     """Process a single union member, handling nesting recursively.
 
@@ -43,6 +57,7 @@ def _process_union_member(
         discriminator_to_model: Dict to populate with discriminator value mappings
         model_name_to_model: Dict to populate with model name mappings
         nested_unions: Dict to populate with nested union metadata
+        discriminator_field: The discriminator field name from the parent union annotation
     """
     member_origin = get_origin(member)
 
@@ -63,12 +78,24 @@ def _process_union_member(
             nested_metadata = introspect_union(member)
             nested_unions[str(member)] = nested_metadata
             discriminator_to_model.update(nested_metadata.discriminator_to_model)
+            # The nested union's discriminator_to_model uses the nested discriminator
+            # field (e.g. "subtype"). Re-extract using the parent discriminator field
+            # (e.g. "type") so leaf models are also reachable by the parent's values.
+            if discriminator_field is not None:
+                for model in nested_metadata.model_name_to_model.values():
+                    value = _extract_literal_value(model, discriminator_field)
+                    if value is not None:
+                        discriminator_to_model[value] = model
             return
 
         # Unwrap Annotated to get the actual type (e.g., Annotated[Building, Tag('building')])
         # and process it recursively
         _process_union_member(
-            member_args[0], discriminator_to_model, model_name_to_model, nested_unions
+            member_args[0],
+            discriminator_to_model,
+            model_name_to_model,
+            nested_unions,
+            discriminator_field,
         )
         return
 
@@ -76,17 +103,10 @@ def _process_union_member(
     if inspect.isclass(member) and issubclass(member, BaseModel):
         model_name_to_model[member.__name__] = member
 
-        # Extract discriminator values from known discriminator fields only
-        # Restrict to known discriminator names to avoid false positives from other Literal fields
-        discriminator_fields = ("type", "theme", "subtype")
-        for field_name, field_info in member.model_fields.items():
-            if field_name not in discriminator_fields:
-                continue
-            annotation = field_info.annotation
-            if get_origin(annotation) is Literal:
-                literal_args = get_args(annotation)
-                if literal_args:
-                    discriminator_to_model[literal_args[0]] = member
+        if discriminator_field is not None:
+            value = _extract_literal_value(member, discriminator_field)
+            if value is not None:
+                discriminator_to_model[value] = member
 
 
 def introspect_union(union_type: Any) -> UnionMetadata:  # noqa: ANN401
@@ -163,9 +183,9 @@ def introspect_union(union_type: Any) -> UnionMetadata:  # noqa: ANN401
                 if isinstance(metadata, FieldInfo) and hasattr(
                     metadata, "discriminator"
                 ):
-                    disc = metadata.discriminator
-                    # discriminator can be a string or Discriminator object
-                    discriminator_field = str(disc) if disc is not None else None
+                    discriminator_field = resolve_discriminator_field_name(
+                        metadata.discriminator
+                    )
                     break
 
     # Get union members
@@ -183,7 +203,11 @@ def introspect_union(union_type: Any) -> UnionMetadata:  # noqa: ANN401
     # Process each union member
     for member in union_members:
         _process_union_member(
-            member, discriminator_to_model, model_name_to_model, nested_unions
+            member,
+            discriminator_to_model,
+            model_name_to_model,
+            nested_unions,
+            discriminator_field,
         )
 
     return UnionMetadata(
diff --git a/packages/overture-schema-cli/tests/test_type_analysis.py b/packages/overture-schema-cli/tests/test_type_analysis.py
index 12fb10e75..0a21e168a 100644
--- a/packages/overture-schema-cli/tests/test_type_analysis.py
+++ b/packages/overture-schema-cli/tests/test_type_analysis.py
@@ -29,14 +29,9 @@ class ModelB(BaseModel):
 
         UnionType = Annotated[ModelA | ModelB, Field(discriminator="type")]
 
-        # Test simple discriminated union error path
         loc = ("a", "required_a")
         metadata = introspect_union(UnionType)
         structural = create_structural_tuple(loc, metadata)
-        print(f"\nloc: {loc}")
-        print(f"structural: {structural}")
-        assert len(structural) == len(loc)
-        # First element should be discriminator, second should be field
         assert structural == ("discriminator", "field")
 
     def test_mixed_union_structural_tuple(self) -> None:
@@ -56,17 +51,11 @@ class Sources(BaseModel):
         # Test discriminated side
         loc1 = ("tagged-union[ModelA]", "a", "required_a")
         structural1 = create_structural_tuple(loc1, metadata)
-        print("\nDiscriminated side:")
-        print(f"loc: {loc1}")
-        print(f"structural: {structural1}")
         assert structural1 == ("union", "discriminator", "field")
 
         # Test non-discriminated side
         loc2 = ("Sources", "datasets")
         structural2 = create_structural_tuple(loc2, metadata)
-        print("\nNon-discriminated side:")
-        print(f"loc: {loc2}")
-        print(f"structural: {structural2}")
         assert structural2 == ("model", "field")
 
     def test_list_context_structural_tuple(self) -> None:
@@ -78,13 +67,9 @@ class ModelA(BaseModel):
 
         UnionType = Annotated[ModelA, Field(discriminator="type")]
 
-        # Test list context
         loc = (1, "a", "required_a")
         metadata = introspect_union(list[UnionType])
         structural = create_structural_tuple(loc, metadata)
-        print("\nList context:")
-        print(f"loc: {loc}")
-        print(f"structural: {structural}")
         assert structural == ("list_index", "discriminator", "field")
 
     def test_nested_discriminated_structural_tuple(self) -> None:
@@ -114,13 +99,9 @@ class Sources(BaseModel):
         FeatureUnion = Annotated[Building | SegmentUnion, Field(discriminator="type")]
         MixedUnion = FeatureUnion | Sources
 
-        # Test nested discriminator path (type=segment, subtype=road)
         loc = ("tagged-union[SegmentUnion]", "segment", "road", "road_class")
         metadata = introspect_union(MixedUnion)
         structural = create_structural_tuple(loc, metadata)
-        print("\nNested discriminated:")
-        print(f"loc: {loc}")
-        print(f"structural: {structural}")
         assert structural == ("union", "discriminator", "discriminator", "field")
 
 
@@ -253,34 +234,71 @@ class ModelA(BaseModel):
         assert metadata.discriminator_field == "type"
         assert "a" in metadata.discriminator_to_model
 
-    @pytest.mark.parametrize(
-        "literal_value,expected_in_mapping",
-        [
-            pytest.param("building", True, id="literal_building"),
-            pytest.param("place", True, id="literal_place"),
-            pytest.param("nonexistent", False, id="not_present"),
-        ],
-    )
-    def test_introspect_extracts_all_literals(
-        self, literal_value: str, expected_in_mapping: bool
-    ) -> None:
-        """Test that introspect_union extracts all Literal field values."""
+
+class TestDiscriminatorDiscovery:
+    """Tests for runtime discriminator field discovery (not hardcoded)."""
+
+    def test_nonstandard_discriminator_field_name(self) -> None:
+        """Discriminator field not named type/theme/subtype is discovered at runtime."""
+
+        class Cat(BaseModel):
+            kind: Literal["cat"]
+            indoor: bool
+
+        class Dog(BaseModel):
+            kind: Literal["dog"]
+            breed: str
+
+        UnionType = Annotated[Cat | Dog, Field(discriminator="kind")]
+        metadata = introspect_union(UnionType)
+
+        assert metadata.is_discriminated is True
+        assert metadata.discriminator_field == "kind"
+        assert metadata.discriminator_to_model["cat"] == Cat
+        assert metadata.discriminator_to_model["dog"] == Dog
+
+    def test_non_discriminator_literal_fields_excluded(self) -> None:
+        """Literal fields that aren't the discriminator are not in the mapping."""
 
         class Building(BaseModel):
             type: Literal["building"]
-            subtype: Literal["residential"]
+            status: Literal["active"]
 
         class Place(BaseModel):
             type: Literal["place"]
-            category: Literal["restaurant"]
+            status: Literal["active"]
 
         UnionType = Annotated[Building | Place, Field(discriminator="type")]
         metadata = introspect_union(UnionType)
 
-        if expected_in_mapping:
-            assert literal_value in metadata.discriminator_to_model
-        else:
-            assert literal_value not in metadata.discriminator_to_model
+        assert "building" in metadata.discriminator_to_model
+        assert "place" in metadata.discriminator_to_model
+        assert "active" not in metadata.discriminator_to_model
+
+    def test_callable_discriminator_extracts_field_name(self) -> None:
+        """Callable discriminators (Feature.field_discriminator) are supported."""
+        from pydantic import Discriminator
+
+        class ModelA(BaseModel):
+            kind: Literal["a"]
+
+        class ModelB(BaseModel):
+            kind: Literal["b"]
+
+        def get_kind(data: object) -> str | None:
+            return data.get("kind") if isinstance(data, dict) else None
+
+        get_kind._field_name = "kind"  # type: ignore[attr-defined]
+
+        UnionType = Annotated[
+            ModelA | ModelB, Field(discriminator=Discriminator(get_kind))
+        ]
+        metadata = introspect_union(UnionType)
+
+        assert metadata.is_discriminated is True
+        assert metadata.discriminator_field == "kind"
+        assert metadata.discriminator_to_model["a"] == ModelA
+        assert metadata.discriminator_to_model["b"] == ModelB
 
 
 class TestStructuralTupleCaching:
diff --git a/packages/overture-schema-codegen/README.md b/packages/overture-schema-codegen/README.md
new file mode 100644
index 000000000..f09467f77
--- /dev/null
+++ b/packages/overture-schema-codegen/README.md
@@ -0,0 +1,118 @@
+# Overture Schema Codegen
+
+Generates documentation from Overture Maps Pydantic schema definitions.
+
+Pydantic's `model_json_schema()` flattens the schema's domain vocabulary into JSON
+Schema primitives. NewType names disappear, constraint provenance is lost (which NewType
+contributed which bound), custom constraint classes lose their identity (a
+`GeometryTypeConstraint` becomes an anonymous `enum` array), and discriminated union
+structure collapses into `anyOf` arrays with duplicated fields.
+
+Navigating Python's type annotation machinery -- NewType chains, nested `Annotated`
+wrappers, union filtering, generic resolution -- is complex. The codegen does it once.
+`analyze_type()` unwraps annotations into `TypeInfo`, a flat target-independent
+representation. Extractors build specs from `TypeInfo`. Renderers consume specs without
+touching the type system. New output targets (Arrow schemas, PySpark expressions) add
+renderers, not extraction logic.
+
+## Usage
+
+```bash
+# Generate markdown documentation for all themes
+overture-codegen generate --format markdown --output-dir docs/schema/reference
+
+# Generate for a single theme
+overture-codegen generate --format markdown --theme buildings --output-dir out/
+
+# List discovered models
+overture-codegen list
+```
+
+The generator discovers models via `overture.models` entry points (provided by theme
+packages like `overture-schema-buildings-theme`), extracts type information, and renders
+output pages with cross-page links, constraint descriptions, and validated examples.
+
+## Architecture
+
+Four layers with strict downward imports -- no layer references the one above it:
+
+```text
+Rendering            Output formatting, all presentation decisions
+    ^
+Output Layout        What to generate, where it goes, how outputs link
+    ^
+Extraction           TypeInfo, FieldSpec, ModelSpec, UnionSpec
+    ^
+Discovery            discover_models() from overture-schema-core
+```
+
+**Discovery** loads registered Pydantic models via entry points. The return dict
+includes both concrete `BaseModel` subclasses (like `Building`) and discriminated union
+type aliases (like `Segment`). Both satisfy the `FeatureSpec` protocol and flow through
+the same pipeline.
+
+**Extraction** unwraps type annotations into specs. `analyze_type()` is the central
+function -- a single iterative loop that peels NewType, Annotated, Union, and container
+wrappers, accumulating constraints tagged with the NewType that contributed them.
+Domain-specific extractors (`model_extraction`, `union_extraction`, `enum_extraction`,
+`newtype_extraction`, `primitive_extraction`) call `analyze_type()` for field types and
+produce spec dataclasses.
+
+**Output Layout** determines what artifacts to generate and where they go. Supplementary
+type collection walks expanded feature trees to find referenced enums, NewTypes, and
+sub-models. Path assignment maps every type to an output file path mirroring the Python
+module structure. Link computation and reverse references enable cross-page navigation.
+
+**Rendering** consumes specs and owns all presentation decisions. Markdown output uses
+Jinja2 templates for feature pages (with field tables, constraint sections, and
+examples), enum pages, NewType pages, and aggregate primitive/geometry reference pages.
+
+`markdown/pipeline.py` orchestrates the full pipeline without I/O, returning
+`list[RenderedPage]`. The CLI writes files to disk with Docusaurus frontmatter.
+
+## Programmatic use
+
+```python
+from overture.schema.codegen.extraction.type_analyzer import analyze_type, TypeKind
+
+info = analyze_type(some_annotation)
+assert info.kind == TypeKind.PRIMITIVE
+assert info.base_type == "int32"
+assert info.newtype_name == "FeatureVersion"
+# Constraints carry provenance:
+for cs in info.constraints:
+    print(f"{cs.constraint} from {cs.source}")
+```
+
+## Fetching sample data
+
+Theme packages include example records in their `pyproject.toml` files under
+`[[examples.<Type>]]` sections. The codegen validates these against Pydantic
+models and renders them in feature pages.
+
+To fetch a fresh sample from the latest Overture release using DuckDB:
+
+```bash
+duckdb -json \
+  -c "load spatial" \
+  -c "attach 'http://labs.overturemaps.org/data/latest.ddb' as overture" \
+  -c "select to_json(columns(*))
+      from (
+        select * REPLACE ST_AsText(geometry) as geometry
+        from overture.place
+        USING SAMPLE 1
+      )" \
+  | jq .
+```
+
+The `latest.ddb` database always points to the current release. Tables use
+the type name directly (`overture.place`, `overture.segment`,
+`overture.building`, etc.). Convert the JSON output to TOML for inclusion in
+the theme's `pyproject.toml`.
+
+## Further reading
+
+- [Design document](docs/design.md) -- architecture, extension points, data flow
+  diagrams
+- [Walkthrough](docs/walkthrough.md) -- module-by-module narrative tracing Segment
+  through the full pipeline
diff --git a/packages/overture-schema-codegen/docs/design.md b/packages/overture-schema-codegen/docs/design.md
new file mode 100644
index 000000000..f541fb359
--- /dev/null
+++ b/packages/overture-schema-codegen/docs/design.md
@@ -0,0 +1,262 @@
+# Code Generator Design
+
+Code generator that produces documentation and code from Overture Maps Pydantic schema
+definitions.
+
+## Problem
+
+Overture Maps schema definitions live in Pydantic models across theme packages. Each
+model carries type annotations, field constraints, docstrings, and relationships
+(inheritance, composition, discriminated unions). Generating documentation or code from
+these models requires introspecting all of that structure and rendering it into output
+formats.
+
+Pydantic's internal representation is JSON-schema-oriented and discards the vocabulary
+the code generator needs to preserve. `model_json_schema()` flattens `FeatureVersion` (a
+NewType wrapping `int32` wrapping `Annotated[int, Field(ge=0, le=2^31-1)]`) to `{"type":
+"integer", "minimum": 0}` -- the NewType names `FeatureVersion` and `int32` are gone,
+custom constraint classes (`GeometryTypeConstraint`, `UniqueItemsConstraint`) are gone,
+Python class references are gone, and constraint provenance (which NewType contributed
+which bound) is gone. `FieldInfo.annotation` gives the raw annotation, but Pydantic does
+not unwrap NewType chains or track multi-depth constraint provenance.
+
+The schema's domain language -- custom primitives (`int32`, `float64`), semantic
+NewTypes (`FeatureVersion`, `Sources`), and custom constraint classes -- needs to
+survive extraction intact. A single field annotation like `NewType("Foo",
+Annotated[list[SomeModel] | None, Field(ge=0)])` encodes optionality, collection type,
+element type, constraints, and semantic naming in nested Python typing constructs. Type
+definitions regularly nest `Annotated` inside `NewType` inside `Annotated` --
+`FeatureVersion = NewType("FeatureVersion", int32)` where `int32 = NewType("int32",
+Annotated[int, Field(ge=...)])` -- and constraints at each depth need to be tagged with
+the NewType that contributed them.
+
+The code generator solves this by extracting type information once into a flat,
+navigable representation (`TypeInfo`), then passing that to renderers that produce
+output without touching Python's type system.
+
+## Inputs and Outputs
+
+**Inputs**: Pydantic `BaseModel` subclasses discovered via `overture.models` entry
+points, plus example data from theme `pyproject.toml` files. Examples serve two
+purposes: rendered examples in documentation pages, and a starting point for generating
+tests that verify behavior of generated code.
+
+**Current Outputs**: Markdown documentation pages with field tables, cross-page links,
+constraint descriptions, and examples.
+
+**Planned outputs**: Arrow schemas, PySpark expressions.
+
+## Architecture
+
+Four layers with strict downward imports -- no layer references the one above it:
+
+```text
+Rendering            Output formatting, all presentation decisions
+    ^
+Output Layout        What to generate, where it goes, how outputs link
+    ^
+Extraction           TypeInfo, FieldSpec, ModelSpec, EnumSpec, ...
+    ^
+Discovery            discover_models() from overture-schema-core
+```
+
+`markdown/pipeline.py` orchestrates the pipeline without I/O: it expands feature trees,
+collects supplementary types, builds placement registries, computes reverse references,
+and calls renderers -- returning `RenderedPage` objects. The CLI (`cli.py`) is a thin
+Click wrapper that calls `generate_markdown_pages()` and writes files to disk.
+
+```mermaid
+graph TD
+    subgraph Discovery
+        DM["discover_models()"]
+    end
+
+    DM -->|"dict[ModelKey, type]"| EX
+
+    subgraph Extraction
+        EX["extraction/type_analyzer / extractors"]
+        EX -->|"ModelSpec, UnionSpec"| TREE["expand_model_tree()"]
+    end
+
+    TREE -->|"FeatureSpec[]"| OL
+
+    subgraph "Output Layout"
+        OL["layout/type_collection"]
+        OL -->|"SupplementarySpec{}"| PA["markdown/path_assignment"]
+        PA -->|"dict[str, Path]"| LC["markdown/link_computation"]
+        RR["markdown/reverse_references"]
+    end
+
+    subgraph Rendering
+        R["markdown/renderer"]
+        TR["extraction/type_registry"] -.->|"type name resolution"| R
+    end
+
+    subgraph Orchestration
+        MP["markdown/pipeline"]
+    end
+
+    OL --> MP
+    LC --> MP
+    RR --> MP
+    MP --> R
+    R -->|"RenderedPage[]"| MP
+    MP -->|"list[RenderedPage]"| CLI["cli.py → disk"]
+```
+
+## Extraction
+
+### `analyze_type` -- iterative type unwrapping
+
+`analyze_type(annotation)` is a single iterative function that peels type annotation
+layers in a fixed order, accumulating information into an `_UnwrapState`:
+
+1. **NewType**: Records the outermost name (user-facing semantic identity, e.g.
+   `FeatureVersion`) and updates the "current" name (used for constraint provenance and
+   as `base_type` at terminal)
+2. **Annotated**: Collects constraints from metadata, each tagged with whichever NewType
+   was most recently entered. Extracts `Field.description` when present
+3. **Union**: Filters out `None` (marks optional), `Sentinel`, and `Literal` sentinel
+   arms. If multiple concrete `BaseModel` arms remain, classifies as `UNION`; otherwise
+   continues with the single remaining arm
+4. **list / dict**: Increments `list_depth` for each `list[...]` layer, sets dict flags,
+   continues into element types
+5. **Terminal**: Classifies as `PRIMITIVE`, `LITERAL`, `ENUM`, `MODEL`, or `UNION`
+
+The result is `TypeInfo` -- a flat dataclass that fully describes the unwrapped type:
+classification (`TypeKind`), optional/dict flags, `list_depth` (count of `list[...]`
+layers), `newtype_outer_list_depth` (list layers outside the outermost NewType boundary),
+accumulated constraints with provenance, NewType names, source type, literal values, and
+(for UNION kind) the tuple of concrete `BaseModel` member types. Dict types carry
+recursively analyzed `TypeInfo` for their key and value types.
+
+Multi-depth `Annotated` layers (common in practice, since NewTypes wrap `Annotated`
+types that wrap further NewTypes) are handled naturally by the loop -- each iteration
+processes the next wrapper. Constraints from each `Annotated` layer are tagged with the
+NewType active at that depth.
+
+### Extractors by domain
+
+Extraction is split by entity kind:
+
+- `extraction/model_extraction.py`: Pydantic model -> `ModelSpec` (fields in MRO-aware
+  documentation order, alias-resolved names, model-level constraints)
+- `extraction/enum_extraction.py`: Enum class -> `EnumSpec`
+- `extraction/newtype_extraction.py`: NewType -> `NewTypeSpec`
+- `extraction/union_extraction.py`: Discriminated union alias -> `UnionSpec`
+- `extraction/primitive_extraction.py`: Numeric primitives -> `PrimitiveSpec`
+
+Each calls `analyze_type()` for field types. Tree expansion (`expand_model_tree()`)
+walks MODEL-kind fields to populate nested model references, with a shared cache and
+cycle detection (`starts_cycle=True`).
+
+### Unions and the FeatureSpec protocol
+
+Discriminated unions (e.g. `Segment = Annotated[Union[RoadSegment, ...],
+Discriminator(...)]`) are type aliases, not classes. `UnionSpec` captures the union
+structure: member types, discriminator field and value mapping, and a merged field list.
+Fields shared across all variants appear once; fields present in some variants are
+wrapped in `AnnotatedField` with `variant_sources` indicating which members contribute
+them. The common base class is identified so shared fields can be deduplicated.
+
+`FeatureSpec` is a `Protocol` satisfied by both `ModelSpec` and `UnionSpec`. Code that
+operates on "any top-level feature" -- tree expansion, supplementary type collection,
+rendering dispatch -- uses `FeatureSpec` rather than a concrete type, so union and model
+features flow through the same pipeline.
+
+### Constraints
+
+Field-level constraints come from `Annotated` metadata -- `Ge`, `Le`, `Interval`, custom
+constraint classes. Each is tagged with the NewType that contributed it via
+`ConstraintSource`.
+
+Model-level constraints come from decorators (`@require_any_of`, `@require_if`,
+`@forbid_if`) and are extracted via `ModelConstraint.get_model_constraints()`.
+
+## Output Layout
+
+Determines the full set of artifacts to generate, where each lives on disk, and how they
+reference each other.
+
+### Supplementary type collection
+
+`collect_all_supplementary_types()` walks the expanded field trees of all feature specs,
+extracting enums, semantic NewTypes, and sub-models that need their own output. Returns
+`dict[str, SupplementarySpec]`.
+
+### Module-mirrored output paths
+
+Output paths derive from the source Python module path relative to a computed schema
+root (`compute_schema_root()` finds the longest common prefix of all entry point module
+paths). `compute_output_dir()` maps a Python module to an output directory. Feature
+models land in their module-derived directory. Supplementary types land at their own
+module-derived path, with a `types/` segment inserted when they fall under a feature
+directory.
+
+### Link computation
+
+`LinkContext` carries the current output's path and the full type-to-path registry. When
+a renderer formats a type reference, it looks up the target in the registry and computes
+a relative path. Links exist only for types with registry entries, avoiding broken
+references to ungenerated outputs.
+
+### Reverse references
+
+`compute_reverse_references()` walks feature specs to build `dict[type_name,
+list[UsedByEntry]]` for "Used By" sections.
+
+## Rendering
+
+Renderers consume specs and own all presentation decisions -- formatting, casing, link
+syntax. Extraction and the type registry carry no presentation logic.
+
+### Type registry
+
+`extraction/type_registry.py` maps type names to per-target string representations via
+`TypeMapping`. `format_type_string()` wraps the resolved name with list/optional
+qualifiers. `is_semantic_newtype()` distinguishes NewTypes that deserve their own
+identity (like `FeatureVersion` wrapping `int32`) from pass-through aliases to
+registered primitives.
+
+### Markdown renderer
+
+Jinja2 templates for feature, enum, NewType, primitives, and geometry pages.
+`render_feature()` expands MODEL-kind fields inline with dot-notation (e.g.,
+`sources[].dataset`), stopping at cycle boundaries. `format_type()` in
+`markdown/type_format.py` converts `TypeInfo` into link-aware display strings using
+`LinkContext`.
+
+### Constraint prose
+
+`extraction/field_constraints.py` and `extraction/model_constraints.py` convert
+constraint objects into human-readable descriptions. Field constraints produce inline
+text. Model constraints produce section-level descriptions and per-field notes, with
+consolidation for related conditional constraints (`require_if` / `forbid_if` grouped by
+trigger).
+
+### Example loader
+
+Loads example data from theme `pyproject.toml` files, validates against Pydantic models,
+and flattens to dot-notation rows for display in feature pages. Also provides a starting
+point for generated test data.
+
+`collect_dict_paths` walks the `FieldSpec` tree to identify dict-typed fields (like
+`tags: dict[str, str]`), returning their dot-paths as a `frozenset`. `flatten_example`
+checks this set before recursing into dicts -- paths in the set are kept as leaf values
+rather than being split into dot-notation rows. The pipeline computes `dict_paths` from
+`spec.fields` and threads it through `load_examples`.
+
+## Extension Points
+
+**Adding a new output target** (Arrow schemas next, PySpark expressions after): Add a
+column to `TypeMapping` in `extraction/type_registry.py` for type-name resolution. Write
+a new renderer module that consumes specs and the type registry. The extraction layer and
+output layout are target-independent.
+
+**Adding a new type kind**: Add a variant to `TypeKind` in `extraction/type_analyzer.py`.
+Handle it in the terminal classification of `analyze_type()`. Add an extraction function
+and spec dataclass if needed. Update renderers to handle the new kind.
+
+**Adding a new constraint type**: The iterative unwrapper collects it automatically (any
+`Annotated` metadata becomes a `ConstraintSource`). Add a case to
+`describe_field_constraint()` for the prose representation.
diff --git a/packages/overture-schema-codegen/docs/walkthrough.md b/packages/overture-schema-codegen/docs/walkthrough.md
new file mode 100644
index 000000000..b51e3f0a4
--- /dev/null
+++ b/packages/overture-schema-codegen/docs/walkthrough.md
@@ -0,0 +1,757 @@
+# Walkthrough: overture-schema-codegen
+
+Pydantic's serialization machinery destroys the vocabulary that documentation needs. The
+codegen recovers it.
+
+Consider the transportation theme's `Segment` type -- a discriminated union of
+`RoadSegment`, `RailSegment`, and `WaterSegment`. All three share fields inherited from
+`TransportationSegment`. Each adds variant-specific fields. The discriminator field
+`subtype` carries a `Literal` value (`"road"`, `"rail"`, `"water"`) that selects the
+arm. Call `model_json_schema()` and the union collapses into an `anyOf` array with
+duplicated field definitions, the discriminator mapping disappears, and the common-base
+relationship between variants is unrecoverable.
+
+The same loss happens at the field level. `FeatureVersion = NewType("FeatureVersion",
+int32)` where `int32 = NewType("int32", Annotated[int, Field(ge=0, le=2147483647)])`
+becomes `{"type": "integer", "minimum": 0, "maximum": 2147483647}`. Three things
+vanished: the name "FeatureVersion," the name "int32," and the fact that `ge=0` came
+from the `int32` layer rather than `FeatureVersion`. Custom constraint classes like
+`GeometryTypeConstraint` lose their identity -- the class name, its docstring, and its
+relationship to a specific NewType dissolve into anonymous JSON Schema keywords.
+
+Documentation needs all of this. The codegen exists to preserve it.
+
+Navigating Python's type annotation machinery -- NewType chains, nested `Annotated`
+wrappers, union filtering, generic resolution -- is complex. The codegen does it once.
+`analyze_type()` unwraps annotations into `TypeInfo`, a flat target-independent
+representation. Extractors build specs from `TypeInfo`. Renderers consume specs without
+re-entering the type system. New output targets add renderers, not extraction logic.
+
+The solution decomposes into four layers. Discovery finds models. Extraction unwraps
+them into flat specifications. Output Layout decides what to generate and where it goes.
+Rendering formats the output. Imports flow strictly downward -- no layer references the
+one above it.
+
+Sixteen sections follow, ordered by dependency: each module appears before anything that
+imports it. The final section inverts this and traces the full pipeline top-down.
+Segment threads through as the primary example, since its path through the system --
+union classification, common base discovery, variant field partitioning, discriminator
+extraction, tagged rendering -- exercises more of the pipeline than any model feature
+does.
+
+---
+
+## 1. Discovery
+
+The pipeline starts in `overture-schema-core`, not in the codegen package itself.
+`discover_models()` calls `importlib.metadata.entry_points(group="overture.models")` and
+loads every registered model. Each entry point name encodes identity as a
+colon-delimited triple (`overture:buildings:building`); each value encodes the Python
+location (`overture.schema.buildings:Building`). The function parses both formats --
+three-part names carry a theme component, two-part names set theme to `None` -- and
+returns `dict[ModelKey, type[BaseModel]]`.
+
+`ModelKey` is a frozen dataclass with four fields: `namespace`, `theme`, `type`, and
+`entry_point`. The `entry_point` field preserves the raw `module:Class` string that
+downstream modules split to determine output directory structure.
+
+The return dict includes both concrete `BaseModel` subclasses and type aliases.
+`Building` is a concrete class -- `isinstance(Building, type)` returns true. `Segment`
+is not. It is an `Annotated` alias wrapping `Union[RoadSegment, RailSegment,
+WaterSegment]` with a discriminator field. `isinstance` and `issubclass` cannot inspect
+it. The entry point `overture:transportation:segment` maps to
+`overture.schema.transportation:Segment`, which loads the alias itself.
+
+The codegen classifies these at the CLI boundary: `is_model_class` identifies concrete
+`BaseModel` subclasses, `is_union_alias` calls `analyze_type` to identify discriminated
+unions. From that point forward both model features and union features satisfy the
+`FeatureSpec` protocol and flow through the same pipeline.
+
+## 2. Leaf utilities
+
+Two modules with no internal dependencies. Both serve multiple layers.
+
+### extraction/case_conversion.py
+
+Converts PascalCase to snake_case with two compiled regexes. `_ACRONYM_BOUNDARY` inserts
+an underscore between an uppercase run and a capitalized word start: `HTMLParser`
+becomes `HTML_Parser` becomes `html_parser`. `_CAMEL_BOUNDARY` inserts between
+lowercase-or-digit and uppercase: `buildingPart` becomes `building_part`.
+`to_snake_case` applies them in sequence and lowercases.
+
+`slug_filename` composes the conversion with a file extension. Every output file path in
+the system passes through this function.
+
+```python
+>>> slug_filename("HexColor")
+'hex_color.md'
+```
+
+### extraction/docstring.py
+
+Distinguishes author-written docstrings from auto-generated ones. Both `Enum` and
+`NewType` produce default docstrings that vary across Python versions. Rather than
+hardcoding version-specific strings, the module creates temporary instances at import
+time, captures their `__doc__` attributes, then deletes the instances:
+
+```python
+class _DocstringProbeEnum(Enum):
+    pass
+
+_ENUM_DEFAULT_DOCSTRING = _DocstringProbeEnum.__doc__
+del _DocstringProbeEnum
+```
+
+`is_custom_docstring` compares a given docstring against these captured defaults and an
+optional inherited docstring. The enum extractor uses this both at class level and
+per-member, since `DocumentedEnum` members carry individual `__doc__` attributes.
+
+`clean_docstring` delegates to `inspect.cleandoc` and returns `None` for empty results.
+`first_docstring_line` takes the first line only -- used by renderers that show
+summaries.
+
+## 3. Type analysis
+
+This is the module the entire package exists to house. `analyze_type` takes a raw type
+annotation and returns `TypeInfo` -- a flat dataclass that fully describes the unwrapped
+type without any reference to Python's typing machinery.
+
+### The loop
+
+The function runs a single `while True` loop that peels layers in fixed order. Each
+iteration handles one wrapper:
+
+**NewType** records names at two levels. The first NewType encountered becomes
+`outermost_newtype_name` (the user-facing identity, e.g. "FeatureVersion") and snapshots
+the current `list_depth` into `newtype_outer_list_depth` -- capturing how many list
+layers appeared before the NewType boundary. Subsequent NewTypes update
+`last_newtype_name` (the innermost, used for constraint provenance and as the terminal
+`base_type`). The loop unwraps via `__supertype__` and continues.
+
+**Annotated** collects every metadata object as a `ConstraintSource`, tagging each with
+whichever NewType was most recently entered. This is how constraint provenance survives:
+when `int32`'s `Annotated` layer contributes `Field(ge=0)`, the constraint records
+`source="int32"`. If a `FieldInfo` carries a description, the function captures it --
+first description wins, so the outermost NewType's documentation takes precedence.
+
+**Union** filters out `NoneType` (marks optional), `Sentinel` instances (Pydantic's
+`<MISSING>` marker for undeclared defaults), and `Literal` sentinel arms (like
+`Literal[""]` used alongside `HttpUrl`). If multiple concrete `BaseModel` subclasses
+remain after filtering, the function classifies the type as `UNION` and returns
+immediately with the member tuple. Non-BaseModel multi-type unions raise
+`UnsupportedUnionError`. A single remaining arm continues the loop.
+
+The `Literal` filtering has a guard: when a union contains *only* Literal arms (like
+`Optional[Literal["x"]]`), the function keeps them rather than filtering everything out.
+
+**list/dict** increments `list_depth` for each `list[...]` layer (so `list[list[str]]`
+records depth 2), sets dict flags, and continues into element types. Dict is the one
+case where `analyze_type` recurses -- it calls itself for key and value types, storing
+the results as nested `TypeInfo` objects.
+
+**Terminal** classification in `_classify_terminal` handles what remains after all
+wrappers are peeled: `Any` becomes a PRIMITIVE, `Literal` returns with the literal value
+(single-value only -- multi-value Literals get `literal_value=None`), `Enum` subclasses
+become ENUM, `BaseModel` subclasses become MODEL, everything else becomes PRIMITIVE.
+
+### Concrete walkthroughs
+
+**Segment (union path).** `analyze_type` receives the `Annotated` alias. Iteration 1
+sees `Annotated` -- collects the `FieldInfo` with discriminator metadata as a
+constraint, unwraps to `Union[RoadSegment, RailSegment, WaterSegment]`. Iteration 2 sees
+the union. No `None` arm, no sentinels. Three concrete `BaseModel` subclasses remain --
+the function classifies the type as `UNION` and returns immediately: `kind=UNION`,
+`union_members=(RoadSegment, RailSegment, WaterSegment)`, `base_type="RoadSegment"` (the
+first member). Two iterations, done. The union members are raw type objects, not
+recursively analyzed -- callers that need field details call `extract_model` on each
+member separately.
+
+**FeatureVersion (NewType chain path).** `FeatureVersion = NewType("FeatureVersion",
+int32)` where `int32 = NewType("int32", Annotated[int, Field(ge=0, le=2147483647)])`.
+
+Iteration 1 sees `FeatureVersion`. It's a NewType -- record
+`outermost_newtype_name="FeatureVersion"`, snapshot `newtype_outer_list_depth=0` (no list
+layers yet), unwrap to `int32`, continue. Iteration 2 sees
+`int32`. Also a NewType -- update `last_newtype_name="int32"`, unwrap to `Annotated[int,
+Field(ge=0, ...)]`, continue. Iteration 3 sees `Annotated`. Collect
+`ConstraintSource(source="int32", constraint=<Field metadata>)`, unwrap to `int`. The
+loop breaks on `int` (not a NewType, not Annotated, not a union, not a container).
+`_classify_terminal` returns a `TypeInfo` with `base_type="int32"`,
+`newtype_name="FeatureVersion"`, `kind=PRIMITIVE`, and a constraint tuple recording the
+provenance chain.
+
+The two paths demonstrate the function's range. Segment exits early on the union branch
+with member types for downstream extraction. FeatureVersion runs the full loop through
+NewType and Annotated layers, accumulating constraint provenance that survives to
+rendering.
+
+### _UnwrapState
+
+The accumulator dataclass carries state across iterations: optional/dict flags,
+`list_depth` (incremented per `list[...]` layer), `newtype_outer_list_depth` (snapshotted
+from `list_depth` when the first NewType is entered), the constraint list, both NewType
+name slots, and the captured description. Its `build_type_info` method assembles the
+final `TypeInfo` from accumulated state, freezing the constraint list into a tuple.
+
+### walk_type_info
+
+A shared visitor that recurses into dict key/value `TypeInfo` children. Both type
+collection and reverse reference computation use it rather than duplicating the descent
+pattern. Union members are raw `type` objects (not `TypeInfo` instances), so callers
+handle them directly.
+
+## 4. Data structures
+
+`extraction/specs.py` defines the vocabulary shared between extraction and rendering. Every spec is
+a dataclass with no methods beyond field access and, in `UnionSpec`'s case, one cached
+property.
+
+**FieldSpec** represents one model field: alias-resolved name, `TypeInfo`, description,
+required flag. Two fields populated later by tree expansion: `model` (a reference to the
+nested `ModelSpec` for MODEL-kind fields) and `starts_cycle` (true when following this
+field's model would create a cycle in the ancestor chain).
+
+**ModelSpec** represents one Pydantic model: class name, cleaned docstring, fields in
+documentation order, source class reference, the entry point string that located it, and
+model-level constraints from decorators like `@require_any_of`.
+
+**UnionSpec** represents a discriminated union type alias. Segment's `UnionSpec` carries
+`members=[RoadSegment, RailSegment, WaterSegment]`, `discriminator_field="subtype"`, and
+`common_base=TransportationSegment`. Its `annotated_fields` list pairs each `FieldSpec`
+with `variant_sources` -- a tuple of class names indicating which union members
+contribute that field, or `None` for fields from `TransportationSegment` shared across
+all members. The `fields` cached property unwraps this for code that doesn't need
+provenance. `UnionSpec` uses `eq=False` because it contains mutable lists and a
+`cached_property` -- dataclass-generated `__eq__` would be unreliable.
+
+**FeatureSpec** is a `Protocol` satisfied by both `ModelSpec` and `UnionSpec`. This is
+the pipeline's unifying abstraction. Tree expansion, type collection, rendering
+dispatch, and example loading all operate on `FeatureSpec` without knowing which
+concrete type they hold.
+
+**EnumSpec** and **EnumMemberSpec** serve enums. **NewTypeSpec** serves NewTypes.
+**PrimitiveSpec** serves numeric primitives with an `Interval` for bounds and optional
+`float_bits`.
+
+**SupplementarySpec** is the union type alias `EnumSpec | NewTypeSpec | ModelSpec` --
+the set of non-feature types that need their own output pages. `PrimitiveSpec` and
+geometry types are excluded because they render on aggregate pages rather than
+individual ones.
+
+### Classification functions
+
+Three functions at the bottom of `extraction/specs.py` classify discovery results. `is_model_class`
+is a `TypeGuard` that checks `isinstance(obj, type) and issubclass(obj, BaseModel)`.
+`is_union_alias` calls `analyze_type` and checks for `UNION` kind -- the only place
+outside the type analyzer that touches Python type annotations. `filter_model_classes`
+applies the model guard across the discovery dict's values.
+
+## 5. Type registry
+
+Maps type names to per-target display strings. `PRIMITIVE_TYPES` contains 15 entries:
+four signed integer widths, three unsigned, two floats, `str`/`bool`, two Python builtin
+aliases (`int` maps to `int64`, `float` maps to `float64`), and two geometry types
+(`Geometry`, `BBox`). Each maps to a `TypeMapping` with a `markdown` field.
+
+`is_semantic_newtype` answers a question: does this NewType deserve its own
+documentation page? The function returns true when the outermost name differs from the
+base type (`FeatureVersion` wrapping `int32`) or when the base type has no registry
+entry (`HexColor` wrapping `str` via constraints). It returns false for registered
+primitives (`int32` wrapping `int`) -- those are the type system's building blocks, not
+user-facing concepts.
+
+`resolve_type_name` looks up the registry by `base_type`, tries `source_type.__name__`
+when the first lookup fails, and falls back to `base_type` as a last resort. Semantic
+NewTypes wrapping unregistered classes (like `Sources` wrapping `SourceItem`) use the
+underlying class name rather than the NewType alias -- `source_type.__name__` takes
+precedence.
+
+## 6. Model extraction
+
+`extract_model` converts a Pydantic `BaseModel` subclass into a `ModelSpec`.
+
+### Field ordering
+
+Documentation order differs from Python declaration order. `_class_order` produces the
+MRO-aware sequence: for single inheritance, reversed MRO puts base class fields first
+and derived fields last. For multiple inheritance, the primary chain (first base) comes
+first, then the class's own fields, then mixin fields. This matches how a reader
+encounters the model -- shared structure before specialization.
+
+`_field_order` walks the class hierarchy produced by `_class_order` and collects
+`__annotations__` keys, deduplicating as it goes.
+
+### Field extraction
+
+For each field, the extractor resolves the alias chain (`validation_alias` > `alias` >
+Python name via `resolve_field_alias`), calls `analyze_type` on `field_info.annotation`,
+and builds a `FieldSpec`. The extractor uses `field_info.annotation` rather than
+`get_type_hints()` because the latter returns unresolved TypeVars for generic base
+classes.
+
+One subtlety: Pydantic strips the `Annotated` wrapper from some fields and moves the
+metadata to `field_info.metadata`. When this happens, `analyze_type` sees a bare type
+and misses the constraints. `_merge_field_metadata` patches them back in, tagging them
+with `source=None` since they came from the field's own annotation rather than a NewType
+chain.
+
+Model-level constraints come from `ModelConstraint.get_model_constraints(model_class)`,
+which inspects decorators like `@require_any_of` and `@require_if`.
+
+### Tree expansion
+
+`expand_model_tree` is the recursive step that populates `FieldSpec.model` references.
+It maintains a shared cache keyed by Python class and an ancestor set for cycle
+detection.
+
+The cache insert happens *before* recursion. Without this ordering, a back-edge
+encounter would find no cached entry and infinite-loop instead of marking
+`starts_cycle=True`. The sequence: extract the sub-model, insert it into the cache, then
+recurse into its fields. Shared references (the same sub-model used in multiple fields)
+reuse the cached `ModelSpec` without marking cycles.
+
+Union-kind fields skip inline expansion -- they appear as a single row in the output,
+linking to their members, rather than expanding inline.
+
+## 7. Other extractors
+
+### Enum extraction
+
+`extract_enum` iterates members, checking `is_custom_docstring` for both class-level and
+per-member descriptions. `DocumentedEnum` members carry `__doc__` attributes that the
+extractor preserves. The class-level docstring is passed as `inherited_doc` to the
+per-member check, so members that inherit the class docstring verbatim get
+`description=None`.
+
+### NewType extraction
+
+`extract_newtype` calls `analyze_type` on the NewType callable and extracts the custom
+docstring. When the NewType has no explicit docstring, it falls back to
+`TypeInfo.description` -- the first `Field.description` found in the `Annotated`
+metadata chain.
+
+### Union extraction
+
+The most involved extractor. Walk through `Segment` concretely.
+
+`extract_union("Segment", annotation)` calls `analyze_type` on the
+`Annotated[Union[RoadSegment, RailSegment, WaterSegment], ...]` alias. The analyzer
+returns `kind=UNION` with the three member types.
+
+Next, `_find_common_base` intersects each member's filtered MRO (BaseModel subclasses
+only, excluding `BaseModel` itself). All three share `TransportationSegment` in their
+MRO. The function picks the most-derived class in the intersection -- the one whose
+worst-case MRO distance is smallest. `TransportationSegment` wins: it is the direct
+parent of all three members.
+
+The extractor calls `extract_model(TransportationSegment)` to get the shared field set.
+Fields like `id`, `geometry`, `version`, `sources`, and `subtype` appear in the common
+base. These become shared `AnnotatedField` entries with `variant_sources=None`.
+
+Then it extracts each member: `RoadSegment`, `RailSegment`, `WaterSegment`. Fields not
+in the shared set are variant-specific, deduplicated by `(name, type_identity)` where
+`type_identity` captures `base_type`, `kind`, `is_optional`, and `list_depth`. If
+`RoadSegment` and `WaterSegment` both define a `width` field with the same type
+identity, the `AnnotatedField` accumulates both class names:
+`variant_sources=("RoadSegment", "WaterSegment")`. Fields unique to one member get a
+single-element tuple.
+
+`extract_discriminator` inspects the `Annotated` metadata for a `FieldInfo` with a
+discriminator attribute. For Segment, it finds `subtype` and builds the mapping:
+`{"road": RoadSegment, "rail": RailSegment, "water": WaterSegment}` by checking each
+member for single-value `Literal` fields on the discriminator.
+
+### Primitive extraction
+
+`partition_primitive_and_geometry_names` reads a module's `__all__` exports. NewType
+exports are numeric primitives; non-constraint class exports are geometry types.
+
+`extract_primitives` builds `PrimitiveSpec` objects. For each primitive name it resolves
+the object from the module, calls `extract_newtype` for the type analysis, then extracts
+numeric bounds from constraints. `extract_numeric_bounds` scans constraint objects for
+`ge`/`gt`/`le`/`lt` attributes and packs them into an `Interval`.
+
+## 8. Constraint prose
+
+Two modules convert constraint objects into human-readable text.
+
+### Field constraints
+
+`extraction/field_constraints.py` pattern-matches constraint types. `Interval` renders
+as `lower <= x <= upper` using Unicode comparison operators. Single-bound constraints
+(`Ge`, `Gt`, `Le`, `Lt`) render as `>= value` or `< value`. Length constraints
+(`MinLen`, `MaxLen`) render as plain prose (e.g. "Minimum length: 1"). `GeometryTypeConstraint` lists
+allowed geometry types by name, converting snake_case values to PascalCase. `Reference`
+describes the relationship and target model, using an optional `link_fn` to produce
+markdown links.
+
+Opaque constraints -- classes that inherit `object.__repr__` without customization --
+render as their class name plus docstring. When a regex pattern attribute exists, the
+prose includes it.
+
+`constraint_display_text` is the top-level entry point. It checks whether the constraint
+is opaque and has a docstring, and if so, produces a composite description combining the
+docstring, class name, and pattern. Otherwise it delegates to
+`describe_field_constraint`.
+
+### Model constraints
+
+`extraction/model_constraints.py` handles model-level constraints from decorators.
+`analyze_model_constraints` returns two things in one pass: a list of section-level
+descriptions and a dict mapping field names to the constraint descriptions that
+reference them.
+
+The module consolidates related conditionals. Three `require_if` constraints with the
+same target fields but different trigger values merge into "when X is one of: a, b, c"
+instead of three separate bullets. `_consolidation_key` groups constraints by `(type,
+field_names, condition_field_name)`. Groups with one member render normally; groups with
+multiple members produce consolidated prose.
+
+`NoExtraFieldsConstraint` is silently skipped -- it is a structural validation rule, not
+something a documentation reader acts on.
+
+## 9. Module layout
+
+Translates Python module paths into output directory paths. `compute_schema_root` finds
+the longest common dotted prefix across all entry point module paths. Given paths like
+`overture.schema.buildings`, `overture.schema.places`, and
+`overture.schema.transportation`, the root is `overture.schema`. For a single unique
+path, it drops the last component.
+
+`compute_output_dir` mirrors the remaining package structure after stripping the root.
+Packages (directories with `__path__` per PEP 302) keep all components. File modules
+drop their last component, since the `.py` filename adds no useful structure.
+`is_package_module` checks `sys.modules` for `__path__` to make this distinction.
+
+The entry point string `overture.schema.buildings:Building` encodes both module and
+class. `entry_point_module` extracts the module path, `entry_point_class` extracts the
+class name. `output_dir_for_entry_point` composes these to produce the output directory
+for a feature.
+
+## 10. Supplementary type collection
+
+`collect_all_supplementary_types` walks the expanded field trees of all feature specs to
+discover every referenced type that needs its own output page: enums, semantic NewTypes,
+and sub-models.
+
+The walk maintains a visited set for models and a feature name set for skip detection.
+Types that are themselves top-level features get skipped. For UNION-kind fields, the
+function extracts and walks each member's fields. For semantic NewTypes, it walks the
+`__supertype__` chain to collect intermediate NewTypes -- `Id` wraps
+`NoWhitespaceString` wraps `str`, and both `Id` and `NoWhitespaceString` get their own
+pages. The `walk_type_info` visitor handles dict key/value recursion.
+
+MODEL-kind fields follow `field_spec.model` references that were populated by
+`expand_model_tree`. The function raises `RuntimeError` if it encounters a MODEL-kind
+field with `model=None` -- a guard against calling collection before tree expansion.
+
+A single field matches multiple conditions independently. A semantic NewType wrapping a
+MODEL-kind type triggers both NewType extraction and model collection. The checks use
+independent `if` statements, not `elif`.
+
+## 11. Path assignment
+
+`build_placement_registry` builds the complete mapping from type names to output file
+paths. Three tiers:
+
+Aggregate pages come first. All numeric primitives point to
+`system/primitive/primitives.md`. All geometry types point to
+`system/primitive/geometry.md`. These are hardcoded paths since the types share a single
+reference page.
+
+Feature specs get individual pages. Output directories derive from
+`output_dir_for_entry_point`. Filenames use `slug_filename`.
+
+Supplementary specs get module-derived paths from `source_type.__module__`. When a
+supplementary type's output directory falls under a feature directory,
+`_nest_under_types` inserts a `types/` segment. Without this insertion, an enum defined
+in `overture.schema.buildings` would land alongside the Building feature page. With it,
+the enum lands in `buildings/types/` -- preventing supplementary type pages from
+cluttering feature directories.
+
+`_nest_under_types` sorts feature directories by path length (descending) before
+checking containment, so the most specific match wins.
+
+## 12. Links and reverse references
+
+### Link computation
+
+`LinkContext` carries the current page's output path and the full type-to-path registry.
+When a renderer formats a type reference, it calls `resolve_link` to compute a relative
+path from the current page to the target. Types without registry entries return `None`,
+telling renderers to show inline code instead of a broken link. `resolve_link_or_slug`
+provides a fallback when a link is required regardless.
+
+`relative_link` computes `../` navigation between any two paths in the output tree. It
+finds the common prefix of directory components, counts the levels up from the source
+directory, and descends into the target. Both paths must be normalized -- the function
+rejects `..` components to prevent path traversal surprises.
+
+### Reverse references
+
+`compute_reverse_references` walks all feature fields and supplementary specs to build
+`dict[str, list[UsedByEntry]]`. Each entry maps a type name to the list of types that
+reference it. Entries sort models before NewTypes, alphabetical within each group.
+
+The function tracks references with sets for deduplication, then sorts into lists at the
+end. It skips self-references and references to types not in the supplementary spec dict
+(features don't need "used by" sections since they are the entry points).
+
+NewType specs register additional references from their constraint sources. If `Id`
+inherits a constraint from `NoWhitespaceString`, the reverse reference captures that
+`Id` uses `NoWhitespaceString` -- even though the relationship is through constraint
+provenance rather than direct field reference.
+
+## 13. Markdown type formatting
+
+`markdown/type_format.py` converts `TypeInfo` into display strings for markdown output.
+
+`format_type` handles the full range of field types. Single-value Literals render as
+`"value"` in backticks. Semantic NewTypes and enums/models get markdown links via
+`_resolve_type_link`, which checks the `LinkContext` registry and falls back to plain
+code spans. For types with a linked identity (semantic NewTypes, enums, models), list
+rendering depends on where the list layers sit relative to the NewType boundary.
+`newtype_outer_list_depth > 0` means the list wraps the NewType (`list[PhoneNumber]`) and
+renders as `list<PhoneNumber>`. `is_list` with `newtype_name` set means the NewType
+wraps a list internally (`Sources` wrapping `list[SourceItem]`) and renders with a
+`(list)` qualifier. Non-NewType identities (enums, models) use `list<X>` syntax. Linked
+inner types use broken-backtick syntax (`` `list<` `` ... `` `>` ``) built as a single
+wrapper to avoid adjacent backticks that CommonMark would interpret as multi-backtick
+code span delimiters. Dict types render as `` `map<K, V>` ``. Qualifiers (optional, list,
+map) append in parentheses.
+
+Union members format independently -- each gets its own link resolution, joined with
+pipe separators escaped for table-cell safety.
+
+`format_underlying_type` handles NewType page headers. It links enums and models that
+have their own pages but skips the outermost NewType name to avoid self-referencing. The
+function uses `source_type.__name__` rather than `base_type` for link resolution, since
+`base_type` may carry the outermost NewType name when only one NewType wraps a class.
+
+## 14. Markdown rendering
+
+`markdown/renderer.py` is the template driver.
+
+### Templates
+
+Six Jinja2 templates in `markdown/templates/`. `feature.md.jinja2` renders a field table
+with Name, Type, and Description columns, an optional Constraints section, an optional
+Examples section, and a "Used By" partial. `enum.md.jinja2` renders a bullet list of
+values. `newtype.md.jinja2` shows underlying type and constraints with provenance links.
+`primitives.md.jinja2` and `geometry.md.jinja2` render aggregate reference pages.
+`_used_by.md.jinja2` is an included partial.
+
+The Jinja2 environment registers `linkify_urls` as a filter, which wraps bare URLs in
+markdown link syntax. The filter uses a two-pass approach: extract code spans first (to
+avoid modifying URLs inside backticks), linkify the remaining text, then restore code
+spans.
+
+### Field expansion
+
+`render_feature` dispatches on spec type. `ModelSpec` gets `_expand_model_fields`, which
+walks the pre-populated `FieldSpec.model` tree and produces dot-notation rows.
+`sources[0].dataset` appears as a single row in the flat field table, with `[]`
+appended per nesting level to list-of-model fields (so a doubly-nested list gets
+`[][]`). Expansion stops at fields marked with
+`starts_cycle`.
+
+`UnionSpec` gets `_expand_union_fields`, which adds italic variant tags to
+variant-specific fields. For Segment, shared fields from `TransportationSegment` (like
+`id`, `geometry`, `sources`) render as plain rows. Variant-specific fields get tagged:
+`_short_variant_name` strips the union name suffix, so `RoadSegment` becomes `Road`,
+`WaterSegment` becomes `Water`. A field present in two of three members renders as ``
+`width` *(Road, Water)* ``. Shared fields render without tags.
+
+### Constraint annotation
+
+Field-level constraints from the field's own annotation (not inherited from NewType
+chains) annotate the field's description cell as italic text. The distinction matters:
+constraints with `source=None` came from the field itself, while constraints with a
+named source live on the NewType's own page.
+
+Model-level constraints annotate top-level field rows (those without dot-notation
+prefixes) using the `field_notes` dict from `analyze_model_constraints`.
+
+### Example formatting
+
+Example values render in backticks for monospace consistency. Booleans use
+`true`/`false` (not Python's `True`/`False`). `None` renders as `null`. Long values
+truncate at 100 characters. Lists and dicts use compact bracket/brace notation.
+
+### Aggregate pages
+
+`render_primitives_from_specs` sorts primitives by bit-width key (prefix then numeric
+width), groups into signed integers, unsigned integers, and floats, and formats ranges.
+Integer ranges show both bounds as a compact "lower to upper" form; `int64`-scale bounds
+use `2^63` notation for readability. `render_geometry_from_values` produces a
+comma-separated backtick list.
+
+## 15. Example loader
+
+Loads example data from theme `pyproject.toml` files and validates it against the
+schema.
+
+`resolve_pyproject_path` walks up from a model's module file to find `pyproject.toml`.
+`load_examples_from_toml` reads the `[examples.ModelName]` TOML section.
+
+Validation requires three preprocessing steps that handle TOML's limitations and
+flat-schema conventions.
+
+TOML has no null literal, so examples use the string `"null"` as a stand-in. `_denull`
+replaces these recursively, walking nested dicts and lists.
+
+Literal fields (like `theme="buildings"`) are omitted from examples since they carry
+constant values. `_inject_literal_fields` adds them back before validation by scanning
+`model_fields` for single-value `Literal` annotations via `single_literal_value`.
+
+Discriminated union examples from flat parquet schemas include null fields from
+non-selected variant arms. `_strip_null_unknown_fields` removes null-valued fields not
+in the common base's field set, so the selected arm's validator accepts the data without
+choking on fields that belong to sibling variants.
+
+`collect_dict_paths` walks the `FieldSpec` tree to identify dict-typed fields (like
+`tags: dict[str, str]`), returning their dot-paths as a `frozenset`. Schema-notation
+paths use empty brackets (`items[].tags`) while runtime paths carry indices
+(`items[0].tags`); `_normalize_path` strips indices before membership checks.
+
+`flatten_example` converts nested dicts to dot-notation. Nested dicts become
+`parent.child`, lists of dicts become `parent[0].child`. Dicts at paths in `dict_paths`
+are kept as leaf values -- a `tags` field typed as `dict[str, str]` renders as a whole
+map rather than being split into `tags.color`, `tags.size`. `order_example_rows` sorts by
+field position in the documentation's field order using a stable sort, so sub-fields
+maintain their original relative order.
+
+`load_examples` orchestrates the full flow: find the pyproject.toml, load the TOML
+section, validate each example, flatten, and order. Invalid examples log a warning and
+skip rather than failing the pipeline.
+
+## 16. Orchestration and CLI
+
+### The pipeline
+
+`generate_markdown_pages` in `markdown/pipeline.py` is the "main" function. It takes
+feature specs and a schema root, returns rendered pages without touching the filesystem.
+Eight steps:
+
+1. **Expand model trees** with a shared cache across all features, so sub-models
+   referenced by multiple features extract once.
+
+2. **Partition primitive and geometry names** from the system primitive module's
+   `__all__` exports.
+
+3. **Collect supplementary types** by walking expanded feature trees.
+
+4. **Build the placement registry** mapping every type to its output file path.
+
+5. **Compute reverse references** across all features and supplements.
+
+6. **Render each feature** with its `LinkContext`, loaded examples, and used-by entries.
+
+7. **Render each supplementary type** -- dispatching to `render_enum`, `render_newtype`,
+   or `render_feature` (for sub-models) based on spec type.
+
+8. **Render aggregate pages** for primitives and geometry.
+
+The return value is `list[RenderedPage]` -- frozen dataclasses carrying content, output
+path, and a boolean `is_feature` flag. The caller decides what to do with them.
+
+### The CLI
+
+`cli.py` is a thin Click wrapper. The `generate` command discovers models, computes
+schema root from *all* entry points (before any theme filtering), classifies each entry
+as model or union via `is_model_class` and `is_union_alias`, extracts specs, calls the
+pipeline, and writes output.
+
+Schema root computation uses all entry points deliberately. Theme filtering narrows
+which features appear in the output, but the directory structure must remain stable
+regardless of which themes are selected. Computing the root from filtered paths would
+shift output directories when themes change.
+
+Feature pages get Docusaurus frontmatter (`sidebar_position: 1`) prepended. The CLI
+generates `_category_.json` files for sidebar navigation, assigning positions
+alphabetically with feature directories first.
+
+The `list` command prints sorted model names -- a diagnostic tool for verifying which
+models the entry point system discovers.
+
+---
+
+## Top-down trace: Segment through the pipeline
+
+A reader who reached this point has seen every module in isolation. This section follows
+`Segment` from discovery to rendered markdown, showing how the pieces compose.
+
+**Discovery.** The CLI calls `discover_models()`. The entry point
+`overture:transportation:segment` loads `overture.schema.transportation:Segment` -- the
+`Annotated[Union[...]]` alias. `Segment` lands in the return dict keyed by
+`ModelKey(namespace="overture", theme="transportation", type="segment",
+entry_point="overture.schema.transportation:Segment")`.
+
+**Classification.** The CLI tests each entry. `is_model_class(Segment)` returns false --
+`Segment` is not a class. `is_union_alias(Segment)` calls `analyze_type`, which peels
+the `Annotated` wrapper and finds three `BaseModel` subclasses in the union. The
+analyzer returns `kind=UNION`. The CLI routes Segment to `extract_union`.
+
+**Extraction.** `extract_union("Segment", annotation)` calls `analyze_type` again (cheap
+-- the same two-iteration path), gets the three member types, and finds
+`TransportationSegment` as the common base via `_find_common_base`. It extracts the
+common base's fields as shared, then extracts each member's fields and partitions the
+non-shared ones into `AnnotatedField` entries with variant provenance.
+`extract_discriminator` finds `subtype` and builds `{"road": RoadSegment, "rail":
+RailSegment, "water": WaterSegment}`. The result is a `UnionSpec` satisfying
+`FeatureSpec`.
+
+Meanwhile, concrete models like `Building` go through `extract_model`, which calls
+`analyze_type` on each field annotation. A field typed `FeatureVersion` unwraps through
+two NewType layers and an `Annotated` layer, producing a `TypeInfo` with
+`base_type="int32"`, `newtype_name="FeatureVersion"`, and constraint provenance linking
+`ge=0` back to the `int32` NewType. Both extraction paths produce specs satisfying
+`FeatureSpec`.
+
+**Pipeline entry.** The feature specs enter `generate_markdown_pages`.
+`expand_model_tree` walks MODEL-kind fields on Segment's `UnionSpec` and populates
+`FieldSpec.model` references. The shared cache ensures sub-models referenced by multiple
+features (like `Sources`) extract once. Union-kind fields skip inline expansion.
+
+**Layout.** `partition_primitive_and_geometry_names` reads the system module's exports.
+`collect_all_supplementary_types` walks Segment's expanded fields and discovers
+referenced enums (like `Subtype`), semantic NewTypes (like `Id`, `Sources`), and
+sub-models. The walk follows `FieldSpec.model` references down the tree, and for
+UNION-kind fields, extracts and walks each member's fields separately.
+
+`build_placement_registry` assigns Segment's output path from its entry point:
+`entry_point_module` extracts `overture.schema.transportation`, `compute_output_dir`
+strips the schema root and mirrors the remaining structure. Supplementary types get
+module-derived paths with `types/` inserted under feature directories.
+
+**Reverse references.** `compute_reverse_references` walks Segment's fields and records
+that Segment references `Subtype`, `Id`, `Sources`, and other types. These references
+populate "Used By" sections: the `Subtype` enum page shows that Segment uses it.
+
+**Rendering.** The pipeline builds a `LinkContext` from Segment's output path and the
+full registry. `render_feature` dispatches to `_expand_union_fields` because the spec is
+a `UnionSpec`. Shared fields from `TransportationSegment` render as plain rows.
+Variant-specific fields get italic tags: `` `road_class` *(Road)* ``. The renderer
+formats each field's type via `format_type`, which resolves links through the
+`LinkContext` -- `Subtype` gets a relative link to its enum page, `Id` links to its
+NewType page. Constraints with `source=None` annotate field rows; constraints with named
+sources appear on the source NewType's page instead.
+
+The example loader finds `pyproject.toml` in the transportation theme package, reads
+`[examples.Segment]`, validates each example against the union alias (injecting literal
+fields, stripping null fields from non-selected arms), computes `dict_paths` from
+`spec.fields` to identify dict-typed fields, flattens to dot-notation (keeping dict-typed
+fields as leaf values), and orders by field position.
+
+The Jinja2 template assembles the field table, optional constraints section, examples,
+and "Used By" partial into markdown.
+
+**Output.** The pipeline returns a `RenderedPage` with Segment's content, its output
+path, and `is_feature=True`. The CLI prepends Docusaurus frontmatter and writes the
+file. `_category_.json` files get generated for sidebar navigation.
+
+**The layering principle.** At every stage, the modules that do the work never reach
+back up the dependency chain. Renderers consume specs and registries but never import
+extractors. Extractors consume `analyze_type` but never import renderers. The type
+analyzer imports nothing from the codegen package except `clean_docstring`. Any module
+can be understood, tested, and modified by reading only the modules below it.
diff --git a/packages/overture-schema-codegen/pyproject.toml b/packages/overture-schema-codegen/pyproject.toml
new file mode 100644
index 000000000..de42c5fb9
--- /dev/null
+++ b/packages/overture-schema-codegen/pyproject.toml
@@ -0,0 +1,29 @@
+[build-system]
+build-backend = "hatchling.build"
+requires = ["hatchling"]
+
+[project]
+dependencies = [
+    "click>=8.0",
+    "jinja2>=3.0",
+    "overture-schema-core",
+    "overture-schema-system",
+    "tomli>=2.0; python_version < '3.11'",
+]
+description = "Code generator that produces documentation and code from Pydantic models"
+dynamic = ["version"]
+license = "MIT"
+name = "overture-schema-codegen"
+
+[project.scripts]
+overture-codegen = "overture.schema.codegen.cli:main"
+
+[tool.uv.sources]
+overture-schema-core = { workspace = true }
+overture-schema-system = { workspace = true }
+
+[tool.hatch.version]
+path = "src/overture/schema/codegen/__about__.py"
+
+[tool.hatch.build.targets.wheel]
+packages = ["src/overture"]
diff --git a/packages/overture-schema-codegen/src/overture/__init__.py b/packages/overture-schema-codegen/src/overture/__init__.py
new file mode 100644
index 000000000..8db66d3d0
--- /dev/null
+++ b/packages/overture-schema-codegen/src/overture/__init__.py
@@ -0,0 +1 @@
+__path__ = __import__("pkgutil").extend_path(__path__, __name__)
diff --git a/packages/overture-schema-codegen/src/overture/schema/__init__.py b/packages/overture-schema-codegen/src/overture/schema/__init__.py
new file mode 100644
index 000000000..8db66d3d0
--- /dev/null
+++ b/packages/overture-schema-codegen/src/overture/schema/__init__.py
@@ -0,0 +1 @@
+__path__ = __import__("pkgutil").extend_path(__path__, __name__)
diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/__about__.py b/packages/overture-schema-codegen/src/overture/schema/codegen/__about__.py
new file mode 100644
index 000000000..3dc1f76bc
--- /dev/null
+++ b/packages/overture-schema-codegen/src/overture/schema/codegen/__about__.py
@@ -0,0 +1 @@
+__version__ = "0.1.0"
diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/__init__.py b/packages/overture-schema-codegen/src/overture/schema/codegen/__init__.py
new file mode 100644
index 000000000..2de7d7120
--- /dev/null
+++ b/packages/overture-schema-codegen/src/overture/schema/codegen/__init__.py
@@ -0,0 +1 @@
+"""Code generator for Overture Schema Pydantic models."""
diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/cli.py b/packages/overture-schema-codegen/src/overture/schema/codegen/cli.py
new file mode 100644
index 000000000..0a24c7348
--- /dev/null
+++ b/packages/overture-schema-codegen/src/overture/schema/codegen/cli.py
@@ -0,0 +1,195 @@
+"""CLI entrypoint for schema code generation."""
+
+import json
+import logging
+from pathlib import Path, PurePosixPath
+
+import click
+
+from overture.schema.core.discovery import discover_models
+
+from .extraction.model_extraction import extract_model
+from .extraction.specs import (
+    FeatureSpec,
+    is_model_class,
+    is_union_alias,
+)
+from .extraction.union_extraction import extract_union
+from .layout.module_layout import (
+    OUTPUT_ROOT,
+    compute_schema_root,
+    entry_point_class,
+    entry_point_module,
+)
+from .markdown.pipeline import generate_markdown_pages
+
+log = logging.getLogger(__name__)
+
+__all__ = ["cli"]
+
+_OUTPUT_FORMATS = ("markdown",)
+
+_FEATURE_FRONTMATTER = "---\nsidebar_position: 1\n---\n\n"
+
+
+def _write_output(
+    content: str,
+    output_dir: Path | None,
+    output_path: PurePosixPath,
+) -> None:
+    """Write content to a file under output_dir, or stdout."""
+    if output_dir:
+        file_path = output_dir / output_path
+        file_path.parent.mkdir(parents=True, exist_ok=True)
+        file_path.write_text(content)
+    else:
+        click.echo(content)
+        click.echo()  # separate entries with a blank line in stdout mode
+
+
+@click.group()
+def cli() -> None:
+    """Overture Schema code generator.
+
+    Generate documentation and code from Pydantic schema models.
+    """
+
+
+@cli.command("list")
+def list_models() -> None:
+    """List all discovered models."""
+    models = discover_models()
+    names = sorted(
+        model.__name__ if isinstance(model, type) else str(model)
+        for model in models.values()
+    )
+    for name in names:
+        click.echo(name)
+
+
+@cli.command()
+@click.option(
+    "--format",
+    "output_format",
+    required=True,
+    type=click.Choice(_OUTPUT_FORMATS),
+    help="Output format",
+)
+@click.option(
+    "--theme",
+    multiple=True,
+    help="Filter to specific theme(s); repeatable (e.g., --theme buildings --theme places)",
+)
+@click.option(
+    "--output-dir",
+    type=click.Path(path_type=Path),
+    default=None,
+    help="Write output to directory (default: stdout)",
+)
+def generate(
+    output_format: str,
+    theme: tuple[str, ...],
+    output_dir: Path | None,
+) -> None:
+    """Generate code/docs from discovered models."""
+    all_models = discover_models()
+
+    # Schema root from ALL entry points (before theme filter).
+    module_paths = [entry_point_module(k.entry_point) for k in all_models]
+    schema_root = compute_schema_root(module_paths)
+
+    models = (
+        {k: v for k, v in all_models.items() if k.theme in theme}
+        if theme
+        else all_models
+    )
+
+    if output_dir:
+        output_dir.mkdir(parents=True, exist_ok=True)
+
+    feature_specs: list[FeatureSpec] = []
+    for key, entry in models.items():
+        if is_model_class(entry):
+            feature_specs.append(extract_model(entry, entry_point=key.entry_point))
+        elif is_union_alias(entry):
+            feature_specs.append(
+                extract_union(
+                    entry_point_class(key.entry_point),
+                    entry,
+                    entry_point=key.entry_point,
+                )
+            )
+
+    _generate_markdown(feature_specs, schema_root, output_dir)
+
+
+def _generate_markdown(
+    feature_specs: list[FeatureSpec],
+    schema_root: str,
+    output_dir: Path | None,
+) -> None:
+    """Generate markdown with directory layout and placement-aware links."""
+    pages = generate_markdown_pages(feature_specs, schema_root)
+
+    for page in pages:
+        content = (
+            f"{_FEATURE_FRONTMATTER}{page.content}" if page.is_feature else page.content
+        )
+        _write_output(content, output_dir, page.path)
+
+    if output_dir:
+        feature_paths = {page.path for page in pages if page.is_feature}
+        all_paths = {page.path for page in pages}
+        _write_category_files(output_dir, all_paths, feature_paths)
+
+
+def _ancestor_dirs(paths: set[PurePosixPath]) -> set[PurePosixPath]:
+    """Collect all ancestor directories for a set of file paths."""
+    dirs: set[PurePosixPath] = set()
+    for path in paths:
+        parent = path.parent
+        while parent != OUTPUT_ROOT:
+            dirs.add(parent)
+            parent = parent.parent
+    return dirs
+
+
+def _top_level_positions(
+    dirs: set[PurePosixPath],
+    feature_paths: set[PurePosixPath],
+) -> dict[PurePosixPath, int]:
+    """Assign sidebar positions: feature dirs first, then non-feature, both alphabetical."""
+    feature_dir_names = {p.parts[0] for p in feature_paths}
+    top_level = sorted(d for d in dirs if d.parent == OUTPUT_ROOT)
+    feature_dirs = [d for d in top_level if d.name in feature_dir_names]
+    non_feature_dirs = [d for d in top_level if d.name not in feature_dir_names]
+    return {d: i for i, d in enumerate(feature_dirs + non_feature_dirs, start=1)}
+
+
+def _write_category_files(
+    output_dir: Path,
+    all_paths: set[PurePosixPath],
+    feature_paths: set[PurePosixPath],
+) -> None:
+    """Write _category_.json files for Docusaurus sidebar navigation."""
+    dirs = _ancestor_dirs(all_paths)
+    positions = _top_level_positions(dirs, feature_paths)
+
+    for dir_path in sorted(dirs):
+        label = dir_path.name.replace("_", " ").title()
+        category: dict[str, object] = {"label": label}
+        if dir_path in positions:
+            category["position"] = positions[dir_path]
+
+        file_path = output_dir / dir_path / "_category_.json"
+        file_path.parent.mkdir(parents=True, exist_ok=True)
+        file_path.write_text(json.dumps(category, indent=2) + "\n")
+
+
+def main() -> None:
+    """Run the CLI entry point."""
+    cli()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/__init__.py b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/case_conversion.py b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/case_conversion.py
new file mode 100644
index 000000000..9d06341fb
--- /dev/null
+++ b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/case_conversion.py
@@ -0,0 +1,41 @@
+"""PascalCase to snake_case conversion for code generation."""
+
+import re
+
+__all__ = ["slug_filename", "to_snake_case"]
+
+# Insert _ between an acronym run and a capitalized word start (HTML|Parser)
+_ACRONYM_BOUNDARY = re.compile(r"([A-Z]+)([A-Z][a-z])")
+# Insert _ between a lowercase/digit and an uppercase letter (building|Part)
+_CAMEL_BOUNDARY = re.compile(r"([a-z0-9])([A-Z])")
+
+
+def to_snake_case(name: str) -> str:
+    """Convert PascalCase to snake_case.
+
+    Handles acronym runs correctly: "HTMLParser" becomes "html_parser",
+    not "h_t_m_l_parser".
+
+    >>> to_snake_case("HTMLParser")
+    'html_parser'
+    >>> to_snake_case("BuildingPart")
+    'building_part'
+    >>> to_snake_case("simple")
+    'simple'
+    """
+    name = _ACRONYM_BOUNDARY.sub(r"\1_\2", name)
+    name = _CAMEL_BOUNDARY.sub(r"\1_\2", name)
+    return name.lower()
+
+
+def slug_filename(name: str, ext: str = ".md") -> str:
+    """Convert a PascalCase type name to a snake_case filename.
+
+    >>> slug_filename("HexColor")
+    'hex_color.md'
+    >>> slug_filename("BuildingPart")
+    'building_part.md'
+    >>> slug_filename("BuildingPart", ext=".json")
+    'building_part.json'
+    """
+    return f"{to_snake_case(name)}{ext}"
diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/docstring.py b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/docstring.py
new file mode 100644
index 000000000..7dc2e112f
--- /dev/null
+++ b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/docstring.py
@@ -0,0 +1,46 @@
+"""Docstring extraction and cleaning utilities."""
+
+import inspect
+from enum import Enum
+from typing import NewType
+
+__all__ = ["clean_docstring", "first_docstring_line", "is_custom_docstring"]
+
+
+# Probe auto-generated docstrings so we can distinguish them from explicit ones.
+# Both Enum and NewType generate default docstrings that vary by Python version;
+# capturing at import time adapts automatically if the format changes.
+class _DocstringProbeEnum(Enum):
+    pass
+
+
+_ENUM_DEFAULT_DOCSTRING = _DocstringProbeEnum.__doc__
+del _DocstringProbeEnum
+_NewtypeProbe = NewType("_NewtypeProbe", int)
+_NEWTYPE_DEFAULT_DOCSTRING = _NewtypeProbe.__doc__
+del _NewtypeProbe
+
+
+def clean_docstring(doc: str | None) -> str | None:
+    """Return cleaned docstring, or None if absent or whitespace-only."""
+    if not doc:
+        return None
+    cleaned = inspect.cleandoc(doc)
+    return cleaned or None
+
+
+def first_docstring_line(doc: str | None) -> str | None:
+    """Return the first line of a docstring, or None if absent."""
+    cleaned = clean_docstring(doc)
+    if not cleaned:
+        return None
+    return cleaned.split("\n")[0]
+
+
+def is_custom_docstring(doc: str | None, inherited_doc: str | None = None) -> bool:
+    """Check if a docstring was explicitly written, not auto-generated or inherited."""
+    return bool(doc) and doc not in (
+        _ENUM_DEFAULT_DOCSTRING,
+        _NEWTYPE_DEFAULT_DOCSTRING,
+        inherited_doc,
+    )
diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/enum_extraction.py b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/enum_extraction.py
new file mode 100644
index 000000000..545979d66
--- /dev/null
+++ b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/enum_extraction.py
@@ -0,0 +1,40 @@
+"""Enum extraction."""
+
+from enum import Enum
+
+from .docstring import clean_docstring, is_custom_docstring
+from .specs import EnumMemberSpec, EnumSpec
+
+__all__ = ["extract_enum"]
+
+
+def extract_enum(enum_class: type[Enum]) -> EnumSpec:
+    """Extract enum specification from an Enum class.
+
+    Handles both simple str Enums and DocumentedEnums where members
+    have per-value descriptions via the __doc__ attribute.
+    """
+    class_doc = enum_class.__doc__
+    description = clean_docstring(class_doc) if is_custom_docstring(class_doc) else None
+
+    members: list[EnumMemberSpec] = []
+    for member in enum_class:
+        member_doc = getattr(member, "__doc__", None)
+        member_description = (
+            member_doc if is_custom_docstring(member_doc, class_doc) else None
+        )
+
+        members.append(
+            EnumMemberSpec(
+                name=member.name,
+                value=str(member.value),
+                description=member_description,
+            )
+        )
+
+    return EnumSpec(
+        name=enum_class.__name__,
+        description=description,
+        members=members,
+        source_type=enum_class,
+    )
diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/examples.py b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/examples.py
new file mode 100644
index 000000000..18d71ea8b
--- /dev/null
+++ b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/examples.py
@@ -0,0 +1,370 @@
+"""Load and process example data from theme pyproject.toml files."""
+
+import logging
+import re
+import sys
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any
+
+from pydantic import BaseModel, TypeAdapter, ValidationError
+from pydantic.fields import FieldInfo
+
+from .model_extraction import resolve_field_alias
+from .specs import FieldSpec
+from .type_analyzer import single_literal_value
+
+log = logging.getLogger(__name__)
+
+__all__ = ["ExampleRecord", "collect_dict_paths", "load_examples", "validate_example"]
+
+# tomllib is stdlib from 3.11+; tomli is the backport for 3.10.
+try:
+    import tomllib  # type: ignore[import-not-found]
+except ModuleNotFoundError:
+    import tomli as tomllib  # type: ignore[import-not-found]
+
+
+@dataclass
+class ExampleRecord:
+    """A flattened example with field-value pairs in documentation order."""
+
+    rows: list[tuple[str, Any]]
+
+
+def _inject_literal_fields(
+    model_fields_dict: dict[str, FieldInfo], data: dict[str, Any]
+) -> dict[str, Any]:
+    """Inject single-value Literal field defaults missing from *data*.
+
+    Inspects *model_fields_dict* for fields with single-value ``Literal``
+    annotations. For each field missing from *data*, injects the literal
+    value using the field's ``validation_alias`` (if set), falling back
+    to ``alias``, then to the field name.
+
+    Returns a new dict; the original is not mutated.
+    """
+    result = data.copy()
+
+    for field_name, field_info in model_fields_dict.items():
+        key = resolve_field_alias(field_name, field_info)
+        if key in result:
+            continue
+
+        literal_value = single_literal_value(field_info.annotation)
+        if literal_value is not None:
+            result[key] = literal_value
+
+    return result
+
+
+def _denull_value(value: object) -> object:
+    """Convert a single value, replacing ``"null"`` strings with ``None``."""
+    if value == "null":
+        return None
+    if isinstance(value, dict):
+        return _denull(value)
+    if isinstance(value, list):
+        return [_denull_value(item) for item in value]
+    return value
+
+
+def _denull(data: dict[str, Any]) -> dict[str, Any]:
+    """Convert ``"null"`` sentinel strings to ``None``.
+
+    TOML has no null literal, so example data uses the string ``"null"``
+    as a stand-in.  This recursively walks *data* (including nested dicts,
+    lists of dicts, and plain lists) and replaces every ``"null"`` value
+    with ``None``.
+
+    Returns a new dict; the original is not mutated.
+    """
+    return {key: _denull_value(value) for key, value in data.items()}
+
+
+def _known_field_keys(model_fields_dict: dict[str, FieldInfo]) -> frozenset[str]:
+    """Alias-resolved field keys from a model_fields dict."""
+    return frozenset(
+        resolve_field_alias(name, info) for name, info in model_fields_dict.items()
+    )
+
+
+def _strip_null_unknown_fields(
+    data: dict[str, Any], known_keys: frozenset[str]
+) -> dict[str, Any]:
+    """Drop null-valued fields not in *known_keys*.
+
+    For discriminated unions, *known_keys* contains only common base
+    fields.  Variant-specific null fields from other arms (present in
+    flat parquet schemas) are stripped so the selected arm's validator
+    doesn't reject them as unknown extras.
+
+    Non-null fields are always kept so the arm's own validator can
+    accept or reject them normally.
+    """
+    return {k: v for k, v in data.items() if v is not None or k in known_keys}
+
+
+def validate_example(
+    validation_type: object,
+    raw: dict[str, Any],
+    *,
+    model_fields: dict[str, FieldInfo] | None = None,
+) -> dict[str, Any]:
+    """Validate example data against a model or union type.
+
+    Uses TypeAdapter for validation, supporting both concrete models
+    and discriminated union aliases.
+
+    Preprocesses *raw* data by:
+    1. Converting "null" strings to None
+    2. Injecting missing Literal fields for validation (if model_fields provided)
+    3. Stripping null-valued fields not in *model_fields* (handles
+       flat-schema examples from discriminated unions where fields from
+       non-selected arms appear as nulls)
+
+    Returns the denulled dict (not the preprocessed one with injected
+    literals). Lets ValidationError propagate on validation failure.
+    """
+    denulled = _denull(raw)
+
+    if model_fields is None:
+        if isinstance(validation_type, type) and issubclass(validation_type, BaseModel):
+            model_fields = validation_type.model_fields
+        else:
+            model_fields = {}
+
+    known_keys = _known_field_keys(model_fields)
+    preprocessed = _inject_literal_fields(model_fields, denulled)
+    preprocessed = _strip_null_unknown_fields(preprocessed, known_keys)
+    TypeAdapter(validation_type).validate_python(preprocessed)
+    return denulled
+
+
+_DEFAULT_SKIP_KEYS: frozenset[str] = frozenset()
+_DEFAULT_DICT_PATHS: frozenset[str] = frozenset()
+
+_INDEXED_BRACKET = re.compile(r"\[\d+\]")
+
+
+def _normalize_path(path: str) -> str:
+    """Replace indexed brackets with empty brackets for dict_paths matching.
+
+    ``collect_dict_paths`` produces schema-notation paths like
+    ``items[].tags``, while ``_flatten_value`` builds runtime paths like
+    ``items[0].tags``. Normalizing before membership testing makes them
+    comparable.
+    """
+    return _INDEXED_BRACKET.sub("[]", path)
+
+
+def collect_dict_paths(fields: list[FieldSpec], prefix: str = "") -> frozenset[str]:
+    """Collect dot-paths of dict-typed fields from a FieldSpec tree.
+
+    Walks the ``FieldSpec.model`` tree (same structure the renderer walks
+    for inline expansion) and returns paths where ``type_info.is_dict``
+    is True. These paths tell ``flatten_example`` which dicts are maps
+    (keep as leaf) vs. models (recurse into).
+
+    Parameters
+    ----------
+    fields : list[FieldSpec]
+        Fields to walk.
+    prefix : str
+        Dot-notation prefix accumulated from parent fields.
+    """
+    paths: set[str] = set()
+    for f in fields:
+        path = f"{prefix}{f.name}" if prefix else f.name
+        if f.type_info.is_dict:
+            paths.add(path)
+        elif f.model and not f.starts_cycle:
+            suffix = "[]" * f.type_info.list_depth if f.type_info.is_list else ""
+            paths |= collect_dict_paths(f.model.fields, f"{path}{suffix}.")
+    return frozenset(paths)
+
+
+def _flatten_value(
+    prefix: str, value: object, dict_paths: frozenset[str]
+) -> list[tuple[str, Any]]:
+    """Recursively flatten a value into dot/bracket-notation rows."""
+    if isinstance(value, dict):
+        if _normalize_path(prefix) in dict_paths:
+            return [(prefix, value)]
+        result: list[tuple[str, Any]] = []
+        for k, v in value.items():
+            result.extend(_flatten_value(f"{prefix}.{k}", v, dict_paths))
+        return result
+    if isinstance(value, list) and value and isinstance(value[0], (dict, list)):
+        result = []
+        for i, item in enumerate(value):
+            result.extend(_flatten_value(f"{prefix}[{i}]", item, dict_paths))
+        return result
+    return [(prefix, value)]
+
+
+def flatten_example(
+    raw: dict[str, Any],
+    *,
+    skip_keys: frozenset[str] = _DEFAULT_SKIP_KEYS,
+    dict_paths: frozenset[str] = _DEFAULT_DICT_PATHS,
+) -> list[tuple[str, Any]]:
+    """Flatten nested example dict to dot-notation key-value pairs.
+
+    Nested dicts become ``"parent.child"``; lists of dicts become
+    ``"parent[0].child"``; lists of lists of dicts use double-index
+    notation ``"parent[0][1].child"``. Keys in *skip_keys* are dropped
+    at the top level only. Plain lists are kept as values.
+
+    Dicts at paths in *dict_paths* are kept as leaf values instead of
+    being recursed into. Use ``collect_dict_paths`` to compute this set
+    from a FieldSpec tree.
+    """
+    result: list[tuple[str, Any]] = []
+    for key, value in raw.items():
+        if key in skip_keys:
+            continue
+        result.extend(_flatten_value(key, value, dict_paths))
+    return result
+
+
+def extract_base_field(key: str) -> str:
+    """Extract the top-level field name from a flattened key.
+
+    >>> extract_base_field("sources[0].dataset")
+    'sources'
+    >>> extract_base_field("names.primary")
+    'names'
+    >>> extract_base_field("id")
+    'id'
+    """
+    if "[" in key:
+        return key.split("[")[0]
+    if "." in key:
+        return key.split(".")[0]
+    return key
+
+
+def order_example_rows(
+    flat_rows: list[tuple[str, Any]],
+    field_names: list[str],
+) -> list[tuple[str, Any]]:
+    """Order flattened rows by field position in documentation.
+
+    Sorts by position of base field name in *field_names*.
+    Fields with the same base maintain their original order (stable sort).
+    Unknown fields sort to end.
+    """
+    position = {name: i for i, name in enumerate(field_names)}
+    sentinel = len(field_names)
+
+    def sort_key(row: tuple[str, Any]) -> int:
+        return position.get(extract_base_field(row[0]), sentinel)
+
+    return sorted(flat_rows, key=sort_key)
+
+
+def load_examples_from_toml(
+    pyproject_path: Path,
+    model_name: str,
+) -> list[dict[str, Any]]:
+    """Load ``[examples.<model_name>]`` from a pyproject.toml file."""
+    with pyproject_path.open("rb") as f:
+        data = tomllib.load(f)
+
+    examples: dict[str, list[dict[str, Any]]] = data.get("examples", {})
+    return examples.get(model_name, [])
+
+
+def resolve_pyproject_path(model_class: type) -> Path | None:
+    """Find pyproject.toml by walking up from the model's module location."""
+    module_name = getattr(model_class, "__module__", None)
+    if not module_name:
+        return None
+
+    module = sys.modules.get(module_name)
+    if not module:
+        return None
+
+    module_file = getattr(module, "__file__", None)
+    if not module_file:
+        return None
+
+    # Walk up from module directory
+    current = Path(module_file).parent
+    while current != current.parent:  # Stop at filesystem root
+        pyproject = current / "pyproject.toml"
+        if pyproject.exists():
+            return pyproject
+        current = current.parent
+
+    return None
+
+
+def load_examples(
+    validation_type: object,
+    model_name: str,
+    field_names: list[str],
+    *,
+    pyproject_source: type | None = None,
+    model_fields: dict[str, FieldInfo] | None = None,
+    dict_paths: frozenset[str] = _DEFAULT_DICT_PATHS,
+) -> list[ExampleRecord]:
+    """Load examples for a model, flattened and ordered by *field_names*.
+
+    Validates each example against the validation type. Invalid examples
+    are skipped with a warning logged. Returns an empty list on any failure
+    (missing file, missing section, parse error).
+
+    Parameters
+    ----------
+    validation_type : type[BaseModel] | object
+        Model class or union alias to validate against.
+    model_name : str
+        Name of the model to load examples for.
+    field_names : list[str]
+        List of field names for ordering output.
+    pyproject_source : type or None
+        Type to use for finding pyproject.toml. If None,
+        uses validation_type if it's a class.
+    model_fields : dict[str, FieldInfo] or None
+        Field info dict for Literal injection. If None, infers
+        from validation_type if it's a BaseModel class.
+    dict_paths : frozenset[str]
+        Dot-paths of dict-typed fields to keep as leaf values.
+        Use ``collect_dict_paths`` to compute from a FieldSpec tree.
+    """
+    source_type = pyproject_source if pyproject_source is not None else validation_type
+    if not isinstance(source_type, type):
+        return []
+
+    pyproject_path = resolve_pyproject_path(source_type)
+    if not pyproject_path:
+        return []
+
+    try:
+        raw_examples = load_examples_from_toml(pyproject_path, model_name)
+    except (OSError, tomllib.TOMLDecodeError):
+        log.debug("Failed to load examples for %s", model_name, exc_info=True)
+        return []
+
+    if not raw_examples:
+        return []
+
+    records = []
+    for raw in raw_examples:
+        try:
+            denulled = validate_example(validation_type, raw, model_fields=model_fields)
+        except ValidationError as e:
+            log.warning(
+                "Skipping invalid example for %s in %s: %s",
+                model_name,
+                pyproject_path,
+                e,
+            )
+            continue
+        flat_rows = flatten_example(denulled, dict_paths=dict_paths)
+        ordered_rows = order_example_rows(flat_rows, field_names)
+        records.append(ExampleRecord(rows=ordered_rows))
+
+    return records
diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/field_constraints.py b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/field_constraints.py
new file mode 100644
index 000000000..5981528d1
--- /dev/null
+++ b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/field_constraints.py
@@ -0,0 +1,153 @@
+"""Convert field-level constraints to display text.
+
+Handles constraints from Annotated metadata and NewType wrappers:
+Ge, Gt, Interval, Le, Lt, MaxLen, MinLen, GeometryTypeConstraint,
+Reference, and custom constraint classes.
+"""
+
+from __future__ import annotations
+
+from collections.abc import Callable
+
+from annotated_types import Ge, Gt, Interval, Le, Lt, MaxLen, MinLen
+
+from overture.schema.system.primitive import GeometryTypeConstraint
+from overture.schema.system.ref import Reference
+
+from .docstring import first_docstring_line
+from .specs import TypeIdentity
+from .type_analyzer import ConstraintSource
+
+__all__ = [
+    "constraint_display_text",
+    "constraint_pattern",
+    "describe_field_constraint",
+]
+
+# Bound attribute names paired with display operators. Each entry maps an
+# annotated_types constraint attribute (Ge, Gt, Le, Lt, Interval) to its
+# mathematical symbol for prose rendering.
+#
+# primitive_extraction.py has its own _BOUND_ATTRS for numeric extraction. The
+# duplication is deliberate: these modules use the same attribute names for
+# unrelated purposes (display formatting vs. numeric bound extraction), and
+# coupling them for four string literals adds a dependency without value.
+_BOUND_OPS: tuple[tuple[str, str], ...] = (
+    ("ge", "≥"),
+    ("gt", ">"),
+    ("le", "≤"),
+    ("lt", "<"),
+)
+
+
+def _first_bound(obj: object) -> str | None:
+    """Return backticked notation for the first set bound, or None."""
+    for attr, op in _BOUND_OPS:
+        val = getattr(obj, attr, None)
+        if val is not None:
+            return f"`{op} {val}`"
+    return None
+
+
+def _describe_interval(iv: Interval) -> str:
+    """Format an Interval as readable bound notation."""
+    lower_val = iv.ge if iv.ge is not None else iv.gt
+    lower_op = "≤" if iv.ge is not None else "<"
+    upper_val = iv.le if iv.le is not None else iv.lt
+    upper_op = "≤" if iv.le is not None else "<"
+
+    if lower_val is not None and upper_val is not None:
+        return f"`{lower_val} {lower_op} x {upper_op} {upper_val}`"
+
+    return _first_bound(iv) or ""
+
+
+def _is_opaque_constraint(constraint: object) -> bool:
+    """Check whether the constraint has no custom __repr__ (renders as just its class name)."""
+    return type(constraint).__repr__ is object.__repr__
+
+
+def _geometry_type_label(value: str) -> str:
+    """Convert a GeometryType value to PascalCase display name.
+
+    >>> _geometry_type_label("line_string")
+    'LineString'
+    """
+    return "".join(part.title() for part in value.split("_"))
+
+
+def describe_field_constraint(
+    constraint: object,
+    link_fn: Callable[[TypeIdentity], str] | None = None,
+) -> str:
+    """Return a display string for a field-level constraint object.
+
+    *link_fn* resolves a TypeIdentity to a markdown link string (e.g.
+    `` [`Name`](path) ``). When None, names render as inline code.
+    """
+    if isinstance(constraint, GeometryTypeConstraint):
+        labels = ", ".join(
+            _geometry_type_label(gt.value) for gt in constraint.allowed_types
+        )
+        return f"Allowed geometry types: {labels}"
+    if isinstance(constraint, Reference):
+        rel_value: str = constraint.relationship.value  # type: ignore[assignment]
+        rel_label = rel_value.replace("_", " ")
+        target = constraint.relatee
+        target_id = TypeIdentity.of(target)
+        target_str = link_fn(target_id) if link_fn else f"`{target.__name__}`"
+        return f"References {target_str} ({rel_label})"
+    if isinstance(constraint, Interval):
+        desc = _describe_interval(constraint)
+        if desc:
+            return desc
+    elif isinstance(constraint, (Ge, Gt, Le, Lt)):
+        result = _first_bound(constraint)
+        if result is not None:
+            return result
+    if isinstance(constraint, MinLen):
+        return f"Minimum length: {constraint.min_length}"
+    if isinstance(constraint, MaxLen):
+        return f"Maximum length: {constraint.max_length}"
+
+    if _is_opaque_constraint(constraint):
+        return f"`{type(constraint).__name__}`"
+    return f"`{constraint}`"
+
+
+def _constraint_class_description(constraint: object) -> str | None:
+    """Extract the first docstring line from a custom constraint class.
+
+    Returns None for builtins and classes without docstrings.
+    """
+    constraint_type = type(constraint)
+    if constraint_type.__module__ == "builtins":
+        return None
+    line = first_docstring_line(constraint_type.__doc__)
+    return line or None
+
+
+def constraint_pattern(constraint: object) -> str | None:
+    """Extract the regex pattern string from a constraint, if present.
+
+    Traverses two levels: constraint.pattern is a compiled re.Pattern
+    object, and re.Pattern.pattern is the raw string.
+    """
+    compiled = getattr(constraint, "pattern", None)
+    return getattr(compiled, "pattern", None)
+
+
+def constraint_display_text(
+    cs: ConstraintSource,
+    link_fn: Callable[[TypeIdentity], str] | None = None,
+) -> str:
+    """Build display text for a constraint, combining description/pattern when available."""
+    description = _constraint_class_description(cs.constraint)
+    if _is_opaque_constraint(cs.constraint) and description:
+        cls_name = type(cs.constraint).__name__
+        pattern = constraint_pattern(cs.constraint)
+        if pattern:
+            return f"{description} (`{cls_name}`, pattern: `{pattern}`)"
+        return f"{description} (`{cls_name}`)"
+
+    return describe_field_constraint(cs.constraint, link_fn=link_fn)
diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/model_constraints.py b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/model_constraints.py
new file mode 100644
index 000000000..76f2934fc
--- /dev/null
+++ b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/model_constraints.py
@@ -0,0 +1,228 @@
+"""Convert model-level constraints to human-readable prose.
+
+Handles RequireAnyOf, RadioGroup, ForbidIf, RequireIf, and other
+ModelConstraint types. Produces descriptions and per-field notes for
+documentation rendering.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+
+from overture.schema.system.model_constraint import (
+    FieldEqCondition,
+    ForbidIfConstraint,
+    MinFieldsSetConstraint,
+    ModelConstraint,
+    NoExtraFieldsConstraint,
+    Not,
+    RadioGroupConstraint,
+    RequireAnyOfConstraint,
+    RequireIfConstraint,
+)
+
+__all__ = ["analyze_model_constraints"]
+
+_ConditionalConstraint = RequireIfConstraint | ForbidIfConstraint
+
+
+@dataclass(frozen=True)
+class _ConstraintEntry:
+    """A constraint description paired with the field names it affects."""
+
+    description: str
+    field_names: frozenset[str]
+
+
+def _format_field_list(names: tuple[str, ...]) -> str:
+    """Format field names as backtick-quoted, comma-separated list."""
+    return ", ".join(f"`{n}`" for n in names)
+
+
+def _conditional_verb(constraint: _ConditionalConstraint) -> str:
+    """Return 'required' or 'forbidden' based on constraint type."""
+    return "required" if isinstance(constraint, RequireIfConstraint) else "forbidden"
+
+
+def _plural_verb(names: tuple[str, ...]) -> str:
+    """Return 'is' or 'are' based on field count."""
+    return "are" if len(names) > 1 else "is"
+
+
+def _unwrap_field_eq(condition: object) -> tuple[FieldEqCondition, bool] | None:
+    """Extract the FieldEqCondition from a condition, with negation flag.
+
+    Returns (field_eq, is_negated) or None for unrecognized conditions.
+    """
+    if isinstance(condition, Not) and isinstance(condition.inner, FieldEqCondition):
+        return condition.inner, True
+    if isinstance(condition, FieldEqCondition):
+        return condition, False
+    return None
+
+
+def _describe_condition(condition: object) -> str:
+    """Render a Condition as human-readable text."""
+    unwrapped = _unwrap_field_eq(condition)
+    if unwrapped is not None:
+        field_eq, negated = unwrapped
+        op = "≠" if negated else "="
+        return f"`{field_eq.field_name}` {op} `{field_eq.value}`"
+    return str(condition)
+
+
+def _describe_conditional(constraint: _ConditionalConstraint) -> str:
+    """Describe a require_if or forbid_if constraint."""
+    fields = _format_field_list(constraint.field_names)
+    verb = _conditional_verb(constraint)
+    cond = _describe_condition(constraint.condition)
+    return f"{fields} {_plural_verb(constraint.field_names)} {verb} when {cond}"
+
+
+def _consolidation_key(
+    constraint: _ConditionalConstraint,
+) -> tuple[type, tuple[str, ...], str] | None:
+    """Return a grouping key if the constraint is consolidatable, else None.
+
+    Consolidatable: same type, same field_names, plain FieldEqCondition
+    (not negated) on the same condition field.
+    """
+    cond = constraint.condition
+    if not isinstance(cond, FieldEqCondition):
+        return None
+    return (type(constraint), constraint.field_names, cond.field_name)
+
+
+def _as_field_eq(constraint: _ConditionalConstraint) -> FieldEqCondition:
+    """Narrow a conditional constraint's condition to FieldEqCondition.
+
+    Only called on constraints that passed _consolidation_key, which
+    rejects non-FieldEqCondition conditions.
+    """
+    cond = constraint.condition
+    if not isinstance(cond, FieldEqCondition):
+        raise TypeError(f"Expected FieldEqCondition, got {type(cond).__name__}")
+    return cond
+
+
+def _describe_consolidated(
+    constraints: list[_ConditionalConstraint],
+) -> str:
+    """Describe a group of consolidated conditional constraints."""
+    first = constraints[0]
+    fields = _format_field_list(first.field_names)
+    verb = _conditional_verb(first)
+    cond_field = _as_field_eq(first).field_name
+    values = ", ".join(f"`{_as_field_eq(c).value}`" for c in constraints)
+    return (
+        f"{fields} {_plural_verb(first.field_names)} {verb} "
+        f"when `{cond_field}` is one of: {values}"
+    )
+
+
+def _condition_field_names(condition: object) -> frozenset[str]:
+    """Extract field names referenced by a condition."""
+    unwrapped = _unwrap_field_eq(condition)
+    if unwrapped is not None:
+        return frozenset({unwrapped[0].field_name})
+    return frozenset()
+
+
+def _affected_field_names(constraint: ModelConstraint) -> frozenset[str]:
+    """Return all field names referenced by a constraint.
+
+    Includes both constrained field_names and condition trigger fields.
+    Returns empty set for constraints that don't reference specific fields
+    (NoExtraFieldsConstraint, MinFieldsSetConstraint).
+    """
+    if isinstance(constraint, (NoExtraFieldsConstraint, MinFieldsSetConstraint)):
+        return frozenset()
+    if isinstance(constraint, (RequireIfConstraint, ForbidIfConstraint)):
+        return frozenset(constraint.field_names) | _condition_field_names(
+            constraint.condition
+        )
+    if isinstance(constraint, (RequireAnyOfConstraint, RadioGroupConstraint)):
+        return frozenset(constraint.field_names)
+    return frozenset()
+
+
+def _describe_one(constraint: ModelConstraint) -> str | None:
+    """Describe a single constraint, or None to skip it."""
+    if isinstance(constraint, NoExtraFieldsConstraint):
+        return None
+    if isinstance(constraint, RequireAnyOfConstraint):
+        return (
+            f"At least one of {_format_field_list(constraint.field_names)} must be set"
+        )
+    if isinstance(constraint, RadioGroupConstraint):
+        return f"Exactly one of {_format_field_list(constraint.field_names)} must be `true`"
+    if isinstance(constraint, MinFieldsSetConstraint):
+        return f"At least {constraint.count} fields must be set"
+    if isinstance(constraint, (RequireIfConstraint, ForbidIfConstraint)):
+        return _describe_conditional(constraint)
+    return f"`{constraint.name}`"
+
+
+def _analyze_constraints(
+    constraints: tuple[ModelConstraint, ...],
+) -> list[_ConstraintEntry]:
+    """Analyze constraints into descriptions paired with affected fields.
+
+    Handles consolidation and filtering, preserving original declaration order.
+    """
+    groups: dict[
+        tuple[type, tuple[str, ...], str], list[tuple[int, _ConditionalConstraint]]
+    ] = {}
+    standalone: list[tuple[int, ModelConstraint]] = []
+
+    for i, c in enumerate(constraints):
+        if isinstance(c, (RequireIfConstraint, ForbidIfConstraint)):
+            key = _consolidation_key(c)
+            if key is not None:
+                groups.setdefault(key, []).append((i, c))
+                continue
+        standalone.append((i, c))
+
+    entries: list[tuple[int, _ConstraintEntry]] = []
+
+    for group_items in groups.values():
+        first_idx = group_items[0][0]
+        group_constraints = [c for _, c in group_items]
+        all_fields: frozenset[str] = frozenset().union(
+            *(_affected_field_names(c) for c in group_constraints)
+        )
+        if len(group_constraints) == 1:
+            desc = _describe_one(group_constraints[0])
+        else:
+            desc = _describe_consolidated(group_constraints)
+        if desc is not None:
+            entries.append((first_idx, _ConstraintEntry(desc, all_fields)))
+
+    for idx, c in standalone:
+        desc = _describe_one(c)
+        if desc is not None:
+            entries.append((idx, _ConstraintEntry(desc, _affected_field_names(c))))
+
+    entries.sort(key=lambda e: e[0])
+    return [entry for _, entry in entries]
+
+
+def analyze_model_constraints(
+    constraints: tuple[ModelConstraint, ...],
+) -> tuple[list[str], dict[str, list[str]]]:
+    """Analyze constraints into descriptions and per-field notes in one pass.
+
+    Returns (descriptions, field_notes) where descriptions is the list of
+    human-readable constraint strings and field_notes maps field names to
+    constraint descriptions that reference them.
+    """
+    entries = _analyze_constraints(constraints)
+
+    descriptions = [entry.description for entry in entries]
+
+    field_notes: dict[str, list[str]] = {}
+    for entry in entries:
+        for name in entry.field_names:
+            field_notes.setdefault(name, []).append(entry.description)
+
+    return descriptions, field_notes
diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/model_extraction.py b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/model_extraction.py
new file mode 100644
index 000000000..81c360538
--- /dev/null
+++ b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/model_extraction.py
@@ -0,0 +1,204 @@
+"""Model extraction and tree expansion."""
+
+from __future__ import annotations
+
+import dataclasses
+
+from pydantic import BaseModel
+from pydantic.fields import FieldInfo
+from pydantic_core import PydanticUndefined
+
+from overture.schema.system.model_constraint import ModelConstraint
+
+from .docstring import clean_docstring
+from .specs import FeatureSpec, FieldSpec, ModelSpec, is_model_class
+from .type_analyzer import ConstraintSource, TypeInfo, TypeKind, analyze_type
+
+__all__ = [
+    "expand_model_tree",
+    "extract_model",
+    "resolve_field_alias",
+]
+
+
+def resolve_field_alias(field_name: str, field_info: FieldInfo) -> str:
+    """Return the data-dict key for a Pydantic field.
+
+    Prefers ``validation_alias``, falls back to ``alias``, then the
+    Python field name. Only string aliases are supported; AliasPath
+    and AliasChoices are ignored.
+    """
+    validation_alias = field_info.validation_alias
+    if isinstance(validation_alias, str):
+        return validation_alias
+    alias = field_info.alias
+    if isinstance(alias, str):
+        return alias
+    return field_name
+
+
+def _merge_field_metadata(type_info: TypeInfo, field_info: FieldInfo) -> TypeInfo:
+    """Merge constraints from field_info.metadata into TypeInfo.
+
+    Pydantic strips the Annotated wrapper from some fields (non-optional,
+    non-union) and moves the metadata to field_info.metadata. When this
+    happens, analyze_type sees a bare type and misses the constraints.
+    The two sets never overlap: field_info.metadata is empty when the
+    Annotated wrapper survives in the annotation.
+    """
+    if not field_info.metadata:
+        return type_info
+    extra = tuple(ConstraintSource(None, None, m) for m in field_info.metadata)
+    return dataclasses.replace(type_info, constraints=type_info.constraints + extra)
+
+
+def _is_field_required(field_info: FieldInfo, type_info: TypeInfo) -> bool:
+    """Determine whether a field is required (no default and not Optional)."""
+    has_default = (
+        field_info.default is not PydanticUndefined
+        or field_info.default_factory is not None
+    )
+    return not has_default and not type_info.is_optional
+
+
+def _basemodel_bases(cls: type) -> list[type[BaseModel]]:
+    """Return direct BaseModel bases, excluding BaseModel itself."""
+    return [b for b in cls.__bases__ if is_model_class(b) and b is not BaseModel]
+
+
+def _class_order(model_class: type[BaseModel]) -> list[type]:
+    """Return MRO classes in documentation order, recursively.
+
+    For single-inheritance: reversed MRO (base first, derived last).
+    For multiple-inheritance: primary chain → self → mixins, where
+    primary chain and each mixin are themselves recursively ordered.
+    """
+    bases = _basemodel_bases(model_class)
+
+    if len(bases) <= 1:
+        return [
+            cls
+            for cls in reversed(model_class.__mro__)
+            if issubclass(cls, BaseModel) and cls is not BaseModel
+        ]
+
+    primary = _class_order(bases[0])
+    mixins = [cls for base in bases[1:] for cls in _class_order(base)]
+    return primary + [model_class] + mixins
+
+
+def _field_order(model_class: type[BaseModel]) -> list[str]:
+    """Return model_fields keys in documentation order.
+
+    Walks the class hierarchy recursively. At each level of multiple
+    inheritance, the first base is the "primary chain" and the rest
+    are "mixins." Primary chain and own fields come first, then mixin
+    fields in declaration order. Single-inheritance levels use
+    Pydantic's default reversed-MRO order.
+    """
+    valid_names = set(model_class.model_fields.keys())
+    result: list[str] = []
+    seen: set[str] = set()
+    for cls in _class_order(model_class):
+        for name in getattr(cls, "__annotations__", {}):
+            if name not in seen and name in valid_names:
+                result.append(name)
+                seen.add(name)
+    return result
+
+
+def extract_model(
+    model_class: type[BaseModel],
+    *,
+    entry_point: str | None = None,
+) -> ModelSpec:
+    """Extract model specification from a Pydantic model class."""
+    field_info_map = model_class.model_fields
+    ordered_keys = _field_order(model_class)
+
+    fields: list[FieldSpec] = []
+    for field_name in ordered_keys:
+        field_info = field_info_map[field_name]
+        output_name = resolve_field_alias(field_name, field_info)
+
+        # Use field_info.annotation (resolved TypeVars) not get_type_hints
+        annotation = field_info.annotation
+        if annotation is None:
+            continue
+
+        type_info = _merge_field_metadata(analyze_type(annotation), field_info)
+
+        fields.append(
+            FieldSpec(
+                name=output_name,
+                type_info=type_info,
+                description=field_info.description or type_info.description,
+                is_required=_is_field_required(field_info, type_info),
+            )
+        )
+
+    return ModelSpec(
+        name=model_class.__name__,
+        description=clean_docstring(model_class.__doc__),
+        fields=fields,
+        source_type=model_class,
+        entry_point=entry_point,
+        constraints=ModelConstraint.get_model_constraints(model_class),
+    )
+
+
+def expand_model_tree(
+    spec: FeatureSpec,
+    cache: dict[type, ModelSpec] | None = None,
+) -> FeatureSpec:
+    """Populate model references on MODEL-kind fields, recursively.
+
+    Walks *spec*'s fields and sets ``field.model`` for fields whose type
+    is a Pydantic model. Uses *cache* to reuse already-extracted ModelSpecs
+    and detect shared references. Marks fields whose model creates a cycle
+    in the ancestor chain with ``starts_cycle=True``.
+
+    Mutates *spec* in place and returns it.
+    """
+    if cache is None:
+        cache = {}
+    if isinstance(spec, ModelSpec) and spec.source_type is not None:
+        cache[spec.source_type] = spec
+    ancestors = frozenset({spec.source_type}) if spec.source_type else frozenset()
+    _expand_fields(spec.fields, cache, ancestors)
+    return spec
+
+
+def _expand_fields(
+    fields: list[FieldSpec],
+    cache: dict[type, ModelSpec],
+    ancestors: frozenset[type],
+) -> None:
+    """Recursive helper for expand_model_tree.
+
+    Cache insertion happens before recursion — cycle detection depends
+    on the ancestor's ModelSpec being in the cache when the back-edge
+    is encountered.
+    """
+    for field_spec in fields:
+        ti = field_spec.type_info
+        source = ti.source_type
+        if ti.kind == TypeKind.UNION:
+            # Union fields have no single model to recurse into.
+            # The field row appears in the output; skip inline expansion.
+            continue
+        if ti.kind != TypeKind.MODEL or source is None:
+            continue
+
+        if source in ancestors:
+            # Cycle: reuse existing spec, mark the edge
+            field_spec.model = cache.get(source)
+            field_spec.starts_cycle = True
+        elif source in cache:
+            # Shared reference: reuse, not a cycle
+            field_spec.model = cache[source]
+        else:
+            sub_spec = extract_model(source)
+            cache[source] = sub_spec  # insert BEFORE recursing
+            field_spec.model = sub_spec
+            _expand_fields(sub_spec.fields, cache, ancestors | {source})
diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/newtype_extraction.py b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/newtype_extraction.py
new file mode 100644
index 000000000..ff11c770a
--- /dev/null
+++ b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/newtype_extraction.py
@@ -0,0 +1,26 @@
+"""NewType extraction."""
+
+from .docstring import clean_docstring, is_custom_docstring
+from .specs import NewTypeSpec
+from .type_analyzer import analyze_type
+
+__all__ = ["extract_newtype"]
+
+
+def extract_newtype(newtype_callable: object) -> NewTypeSpec:
+    """Extract NewType specification from a NewType callable."""
+    type_info = analyze_type(newtype_callable)
+    doc = getattr(newtype_callable, "__doc__", None)
+    name = type_info.newtype_name or getattr(newtype_callable, "__name__", None)
+    if name is None:
+        msg = f"Cannot determine name for NewType: {newtype_callable!r}"
+        raise ValueError(msg)
+    description = (
+        clean_docstring(doc) if is_custom_docstring(doc) else type_info.description
+    )
+    return NewTypeSpec(
+        name=name,
+        description=description,
+        type_info=type_info,
+        source_type=newtype_callable,
+    )
diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/primitive_extraction.py b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/primitive_extraction.py
new file mode 100644
index 000000000..ceb8ff7cd
--- /dev/null
+++ b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/primitive_extraction.py
@@ -0,0 +1,95 @@
+"""Primitive extraction and partitioning."""
+
+from annotated_types import Interval
+
+from .docstring import first_docstring_line
+from .newtype_extraction import extract_newtype
+from .specs import PrimitiveSpec, TypeIdentity
+from .type_analyzer import TypeInfo, is_newtype
+
+__all__ = [
+    "extract_numeric_bounds",
+    "extract_primitives",
+    "partition_primitive_and_geometry_names",
+]
+
+
+# Bound attribute names on annotated_types constraint objects (Ge, Gt, Le,
+# Lt, Interval) used for numeric bound extraction.
+#
+# field_constraint_description.py has its own _BOUND_OPS for display formatting.
+# The duplication is deliberate: these modules use the same attribute names
+# for unrelated purposes (numeric extraction vs. prose rendering), and
+# coupling them for four string literals adds a dependency without value.
+_BOUND_ATTRS = ("ge", "gt", "le", "lt")
+
+
+def extract_numeric_bounds(type_info: TypeInfo) -> Interval:
+    """Extract numeric bounds from a TypeInfo's constraints.
+
+    Checks for ge, gt, le, and lt attributes on constraint objects.
+    Stops at the first constraint defining each bound.
+    """
+    found: dict[str, int | float] = {}
+    for cs in type_info.constraints:
+        c = cs.constraint
+        for attr in _BOUND_ATTRS:
+            if attr not in found:
+                val = getattr(c, attr, None)
+                if val is not None:
+                    found[attr] = val
+    return Interval(**found)
+
+
+def extract_primitives(
+    primitive_ids: list[TypeIdentity],
+) -> list[PrimitiveSpec]:
+    """Extract specifications for numeric primitive types."""
+    specs: list[PrimitiveSpec] = []
+    for tid in primitive_ids:
+        newtype_spec = extract_newtype(tid.obj)
+        bounds = extract_numeric_bounds(newtype_spec.type_info)
+        description = first_docstring_line(getattr(tid.obj, "__doc__", None))
+        float_bits = _extract_float_bits(tid.name)
+        specs.append(
+            PrimitiveSpec(
+                name=tid.name,
+                description=description,
+                bounds=bounds,
+                float_bits=float_bits,
+            )
+        )
+    return specs
+
+
+_FLOAT_BITS: dict[str, int] = {
+    "float32": 32,
+    "float64": 64,
+}
+
+
+def _extract_float_bits(name: str) -> int | None:
+    """Extract bit width from a float type name like 'float32'."""
+    return _FLOAT_BITS.get(name)
+
+
+def partition_primitive_and_geometry_names(
+    primitive_module: object,
+) -> tuple[list[TypeIdentity], list[TypeIdentity]]:
+    """Discover primitive and geometry types from a module's exports.
+
+    NewType exports are numeric primitives.
+    Non-constraint class/enum exports are geometry types.
+    """
+    module_all: list[str] = getattr(primitive_module, "__all__", [])
+    primitives: list[TypeIdentity] = []
+    geometries: list[TypeIdentity] = []
+
+    for name in module_all:
+        obj = getattr(primitive_module, name)
+        if is_newtype(obj):
+            primitives.append(TypeIdentity(obj, name))
+        elif isinstance(obj, type) and not name.endswith("Constraint"):
+            geometries.append(TypeIdentity(obj, name))
+
+    return primitives, geometries
diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/pydantic_extraction.py b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/pydantic_extraction.py
new file mode 100644
index 000000000..120f4760d
--- /dev/null
+++ b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/pydantic_extraction.py
@@ -0,0 +1,33 @@
+"""Pydantic built-in type extraction."""
+
+import re
+
+from .docstring import first_docstring_line
+from .specs import PydanticTypeSpec
+
+__all__ = ["extract_pydantic_type"]
+
+# Matches bare admonition labels like "Info:" or "Note:" with no following text.
+_ADMONITION_LABEL = re.compile(r"^\w+:\s*$")
+
+
+def _usable_description(doc: str | None) -> str | None:
+    """Return the first docstring line, or None if it's an admonition label."""
+    line = first_docstring_line(doc)
+    if line is None or _ADMONITION_LABEL.match(line):
+        return None
+    return line
+
+
+def extract_pydantic_type(cls: type) -> PydanticTypeSpec:
+    """Extract a PydanticTypeSpec from a Pydantic built-in type class."""
+    module = getattr(cls, "__module__", "")
+    if not module.startswith("pydantic"):
+        msg = f"Expected a pydantic type, got {cls!r} from {module!r}"
+        raise ValueError(msg)
+    return PydanticTypeSpec(
+        name=cls.__name__,
+        description=_usable_description(cls.__doc__),
+        source_type=cls,
+        source_module=cls.__module__.removeprefix("pydantic."),
+    )
diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/specs.py b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/specs.py
new file mode 100644
index 000000000..801e69c23
--- /dev/null
+++ b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/specs.py
@@ -0,0 +1,260 @@
+"""Data types for extracted specifications."""
+
+from __future__ import annotations
+
+import functools
+from dataclasses import dataclass, field
+from typing import Any, Protocol, TypeGuard, runtime_checkable
+
+from annotated_types import Interval
+from pydantic import BaseModel
+
+from overture.schema.system.model_constraint import ModelConstraint
+
+from .type_analyzer import TypeInfo, TypeKind, UnsupportedUnionError, analyze_type
+
+__all__ = [
+    "AnnotatedField",
+    "EnumMemberSpec",
+    "EnumSpec",
+    "FeatureSpec",
+    "FieldSpec",
+    "ModelSpec",
+    "NewTypeSpec",
+    "PrimitiveSpec",
+    "PydanticTypeSpec",
+    "SupplementarySpec",
+    "TypeIdentity",
+    "filter_model_classes",
+    "is_model_class",
+    "is_pydantic_sourced",
+    "is_pydantic_type",
+    "is_union_alias",
+]
+
+
+@dataclass(frozen=True, eq=False)
+class TypeIdentity:
+    """Unique identity for a type in the codegen system.
+
+    Pairs a unique Python object (class, NewType callable, or union
+    annotation) with its display name. Equality and hashing delegate
+    to ``obj`` identity so registry lookups work regardless of how
+    the display name was derived.
+    """
+
+    obj: object
+    name: str
+
+    @classmethod
+    def of(cls, obj: object) -> TypeIdentity:
+        """Derive a TypeIdentity from a named object (class, NewType, etc.)."""
+        assert obj is not None
+        return cls(obj, obj.__name__)  # type: ignore[attr-defined]
+
+    def __eq__(self, other: object) -> bool:
+        return isinstance(other, TypeIdentity) and self.obj is other.obj
+
+    def __hash__(self) -> int:
+        return id(self.obj)
+
+    @property
+    def module(self) -> str:
+        """Source module of the underlying object, or empty string."""
+        return getattr(self.obj, "__module__", "")
+
+
+class _SourceTypeIdentityMixin:
+    """Mixin providing ``identity`` from ``source_type`` and ``name``.
+
+    Shared by EnumSpec, ModelSpec, NewTypeSpec, and PydanticTypeSpec --
+    each has a ``source_type`` (the Python class/callable) and a ``name``.
+    UnionSpec uses ``source_annotation`` instead, so it defines its
+    own ``identity``.
+    """
+
+    source_type: object | None
+    name: str
+
+    @property
+    def identity(self) -> TypeIdentity:
+        assert self.source_type is not None
+        return TypeIdentity(self.source_type, self.name)
+
+
+@dataclass
+class EnumMemberSpec:
+    """Specification for an enum member."""
+
+    name: str
+    value: str
+    description: str | None
+
+
+@dataclass
+class EnumSpec(_SourceTypeIdentityMixin):
+    """Specification for an Enum class."""
+
+    name: str
+    description: str | None
+    members: list[EnumMemberSpec] = field(default_factory=list)
+    source_type: type | None = None
+
+
+@dataclass
+class FieldSpec:
+    """Specification for a model field."""
+
+    name: str
+    type_info: TypeInfo
+    description: str | None
+    is_required: bool
+    model: ModelSpec | None = None
+    starts_cycle: bool = False
+
+
+@runtime_checkable
+class FeatureSpec(Protocol):
+    """Shared interface for feature-level specs (ModelSpec, UnionSpec)."""
+
+    name: str
+    description: str | None
+    source_type: type[BaseModel] | None
+    entry_point: str | None
+    constraints: tuple[ModelConstraint, ...]
+
+    @property
+    def fields(self) -> list[FieldSpec]: ...
+
+    @property
+    def identity(self) -> TypeIdentity: ...
+
+
+@dataclass
+class ModelSpec(_SourceTypeIdentityMixin):
+    """Specification for a Pydantic model."""
+
+    name: str
+    description: str | None
+    fields: list[FieldSpec] = field(default_factory=list)
+    source_type: type[BaseModel] | None = None
+    entry_point: str | None = None
+    constraints: tuple[ModelConstraint, ...] = ()
+
+
+@dataclass
+class AnnotatedField:
+    """A FieldSpec paired with union variant provenance."""
+
+    field_spec: FieldSpec
+    variant_sources: tuple[str, ...] | None
+
+
+# eq=False: contains mutable lists and a cached_property, so
+# dataclass-generated __eq__ would be unreliable.
+@dataclass(eq=False)
+class UnionSpec:
+    """Specification for a discriminated union type alias."""
+
+    name: str
+    description: str | None
+    annotated_fields: list[AnnotatedField]
+    members: list[type[BaseModel]]
+    discriminator_field: str | None
+    discriminator_mapping: dict[str, type[BaseModel]] | None
+    source_annotation: object
+    common_base: type[BaseModel]
+    source_type: type[BaseModel] | None = field(default=None, init=False)
+    entry_point: str | None = None
+    constraints: tuple[ModelConstraint, ...] = ()
+
+    @functools.cached_property
+    def fields(self) -> list[FieldSpec]:
+        """Plain field list for tree expansion and supplementary collection."""
+        return [af.field_spec for af in self.annotated_fields]
+
+    @property
+    def identity(self) -> TypeIdentity:
+        return TypeIdentity(self.source_annotation, self.name)
+
+
+@dataclass
+class NewTypeSpec(_SourceTypeIdentityMixin):
+    """Specification for a NewType."""
+
+    name: str
+    description: str | None
+    type_info: TypeInfo
+    source_type: object | None = None
+
+
+@dataclass
+class PrimitiveSpec:
+    """Extracted specification for a numeric primitive type."""
+
+    name: str
+    description: str | None
+    bounds: Interval = field(default_factory=Interval)
+    float_bits: int | None = None
+
+
+@dataclass
+class PydanticTypeSpec(_SourceTypeIdentityMixin):
+    """Specification for a Pydantic built-in type (HttpUrl, EmailStr, etc.)."""
+
+    name: str
+    description: str | None
+    source_type: type
+    source_module: str
+
+    @property
+    def docs_url(self) -> str:
+        """Pydantic documentation URL for this type."""
+        return (
+            f"https://docs.pydantic.dev/latest/api/{self.source_module}"
+            f"/#pydantic.{self.source_module}.{self.name}"
+        )
+
+
+SupplementarySpec = EnumSpec | NewTypeSpec | ModelSpec | PydanticTypeSpec
+"""Non-feature types referenced by feature models.
+
+Excludes PrimitiveSpec and geometry types, which are extracted
+separately via dedicated functions.
+"""
+
+
+def is_pydantic_sourced(source_type: type | None) -> bool:
+    """Check whether *source_type* originates from the ``pydantic`` package."""
+    return getattr(source_type, "__module__", "").startswith("pydantic")
+
+
+def is_pydantic_type(ti: TypeInfo) -> bool:
+    """Check whether a TypeInfo represents a Pydantic built-in type."""
+    return (
+        ti.kind == TypeKind.PRIMITIVE
+        and ti.source_type is not None
+        and is_pydantic_sourced(ti.source_type)
+    )
+
+
+def is_model_class(obj: object) -> TypeGuard[type[BaseModel]]:
+    """Check whether *obj* is a concrete BaseModel subclass (not a type alias)."""
+    return isinstance(obj, type) and issubclass(obj, BaseModel)
+
+
+def is_union_alias(obj: object) -> bool:
+    """Check whether *obj* is a discriminated union type alias of BaseModel subclasses."""
+    try:
+        ti = analyze_type(obj)
+    except (TypeError, UnsupportedUnionError):
+        return False
+    return ti.kind == TypeKind.UNION
+
+
+def filter_model_classes(models: dict[Any, Any]) -> list[type[BaseModel]]:
+    """Filter discovered models to concrete BaseModel subclasses.
+
+    Excludes type aliases (like discriminated unions) and non-class entries.
+    """
+    return [v for v in models.values() if is_model_class(v)]
diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/type_analyzer.py b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/type_analyzer.py
new file mode 100644
index 000000000..fd4c9a06b
--- /dev/null
+++ b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/type_analyzer.py
@@ -0,0 +1,344 @@
+"""Iterative type unwrapping for Pydantic model annotations."""
+
+from __future__ import annotations
+
+import types
+from collections.abc import Callable
+from dataclasses import dataclass, field
+from enum import Enum, auto
+from typing import Annotated, Any, Literal, Union, get_args, get_origin
+
+from pydantic import BaseModel
+from pydantic.fields import FieldInfo
+from typing_extensions import Sentinel
+
+from .docstring import clean_docstring
+
+__all__ = [
+    "ConstraintSource",
+    "TypeKind",
+    "TypeInfo",
+    "UnsupportedUnionError",
+    "analyze_type",
+    "is_newtype",
+    "single_literal_value",
+    "walk_type_info",
+]
+
+
+class UnsupportedUnionError(TypeError):
+    """Raised when analyze_type encounters a multi-type union it cannot represent."""
+
+
+class TypeKind(Enum):
+    """Classification of type kinds."""
+
+    PRIMITIVE = auto()
+    LITERAL = auto()
+    ENUM = auto()
+    MODEL = auto()
+    UNION = auto()
+
+
+@dataclass(slots=True)
+class ConstraintSource:
+    """A constraint paired with the NewType that contributed it."""
+
+    source_ref: object | None
+    source_name: str | None
+    constraint: object
+
+
+@dataclass(slots=True)
+class TypeInfo:
+    """Information about a type annotation."""
+
+    base_type: str
+    kind: TypeKind
+    is_optional: bool = False
+    list_depth: int = 0
+    newtype_outer_list_depth: int = 0
+    is_dict: bool = False
+    dict_key_type: TypeInfo | None = None
+    dict_value_type: TypeInfo | None = None
+    constraints: tuple[ConstraintSource, ...] = ()
+    literal_values: tuple[object, ...] | None = None
+    source_type: type | None = None
+    newtype_name: str | None = None
+    newtype_ref: object | None = None
+    union_members: tuple[type[BaseModel], ...] | None = None
+    description: str | None = None
+
+    @property
+    def is_list(self) -> bool:
+        """Whether this type has any list wrapping."""
+        return self.list_depth > 0
+
+
+def walk_type_info(ti: TypeInfo, visitor: Callable[[TypeInfo], None]) -> None:
+    """Call *visitor* on *ti*, then recurse into dict key/value types.
+
+    Captures the shared recursive descent pattern used by type collection
+    and reverse reference computation. Union members are ``type`` objects
+    (not ``TypeInfo``), so callers handle them directly.
+    """
+    visitor(ti)
+    if ti.dict_key_type is not None:
+        walk_type_info(ti.dict_key_type, visitor)
+    if ti.dict_value_type is not None:
+        walk_type_info(ti.dict_value_type, visitor)
+
+
+def is_newtype(annotation: object) -> bool:
+    """Check if annotation is a typing.NewType.
+
+    NewType creates a callable with a __supertype__ attribute pointing
+    to the wrapped type. No public API exists for this check.
+    """
+    return callable(annotation) and hasattr(annotation, "__supertype__")
+
+
+def _is_union(origin: object) -> bool:
+    """Check if an origin represents a union type (X | Y or Union[X, Y])."""
+    return origin in (types.UnionType, Union)
+
+
+@dataclass(slots=True)
+class _UnwrapState:
+    """Accumulated state from iterative type unwrapping.
+
+    Tracks NewType names and refs during unwrapping:
+    - ``outermost_newtype_name`` / ``outermost_newtype_ref``: the first
+      NewType encountered, exposed as ``TypeInfo.newtype_name`` / ``newtype_ref``.
+    - ``last_newtype_name``: the most recently entered NewType name, used
+      as the resolved ``base_type`` for the terminal type.
+    - ``last_newtype_ref``: the most recently entered NewType callable,
+      used as constraint provenance (which NewType contributed each constraint).
+    - ``newtype_outer_list_depth``: list layers accumulated before entering
+      the outermost NewType boundary.
+    """
+
+    is_optional: bool = False
+    list_depth: int = 0
+    newtype_outer_list_depth: int = 0
+    is_dict: bool = False
+    dict_key_type: TypeInfo | None = None
+    dict_value_type: TypeInfo | None = None
+    constraints: list[ConstraintSource] = field(default_factory=list)
+    outermost_newtype_name: str | None = None
+    outermost_newtype_ref: object | None = None
+    last_newtype_name: str | None = None
+    last_newtype_ref: object | None = None
+    description: str | None = None
+
+    def add_constraint(self, constraint: object) -> None:
+        self.constraints.append(
+            ConstraintSource(self.last_newtype_ref, self.last_newtype_name, constraint)
+        )
+
+    def build_type_info(
+        self,
+        *,
+        base_type: str,
+        kind: TypeKind,
+        literal_values: tuple[object, ...] | None = None,
+        source_type: type | None = None,
+        union_members: tuple[type[BaseModel], ...] | None = None,
+    ) -> TypeInfo:
+        return TypeInfo(
+            base_type=base_type,
+            kind=kind,
+            is_optional=self.is_optional,
+            list_depth=self.list_depth,
+            newtype_outer_list_depth=self.newtype_outer_list_depth,
+            is_dict=self.is_dict,
+            dict_key_type=self.dict_key_type,
+            dict_value_type=self.dict_value_type,
+            constraints=tuple(self.constraints),
+            literal_values=literal_values,
+            source_type=source_type,
+            newtype_name=self.outermost_newtype_name,
+            newtype_ref=self.outermost_newtype_ref,
+            union_members=union_members,
+            description=self.description,
+        )
+
+
+def analyze_type(annotation: object) -> TypeInfo:
+    """Analyze a type annotation and return TypeInfo.
+
+    Iteratively unwraps type wrappers (Annotated, Optional, list, NewType) until
+    reaching a terminal type.
+    """
+    state = _UnwrapState()
+
+    while True:
+        origin = get_origin(annotation)
+
+        # Handle NewType (e.g., int32 = NewType("int32", Annotated[int, ...]))
+        if is_newtype(annotation):
+            name = annotation.__name__  # type: ignore[attr-defined]
+            state.last_newtype_name = name
+            state.last_newtype_ref = annotation
+            if state.outermost_newtype_name is None:
+                state.newtype_outer_list_depth = state.list_depth
+                state.outermost_newtype_name = name
+                state.outermost_newtype_ref = annotation
+            annotation = annotation.__supertype__  # type: ignore[attr-defined]
+            continue
+
+        # Handle Annotated types (Annotated[X, metadata...])
+        if origin is Annotated:
+            args = get_args(annotation)
+            annotation = args[0]
+            for c in args[1:]:
+                if isinstance(c, FieldInfo):
+                    if c.description is not None and state.description is None:
+                        state.description = clean_docstring(c.description)
+                    for m in c.metadata:
+                        state.add_constraint(m)
+                else:
+                    state.add_constraint(c)
+            continue
+
+        # Handle union types (X | None or Optional[X])
+        if _is_union(origin):
+            args = get_args(annotation)
+            # Filter out None, Sentinel types (Pydantic's <MISSING>), and
+            # Literal alternatives (e.g., HttpUrl | Literal[""] where the
+            # Literal is a special-value sentinel, not the primary type).
+            if any(a is types.NoneType for a in args):
+                state.is_optional = True
+
+            non_none_args = [
+                a
+                for a in args
+                if a is not types.NoneType and not isinstance(a, Sentinel)
+            ]
+
+            # Only filter out Literal arms when a concrete (non-Literal) type
+            # exists.  Without this guard, Optional[Literal["x"]] would lose
+            # all args because the Literal *is* the primary type.
+            concrete_args = [a for a in non_none_args if get_origin(a) is not Literal]
+            real_args = concrete_args if concrete_args else non_none_args
+
+            if len(real_args) > 1:
+                # Check if all real args are BaseModel subclasses
+                # (unwrap Annotated wrappers to get the actual class)
+                members: list[type[BaseModel]] = []
+                for arg in real_args:
+                    inner = arg
+                    if get_origin(inner) is Annotated:
+                        inner = get_args(inner)[0]
+                    if isinstance(inner, type) and issubclass(inner, BaseModel):
+                        members.append(inner)
+                    else:
+                        raise UnsupportedUnionError(
+                            f"Multi-type unions not supported: {annotation}"
+                        )
+                return state.build_type_info(
+                    base_type=members[0].__name__,
+                    kind=TypeKind.UNION,
+                    union_members=tuple(members),
+                )
+
+            if not real_args:
+                raise UnsupportedUnionError(
+                    f"Union with no concrete types: {annotation}"
+                )
+
+            annotation = real_args[0]
+            continue
+
+        # Handle list types (list[X])
+        if origin is list:
+            args = get_args(annotation)
+            if not args:
+                raise TypeError("Bare list without type argument is not supported")
+            state.list_depth += 1
+            annotation = args[0]
+            continue
+
+        # Handle dict types (dict[K, V])
+        if origin is dict:
+            args = get_args(annotation)
+            if not args:
+                raise TypeError("Bare dict without type arguments is not supported")
+            state.is_dict = True
+            state.dict_key_type = analyze_type(args[0])
+            state.dict_value_type = analyze_type(args[1])
+            base_type = state.last_newtype_name or "dict"
+            return state.build_type_info(
+                base_type=base_type,
+                kind=TypeKind.PRIMITIVE,
+                source_type=dict,
+            )
+
+        break
+
+    return _classify_terminal(annotation, state)
+
+
+def _classify_terminal(annotation: object, state: _UnwrapState) -> TypeInfo:
+    """Classify a fully-unwrapped terminal type into a TypeInfo."""
+    # typing.Any -- treat as an opaque primitive
+    if annotation is Any:
+        return state.build_type_info(
+            base_type="Any",
+            kind=TypeKind.PRIMITIVE,
+        )
+
+    # Literal types (e.g., Literal["value"] or Literal["a", "b"])
+    if get_origin(annotation) is Literal:
+        args = get_args(annotation)
+        return state.build_type_info(
+            base_type="Literal",
+            kind=TypeKind.LITERAL,
+            literal_values=tuple(args),
+        )
+
+    if not isinstance(annotation, type):
+        raise TypeError(f"Unsupported annotation type: {type(annotation)}")
+
+    if issubclass(annotation, list):
+        raise TypeError("Bare list without type argument is not supported")
+
+    if issubclass(annotation, dict):
+        raise TypeError("Bare dict without type arguments is not supported")
+
+    # Determine kind from type hierarchy
+    if issubclass(annotation, Enum):
+        kind = TypeKind.ENUM
+    elif issubclass(annotation, BaseModel):
+        kind = TypeKind.MODEL
+    else:
+        kind = TypeKind.PRIMITIVE
+
+    base_type = state.last_newtype_name or annotation.__name__
+
+    return state.build_type_info(
+        base_type=base_type,
+        kind=kind,
+        source_type=annotation,
+    )
+
+
+def single_literal_value(annotation: object) -> object | None:
+    """Extract a single literal value from a type annotation, or None.
+
+    Delegates to analyze_type for all unwrapping, then checks
+    whether the result is a single-value Literal. Multi-value
+    Literals return None — callers needing all values should use
+    ``analyze_type`` and read ``literal_values`` directly.
+    """
+    try:
+        ti = analyze_type(annotation)
+    except (TypeError, UnsupportedUnionError):
+        return None
+    if (
+        ti.kind == TypeKind.LITERAL
+        and ti.literal_values
+        and len(ti.literal_values) == 1
+    ):
+        return ti.literal_values[0]
+    return None
diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/type_registry.py b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/type_registry.py
new file mode 100644
index 000000000..505657866
--- /dev/null
+++ b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/type_registry.py
@@ -0,0 +1,113 @@
+"""Type registry mapping Python types to target representations."""
+
+from dataclasses import dataclass
+
+from .type_analyzer import TypeInfo
+
+__all__ = [
+    "TypeMapping",
+    "PRIMITIVE_TYPES",
+    "get_type_mapping",
+    "is_semantic_newtype",
+    "resolve_type_name",
+]
+
+
+@dataclass(frozen=True)
+class TypeMapping:
+    """Maps a type to its representation in different targets."""
+
+    markdown: str
+
+    def for_target(self, target: str) -> str:
+        """Get the type representation for a named target."""
+        if target != "markdown":
+            raise ValueError(f"Unknown target {target!r}, expected 'markdown'")
+        return self.markdown
+
+
+PRIMITIVE_TYPES: dict[str, TypeMapping] = {
+    # Signed integers
+    "int8": TypeMapping(markdown="int8"),
+    "int16": TypeMapping(markdown="int16"),
+    "int32": TypeMapping(markdown="int32"),
+    "int64": TypeMapping(markdown="int64"),
+    # Unsigned integers
+    "uint8": TypeMapping(markdown="uint8"),
+    "uint16": TypeMapping(markdown="uint16"),
+    "uint32": TypeMapping(markdown="uint32"),
+    # Floating point
+    "float32": TypeMapping(markdown="float32"),
+    "float64": TypeMapping(markdown="float64"),
+    # Basic types
+    "str": TypeMapping(markdown="string"),
+    "bool": TypeMapping(markdown="boolean"),
+    # Python builtins (aliases to their portable equivalents)
+    "int": TypeMapping(markdown="int64"),
+    "float": TypeMapping(markdown="float64"),
+    # Geometry types
+    "Geometry": TypeMapping(markdown="geometry"),
+    "BBox": TypeMapping(markdown="bbox"),
+}
+
+
+def is_semantic_newtype(type_info: TypeInfo) -> bool:
+    """Whether a type represents a semantic NewType that should be displayed by name.
+
+    Returns True for unregistered NewTypes (HexColor, Sources) and NewTypes
+    that wrap a different base type (FeatureVersion wrapping int32, Id wrapping
+    NoWhitespaceString). Returns False for registered primitives (int32, Geometry).
+    """
+    if type_info.newtype_name is None:
+        return False
+    if type_info.newtype_name != type_info.base_type:
+        return True
+    return get_type_mapping(type_info.base_type) is None
+
+
+def get_type_mapping(type_name: str) -> TypeMapping | None:
+    """Look up a type mapping by name.
+
+    Parameters
+    ----------
+    type_name : str
+        The type name to look up (e.g., "int32", "str", "Geometry").
+        Also accepts Python builtin names ("int" -> int64, "float" -> float64).
+
+    Returns
+    -------
+    TypeMapping or None
+        The TypeMapping for the type, or None if not found.
+    """
+    return PRIMITIVE_TYPES.get(type_name)
+
+
+def resolve_type_name(type_info: TypeInfo, target: str) -> str:
+    """Resolve a TypeInfo to the base type string for a given target.
+
+    Looks up the type in the registry first (trying source_type if base_type
+    has no mapping). Falls back to the base_type name as-is.
+
+    Parameters
+    ----------
+    type_info : TypeInfo
+        The analyzed type information.
+    target : str
+        The output target ("markdown").
+
+    Returns
+    -------
+    str
+        The resolved base type name string for the target.
+    """
+    mapping = get_type_mapping(type_info.base_type)
+    if mapping is None and type_info.source_type is not None:
+        mapping = get_type_mapping(type_info.source_type.__name__)
+    if mapping is not None:
+        return mapping.for_target(target)
+
+    # Semantic NewType wrapping an unregistered type (e.g., Sources wrapping
+    # SourceItem): use the underlying class name rather than the NewType alias.
+    if type_info.newtype_name and type_info.source_type is not None:
+        return type_info.source_type.__name__
+    return type_info.base_type
diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/union_extraction.py b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/union_extraction.py
new file mode 100644
index 000000000..6cd4d73d5
--- /dev/null
+++ b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/union_extraction.py
@@ -0,0 +1,142 @@
+"""Union extraction and discriminator handling."""
+
+from __future__ import annotations
+
+from typing import Annotated, get_args, get_origin
+
+from pydantic import BaseModel
+from pydantic.fields import FieldInfo
+
+from overture.schema.system.feature import resolve_discriminator_field_name
+
+from .model_extraction import extract_model, resolve_field_alias
+from .specs import AnnotatedField, UnionSpec, is_model_class
+from .type_analyzer import TypeInfo, TypeKind, analyze_type, single_literal_value
+
+__all__ = ["extract_discriminator", "extract_union"]
+
+
+def _find_common_base(members: list[type[BaseModel]]) -> type[BaseModel]:
+    """Find the most-derived common BaseModel ancestor of all members."""
+    filtered_mros = [
+        [c for c in cls.__mro__ if is_model_class(c) and c is not BaseModel]
+        for cls in members
+    ]
+    common = set(filtered_mros[0])
+    for mro in filtered_mros[1:]:
+        common &= set(mro)
+    if not common:
+        raise ValueError(
+            f"No common BaseModel ancestor for {[m.__name__ for m in members]}"
+        )
+
+    def max_mro_index(cls: type) -> int:
+        return max(mro.index(cls) for mro in filtered_mros)
+
+    return min(common, key=max_mro_index)
+
+
+def _find_field_by_alias(model: type[BaseModel], alias: str) -> FieldInfo | None:
+    """Find a field in model_fields by alias-resolved name."""
+    direct = model.model_fields.get(alias)
+    if direct is not None:
+        return direct
+    for py_name, fi in model.model_fields.items():
+        if resolve_field_alias(py_name, fi) == alias:
+            return fi
+    return None
+
+
+def extract_discriminator(
+    annotation: object,
+    members: list[type[BaseModel]],
+) -> tuple[str | None, dict[str, type[BaseModel]] | None]:
+    """Extract discriminator field name and value-to-type mapping."""
+    if get_origin(annotation) is not Annotated:
+        return None, None
+
+    disc_field_name: str | None = None
+    for metadata in get_args(annotation)[1:]:
+        if isinstance(metadata, FieldInfo):
+            disc_field_name = resolve_discriminator_field_name(metadata.discriminator)
+            if disc_field_name is not None:
+                break
+
+    if disc_field_name is None:
+        return None, None
+
+    mapping: dict[str, type[BaseModel]] = {}
+    for member in members:
+        field_info = _find_field_by_alias(member, disc_field_name)
+        if field_info and field_info.annotation is not None:
+            lit_val = single_literal_value(field_info.annotation)
+            if lit_val is not None:
+                mapping[str(lit_val)] = member
+
+    return disc_field_name, mapping or None
+
+
+_TypeShape = tuple[str, TypeKind, bool, int]
+_FieldKey = tuple[str, _TypeShape]
+
+
+def _type_shape(ti: TypeInfo) -> _TypeShape:
+    """Structural shape for dedup -- excludes source_type which varies across members."""
+    return (ti.base_type, ti.kind, ti.is_optional, ti.list_depth)
+
+
+def extract_union(
+    name: str,
+    annotation: object,
+    *,
+    entry_point: str | None = None,
+) -> UnionSpec:
+    """Extract a UnionSpec from a discriminated union type alias."""
+    ti = analyze_type(annotation)
+    if ti.kind != TypeKind.UNION or ti.union_members is None:
+        raise TypeError(f"{name} is not a union type alias")
+
+    members = list(ti.union_members)
+    common_base = _find_common_base(members)
+
+    base_spec = extract_model(common_base)
+    shared_field_names = {f.name for f in base_spec.fields}
+
+    member_specs = [(m, extract_model(m)) for m in members]
+
+    annotated_fields: list[AnnotatedField] = []
+
+    # Shared fields first (from common base)
+    for fs in base_spec.fields:
+        annotated_fields.append(AnnotatedField(field_spec=fs, variant_sources=None))
+
+    # Variant-specific fields: collect by (name, type identity) for dedup
+    seen: dict[_FieldKey, AnnotatedField] = {}
+
+    for member_cls, member_spec in member_specs:
+        for fs in member_spec.fields:
+            if fs.name in shared_field_names:
+                continue
+            key = (fs.name, _type_shape(fs.type_info))
+            existing = seen.get(key)
+            prior_sources = existing.variant_sources or () if existing else ()
+            seen[key] = AnnotatedField(
+                field_spec=fs,
+                variant_sources=(*prior_sources, member_cls.__name__),
+            )
+
+    annotated_fields.extend(seen.values())
+
+    disc_field, disc_mapping = extract_discriminator(annotation, members)
+
+    return UnionSpec(
+        name=name,
+        description=ti.description,
+        annotated_fields=annotated_fields,
+        members=members,
+        discriminator_field=disc_field,
+        discriminator_mapping=disc_mapping,
+        source_annotation=annotation,
+        common_base=common_base,
+        entry_point=entry_point,
+    )
diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/layout/__init__.py b/packages/overture-schema-codegen/src/overture/schema/codegen/layout/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/layout/module_layout.py b/packages/overture-schema-codegen/src/overture/schema/codegen/layout/module_layout.py
new file mode 100644
index 000000000..f7b8f0a44
--- /dev/null
+++ b/packages/overture-schema-codegen/src/overture/schema/codegen/layout/module_layout.py
@@ -0,0 +1,150 @@
+"""Output directory layout from Python module paths.
+
+Translates dotted module paths into output directory paths by mirroring
+the source package structure.
+"""
+
+from __future__ import annotations
+
+import sys
+from collections.abc import Iterable, Mapping
+from pathlib import PurePosixPath
+
+__all__ = [
+    "OUTPUT_ROOT",
+    "compute_output_dir",
+    "compute_schema_root",
+    "entry_point_class",
+    "entry_point_module",
+    "is_package_module",
+    "module_relpath",
+    "output_dir_for_entry_point",
+]
+
+OUTPUT_ROOT = PurePosixPath(".")
+
+
+def _split_entry_point(entry_point_path: str) -> tuple[str, str]:
+    """Split ``"module.path:ClassName"`` into its two parts.
+
+    >>> _split_entry_point("overture.schema.buildings:Building")
+    ('overture.schema.buildings', 'Building')
+    """
+    if ":" not in entry_point_path:
+        msg = f"Expected 'module:Class' format, got {entry_point_path!r}"
+        raise ValueError(msg)
+    module, cls = entry_point_path.split(":", 1)
+    return module, cls
+
+
+def entry_point_module(entry_point_path: str) -> str:
+    """Extract module path from entry-point-style path.
+
+    >>> entry_point_module("overture.schema.buildings:Building")
+    'overture.schema.buildings'
+    """
+    return _split_entry_point(entry_point_path)[0]
+
+
+def entry_point_class(entry_point_path: str) -> str:
+    """Extract class name from entry-point-style path.
+
+    >>> entry_point_class("overture.schema.buildings:Building")
+    'Building'
+    """
+    return _split_entry_point(entry_point_path)[1]
+
+
+def compute_schema_root(module_paths: Iterable[str]) -> str:
+    """Find the longest common dotted prefix of module paths.
+
+    Deduplicates inputs first. For a single unique path, drops the
+    last component (the module itself).
+    """
+    paths = sorted(set(module_paths))
+    if not paths:
+        msg = "No module paths provided"
+        raise ValueError(msg)
+
+    segments = [p.split(".") for p in paths]
+    if len(segments) == 1:
+        return ".".join(segments[0][:-1])
+
+    common: list[str] = []
+    for parts in zip(*segments, strict=False):
+        if len(set(parts)) == 1:
+            common.append(parts[0])
+        else:
+            break
+    return ".".join(common)
+
+
+def module_relpath(module: str, root: str) -> str:
+    """Strip the schema root prefix from a dotted module path."""
+    if not root:
+        return module
+    if module == root:
+        return ""
+    prefix = root + "."
+    if not module.startswith(prefix):
+        msg = f"Module {module!r} does not start with root {root!r}"
+        raise ValueError(msg)
+    return module[len(prefix) :]
+
+
+def is_package_module(
+    module: str,
+    module_registry: Mapping[str, object] | None = None,
+) -> bool:
+    """Check whether a module is a package (directory) or a file module.
+
+    Packages have ``__path__``; file modules do not (PEP 302).
+    """
+    registry: Mapping[str, object] = (
+        module_registry if module_registry is not None else sys.modules
+    )
+    mod = registry.get(module)
+    if mod is None:
+        msg = f"Module {module!r} not found in registry"
+        raise ValueError(msg)
+    return hasattr(mod, "__path__")
+
+
+def output_dir_for_entry_point(
+    entry_point_path: str | None,
+    schema_root: str,
+    module_registry: Mapping[str, object] | None = None,
+) -> PurePosixPath:
+    """Compute output directory from an entry-point-style path.
+
+    Raises ValueError if *entry_point_path* is None.
+    """
+    if entry_point_path is None:
+        msg = "entry_point_path must not be None"
+        raise ValueError(msg)
+    module = entry_point_module(entry_point_path)
+    return compute_output_dir(module, schema_root, module_registry)
+
+
+def compute_output_dir(
+    module: str,
+    schema_root: str,
+    module_registry: Mapping[str, object] | None = None,
+) -> PurePosixPath:
+    """Compute output directory for a module, mirroring package structure.
+
+    File modules drop their last component (the .py filename).
+    Packages keep all components. Returns ``PurePosixPath(".")`` for
+    the root directory.
+    """
+    relpath = module_relpath(module, schema_root)
+    if not relpath:
+        return OUTPUT_ROOT
+
+    parts = relpath.split(".")
+    if not is_package_module(module, module_registry):
+        parts = parts[:-1]
+
+    if not parts:
+        return OUTPUT_ROOT
+    return PurePosixPath(*parts)
diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/layout/type_collection.py b/packages/overture-schema-codegen/src/overture/schema/codegen/layout/type_collection.py
new file mode 100644
index 000000000..7d7cf95f7
--- /dev/null
+++ b/packages/overture-schema-codegen/src/overture/schema/codegen/layout/type_collection.py
@@ -0,0 +1,155 @@
+"""Supplementary type discovery by walking expanded feature trees.
+
+Walks FieldSpec.model references for sub-models (already extracted),
+and extracts enums and NewTypes on first encounter.
+"""
+
+from collections.abc import Sequence
+from typing import Annotated, get_args, get_origin
+
+from ..extraction.enum_extraction import extract_enum
+from ..extraction.model_extraction import expand_model_tree, extract_model
+from ..extraction.newtype_extraction import extract_newtype
+from ..extraction.pydantic_extraction import extract_pydantic_type
+from ..extraction.specs import (
+    FeatureSpec,
+    FieldSpec,
+    ModelSpec,
+    SupplementarySpec,
+    TypeIdentity,
+    is_pydantic_type,
+)
+from ..extraction.type_analyzer import (
+    TypeInfo,
+    TypeKind,
+    analyze_type,
+    is_newtype,
+    walk_type_info,
+)
+from ..extraction.type_registry import is_semantic_newtype
+
+__all__ = ["collect_all_supplementary_types"]
+
+
+def collect_all_supplementary_types(
+    feature_specs: Sequence[FeatureSpec],
+) -> dict[TypeIdentity, SupplementarySpec]:
+    """Collect supplementary types by walking expanded feature trees.
+
+    Requires that expand_model_tree has been called on all feature specs
+    first. Walks FieldSpec.model references for sub-models (already
+    extracted), and extracts enums and NewTypes on first encounter.
+
+    Returns a dict mapping TypeIdentity to extracted specs. Two types
+    with the same class name from different modules are keyed separately.
+    """
+    feature_objs: set[object] = {spec.identity.obj for spec in feature_specs}
+    all_specs: dict[TypeIdentity, SupplementarySpec] = {}
+    visited_models: set[object] = set()
+
+    def _register_newtype(newtype_ref: object, name: str) -> bool:
+        """Register a NewType if not already present. Returns True if registered."""
+        nt_id = TypeIdentity(newtype_ref, name)
+        if nt_id in all_specs:
+            return False
+        all_specs[nt_id] = extract_newtype(newtype_ref)
+        return True
+
+    def _collect_from_model(model_spec: ModelSpec) -> None:
+        if (
+            model_spec.source_type in visited_models
+            or model_spec.source_type in feature_objs
+        ):
+            return
+        visited_models.add(model_spec.source_type)
+        all_specs[model_spec.identity] = model_spec
+        _collect_from_fields(model_spec.fields)
+
+    def _collect_inner_newtypes(newtype_ref: object) -> None:
+        """Walk a NewType's __supertype__ chain for intermediate semantic NewTypes."""
+        annotation = getattr(newtype_ref, "__supertype__", None)
+        while annotation is not None:
+            if get_origin(annotation) is Annotated:
+                annotation = get_args(annotation)[0]
+                continue
+            if is_newtype(annotation):
+                inner_ti = analyze_type(annotation)
+                if (
+                    inner_ti.newtype_ref is not None
+                    and inner_ti.newtype_name is not None
+                    and is_semantic_newtype(inner_ti)
+                ):
+                    _register_newtype(inner_ti.newtype_ref, inner_ti.newtype_name)
+                annotation = getattr(annotation, "__supertype__", None)
+                continue
+            break
+
+    def _collect_from_type_info(ti: TypeInfo) -> None:
+        """Collect supplementary types from a single TypeInfo.
+
+        Uses walk_type_info for dict key/value recursion. Handles all
+        TypeKind variants without early returns so newtype extraction
+        and dict recursion apply regardless of kind.
+        """
+
+        def _visit(node: TypeInfo) -> None:
+            # UNION, ENUM, and pydantic (PRIMITIVE) are mutually exclusive
+            # by TypeKind. NewType extraction is orthogonal -- a node can be
+            # a NewType-wrapped ENUM, for instance.
+            if node.kind == TypeKind.UNION and node.union_members:
+                # Walk each member's fields for supplementary types.
+                # Members that are also top-level feature specs are skipped
+                # by the feature_objs guard in _collect_from_model.
+                for member_cls in node.union_members:
+                    member_spec = extract_model(member_cls)
+                    expand_model_tree(member_spec)
+                    _collect_from_model(member_spec)
+            elif node.kind == TypeKind.ENUM and node.source_type is not None:
+                enum_id = TypeIdentity.of(node.source_type)
+                if enum_id not in all_specs:
+                    all_specs[enum_id] = extract_enum(node.source_type)
+            elif is_pydantic_type(node):
+                assert node.source_type is not None  # guaranteed by is_pydantic_type
+                pid = TypeIdentity.of(node.source_type)
+                if pid not in all_specs:
+                    all_specs[pid] = extract_pydantic_type(node.source_type)
+
+            # Semantic NewTypes always get extracted, including intermediate
+            # NewTypes in the wrapping chain (e.g., Id wraps NoWhitespaceString
+            # wraps str -- both Id and NoWhitespaceString get pages).
+            if (
+                node.newtype_ref is not None
+                and node.newtype_name is not None
+                and is_semantic_newtype(node)
+            ):
+                newly_registered = _register_newtype(
+                    node.newtype_ref, node.newtype_name
+                )
+                if newly_registered:
+                    _collect_inner_newtypes(node.newtype_ref)
+
+        walk_type_info(ti, _visit)
+
+    def _collect_from_fields(fields: list[FieldSpec]) -> None:
+        # A single field can match multiple conditions (e.g., Sources is both
+        # a semantic NewType and wraps a MODEL-kind type), so checks are
+        # independent `if` statements, not `elif`.
+        for field_spec in fields:
+            ti = field_spec.type_info
+            _collect_from_type_info(ti)
+
+            # MODEL-kind fields (whether direct or via NewType wrapper) get expanded
+            if ti.kind == TypeKind.MODEL and ti.source_type is not None:
+                if field_spec.model is None:
+                    msg = (
+                        f"MODEL-kind field {field_spec.name!r} has source_type "
+                        f"but model=None — call expand_model_tree first"
+                    )
+                    raise RuntimeError(msg)
+                if not field_spec.starts_cycle:
+                    _collect_from_model(field_spec.model)
+
+    for spec in feature_specs:
+        _collect_from_fields(spec.fields)
+
+    return all_specs
diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/__init__.py b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/link_computation.py b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/link_computation.py
new file mode 100644
index 000000000..f05fa6fd7
--- /dev/null
+++ b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/link_computation.py
@@ -0,0 +1,69 @@
+"""Relative link computation between rendered output files."""
+
+from dataclasses import dataclass
+from pathlib import PurePosixPath
+
+from ..extraction.case_conversion import slug_filename
+from ..extraction.specs import TypeIdentity
+
+__all__ = ["LinkContext", "relative_link"]
+
+
+@dataclass
+class LinkContext:
+    """Placement context for resolving cross-directory markdown links."""
+
+    page_path: PurePosixPath
+    registry: dict[TypeIdentity, PurePosixPath]
+
+    def resolve_link(self, identity: TypeIdentity) -> str | None:
+        """Resolve *identity* to a relative link if it exists in the registry."""
+        if identity in self.registry:
+            return relative_link(self.page_path, self.registry[identity])
+        return None
+
+    def resolve_link_or_slug(self, identity: TypeIdentity) -> str:
+        """Resolve *identity* to a relative link, falling back to a slug filename.
+
+        Always returns a usable link string. Use when the caller needs a
+        link regardless of whether the type has a registered page.
+        """
+        return self.resolve_link(identity) or slug_filename(identity.name)
+
+
+def _is_normalized(path: PurePosixPath) -> bool:
+    """Check whether the path contains no '..' or '.' components (except root '.')."""
+    return ".." not in path.parts and path.parts.count(".") <= 1
+
+
+def relative_link(source: PurePosixPath, target: PurePosixPath) -> str:
+    """Compute a relative path from source file to target file.
+
+    Both paths must be normalized (no ``..`` components) and relative
+    to the same output root.
+    """
+    if not _is_normalized(source):
+        msg = f"Source path not normalized: {source}"
+        raise ValueError(msg)
+    if not _is_normalized(target):
+        msg = f"Target path not normalized: {target}"
+        raise ValueError(msg)
+    source_dir = source.parent
+    # Count how many levels up from source_dir to common ancestor,
+    # then descend to target. PurePosixPath doesn't have os.path.relpath,
+    # so compute manually.
+    source_parts = source_dir.parts
+    target_parts = target.parts
+
+    # Find common prefix length
+    common = 0
+    for s, t in zip(source_parts, target_parts, strict=False):
+        if s != t:
+            break
+        common += 1
+
+    ups = len(source_parts) - common
+    downs = target_parts[common:]
+
+    parts = [".."] * ups + list(downs)
+    return "/".join(parts) if parts else "."
diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/path_assignment.py b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/path_assignment.py
new file mode 100644
index 000000000..2700d5a9e
--- /dev/null
+++ b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/path_assignment.py
@@ -0,0 +1,115 @@
+"""Map types to markdown output file paths.
+
+Uses module-mirrored output directories: output paths derive from
+the source Python module path relative to schema_root.
+"""
+
+from collections.abc import Sequence
+from pathlib import PurePosixPath
+
+from ..extraction.case_conversion import slug_filename
+from ..extraction.specs import (
+    FeatureSpec,
+    PydanticTypeSpec,
+    SupplementarySpec,
+    TypeIdentity,
+)
+from ..layout.module_layout import compute_output_dir, output_dir_for_entry_point
+
+__all__ = [
+    "GEOMETRY_PAGE",
+    "PRIMITIVES_PAGE",
+    "build_placement_registry",
+    "resolve_output_path",
+]
+
+# Aggregate page paths.
+PRIMITIVES_PAGE = PurePosixPath("system/primitive/primitives.md")
+GEOMETRY_PAGE = PurePosixPath("system/primitive/geometry.md")
+
+
+def build_placement_registry(
+    feature_specs: Sequence[FeatureSpec],
+    all_specs: dict[TypeIdentity, SupplementarySpec],
+    primitive_names: list[TypeIdentity],
+    geometry_names: list[TypeIdentity],
+    schema_root: str,
+) -> dict[TypeIdentity, PurePosixPath]:
+    """Build a mapping from TypeIdentity to output file paths.
+
+    Uses module-mirrored output directories: output paths derive from
+    the source Python module path relative to schema_root.
+    """
+    registry: dict[TypeIdentity, PurePosixPath] = _aggregate_page_entries(
+        primitive_names, geometry_names
+    )
+
+    feature_dirs: set[PurePosixPath] = set()
+    for spec in feature_specs:
+        spec_dir = output_dir_for_entry_point(spec.entry_point, schema_root)
+        registry[spec.identity] = _md_path(spec_dir, spec.name)
+        feature_dirs.add(spec_dir)
+
+    for tid, supp_spec in all_specs.items():
+        if tid in registry:
+            continue
+        if isinstance(supp_spec, PydanticTypeSpec):
+            registry[tid] = (
+                PurePosixPath("pydantic")
+                / supp_spec.source_module
+                / slug_filename(tid.name)
+            )
+            continue
+        source_module = getattr(supp_spec.source_type, "__module__", None)
+        if source_module is None:
+            continue
+        output_dir = compute_output_dir(source_module, schema_root)
+        output_dir = _nest_under_types(output_dir, feature_dirs)
+        registry[tid] = _md_path(output_dir, tid.name)
+
+    return registry
+
+
+def resolve_output_path(
+    identity: TypeIdentity,
+    registry: dict[TypeIdentity, PurePosixPath] | None,
+) -> PurePosixPath:
+    """Look up a type's output path from the registry, with flat-file fallback."""
+    if registry is not None and identity in registry:
+        return registry[identity]
+    return PurePosixPath(slug_filename(identity.name))
+
+
+def _aggregate_page_entries(
+    primitive_names: list[TypeIdentity],
+    geometry_names: list[TypeIdentity],
+) -> dict[TypeIdentity, PurePosixPath]:
+    """Pre-populate registry entries for types documented on aggregate pages."""
+    entries: dict[TypeIdentity, PurePosixPath] = dict.fromkeys(
+        primitive_names, PRIMITIVES_PAGE
+    )
+    entries.update(dict.fromkeys(geometry_names, GEOMETRY_PAGE))
+    return entries
+
+
+def _nest_under_types(
+    output_dir: PurePosixPath, feature_dirs: set[PurePosixPath]
+) -> PurePosixPath:
+    """Insert ``types/`` after the feature directory portion.
+
+    If *output_dir* equals or is a subdirectory of a feature directory,
+    returns a path with ``types/`` inserted after the feature directory.
+    Otherwise returns *output_dir* unchanged.
+    """
+    for fd in sorted(feature_dirs, key=lambda p: len(p.parts), reverse=True):
+        try:
+            relative = output_dir.relative_to(fd)
+        except ValueError:
+            continue
+        return fd / "types" / relative
+    return output_dir
+
+
+def _md_path(directory: PurePosixPath, name: str) -> PurePosixPath:
+    """Build a .md file path from a directory and a PascalCase type name."""
+    return directory / slug_filename(name)
diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/pipeline.py b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/pipeline.py
new file mode 100644
index 000000000..0bd143c56
--- /dev/null
+++ b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/pipeline.py
@@ -0,0 +1,165 @@
+"""Markdown generation pipeline: render pages without I/O.
+
+Orchestrates tree expansion, type collection, placement, reverse
+references, and rendering into a list of RenderedPage objects. The
+caller decides what to do with them (write to disk, add frontmatter,
+stream to stdout, etc.).
+"""
+
+from collections.abc import Sequence
+from dataclasses import dataclass
+from pathlib import PurePosixPath
+
+import overture.schema.system.primitive as _system_primitive
+from overture.schema.system.primitive import GeometryType
+
+from ..extraction.examples import ExampleRecord, collect_dict_paths, load_examples
+from ..extraction.model_extraction import expand_model_tree
+from ..extraction.primitive_extraction import (
+    extract_primitives,
+    partition_primitive_and_geometry_names,
+)
+from ..extraction.specs import (
+    EnumSpec,
+    FeatureSpec,
+    ModelSpec,
+    NewTypeSpec,
+    PydanticTypeSpec,
+    SupplementarySpec,
+    TypeIdentity,
+    UnionSpec,
+)
+from ..layout.type_collection import collect_all_supplementary_types
+from .link_computation import LinkContext
+from .path_assignment import (
+    GEOMETRY_PAGE,
+    PRIMITIVES_PAGE,
+    build_placement_registry,
+    resolve_output_path,
+)
+from .renderer import (
+    render_enum,
+    render_feature,
+    render_geometry_from_values,
+    render_newtype,
+    render_primitives_from_specs,
+    render_pydantic_type,
+)
+from .reverse_references import UsedByEntry, compute_reverse_references
+
+__all__ = ["RenderedPage", "generate_markdown_pages"]
+
+
+@dataclass(frozen=True, slots=True)
+class RenderedPage:
+    """A rendered page with its content and output path."""
+
+    content: str
+    path: PurePosixPath
+    is_feature: bool = False
+
+
+def _load_model_examples(
+    spec: FeatureSpec,
+) -> list[ExampleRecord] | None:
+    """Load examples for a feature spec, returning None when absent."""
+    if isinstance(spec, UnionSpec):
+        pyproject_source = spec.members[0] if spec.members else None
+        validation_type = spec.source_annotation
+        model_fields = spec.common_base.model_fields
+    else:
+        pyproject_source = spec.source_type
+        validation_type = spec.source_type
+        model_fields = spec.source_type.model_fields if spec.source_type else {}
+    if not pyproject_source:
+        return None
+    field_names = [f.name for f in spec.fields]
+    dict_paths = collect_dict_paths(spec.fields)
+    examples = load_examples(
+        validation_type,
+        spec.name,
+        field_names,
+        pyproject_source=pyproject_source,
+        model_fields=model_fields,
+        dict_paths=dict_paths,
+    )
+    return examples or None
+
+
+def _render_supplement(
+    tid: TypeIdentity,
+    spec: SupplementarySpec,
+    registry: dict[TypeIdentity, PurePosixPath],
+    reverse_refs: dict[TypeIdentity, list[UsedByEntry]],
+) -> RenderedPage:
+    """Render a single supplementary type page."""
+    output_path = resolve_output_path(tid, registry)
+    ctx = LinkContext(output_path, registry)
+    used_by = reverse_refs.get(tid)
+
+    if isinstance(spec, EnumSpec):
+        content = render_enum(spec, link_ctx=ctx, used_by=used_by)
+    elif isinstance(spec, NewTypeSpec):
+        content = render_newtype(spec, ctx, used_by=used_by)
+    elif isinstance(spec, ModelSpec):
+        content = render_feature(spec, ctx, used_by=used_by)
+    elif isinstance(spec, PydanticTypeSpec):
+        content = render_pydantic_type(spec, link_ctx=ctx, used_by=used_by)
+    else:
+        raise TypeError(f"Unhandled SupplementarySpec variant: {type(spec).__name__}")
+
+    return RenderedPage(content=content, path=output_path)
+
+
+def generate_markdown_pages(
+    feature_specs: Sequence[FeatureSpec],
+    schema_root: str,
+) -> list[RenderedPage]:
+    """Generate all markdown pages from feature specs.
+
+    Returns rendered pages without writing to disk. The caller handles
+    I/O, frontmatter injection, and any output-format-specific concerns
+    (like Docusaurus category files).
+    """
+    cache: dict[type, ModelSpec] = {}
+    for spec in feature_specs:
+        expand_model_tree(spec, cache)
+
+    primitive_names, geometry_names = partition_primitive_and_geometry_names(
+        _system_primitive
+    )
+    all_specs = collect_all_supplementary_types(feature_specs)
+    registry = build_placement_registry(
+        feature_specs, all_specs, primitive_names, geometry_names, schema_root
+    )
+
+    reverse_refs = compute_reverse_references(feature_specs, all_specs)
+
+    pages: list[RenderedPage] = []
+
+    for spec in feature_specs:
+        output_path = registry[spec.identity]
+        ctx = LinkContext(output_path, registry)
+        examples = _load_model_examples(spec)
+        used_by = reverse_refs.get(spec.identity)
+        content = render_feature(spec, link_ctx=ctx, examples=examples, used_by=used_by)
+        pages.append(RenderedPage(content=content, path=output_path, is_feature=True))
+
+    for tid, supp_spec in all_specs.items():
+        pages.append(_render_supplement(tid, supp_spec, registry, reverse_refs))
+
+    pages.append(
+        RenderedPage(
+            content=render_primitives_from_specs(extract_primitives(primitive_names)),
+            path=PRIMITIVES_PAGE,
+        )
+    )
+
+    pages.append(
+        RenderedPage(
+            content=render_geometry_from_values([m.value for m in GeometryType]),
+            path=GEOMETRY_PAGE,
+        )
+    )
+
+    return pages
diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/renderer.py b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/renderer.py
new file mode 100644
index 000000000..6ef448eb5
--- /dev/null
+++ b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/renderer.py
@@ -0,0 +1,621 @@
+"""Markdown renderer for Pydantic model documentation."""
+
+import functools
+import json
+import re
+from collections.abc import Callable
+from dataclasses import dataclass
+from pathlib import Path
+from typing import TypedDict, cast
+
+from annotated_types import Interval
+from jinja2 import Environment, FileSystemLoader
+from typing_extensions import NotRequired
+
+from ..extraction.examples import ExampleRecord
+from ..extraction.field_constraints import constraint_display_text
+from ..extraction.model_constraints import analyze_model_constraints
+from ..extraction.specs import (
+    AnnotatedField,
+    EnumSpec,
+    FeatureSpec,
+    FieldSpec,
+    ModelSpec,
+    NewTypeSpec,
+    PrimitiveSpec,
+    PydanticTypeSpec,
+    TypeIdentity,
+    UnionSpec,
+)
+from ..extraction.type_analyzer import (
+    ConstraintSource,
+)
+from .link_computation import LinkContext
+from .reverse_references import UsedByEntry
+from .type_format import (
+    format_type,
+    format_underlying_type,
+    resolve_type_link,
+)
+
+__all__ = [
+    "render_enum",
+    "render_feature",
+    "render_geometry_from_values",
+    "render_newtype",
+    "render_primitives_from_specs",
+    "render_pydantic_type",
+]
+
+
+_LinkFn = Callable[[TypeIdentity], str]
+
+_TEMPLATES_DIR = Path(__file__).parent / "templates"
+
+_BARE_URL_RE = re.compile(
+    r"(?<!\]\()"  # not preceded by ](  (already a Markdown link target)
+    r"(https?://[^\s<>)]+|www\.[^\s<>)]+)"
+)
+_TRAILING_PUNCT_RE = re.compile(r"[.,;:!?]+$")
+# (.+?) deliberately does not match newlines -- CommonMark code spans are inline.
+_CODE_SPAN_RE = re.compile(r"(`+)(.+?)\1")
+
+
+def _linkify_bare_urls(text: str) -> str:
+    """Wrap bare URLs in Markdown link syntax.
+
+    Turns ``www.example.com`` into ``[www.example.com](https://www.example.com)``
+    and ``https://example.com`` into ``[https://example.com](https://example.com)``.
+    URLs already inside ``[text](url)`` or backtick code spans are left
+    untouched. Trailing sentence punctuation (``.``, ``,``, etc.) is excluded
+    from the link.
+
+    Two-pass approach: extract code spans first, linkify the remaining
+    text, then restore code spans.
+    """
+    # Extract code spans, replacing with placeholders
+    spans: list[str] = []
+
+    def _stash_span(m: re.Match[str]) -> str:
+        spans.append(m.group(0))
+        return f"\x00CODESPAN{len(spans) - 1}\x00"
+
+    text = _CODE_SPAN_RE.sub(_stash_span, text)
+
+    # Linkify bare URLs in non-code text
+    def _to_link(m: re.Match[str]) -> str:
+        raw = m.group(0)
+        url = _TRAILING_PUNCT_RE.sub("", raw)
+        trailing = raw[len(url) :]
+        href = url if url.startswith("http") else f"https://{url}"
+        return f"[{url}]({href}){trailing}"
+
+    text = _BARE_URL_RE.sub(_to_link, text)
+
+    # Restore code spans
+    for i, span in enumerate(spans):
+        text = text.replace(f"\x00CODESPAN{i}\x00", span)
+
+    return text
+
+
+@functools.lru_cache(maxsize=1)
+def _get_jinja_env() -> Environment:
+    """Return the Jinja2 environment, creating it on first use."""
+    env = Environment(
+        loader=FileSystemLoader(_TEMPLATES_DIR),
+        trim_blocks=True,
+        lstrip_blocks=True,
+    )
+    env.filters["linkify_urls"] = _linkify_bare_urls
+    return env
+
+
+_EXAMPLE_TRUNCATION_LIMIT = 100
+
+
+class _FieldRow(TypedDict):
+    """Template context for a single field table row.
+
+    ``pre_formatted`` indicates the ``name`` already contains backticks
+    and variant tags, so the template should render it verbatim.
+    """
+
+    name: str
+    type_str: str
+    description: str | None
+    pre_formatted: NotRequired[bool]
+
+
+_PARAGRAPH_BREAK_RE = re.compile(r"\n(?:[ \t]*\n)+")
+
+
+def _unwrap_paragraphs(text: str) -> str:
+    r"""Unwrap hard-wrapped lines within paragraphs, preserving paragraph breaks.
+
+    Splits on blank lines (paragraph boundaries), replaces single newlines
+    within each paragraph with spaces, then rejoins with ``\n\n``.
+    Matches markdown's treatment of newlines within paragraphs.
+    """
+    paragraphs = _PARAGRAPH_BREAK_RE.split(text)
+    return "\n\n".join(p.replace("\n", " ") for p in paragraphs)
+
+
+def _sanitize_for_table_cell(text: str) -> str:
+    """Sanitize text for embedding in a markdown table cell.
+
+    Unwraps within-paragraph newlines to spaces, then converts paragraph
+    breaks to ``<br/><br/>``. Escapes pipe characters for table safety.
+    Uses ``<br/>`` (not ``<br>``) for MDX/Docusaurus compatibility.
+    """
+    text = text.strip()
+    text = _unwrap_paragraphs(text)
+    text = text.replace("\n\n", "<br/><br/>")
+    return text.replace("|", "\\|")
+
+
+def _truncate(text: str) -> str:
+    """Truncate text to ``_EXAMPLE_TRUNCATION_LIMIT`` chars, adding ellipsis."""
+    if len(text) > _EXAMPLE_TRUNCATION_LIMIT:
+        return text[: _EXAMPLE_TRUNCATION_LIMIT - 3] + "..."
+    return text
+
+
+def _format_example_value(value: object) -> str:
+    """Format an example value for display in a markdown Column | Value table.
+
+    All non-empty values render in backticks for consistent monospace
+    formatting. Long representations are truncated before wrapping.
+    """
+    if value is None:
+        return "`null`"
+
+    if isinstance(value, bool):
+        return "`true`" if value else "`false`"
+
+    if isinstance(value, str):
+        if value == "":
+            return ""
+        return f"`{_truncate(value)}`"
+
+    if isinstance(value, list):
+        items = ", ".join(json.dumps(item) for item in value)
+        return f"`{_truncate(f'[{items}]')}`"
+
+    if isinstance(value, dict):
+        pairs = ", ".join(f"{json.dumps(k)}: {json.dumps(v)}" for k, v in value.items())
+        return f"`{_truncate(f'{{{pairs}}}')}`"
+
+    return f"`{value}`"
+
+
+def _field_template_context(
+    field: FieldSpec,
+    ctx: LinkContext | None = None,
+) -> _FieldRow:
+    """Build template context dict for a field."""
+    description = (
+        _sanitize_for_table_cell(field.description) if field.description else None
+    )
+    return _FieldRow(
+        name=field.name,
+        type_str=format_type(field, ctx),
+        description=description,
+    )
+
+
+def _annotate_constraint_notes(
+    row: _FieldRow,
+    notes: list[str],
+) -> None:
+    """Append italic constraint descriptions to a field's description cell."""
+    formatted = "<br/>".join(f"*{note}*" for note in notes)
+    if row["description"]:
+        row["description"] = f"{row['description']}<br/><br/>{formatted}"
+    else:
+        row["description"] = formatted
+
+
+def _link_fn_from_ctx(ctx: LinkContext | None) -> _LinkFn:
+    r"""Build a TypeIdentity-to-markdown-link resolver from a LinkContext.
+
+    Returns a function that resolves a TypeIdentity to ``[`Name`](href)``
+    when the identity has a page in the registry, or plain ``\`Name\``` otherwise.
+    """
+    return functools.partial(resolve_type_link, ctx=ctx)
+
+
+def _annotate_field_constraints(
+    row: _FieldRow, field: FieldSpec, ctx: LinkContext | None
+) -> None:
+    """Annotate a field row with constraints from the field's own annotation.
+
+    Shows constraints where source is None — those applied directly to
+    the field, not inherited from NewType chains. NewType-inherited
+    constraints appear on the NewType's own page instead.
+    """
+    link_fn = _link_fn_from_ctx(ctx)
+    notes = [
+        constraint_display_text(cs, link_fn=link_fn)
+        for cs in field.type_info.constraints
+        if cs.source_ref is None
+    ]
+    if notes:
+        _annotate_constraint_notes(row, notes)
+
+
+def _expandable_list_suffix(field_spec: FieldSpec) -> str:
+    """Return ``"[]"`` per nesting level for list-of-model fields expanded inline."""
+    if (
+        field_spec.type_info.is_list
+        and field_spec.model
+        and not field_spec.starts_cycle
+    ):
+        return "[]" * field_spec.type_info.list_depth
+    return ""
+
+
+def _expand_sub_model(
+    field_spec: FieldSpec,
+    name: str,
+    ctx: LinkContext | None,
+    result: list[_FieldRow],
+) -> None:
+    """Expand sub-model fields inline, appending child rows to *result*."""
+    sub = field_spec.model if not field_spec.starts_cycle else None
+    if sub is not None:
+        child_prefix = f"{name}{_expandable_list_suffix(field_spec)}."
+        result.extend(_expand_model_fields(sub.fields, ctx, prefix=child_prefix))
+
+
+def _annotate_top_level_constraints(
+    rows: list[_FieldRow],
+    constraint_notes: dict[str, list[str]] | None,
+) -> None:
+    """Annotate top-level field rows with model-constraint notes.
+
+    Top-level rows are those without dot-notation prefixes.
+    """
+    if not constraint_notes:
+        return
+    for row in rows:
+        name = row["name"]
+        if "." in name:
+            continue
+        field_name = name.split("[")[0]
+        if field_name in constraint_notes:
+            _annotate_constraint_notes(row, constraint_notes[field_name])
+
+
+def _expand_model_fields(
+    fields: list[FieldSpec],
+    ctx: LinkContext | None,
+    prefix: str = "",
+) -> list[_FieldRow]:
+    """Flatten nested model fields into dot-notation rows for display.
+
+    Walks the pre-populated FieldSpec.model tree. Stops recursion at
+    fields marked with starts_cycle.
+    """
+    result: list[_FieldRow] = []
+    for field_spec in fields:
+        row = _field_template_context(field_spec, ctx)
+        name = f"{prefix}{field_spec.name}" if prefix else field_spec.name
+        row["name"] = f"{name}{_expandable_list_suffix(field_spec)}"
+        if not prefix:
+            _annotate_field_constraints(row, field_spec, ctx)
+        result.append(row)
+
+        _expand_sub_model(field_spec, name, ctx, result)
+    return result
+
+
+def _short_variant_name(class_name: str, union_name: str) -> str:
+    """Strip common suffix to produce short variant name.
+
+    Examples
+    --------
+    >>> _short_variant_name("RoadSegment", "Segment")
+    'Road'
+    >>> _short_variant_name("WaterSegment", "Segment")
+    'Water'
+    >>> _short_variant_name("Building", "Building")
+    'Building'
+    """
+    if class_name.endswith(union_name):
+        short = class_name[: -len(union_name)]
+        if short:
+            return short
+    return class_name
+
+
+def _variant_tag(annotated: AnnotatedField, union_name: str) -> str | None:
+    """Return an italic variant tag like ``*(Road, Water)*``, or None for shared fields."""
+    if annotated.variant_sources is None:
+        return None
+    short_names = [
+        _short_variant_name(v, union_name) for v in annotated.variant_sources
+    ]
+    return f" *({', '.join(short_names)})*"
+
+
+def _expand_union_fields(
+    spec: UnionSpec,
+    ctx: LinkContext | None,
+    constraint_notes: dict[str, list[str]] | None = None,
+) -> list[_FieldRow]:
+    """Expand UnionSpec fields with inline variant tags.
+
+    Shared fields (variant_sources=None) render normally. Variant-specific
+    fields get *(ShortName)* tag after the field name.
+    """
+    result: list[_FieldRow] = []
+    for annotated in spec.annotated_fields:
+        field_spec = annotated.field_spec
+        row = _field_template_context(field_spec, ctx)
+        name = field_spec.name
+        suffix = _expandable_list_suffix(field_spec)
+
+        _annotate_field_constraints(row, field_spec, ctx)
+        if constraint_notes and field_spec.name in constraint_notes:
+            _annotate_constraint_notes(row, constraint_notes[field_spec.name])
+
+        tag = _variant_tag(annotated, spec.name)
+        if tag is not None:
+            row["name"] = f"`{name}{suffix}`{tag}"
+            row["pre_formatted"] = True
+        else:
+            row["name"] = f"{name}{suffix}"
+
+        result.append(row)
+        _expand_sub_model(field_spec, name, ctx, result)
+    return result
+
+
+def render_feature(
+    spec: FeatureSpec,
+    link_ctx: LinkContext | None = None,
+    examples: list[ExampleRecord] | None = None,
+    used_by: list[UsedByEntry] | None = None,
+) -> str:
+    """Render a FeatureSpec (ModelSpec or UnionSpec) as Markdown documentation.
+
+    For ModelSpec, requires expand_model_tree to have been called first.
+    For UnionSpec, adds inline variant tags to variant-specific fields.
+    """
+    template = _get_jinja_env().get_template("feature.md.jinja2")
+
+    constraint_descriptions, field_notes = analyze_model_constraints(spec.constraints)
+
+    if isinstance(spec, UnionSpec):
+        fields = _expand_union_fields(spec, link_ctx, constraint_notes=field_notes)
+    elif isinstance(spec, ModelSpec):
+        fields = _expand_model_fields(spec.fields, link_ctx)
+        _annotate_top_level_constraints(fields, field_notes)
+    else:
+        raise TypeError(f"Unsupported spec type: {type(spec).__name__}")
+
+    formatted_examples: list[list[dict[str, str]]] | None = None
+    if examples:
+        formatted_examples = [
+            [
+                {"column": key, "value": _format_example_value(val)}
+                for key, val in record.rows
+            ]
+            for record in examples
+        ]
+
+    return template.render(
+        model=spec,
+        fields=fields,
+        constraints=constraint_descriptions,
+        examples=formatted_examples,
+        used_by=_build_used_by_context(used_by, link_ctx),
+    )
+
+
+def render_enum(
+    enum_spec: EnumSpec,
+    link_ctx: LinkContext | None = None,
+    used_by: list[UsedByEntry] | None = None,
+) -> str:
+    """Render an EnumSpec as Markdown documentation."""
+    template = _get_jinja_env().get_template("enum.md.jinja2")
+    return template.render(
+        enum=enum_spec, used_by=_build_used_by_context(used_by, link_ctx)
+    )
+
+
+@dataclass
+class _NewTypeConstraintRow:
+    """Rendered constraint for template."""
+
+    display: str
+    source: str | None = None
+    source_link: str | None = None
+
+
+def _format_constraint(
+    cs: ConstraintSource,
+    newtype_ref: object,
+    ctx: LinkContext | None = None,
+) -> _NewTypeConstraintRow:
+    """Format a ConstraintSource for display in a NewType page."""
+    display = constraint_display_text(cs)
+
+    if cs.source_ref is None or cs.source_ref is newtype_ref:
+        return _NewTypeConstraintRow(display=display)
+
+    assert cs.source_name is not None  # source_ref and source_name are set together
+    source_identity = TypeIdentity(cs.source_ref, cs.source_name)
+    source_link = ctx.resolve_link(source_identity) if ctx else None
+    return _NewTypeConstraintRow(
+        display=display, source=cs.source_name, source_link=source_link
+    )
+
+
+class _UsedByContext(TypedDict):
+    """Template context for a used-by entry."""
+
+    name: str
+    link: str | None
+
+
+def _build_used_by_context(
+    used_by: list[UsedByEntry] | None,
+    link_ctx: LinkContext | None,
+) -> list[_UsedByContext] | None:
+    """Build template context for used-by entries."""
+    if not used_by:
+        return None
+    return [
+        {
+            "name": entry.identity.name,
+            "link": link_ctx.resolve_link(entry.identity) if link_ctx else None,
+        }
+        for entry in used_by
+    ]
+
+
+def render_newtype(
+    newtype_spec: NewTypeSpec,
+    link_ctx: LinkContext | None = None,
+    used_by: list[UsedByEntry] | None = None,
+) -> str:
+    """Render a NewTypeSpec as Markdown documentation."""
+    template = _get_jinja_env().get_template("newtype.md.jinja2")
+    ti = newtype_spec.type_info
+    underlying = format_underlying_type(ti, link_ctx)
+    constraints = [
+        _format_constraint(cs, newtype_spec.source_type, link_ctx)
+        for cs in ti.constraints
+    ]
+
+    return template.render(
+        newtype=newtype_spec,
+        underlying_type=underlying,
+        constraints=constraints,
+        used_by=_build_used_by_context(used_by, link_ctx),
+    )
+
+
+def render_pydantic_type(
+    spec: PydanticTypeSpec,
+    link_ctx: LinkContext | None = None,
+    used_by: list[UsedByEntry] | None = None,
+) -> str:
+    """Render a PydanticTypeSpec as Markdown documentation."""
+    template = _get_jinja_env().get_template("pydantic_type.md.jinja2")
+    return template.render(
+        pydantic_type=spec,
+        used_by=_build_used_by_context(used_by, link_ctx),
+    )
+
+
+# Matches the ge/le bounds of the int64 NewType in overture.schema.system.primitive.
+_INT64_MIN = -(2**63)
+_INT64_MAX = 2**63 - 1
+
+_NumericBound = int | float | None
+
+# IEEE 754 precision by bit width — formatting knowledge, not schema data.
+_FLOAT_PRECISION: dict[int, str] = {32: "~7 decimal digits", 64: "~15 decimal digits"}
+
+
+def _format_bound(value: int | float) -> str:
+    """Format a numeric bound for display.
+
+    Uses ``2^63`` notation for int64-scale values to avoid unreadable
+    numbers; otherwise formats with thousands separators for ints.
+    """
+    if value == _INT64_MIN:
+        return "-2^63"
+    if value == _INT64_MAX:
+        return "2^63-1"
+    if isinstance(value, float):
+        return str(value)
+    return f"{value:,}"
+
+
+def _format_interval(bounds: Interval) -> str:
+    """Format an Interval as a range string, or empty if unconstrained.
+
+    Two inclusive bounds render as ``lower to upper``. All other
+    combinations use explicit comparison operators so the
+    inclusivity/exclusivity is unambiguous.
+    """
+    # Interval fields are typed as Supports* protocols; narrow to numeric
+    # since we only encounter int/float constraints from the schema.
+    ge = cast(_NumericBound, bounds.ge)
+    gt = cast(_NumericBound, bounds.gt)
+    le = cast(_NumericBound, bounds.le)
+    lt = cast(_NumericBound, bounds.lt)
+
+    # Both bounds inclusive: compact "lower to upper" form
+    if ge is not None and le is not None:
+        return f"{_format_bound(ge)} to {_format_bound(le)}"
+
+    # Any other two-bound combination: use explicit operators
+    parts: list[str] = []
+    if ge is not None:
+        parts.append(f">= {_format_bound(ge)}")
+    elif gt is not None:
+        parts.append(f"> {_format_bound(gt)}")
+
+    if le is not None:
+        parts.append(f"<= {_format_bound(le)}")
+    elif lt is not None:
+        parts.append(f"< {_format_bound(lt)}")
+
+    return ", ".join(parts)
+
+
+def _bit_width_key(name: str) -> tuple[str, int]:
+    """Sort key: prefix then numeric bit width."""
+    prefix = name.rstrip("0123456789")
+    digits = name[len(prefix) :]
+    return (prefix, int(digits) if digits else 0)
+
+
+def render_primitives_from_specs(specs: list[PrimitiveSpec]) -> str:
+    """Render the primitives.md page from pre-extracted PrimitiveSpecs."""
+    template = _get_jinja_env().get_template("primitives.md.jinja2")
+
+    signed_ints: list[dict[str, str | None]] = []
+    unsigned_ints: list[dict[str, str | None]] = []
+    floats: list[dict[str, str | None]] = []
+
+    for spec in sorted(specs, key=lambda s: _bit_width_key(s.name)):
+        if spec.name.startswith(("int", "uint")):
+            target = signed_ints if spec.name.startswith("int") else unsigned_ints
+            target.append(
+                {
+                    "name": spec.name,
+                    "range": _format_interval(spec.bounds),
+                    "description": _sanitize_for_table_cell(spec.description or ""),
+                }
+            )
+        elif spec.name.startswith("float"):
+            precision = (
+                _FLOAT_PRECISION.get(spec.float_bits, "") if spec.float_bits else ""
+            )
+            floats.append(
+                {
+                    "name": spec.name,
+                    "precision": precision,
+                    "description": _sanitize_for_table_cell(spec.description or ""),
+                }
+            )
+
+    return template.render(
+        signed_ints=signed_ints,
+        unsigned_ints=unsigned_ints,
+        floats=floats,
+    )
+
+
+def render_geometry_from_values(geometry_type_values: list[str]) -> str:
+    """Render the geometry.md page from pre-extracted geometry type values."""
+    template = _get_jinja_env().get_template("geometry.md.jinja2")
+    geometry_types = ", ".join(f"`{v}`" for v in geometry_type_values)
+    return template.render(geometry_types=geometry_types)
diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/reverse_references.py b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/reverse_references.py
new file mode 100644
index 000000000..2ad471fc1
--- /dev/null
+++ b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/reverse_references.py
@@ -0,0 +1,173 @@
+"""Compute reverse references from types to their referrers."""
+
+from __future__ import annotations
+
+from collections.abc import Mapping, Sequence
+from dataclasses import dataclass
+from enum import Enum
+
+from ..extraction.specs import (
+    FeatureSpec,
+    FieldSpec,
+    ModelSpec,
+    NewTypeSpec,
+    SupplementarySpec,
+    TypeIdentity,
+    UnionSpec,
+    is_pydantic_type,
+)
+from ..extraction.type_analyzer import TypeInfo, TypeKind, walk_type_info
+
+__all__ = [
+    "UsedByEntry",
+    "UsedByKind",
+    "compute_reverse_references",
+]
+
+
+class UsedByKind(Enum):
+    """Kind of referrer in a 'used by' entry."""
+
+    MODEL = 0
+    NEWTYPE = 1
+
+
+@dataclass(frozen=True, slots=True)
+class UsedByEntry:
+    """A single 'used by' entry pointing to a referrer."""
+
+    identity: TypeIdentity
+    kind: UsedByKind
+
+
+def compute_reverse_references(
+    feature_specs: Sequence[FeatureSpec],
+    all_specs: Mapping[TypeIdentity, SupplementarySpec],
+) -> dict[TypeIdentity, list[UsedByEntry]]:
+    """Compute reverse references from types to their referrers.
+
+    Returns a dict mapping TypeIdentity to lists of UsedByEntry, sorted with
+    models before NewTypes, alphabetical within each group.
+
+    Parameters
+    ----------
+    feature_specs : Sequence[FeatureSpec]
+        Feature-level specs (ModelSpec or UnionSpec).
+    all_specs : Mapping[TypeIdentity, SupplementarySpec]
+        Supplementary types (enums, newtypes, sub-models).
+
+    Returns
+    -------
+    dict[TypeIdentity, list[UsedByEntry]]
+        Dict mapping TypeIdentity to sorted lists of UsedByEntry.
+    """
+    # Track references with sets to deduplicate
+    references: dict[TypeIdentity, set[UsedByEntry]] = {}
+
+    def add_reference(
+        target: TypeIdentity, referrer: TypeIdentity, kind: UsedByKind
+    ) -> None:
+        """Add a reference from referrer to target, with deduplication."""
+        if target == referrer or target not in all_specs:
+            return
+        references.setdefault(target, set()).add(UsedByEntry(referrer, kind))
+
+    def collect_from_type_info(
+        ti: TypeInfo, referrer: TypeIdentity, referrer_kind: UsedByKind
+    ) -> None:
+        """Collect references from a TypeInfo."""
+
+        def _visit(node: TypeInfo) -> None:
+            if node.newtype_ref is not None and node.newtype_name is not None:
+                add_reference(
+                    TypeIdentity(node.newtype_ref, node.newtype_name),
+                    referrer,
+                    referrer_kind,
+                )
+
+            # ENUM, MODEL, pydantic (PRIMITIVE), and UNION are mutually
+            # exclusive by TypeKind.
+            if (
+                node.kind in (TypeKind.ENUM, TypeKind.MODEL)
+                and node.source_type is not None
+            ):
+                add_reference(
+                    TypeIdentity.of(node.source_type),
+                    referrer,
+                    referrer_kind,
+                )
+            elif is_pydantic_type(node):
+                add_reference(
+                    TypeIdentity.of(node.source_type), referrer, referrer_kind
+                )
+            elif node.union_members is not None:
+                for member_cls in node.union_members:
+                    add_reference(
+                        TypeIdentity.of(member_cls),
+                        referrer,
+                        referrer_kind,
+                    )
+
+        walk_type_info(ti, _visit)
+
+    def collect_from_fields(
+        fields: list[FieldSpec], referrer: TypeIdentity, referrer_kind: UsedByKind
+    ) -> None:
+        """Collect references from model fields."""
+        for field_spec in fields:
+            collect_from_type_info(field_spec.type_info, referrer, referrer_kind)
+
+    def collect_from_model_spec(spec: ModelSpec, referrer: TypeIdentity) -> None:
+        """Collect references from a ModelSpec."""
+        collect_from_fields(spec.fields, referrer, UsedByKind.MODEL)
+
+    def collect_from_union_spec(spec: UnionSpec) -> None:
+        """Collect references from a UnionSpec."""
+        referrer = spec.identity
+        # Union features reference their members
+        for member_cls in spec.members:
+            add_reference(
+                TypeIdentity.of(member_cls),
+                referrer,
+                UsedByKind.MODEL,
+            )
+        # Also walk fields for other supplementary types
+        collect_from_fields(spec.fields, referrer, UsedByKind.MODEL)
+
+    def collect_from_newtype_spec(spec: NewTypeSpec, referrer: TypeIdentity) -> None:
+        """Collect references from a NewTypeSpec."""
+        collect_from_type_info(spec.type_info, referrer, UsedByKind.NEWTYPE)
+
+        # Collect inherited NewTypes from constraint sources
+        for cs in spec.type_info.constraints:
+            if cs.source_ref is not None and cs.source_name is not None:
+                ref_id = TypeIdentity(cs.source_ref, cs.source_name)
+                add_reference(ref_id, referrer, UsedByKind.NEWTYPE)
+
+    # Collect from features
+    for spec in feature_specs:
+        if isinstance(spec, ModelSpec):
+            collect_from_model_spec(spec, spec.identity)
+        elif isinstance(spec, UnionSpec):
+            collect_from_union_spec(spec)
+
+    # Collect from supplementary specs (NewTypes and sub-models reference
+    # other types; enums do not, so they need no processing here)
+    for tid, supp_spec in all_specs.items():
+        if isinstance(supp_spec, NewTypeSpec):
+            collect_from_newtype_spec(supp_spec, tid)
+        elif isinstance(supp_spec, ModelSpec):
+            collect_from_model_spec(supp_spec, tid)
+
+    # Sort into deterministic lists. (kind, name) handles the common case;
+    # module breaks ties when two referrers share the same display name
+    # (e.g. identically-named types from different themes/modules).
+    result: dict[TypeIdentity, list[UsedByEntry]] = {}
+    for target, ref_set in references.items():
+        entries = sorted(
+            ref_set,
+            key=lambda e: (e.kind.value, e.identity.name, e.identity.module),
+        )
+        result[target] = entries
+
+    return result
diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/templates/_used_by.md.jinja2 b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/templates/_used_by.md.jinja2
new file mode 100644
index 000000000..fcbd9e82b
--- /dev/null
+++ b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/templates/_used_by.md.jinja2
@@ -0,0 +1,10 @@
+{% if used_by %}
+
+## Used By
+
+{% for entry in used_by -%}
+{% if entry.link %}- [`{{ entry.name }}`]({{ entry.link }})
+{% else %}- `{{ entry.name }}`
+{% endif %}
+{% endfor %}
+{% endif %}
diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/templates/enum.md.jinja2 b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/templates/enum.md.jinja2
new file mode 100644
index 000000000..b5b71c254
--- /dev/null
+++ b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/templates/enum.md.jinja2
@@ -0,0 +1,13 @@
+# {{ enum.name }}
+{% if enum.description %}
+
+{{ enum.description | linkify_urls }}
+{% endif %}
+
+## Values
+
+{% for member in enum.members -%}
+- `{{ member.value }}`{% if member.description %} - {{ member.description }}{% endif %}
+
+{% endfor %}
+{% include '_used_by.md.jinja2' %}
diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/templates/feature.md.jinja2 b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/templates/feature.md.jinja2
new file mode 100644
index 000000000..78a183c5e
--- /dev/null
+++ b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/templates/feature.md.jinja2
@@ -0,0 +1,45 @@
+# {{ model.name }}
+{% if model.description %}
+
+{{ model.description | linkify_urls }}
+{% endif %}
+
+## Fields
+
+| Name | Type | Description |
+| -----: | :----: | ------------- |
+{% for field in fields -%}
+| {% if field.pre_formatted %}{{ field.name }}{% else %}`{{ field.name }}`{% endif %} | {{ field.type_str }} | {% if field.description %}{{ field.description }} {% endif %}|
+{% endfor %}
+{% if constraints %}
+
+## Constraints
+
+{% for c in constraints %}
+- {{ c }}
+{% endfor %}
+{% endif %}
+{% if examples %}
+
+## Examples
+{% if examples|length == 1 %}
+
+| Column | Value |
+| -------: | ------- |
+{% for row in examples[0] -%}
+| `{{ row.column }}` | {{ row.value }} |
+{% endfor %}
+{% else %}
+{% for example in examples %}
+
+### Example {{ loop.index }}
+
+| Column | Value |
+| -------: | ------- |
+{% for row in example -%}
+| `{{ row.column }}` | {{ row.value }} |
+{% endfor %}
+{% endfor %}
+{% endif %}
+{% endif %}
+{% include '_used_by.md.jinja2' %}
diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/templates/geometry.md.jinja2 b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/templates/geometry.md.jinja2
new file mode 100644
index 000000000..cd6b200de
--- /dev/null
+++ b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/templates/geometry.md.jinja2
@@ -0,0 +1,11 @@
+# Geometry Types
+
+Spatial types for representing geographic features.
+
+## Types
+
+| Type | Description |
+| -----: | ------------- |
+| `Geometry` | GeoJSON geometry value (Point, LineString, Polygon, etc.) |
+| `BBox` | Bounding box as 4 or 6 coordinate values: [west, south, east, north] or [west, south, min-altitude, east, north, max-altitude] |
+| `GeometryType` | Enumeration of geometry types: {{ geometry_types }} |
diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/templates/newtype.md.jinja2 b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/templates/newtype.md.jinja2
new file mode 100644
index 000000000..3d2c58f3a
--- /dev/null
+++ b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/templates/newtype.md.jinja2
@@ -0,0 +1,17 @@
+# {{ newtype.name }}
+{% if newtype.description %}
+
+{{ newtype.description | linkify_urls }}
+{% endif %}
+
+Underlying type: {{ underlying_type }}
+{% if constraints %}
+
+## Constraints
+
+{% for c in constraints -%}
+- {{ c.display }}{% if c.source_link %} (from [`{{ c.source }}`]({{ c.source_link }})){% endif %}
+
+{% endfor %}
+{% endif %}
+{% include '_used_by.md.jinja2' %}
diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/templates/primitives.md.jinja2 b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/templates/primitives.md.jinja2
new file mode 100644
index 000000000..fd87a1ec0
--- /dev/null
+++ b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/templates/primitives.md.jinja2
@@ -0,0 +1,27 @@
+# Primitive Types
+
+Numeric types used for schema field definitions.
+
+## Integer Types
+
+| Type | Range | Description |
+| -----: | :-----: | ------------- |
+{% for t in signed_ints -%}
+| `{{ t.name }}` | {{ t.range }} | {{ t.description }} |
+{% endfor %}
+
+## Unsigned Integer Types
+
+| Type | Range | Description |
+| -----: | :-----: | ------------- |
+{% for t in unsigned_ints -%}
+| `{{ t.name }}` | {{ t.range }} | {{ t.description }} |
+{% endfor %}
+
+## Floating Point Types
+
+| Type | Precision | Description |
+| -----: | :---------: | ------------- |
+{% for t in floats -%}
+| `{{ t.name }}` | {{ t.precision }} | {{ t.description }} |
+{% endfor %}
diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/templates/pydantic_type.md.jinja2 b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/templates/pydantic_type.md.jinja2
new file mode 100644
index 000000000..3185acf56
--- /dev/null
+++ b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/templates/pydantic_type.md.jinja2
@@ -0,0 +1,8 @@
+# {{ pydantic_type.name }}
+{% if pydantic_type.description %}
+
+{{ pydantic_type.description | linkify_urls }}
+{% endif %}
+
+See: [Pydantic docs]({{ pydantic_type.docs_url }})
+{% include '_used_by.md.jinja2' %}
diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/type_format.py b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/type_format.py
new file mode 100644
index 000000000..0cc047e6e
--- /dev/null
+++ b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/type_format.py
@@ -0,0 +1,238 @@
+"""Format TypeInfo as markdown type strings with cross-page links."""
+
+from __future__ import annotations
+
+from pydantic import BaseModel
+
+from ..extraction.specs import FieldSpec, TypeIdentity
+from ..extraction.type_analyzer import TypeInfo, TypeKind
+from ..extraction.type_registry import is_semantic_newtype, resolve_type_name
+from .link_computation import LinkContext
+
+__all__ = [
+    "format_dict_type",
+    "format_type",
+    "format_underlying_type",
+    "resolve_type_link",
+]
+
+
+def _code_link(name: str, href: str) -> str:
+    """Format a markdown link with inline-code text: [``name``](href)."""
+    return f"[`{name}`]({href})"
+
+
+def resolve_type_link(identity: TypeIdentity, ctx: LinkContext | None = None) -> str:
+    """Resolve a TypeIdentity to a linked code span or plain code span.
+
+    When *ctx* is provided, links only to types in the registry (types
+    without pages render as inline code). Without context, renders as
+    inline code -- producing a link requires a placement registry to
+    compute correct relative paths.
+    """
+    if ctx:
+        href = ctx.resolve_link(identity)
+        if href:
+            return _code_link(identity.name, href)
+    return f"`{identity.name}`"
+
+
+def _wrap_list_n(inner: str, depth: int) -> str:
+    """Wrap an inner type string in ``list<...>`` markdown syntax *depth* times.
+
+    Builds a single broken-backtick wrapper rather than nesting iteratively.
+    Iterative nesting creates adjacent backticks (`````) that CommonMark
+    interprets as multi-backtick code span delimiters.
+    """
+    return f"`{'list<' * depth}`{inner}`{'>' * depth}`"
+
+
+def _plain_list_type(base: str, depth: int) -> str:
+    """Format a plain (unlinked) list type string for *depth* nesting levels."""
+    return f"`{'list<' * depth}{base}{'>' * depth}`"
+
+
+def _linked_type_identity(ti: TypeInfo) -> TypeIdentity | None:
+    """Return the TypeIdentity to use for a markdown link, or None for non-linked types."""
+    if is_semantic_newtype(ti) and ti.newtype_ref is not None:
+        assert ti.newtype_name is not None  # guaranteed by is_semantic_newtype
+        return TypeIdentity(ti.newtype_ref, ti.newtype_name)
+    if ti.kind in (TypeKind.ENUM, TypeKind.MODEL) and ti.source_type is not None:
+        return TypeIdentity(ti.source_type, ti.base_type)
+    return None
+
+
+def _try_primitive_link(
+    ti: TypeInfo, display_name: str, ctx: LinkContext | None
+) -> str | None:
+    """Try to link a PRIMITIVE type to its page via registry lookup.
+
+    Registered primitives (int32, Geometry) and Pydantic types (HttpUrl)
+    can have pages in the registry. Uses the type registry display name
+    (e.g. ``geometry`` not ``Geometry``) for the link text.
+    """
+    if ti.kind != TypeKind.PRIMITIVE or not ctx:
+        return None
+    candidate = ti.newtype_ref or ti.source_type
+    if candidate is None:
+        return None
+    href = ctx.resolve_link(TypeIdentity(candidate, display_name))
+    if href:
+        return _code_link(display_name, href)
+    return None
+
+
+def _markdown_type_name(ti: TypeInfo) -> str:
+    """Return the markdown display name for a type.
+
+    Uses the semantic NewType name when present (e.g. ``LanguageTag``),
+    otherwise falls back to the resolved markdown type (e.g. ``string``).
+    """
+    name = ti.newtype_name if is_semantic_newtype(ti) else None
+    return name or resolve_type_name(ti, "markdown")
+
+
+def format_dict_type(ti: TypeInfo) -> str:
+    """Format a dict TypeInfo as bare ``map<K, V>`` using resolved markdown names."""
+    if ti.dict_key_type is None or ti.dict_value_type is None:
+        msg = f"format_dict_type requires dict key/value types, got {ti}"
+        raise ValueError(msg)
+    key = _markdown_type_name(ti.dict_key_type)
+    value = _markdown_type_name(ti.dict_value_type)
+    return f"map<{key}, {value}>"
+
+
+def _format_union_members(
+    members: tuple[type[BaseModel], ...],
+    ctx: LinkContext | None,
+    separator: str = r" \| ",
+) -> str:
+    """Format union members as individually linked/backticked names.
+
+    Each member is resolved independently so members with pages get linked
+    while others render as plain code spans. *separator* is inserted between
+    members (default is ``\\|`` for table-cell safety).
+    """
+    return separator.join(resolve_type_link(TypeIdentity.of(m), ctx) for m in members)
+
+
+def format_type(
+    field: FieldSpec,
+    ctx: LinkContext | None = None,
+) -> str:
+    """Format a field's type for markdown display, with links and qualifiers."""
+    ti = field.type_info
+    qualifiers: list[str] = []
+
+    if ti.kind == TypeKind.LITERAL and ti.literal_values:
+        if len(ti.literal_values) == 1:
+            return f'`"{ti.literal_values[0]}"`'
+        return r" \| ".join(f'`"{v}"`' for v in ti.literal_values)
+
+    identity = _linked_type_identity(ti)
+
+    if ti.kind == TypeKind.UNION and ti.union_members:
+        display = _format_union_members(ti.union_members, ctx)
+        if ti.is_list:
+            qualifiers.append("list")
+    elif ti.is_dict:
+        if identity:
+            display = resolve_type_link(identity, ctx)
+            qualifiers.append("map")
+        else:
+            display = f"`{format_dict_type(ti)}`"
+    elif identity:
+        display = resolve_type_link(identity, ctx)
+        # List layers outside a NewType wrap with list<> syntax (e.g., list[PhoneNumber]
+        # renders as list<PhoneNumber>). List layers inside a NewType use a (list)
+        # qualifier instead (e.g., Sources wrapping list[SourceItem] renders as
+        # Sources (list)), since the list-ness is an implementation detail of the type.
+        if ti.newtype_outer_list_depth > 0:
+            assert ti.is_list  # outer list layers are a subset of total list layers
+            display = _wrap_list_n(display, ti.newtype_outer_list_depth)
+        elif ti.is_list and ti.newtype_name is not None:  # list is inside the NewType
+            qualifiers.append("list")
+        elif ti.is_list:
+            display = _wrap_list_n(display, ti.list_depth)
+    else:
+        # Fallback: types without a linked identity. Registered primitives (int32,
+        # Geometry) and Pydantic types (HttpUrl) may still link to aggregate pages
+        # via the placement registry. Unregistered primitives render as plain code.
+        base = resolve_type_name(ti, "markdown")
+        link = _try_primitive_link(ti, base, ctx)
+        if link and ti.is_list:
+            display = _wrap_list_n(link, ti.list_depth)
+        elif link:
+            display = link
+        elif ti.is_list:
+            display = _plain_list_type(base, ti.list_depth)
+        else:
+            display = f"`{base}`"
+
+    if not field.is_required:
+        qualifiers.append("optional")
+
+    if qualifiers:
+        return f"{display} ({', '.join(qualifiers)})"
+    return display
+
+
+def _linked_or_backticked(ti: TypeInfo, ctx: LinkContext | None) -> tuple[str, bool]:
+    """Return (formatted_string, has_link) for a TypeInfo component.
+
+    Used by format_underlying_type to decide whether container types
+    need broken-backtick formatting (interleaving backtick runs with
+    linked text).
+
+    When ``has_link`` is True, ``formatted_string`` is a markdown link
+    ready for broken-backtick container syntax. When False, it is a raw
+    name that the caller embeds inside backticks.
+    """
+    identity = _linked_type_identity(ti)
+    if identity and ctx:
+        href = ctx.resolve_link(identity)
+        if href:
+            return _code_link(identity.name, href), True
+    return _markdown_type_name(ti), False
+
+
+def format_underlying_type(ti: TypeInfo, ctx: LinkContext | None = None) -> str:
+    """Format a NewType's underlying type for the page header, with links.
+
+    Links enums and models that have their own pages. Does not link the
+    outermost NewType (which would self-reference). Dict key/value types
+    use full link resolution since they reference other types.
+    """
+    if ti.kind == TypeKind.UNION and ti.union_members:
+        return _format_union_members(ti.union_members, ctx, separator=" | ")
+
+    if ti.is_dict and ti.dict_key_type and ti.dict_value_type:
+        key_str, key_linked = _linked_or_backticked(ti.dict_key_type, ctx)
+        val_str, val_linked = _linked_or_backticked(ti.dict_value_type, ctx)
+        if key_linked or val_linked:
+            if not key_linked:
+                key_str = f"`{key_str}`"
+            if not val_linked:
+                val_str = f"`{val_str}`"
+            return f"`map<`{key_str}`,`{val_str}`>`"
+        return f"`map<{key_str}, {val_str}>`"
+
+    # Only link enums and models -- skip is_semantic_newtype to avoid
+    # self-linking (this TypeInfo belongs to the NewType being rendered).
+    identity = (
+        TypeIdentity.of(ti.source_type)
+        if ti.kind in (TypeKind.ENUM, TypeKind.MODEL) and ti.source_type
+        else None
+    )
+    if identity and ctx:
+        href = ctx.resolve_link(identity)
+        if href:
+            linked = _code_link(identity.name, href)
+            if ti.is_list:
+                return _wrap_list_n(linked, ti.list_depth)
+            return linked
+
+    base = identity.name if identity else resolve_type_name(ti, "markdown")
+    if ti.is_list:
+        return _plain_list_type(base, ti.list_depth)
+    return f"`{base}`"
diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/py.typed b/packages/overture-schema-codegen/src/overture/schema/codegen/py.typed
new file mode 100644
index 000000000..e69de29bb
diff --git a/packages/overture-schema-codegen/tests/codegen_test_support.py b/packages/overture-schema-codegen/tests/codegen_test_support.py
new file mode 100644
index 000000000..64facf5a9
--- /dev/null
+++ b/packages/overture-schema-codegen/tests/codegen_test_support.py
@@ -0,0 +1,365 @@
+"""Shared test support for overture-schema-codegen tests.
+
+Provides reusable model fixtures and helpers. Pytest fixtures are in conftest.py.
+"""
+
+from __future__ import annotations
+
+from collections.abc import Mapping
+from difflib import unified_diff
+from enum import Enum
+from pathlib import Path
+from typing import Annotated, Generic, Literal, NewType, TypeVar
+
+import pytest
+from overture.schema.codegen.extraction.model_extraction import extract_model
+from overture.schema.codegen.extraction.pydantic_extraction import extract_pydantic_type
+from overture.schema.codegen.extraction.specs import (
+    AnnotatedField,
+    EnumMemberSpec,
+    EnumSpec,
+    FieldSpec,
+    ModelSpec,
+    TypeIdentity,
+    UnionSpec,
+    is_model_class,
+)
+from overture.schema.codegen.extraction.type_analyzer import TypeInfo, TypeKind
+from overture.schema.core.discovery import discover_models
+from overture.schema.system.doc import DocumentedEnum
+from overture.schema.system.field_constraint import UniqueItemsConstraint
+from overture.schema.system.model_constraint import require_any_of
+from overture.schema.system.primitive import (
+    Geometry,
+    GeometryType,
+    GeometryTypeConstraint,
+    float64,
+    int32,
+)
+from overture.schema.system.ref import Id, Identified, Reference, Relationship
+from overture.schema.system.string import HexColor, LanguageTag, StrippedString
+from pydantic import BaseModel, EmailStr, Field, HttpUrl
+
+STR_TYPE = TypeInfo(base_type="str", kind=TypeKind.PRIMITIVE)
+
+ThemeT = TypeVar("ThemeT")
+TypeT = TypeVar("TypeT")
+
+
+class SimpleModel(BaseModel):
+    """A simple model."""
+
+    name: str
+
+
+class FeatureBase(BaseModel, Generic[ThemeT, TypeT]):
+    """Base class mimicking OvertureFeature pattern for tests."""
+
+    theme: ThemeT
+    type: TypeT
+
+
+# Separate TypeVars from ThemeT/TypeT: IdentifiedFeature models a
+# non-Overture user building on Identified with their own nomenclature.
+CategoryT = TypeVar("CategoryT")
+KindT = TypeVar("KindT")
+
+
+class IdentifiedFeature(Identified, Generic[CategoryT, KindT]):
+    """Feature with identity and typed category/kind."""
+
+    category: CategoryT
+    kind: KindT
+
+
+class InstrumentFamily(str, DocumentedEnum):
+    """Classification by sound production method."""
+
+    STRING = "string", "Sound from vibrating strings"
+    WIND = "wind", "Sound from vibrating air column"
+    PERCUSSION = "percussion"
+
+
+class SimpleKind(str, Enum):
+    SMALL = "small"
+    LARGE = "large"
+
+
+class Instrument(
+    IdentifiedFeature[Literal["music"], Literal["instrument"]],
+):
+    """A musical instrument.
+
+    Instruments produce sound through vibration. They are classified
+    by how sound is produced.
+    """
+
+    name: str = Field(description="Common name")
+    tuning: float64 | None = Field(
+        None,
+        description=("Concert pitch in Hz.\n\nStandard tuning is 440 Hz."),
+    )
+    num_strings: int32 | None = Field(None)
+    family: InstrumentFamily | None = None
+    color: HexColor | None = Field(None, description="Body color")
+    tags: Annotated[list[str], UniqueItemsConstraint()] | None = None
+
+
+@require_any_of("name", "description")
+class Venue(
+    IdentifiedFeature[Literal["music"], Literal["venue"]],
+):
+    """A concert venue.
+
+    A location where musical performances take place.
+    """
+
+    name: str | None = Field(None, description="Venue name")
+    description: str | None = None
+    geometry: Annotated[
+        Geometry,
+        GeometryTypeConstraint(GeometryType.POINT, GeometryType.POLYGON),
+    ]
+    capacity: Annotated[int, Field(ge=1)] | None = None
+    resident_ensemble: (
+        Annotated[Id, Reference(Relationship.BELONGS_TO, Instrument)] | None
+    ) = None
+
+
+class SourceItem(BaseModel):
+    """A source data reference."""
+
+    dataset: str = Field(description="Source dataset name")
+
+
+Sources = NewType(
+    "Sources",
+    Annotated[
+        list[SourceItem],
+        Field(min_length=1, description="Source data references"),
+        UniqueItemsConstraint(),
+    ],
+)
+
+
+class FeatureWithSources(
+    FeatureBase[Literal["test"], Literal["sourced"]],
+):
+    """A feature with a Sources field."""
+
+    name: str = Field(description="Feature name")
+    sources: Sources | None = None
+
+
+class Address(BaseModel):
+    """A mailing address."""
+
+    street: str = Field(description="Street name")
+    city: str = Field(description="City name")
+    zip_code: str | None = Field(None, description="Postal code")
+
+
+class FeatureWithAddress(
+    FeatureBase[Literal["test"], Literal["addressed"]],
+):
+    """A feature with an address field."""
+
+    title: str = Field(description="Feature title")
+    address: Address
+
+
+class TreeNode(BaseModel):
+    """A recursive tree node."""
+
+    label: str = Field(description="Node label")
+    parent: TreeNode | None = None
+
+
+class Widget(BaseModel):
+    active: bool
+    label: str = Field(description="Display label")
+
+
+CommonNames = NewType("CommonNames", dict[LanguageTag, StrippedString])
+
+
+class FeatureWithDict(
+    FeatureBase[Literal["test"], Literal["dictfeat"]],
+):
+    """A feature with dict fields."""
+
+    name: str = Field(description="Feature name")
+    names: CommonNames | None = Field(None, description="Localized names")
+    alt_names: dict[LanguageTag, StrippedString] | None = Field(
+        None, description="Alternate localized names"
+    )
+    tags: dict[str, str] | None = Field(None, description="Arbitrary tags")
+    metadata: dict[str, int] = Field(description="Numeric metadata")
+
+
+class FeatureWithUrl(FeatureBase[Literal["test"], Literal["linked"]]):
+    """A feature with Pydantic URL and email fields."""
+
+    website: HttpUrl | None = None
+    emails: list[EmailStr] | None = None
+
+
+HTTP_URL_SPEC = extract_pydantic_type(HttpUrl)
+EMAIL_STR_SPEC = extract_pydantic_type(EmailStr)
+
+
+class SegmentBase(BaseModel):
+    """Common base for test segments."""
+
+    geometry: str
+    subtype: str
+
+
+class RoadSegment(SegmentBase):
+    subtype: Literal["road"]
+    class_: Annotated[str, Field(alias="class")]
+    speed_limit: int | None = None
+
+
+class RailSegment(SegmentBase):
+    subtype: Literal["rail"]
+    class_: Annotated[int, Field(alias="class")]
+    rail_gauge: float | None = None
+
+
+class WaterSegment(SegmentBase):
+    subtype: Literal["water"]
+
+
+TestSegment = Annotated[
+    RoadSegment | RailSegment | WaterSegment,
+    Field(description="Test segment union"),
+]
+
+
+class ContactInfo(BaseModel):
+    """Contact information for a venue."""
+
+    email: str = Field(description="Email address")
+    phone: str | None = Field(None, description="Phone number")
+
+
+class VenueWithContact(SegmentBase):
+    """A segment variant with a nested sub-model field."""
+
+    subtype: Literal["venue"]
+    contact: ContactInfo
+
+
+TestSegmentWithSubModel = Annotated[
+    RoadSegment | VenueWithContact,
+    Field(description="Test segment union with sub-model member"),
+]
+
+
+def make_union_spec(
+    name: str = "TestUnion",
+    *,
+    description: str | None = None,
+    annotated_fields: list[AnnotatedField] | None = None,
+    members: list[type[BaseModel]] | None = None,
+    source_annotation: object = None,
+    common_base: type[BaseModel] | None = None,
+    entry_point: str | None = None,
+) -> UnionSpec:
+    """Build a UnionSpec with sensible defaults for tests."""
+    return UnionSpec(
+        name=name,
+        description=description,
+        annotated_fields=annotated_fields or [],
+        members=members or [],
+        discriminator_field=None,
+        discriminator_mapping=None,
+        source_annotation=source_annotation,
+        common_base=common_base or BaseModel,
+        entry_point=entry_point,
+    )
+
+
+def find_model_class(name: str, models: dict[object, object]) -> type[BaseModel]:
+    """Find a discovered model class by name."""
+    matches = [v for v in models.values() if getattr(v, "__name__", None) == name]
+    assert matches, f"{name} model not found"
+    match = matches[0]
+    assert isinstance(match, type)
+    assert issubclass(match, BaseModel)
+    return match
+
+
+def find_field(spec: ModelSpec, name: str) -> FieldSpec:
+    """Find a field by name in a ModelSpec, raising if missing."""
+    return next(f for f in spec.fields if f.name == name)
+
+
+def find_member(spec: EnumSpec, name: str) -> EnumMemberSpec:
+    """Find a member by name in an EnumSpec, raising if missing."""
+    return next(m for m in spec.members if m.name == name)
+
+
+T = TypeVar("T")
+
+
+def lookup_by_name(mapping: dict[TypeIdentity, T], name: str) -> T:
+    """Look up a value in a TypeIdentity-keyed dict by name, raising KeyError if absent."""
+    for tid, value in mapping.items():
+        if tid.name == name:
+            return value
+    raise KeyError(name)
+
+
+def has_name(mapping: Mapping[TypeIdentity, object], name: str) -> bool:
+    """Check whether a TypeIdentity-keyed mapping contains a key with the given name."""
+    return any(tid.name == name for tid in mapping)
+
+
+def assert_literal_field(
+    spec: ModelSpec, field_name: str, expected_value: object
+) -> None:
+    """Assert a field is a single-value Literal with the expected value."""
+    field = find_field(spec, field_name)
+    assert field.type_info.kind == TypeKind.LITERAL
+    assert field.type_info.literal_values == (expected_value,)
+
+
+def flat_specs_from_discovery(
+    theme: str | None = None,
+) -> list[ModelSpec]:
+    """Build a flat list of ModelSpecs from discovery, with entry_point set."""
+    models = discover_models()
+    if theme:
+        models = {k: v for k, v in models.items() if k.theme == theme}
+    result = []
+    for key, cls in models.items():
+        if not is_model_class(cls):
+            continue
+        result.append(extract_model(cls, entry_point=key.entry_point))
+    return result
+
+
+def assert_golden(actual: str, golden_path: Path, *, update: bool) -> None:
+    """Compare rendered output against a golden file.
+
+    When update is True, writes actual content to the golden file
+    instead of comparing.
+    """
+    if update:
+        golden_path.parent.mkdir(parents=True, exist_ok=True)
+        golden_path.write_text(actual)
+        return
+    expected = golden_path.read_text()
+    if actual != expected:
+        diff = "\n".join(
+            unified_diff(
+                expected.splitlines(),
+                actual.splitlines(),
+                fromfile=str(golden_path),
+                tofile="actual",
+                lineterm="",
+            )
+        )
+        pytest.fail(f"Golden file mismatch:\n{diff}")
diff --git a/packages/overture-schema-codegen/tests/conftest.py b/packages/overture-schema-codegen/tests/conftest.py
new file mode 100644
index 000000000..775fc628c
--- /dev/null
+++ b/packages/overture-schema-codegen/tests/conftest.py
@@ -0,0 +1,82 @@
+"""Shared pytest fixtures for overture-schema-codegen tests."""
+
+import overture.schema.system.primitive as _system_primitive
+import pytest
+from click.testing import CliRunner
+from codegen_test_support import find_model_class
+from overture.schema.codegen.extraction.model_extraction import extract_model
+from overture.schema.codegen.extraction.primitive_extraction import (
+    extract_primitives,
+    partition_primitive_and_geometry_names,
+)
+from overture.schema.codegen.extraction.specs import ModelSpec
+from overture.schema.codegen.markdown.renderer import (
+    render_geometry_from_values,
+    render_primitives_from_specs,
+)
+from overture.schema.core.discovery import discover_models
+from overture.schema.system.primitive import GeometryType
+from pydantic import BaseModel
+
+
+def pytest_addoption(parser: pytest.Parser) -> None:
+    parser.addoption(
+        "--update-golden",
+        action="store_true",
+        default=False,
+        help="Regenerate golden files instead of comparing against them",
+    )
+
+
+@pytest.fixture
+def update_golden(request: pytest.FixtureRequest) -> bool:
+    return bool(request.config.getoption("--update-golden"))
+
+
+@pytest.fixture
+def cli_runner() -> CliRunner:
+    """Provide a Click CLI test runner."""
+    return CliRunner()
+
+
+@pytest.fixture
+def all_discovered_models() -> dict:
+    """Discover and return all registered Overture models."""
+    return discover_models()
+
+
+@pytest.fixture
+def building_class(all_discovered_models: dict) -> type[BaseModel]:
+    """Get the Building model class."""
+    return find_model_class("Building", all_discovered_models)
+
+
+@pytest.fixture
+def building_spec(building_class: type[BaseModel]) -> ModelSpec:
+    """Extract the Building model spec."""
+    return extract_model(building_class)
+
+
+@pytest.fixture
+def place_class(all_discovered_models: dict) -> type[BaseModel]:
+    """Get the Place model class."""
+    return find_model_class("Place", all_discovered_models)
+
+
+@pytest.fixture
+def division_class(all_discovered_models: dict) -> type[BaseModel]:
+    """Get the Division model class."""
+    return find_model_class("Division", all_discovered_models)
+
+
+@pytest.fixture(scope="module")
+def primitives_markdown() -> str:
+    """Render the primitives.md page from the system primitive module."""
+    primitive_names, _ = partition_primitive_and_geometry_names(_system_primitive)
+    return render_primitives_from_specs(extract_primitives(primitive_names))
+
+
+@pytest.fixture(scope="module")
+def geometry_markdown() -> str:
+    """Render the geometry.md page from system GeometryType values."""
+    return render_geometry_from_values([m.value for m in GeometryType])
diff --git a/packages/overture-schema-codegen/tests/golden/markdown/common_names.md b/packages/overture-schema-codegen/tests/golden/markdown/common_names.md
new file mode 100644
index 000000000..c73d708c9
--- /dev/null
+++ b/packages/overture-schema-codegen/tests/golden/markdown/common_names.md
@@ -0,0 +1,7 @@
+# CommonNames
+
+Underlying type: `map<LanguageTag, StrippedString>`
+
+## Used By
+
+- `FeatureWithDict`
diff --git a/packages/overture-schema-codegen/tests/golden/markdown/feature_with_address.md b/packages/overture-schema-codegen/tests/golden/markdown/feature_with_address.md
new file mode 100644
index 000000000..fdbfdc7a8
--- /dev/null
+++ b/packages/overture-schema-codegen/tests/golden/markdown/feature_with_address.md
@@ -0,0 +1,15 @@
+# FeatureWithAddress
+
+A feature with an address field.
+
+## Fields
+
+| Name | Type | Description |
+| -----: | :----: | ------------- |
+| `theme` | `"test"` | |
+| `type` | `"addressed"` | |
+| `title` | `string` | Feature title |
+| `address` | `Address` | |
+| `address.street` | `string` | Street name |
+| `address.city` | `string` | City name |
+| `address.zip_code` | `string` (optional) | Postal code |
diff --git a/packages/overture-schema-codegen/tests/golden/markdown/feature_with_dict.md b/packages/overture-schema-codegen/tests/golden/markdown/feature_with_dict.md
new file mode 100644
index 000000000..499787d06
--- /dev/null
+++ b/packages/overture-schema-codegen/tests/golden/markdown/feature_with_dict.md
@@ -0,0 +1,15 @@
+# FeatureWithDict
+
+A feature with dict fields.
+
+## Fields
+
+| Name | Type | Description |
+| -----: | :----: | ------------- |
+| `theme` | `"test"` | |
+| `type` | `"dictfeat"` | |
+| `name` | `string` | Feature name |
+| `names` | `CommonNames` (map, optional) | Localized names |
+| `alt_names` | `map<LanguageTag, StrippedString>` (optional) | Alternate localized names |
+| `tags` | `map<string, string>` (optional) | Arbitrary tags |
+| `metadata` | `map<string, int64>` | Numeric metadata |
diff --git a/packages/overture-schema-codegen/tests/golden/markdown/feature_with_sources.md b/packages/overture-schema-codegen/tests/golden/markdown/feature_with_sources.md
new file mode 100644
index 000000000..c3e4bc39b
--- /dev/null
+++ b/packages/overture-schema-codegen/tests/golden/markdown/feature_with_sources.md
@@ -0,0 +1,13 @@
+# FeatureWithSources
+
+A feature with a Sources field.
+
+## Fields
+
+| Name | Type | Description |
+| -----: | :----: | ------------- |
+| `theme` | `"test"` | |
+| `type` | `"sourced"` | |
+| `name` | `string` | Feature name |
+| `sources[]` | `Sources` (list, optional) | Source data references |
+| `sources[].dataset` | `string` | Source dataset name |
diff --git a/packages/overture-schema-codegen/tests/golden/markdown/hex_color.md b/packages/overture-schema-codegen/tests/golden/markdown/hex_color.md
new file mode 100644
index 000000000..847a1b9a5
--- /dev/null
+++ b/packages/overture-schema-codegen/tests/golden/markdown/hex_color.md
@@ -0,0 +1,19 @@
+# HexColor
+
+A color represented as an #RRGGBB or #RGB hexadecimal string.
+
+For example:
+
+- `"#ff0000"` or `#f00` for pure red 🟥
+- `"#ffa500"` for bright orange 🟧
+- `"#000000"` or `"#000"` for black ⬛
+
+Underlying type: `string`
+
+## Constraints
+
+- Allows only hexadecimal color codes (e.g., #FF0000 or #FFF). (`HexColorConstraint`, pattern: `^#[0-9A-Fa-f]{3}([0-9A-Fa-f]{3})?$`)
+
+## Used By
+
+- `Instrument`
diff --git a/packages/overture-schema-codegen/tests/golden/markdown/id.md b/packages/overture-schema-codegen/tests/golden/markdown/id.md
new file mode 100644
index 000000000..b2bfa2995
--- /dev/null
+++ b/packages/overture-schema-codegen/tests/golden/markdown/id.md
@@ -0,0 +1,15 @@
+# Id
+
+A unique identifier.
+
+Underlying type: `string`
+
+## Constraints
+
+- Minimum length: 1
+- Allows only strings that contain no whitespace characters. (`NoWhitespaceConstraint`, pattern: `^\S+$`)
+
+## Used By
+
+- `Instrument`
+- `Venue`
diff --git a/packages/overture-schema-codegen/tests/golden/markdown/instrument.md b/packages/overture-schema-codegen/tests/golden/markdown/instrument.md
new file mode 100644
index 000000000..727f1b559
--- /dev/null
+++ b/packages/overture-schema-codegen/tests/golden/markdown/instrument.md
@@ -0,0 +1,20 @@
+# Instrument
+
+A musical instrument.
+
+Instruments produce sound through vibration. They are classified
+by how sound is produced.
+
+## Fields
+
+| Name | Type | Description |
+| -----: | :----: | ------------- |
+| `id` | `Id` | Unique identifier |
+| `category` | `"music"` | |
+| `kind` | `"instrument"` | |
+| `name` | `string` | Common name |
+| `tuning` | `float64` (optional) | Concert pitch in Hz.<br/><br/>Standard tuning is 440 Hz. |
+| `num_strings` | `int32` (optional) | |
+| `family` | `InstrumentFamily` (optional) | |
+| `color` | `HexColor` (optional) | Body color |
+| `tags` | `list<string>` (optional) | *All items must be unique. (`UniqueItemsConstraint`)* |
diff --git a/packages/overture-schema-codegen/tests/golden/markdown/instrument_family.md b/packages/overture-schema-codegen/tests/golden/markdown/instrument_family.md
new file mode 100644
index 000000000..d8489cc4f
--- /dev/null
+++ b/packages/overture-schema-codegen/tests/golden/markdown/instrument_family.md
@@ -0,0 +1,13 @@
+# InstrumentFamily
+
+Classification by sound production method.
+
+## Values
+
+- `string` - Sound from vibrating strings
+- `wind` - Sound from vibrating air column
+- `percussion`
+
+## Used By
+
+- `Instrument`
diff --git a/packages/overture-schema-codegen/tests/golden/markdown/simple_kind.md b/packages/overture-schema-codegen/tests/golden/markdown/simple_kind.md
new file mode 100644
index 000000000..f0aca0300
--- /dev/null
+++ b/packages/overture-schema-codegen/tests/golden/markdown/simple_kind.md
@@ -0,0 +1,6 @@
+# SimpleKind
+
+## Values
+
+- `small`
+- `large`
diff --git a/packages/overture-schema-codegen/tests/golden/markdown/sources.md b/packages/overture-schema-codegen/tests/golden/markdown/sources.md
new file mode 100644
index 000000000..ec0343cb6
--- /dev/null
+++ b/packages/overture-schema-codegen/tests/golden/markdown/sources.md
@@ -0,0 +1,14 @@
+# Sources
+
+Source data references
+
+Underlying type: `list<SourceItem>`
+
+## Constraints
+
+- Minimum length: 1
+- All items must be unique. (`UniqueItemsConstraint`)
+
+## Used By
+
+- `FeatureWithSources`
diff --git a/packages/overture-schema-codegen/tests/golden/markdown/venue.md b/packages/overture-schema-codegen/tests/golden/markdown/venue.md
new file mode 100644
index 000000000..edb0578ef
--- /dev/null
+++ b/packages/overture-schema-codegen/tests/golden/markdown/venue.md
@@ -0,0 +1,22 @@
+# Venue
+
+A concert venue.
+
+A location where musical performances take place.
+
+## Fields
+
+| Name | Type | Description |
+| -----: | :----: | ------------- |
+| `id` | `Id` | Unique identifier |
+| `category` | `"music"` | |
+| `kind` | `"venue"` | |
+| `name` | `string` (optional) | Venue name<br/><br/>*At least one of `name`, `description` must be set* |
+| `description` | `string` (optional) | *At least one of `name`, `description` must be set* |
+| `geometry` | `geometry` | *Allowed geometry types: Point, Polygon* |
+| `capacity` | `int64` (optional) | *`≥ 1`* |
+| `resident_ensemble` | `Id` (optional) | A unique identifier<br/><br/>*References `Instrument` (belongs to)* |
+
+## Constraints
+
+- At least one of `name`, `description` must be set
diff --git a/packages/overture-schema-codegen/tests/golden/markdown/widget.md b/packages/overture-schema-codegen/tests/golden/markdown/widget.md
new file mode 100644
index 000000000..c056d27a3
--- /dev/null
+++ b/packages/overture-schema-codegen/tests/golden/markdown/widget.md
@@ -0,0 +1,8 @@
+# Widget
+
+## Fields
+
+| Name | Type | Description |
+| -----: | :----: | ------------- |
+| `active` | `boolean` | |
+| `label` | `string` | Display label |
diff --git a/packages/overture-schema-codegen/tests/test_cli.py b/packages/overture-schema-codegen/tests/test_cli.py
new file mode 100644
index 000000000..eecd45627
--- /dev/null
+++ b/packages/overture-schema-codegen/tests/test_cli.py
@@ -0,0 +1,434 @@
+"""Tests for CLI entrypoint."""
+
+import json
+import re
+from pathlib import Path
+
+import pytest
+from click.testing import CliRunner
+from overture.schema.codegen.cli import cli
+from overture.schema.codegen.extraction.specs import ModelSpec
+
+
+class TestCliList:
+    """Tests for the list command."""
+
+    def test_list_command_exists(self, cli_runner: CliRunner) -> None:
+        """list command should be available."""
+        result = cli_runner.invoke(cli, ["list"])
+        assert result.exit_code == 0
+
+    def test_list_shows_discovered_models(self, cli_runner: CliRunner) -> None:
+        """list command should show discovered models."""
+        result = cli_runner.invoke(cli, ["list"])
+
+        assert "Building" in result.output
+        assert "Place" in result.output
+
+
+class TestCliGenerate:
+    """Tests for the generate command."""
+
+    def test_generate_command_exists(self, cli_runner: CliRunner) -> None:
+        """generate command should be available."""
+        result = cli_runner.invoke(cli, ["generate", "--help"])
+
+        assert result.exit_code == 0
+        assert "Generate" in result.output or "generate" in result.output
+
+    def test_generate_requires_format(self, cli_runner: CliRunner) -> None:
+        """generate command should require --format."""
+        result = cli_runner.invoke(cli, ["generate"])
+        assert result.exit_code != 0
+
+    def test_generate_markdown_to_stdout(self, cli_runner: CliRunner) -> None:
+        """generate --format markdown should output markdown to stdout."""
+        result = cli_runner.invoke(cli, ["generate", "--format", "markdown"])
+
+        assert result.exit_code == 0
+        assert "# Building" in result.output or "# " in result.output
+
+    def test_generate_with_theme_filter(self, cli_runner: CliRunner) -> None:
+        """generate --theme should filter to specific theme."""
+        result = cli_runner.invoke(
+            cli, ["generate", "--format", "markdown", "--theme", "buildings"]
+        )
+
+        assert result.exit_code == 0
+        assert "Building" in result.output
+        assert "Place" not in result.output
+
+    def test_generate_markdown_feature_at_theme_level(
+        self, cli_runner: CliRunner, tmp_path: Path
+    ) -> None:
+        """Markdown features go directly in theme directory."""
+        result = cli_runner.invoke(
+            cli,
+            [
+                "generate",
+                "--format",
+                "markdown",
+                "--theme",
+                "buildings",
+                "--output-dir",
+                str(tmp_path),
+            ],
+        )
+        assert result.exit_code == 0
+
+        # Feature models at theme level
+        assert (tmp_path / "buildings" / "building.md").exists()
+        assert (tmp_path / "buildings" / "building_part.md").exists()
+
+        # NOT in subdirectories
+        assert not (tmp_path / "buildings" / "building" / "building.md").exists()
+
+    def test_feature_pages_have_sidebar_position(
+        self, cli_runner: CliRunner, tmp_path: Path
+    ) -> None:
+        """Feature pages include sidebar_position frontmatter."""
+        result = cli_runner.invoke(
+            cli,
+            [
+                "generate",
+                "--format",
+                "markdown",
+                "--theme",
+                "buildings",
+                "--output-dir",
+                str(tmp_path),
+            ],
+        )
+        assert result.exit_code == 0
+
+        content = (tmp_path / "buildings" / "building.md").read_text()
+        assert content.startswith("---\nsidebar_position: 1\n---\n")
+
+    def test_generate_markdown_shared_types_mirror_modules(
+        self, cli_runner: CliRunner, tmp_path: Path
+    ) -> None:
+        """Core/system types land in module-mirrored directories."""
+        result = cli_runner.invoke(
+            cli,
+            ["generate", "--format", "markdown", "--output-dir", str(tmp_path)],
+        )
+        assert result.exit_code == 0
+
+        core_dir = tmp_path / "core"
+        assert core_dir.exists(), "core/ directory should exist"
+        subdirs = [d.name for d in core_dir.iterdir() if d.is_dir()]
+        assert len(subdirs) > 0, "core/ should have subdirectories"
+
+    def test_generate_multiple_themes_to_output_dir(
+        self, cli_runner: CliRunner, tmp_path: Path
+    ) -> None:
+        """generate all themes should create subdirectories for each theme."""
+        result = cli_runner.invoke(
+            cli,
+            [
+                "generate",
+                "--format",
+                "markdown",
+                "--output-dir",
+                str(tmp_path),
+            ],
+        )
+
+        assert result.exit_code == 0
+
+        subdirs = [d.name for d in tmp_path.iterdir() if d.is_dir()]
+        assert "buildings" in subdirs
+        assert "places" in subdirs
+
+    def test_generate_no_duplicate_files(
+        self, cli_runner: CliRunner, tmp_path: Path
+    ) -> None:
+        """No type should produce duplicate output files."""
+        result = cli_runner.invoke(
+            cli,
+            [
+                "generate",
+                "--format",
+                "markdown",
+                "--output-dir",
+                str(tmp_path),
+            ],
+        )
+
+        assert result.exit_code == 0
+
+        all_files = list(tmp_path.rglob("*.md"))
+        all_paths = [str(f.relative_to(tmp_path)) for f in all_files]
+        assert len(all_paths) == len(set(all_paths)), (
+            f"Duplicate files: {[p for p in all_paths if all_paths.count(p) > 1]}"
+        )
+
+
+class TestCliGenerateLinkIntegrity:
+    """Verify all markdown links resolve to existing files."""
+
+    def test_all_links_resolve(self, cli_runner: CliRunner, tmp_path: Path) -> None:
+        """Every markdown link target should exist as a file."""
+        result = cli_runner.invoke(
+            cli,
+            [
+                "generate",
+                "--format",
+                "markdown",
+                "--output-dir",
+                str(tmp_path),
+            ],
+        )
+
+        assert result.exit_code == 0
+
+        link_re = re.compile(r"\[.*?\]\(([^)]+\.md(?:#[^)]*)?)\)")
+        broken: list[str] = []
+
+        for md_file in tmp_path.rglob("*.md"):
+            content = md_file.read_text()
+            for match in link_re.finditer(content):
+                href = match.group(1).split("#")[0]
+                # Resolve relative path from the file's directory
+                target = (md_file.parent / href).resolve()
+                if not target.exists():
+                    rel = md_file.relative_to(tmp_path)
+                    broken.append(f"{rel}: {href}")
+
+        assert not broken, "Broken links:\n" + "\n".join(broken)
+
+
+class TestCliGenerateCategoryFiles:
+    """Tests for _category_.json generation."""
+
+    def test_generates_category_files(
+        self, cli_runner: CliRunner, tmp_path: Path
+    ) -> None:
+        """Should generate _category_.json files in output directories."""
+        result = cli_runner.invoke(
+            cli,
+            [
+                "generate",
+                "--format",
+                "markdown",
+                "--theme",
+                "buildings",
+                "--output-dir",
+                str(tmp_path),
+            ],
+        )
+
+        assert result.exit_code == 0
+
+        # Theme directory should have a category file
+        cat_file = tmp_path / "buildings" / "_category_.json"
+        assert cat_file.exists()
+        data = json.loads(cat_file.read_text())
+        assert data["label"] == "Buildings"
+
+    def test_core_directory_has_category_file(
+        self, cli_runner: CliRunner, tmp_path: Path
+    ) -> None:
+        """core/ directory should have _category_.json."""
+        result = cli_runner.invoke(
+            cli,
+            [
+                "generate",
+                "--format",
+                "markdown",
+                "--output-dir",
+                str(tmp_path),
+            ],
+        )
+
+        assert result.exit_code == 0
+
+        cat_file = tmp_path / "core" / "_category_.json"
+        assert cat_file.exists()
+        data = json.loads(cat_file.read_text())
+        assert data["label"] == "Core"
+
+    def test_feature_dirs_positioned_before_non_feature_dirs(
+        self, cli_runner: CliRunner, tmp_path: Path
+    ) -> None:
+        """Feature directories should have lower position than non-feature directories."""
+        result = cli_runner.invoke(
+            cli,
+            [
+                "generate",
+                "--format",
+                "markdown",
+                "--output-dir",
+                str(tmp_path),
+            ],
+        )
+        assert result.exit_code == 0
+
+        def pos(dir_name: str) -> int:
+            data = json.loads((tmp_path / dir_name / "_category_.json").read_text())
+            result: int = data["position"]
+            return result
+
+        # Feature directories (contain feature pages) should sort before
+        # non-feature directories (core, system -- shared types only)
+        feature_positions = [pos("buildings"), pos("places"), pos("transportation")]
+        non_feature_positions = [pos("core"), pos("system")]
+
+        assert max(feature_positions) < min(non_feature_positions)
+
+    def test_subdirectories_have_no_position(
+        self, cli_runner: CliRunner, tmp_path: Path
+    ) -> None:
+        """Only top-level directories get position values."""
+        result = cli_runner.invoke(
+            cli,
+            [
+                "generate",
+                "--format",
+                "markdown",
+                "--output-dir",
+                str(tmp_path),
+            ],
+        )
+        assert result.exit_code == 0
+
+        data = json.loads(
+            (tmp_path / "core" / "scoping" / "_category_.json").read_text()
+        )
+        assert "position" not in data
+
+
+class TestCliGenerateEnums:
+    """Tests for enum generation in the generate command."""
+
+    def test_generate_markdown_includes_enum_files(
+        self, cli_runner: CliRunner, tmp_path: Path
+    ) -> None:
+        """generate --format markdown should create enum documentation files."""
+        result = cli_runner.invoke(
+            cli,
+            [
+                "generate",
+                "--format",
+                "markdown",
+                "--theme",
+                "buildings",
+                "--output-dir",
+                str(tmp_path),
+            ],
+        )
+
+        assert result.exit_code == 0
+
+        # Enum files exist somewhere under the buildings directory
+        all_md = list((tmp_path / "buildings").rglob("*.md"))
+        all_names = [f.stem for f in all_md]
+
+        assert "building" in all_names
+
+        # Should have enum files beyond the feature models
+        non_feature = [n for n in all_names if n not in ("building", "building_part")]
+        assert len(non_feature) > 0, "Should generate enum documentation files"
+
+
+class TestCliEntryPoint:
+    """generate populates entry_point from discovery keys."""
+
+    def test_generate_sets_entry_point_on_specs(
+        self, cli_runner: CliRunner, monkeypatch: pytest.MonkeyPatch
+    ) -> None:
+        captured: list[ModelSpec] = []
+
+        def spy(feature_specs: list, schema_root: str, output_dir: object) -> None:
+            captured.extend(feature_specs)
+
+        monkeypatch.setattr("overture.schema.codegen.cli._generate_markdown", spy)
+        result = cli_runner.invoke(
+            cli, ["generate", "--format", "markdown", "--theme", "buildings"]
+        )
+
+        assert result.exit_code == 0
+        assert len(captured) > 0
+        for spec in captured:
+            assert spec.entry_point is not None, f"{spec.name} missing entry_point"
+            assert ":" in spec.entry_point, (
+                f"entry_point should be entry-point style: {spec.entry_point!r}"
+            )
+
+
+class TestCliHelp:
+    """Tests for CLI help."""
+
+    def test_main_help(self, cli_runner: CliRunner) -> None:
+        """--help should show usage information."""
+        result = cli_runner.invoke(cli, ["--help"])
+
+        assert result.exit_code == 0
+        assert "generate" in result.output
+        assert "list" in result.output
+
+
+class TestGenerateWithSegment:
+    """Integration test: Segment union produces markdown output."""
+
+    def test_segment_appears_in_markdown_output(
+        self, cli_runner: CliRunner, tmp_path: Path
+    ) -> None:
+        """Generate markdown and verify Segment page exists."""
+        result = cli_runner.invoke(
+            cli,
+            [
+                "generate",
+                "--format",
+                "markdown",
+                "--theme",
+                "transportation",
+                "--output-dir",
+                str(tmp_path),
+            ],
+        )
+        assert result.exit_code == 0
+
+        # Segment page should exist
+        segment_files = list(tmp_path.rglob("segment.md"))
+        assert len(segment_files) >= 1, f"No segment.md found in {tmp_path}"
+
+        content = segment_files[0].read_text()
+        assert "# Segment" in content
+        assert "subtype" in content
+
+
+class TestReverseReferences:
+    """Integration test: Reverse references appear in generated markdown."""
+
+    def test_used_by_sections_appear_in_markdown(
+        self, cli_runner: CliRunner, tmp_path: Path
+    ) -> None:
+        """Generate markdown and verify Used By sections appear."""
+        result = cli_runner.invoke(
+            cli,
+            [
+                "generate",
+                "--format",
+                "markdown",
+                "--theme",
+                "buildings",
+                "--output-dir",
+                str(tmp_path),
+            ],
+        )
+        assert result.exit_code == 0
+
+        # Find a supplementary type that should have Used By section
+        # For example, if Building references some enum or NewType
+        all_md = list(tmp_path.rglob("*.md"))
+
+        # At least one supplementary type should have a Used By section
+        has_used_by = False
+        for md_file in all_md:
+            content = md_file.read_text()
+            if "## Used By" in content:
+                has_used_by = True
+                break
+
+        assert has_used_by, "No 'Used By' sections found in any generated markdown"
diff --git a/packages/overture-schema-codegen/tests/test_constraint_description.py b/packages/overture-schema-codegen/tests/test_constraint_description.py
new file mode 100644
index 000000000..9961ef2b2
--- /dev/null
+++ b/packages/overture-schema-codegen/tests/test_constraint_description.py
@@ -0,0 +1,458 @@
+"""Tests for constraint description (model-level and field-level)."""
+
+from annotated_types import Ge, Gt, Interval, Le, Lt, MaxLen, MinLen
+from overture.schema.codegen.extraction.field_constraints import (
+    constraint_display_text,
+    describe_field_constraint,
+)
+from overture.schema.codegen.extraction.model_constraints import (
+    analyze_model_constraints,
+)
+from overture.schema.codegen.extraction.specs import TypeIdentity
+from overture.schema.codegen.extraction.type_analyzer import ConstraintSource
+from overture.schema.system.model_constraint import (
+    FieldEqCondition,
+    ForbidIfConstraint,
+    MinFieldsSetConstraint,
+    ModelConstraint,
+    NoExtraFieldsConstraint,
+    Not,
+    RadioGroupConstraint,
+    RequireAnyOfConstraint,
+    RequireIfConstraint,
+)
+from overture.schema.system.primitive import GeometryType, GeometryTypeConstraint
+from overture.schema.system.ref import Reference, Relationship
+from overture.schema.system.ref.id import Identified
+
+
+def describe_model_constraints(
+    constraints: tuple[ModelConstraint, ...],
+) -> list[str]:
+    descriptions, _ = analyze_model_constraints(constraints)
+    return descriptions
+
+
+def field_constraint_notes(
+    constraints: tuple[ModelConstraint, ...],
+) -> dict[str, list[str]]:
+    _, field_notes = analyze_model_constraints(constraints)
+    return field_notes
+
+
+class TestDescribeSingleConstraint:
+    """Each constraint type produces readable prose."""
+
+    def test_require_any_of(self) -> None:
+        constraint = RequireAnyOfConstraint._create_internal(
+            "@require_any_of", "name", "description"
+        )
+        result = describe_model_constraints((constraint,))
+
+        assert result == ["At least one of `name`, `description` must be set"]
+
+    def test_radio_group(self) -> None:
+        constraint = RadioGroupConstraint._create_internal(
+            "@radio_group", "is_land", "is_territorial"
+        )
+        result = describe_model_constraints((constraint,))
+
+        assert result == ["Exactly one of `is_land`, `is_territorial` must be `true`"]
+
+    def test_min_fields_set(self) -> None:
+        constraint = MinFieldsSetConstraint._create_internal("@min_fields_set", 3)
+        result = describe_model_constraints((constraint,))
+
+        assert result == ["At least 3 fields must be set"]
+
+    def test_require_if_field_eq(self) -> None:
+        constraint = RequireIfConstraint._create_internal(
+            "@require_if", ["admin_level"], FieldEqCondition("subtype", "country")
+        )
+        result = describe_model_constraints((constraint,))
+
+        assert result == ["`admin_level` is required when `subtype` = `country`"]
+
+    def test_require_if_negated_condition(self) -> None:
+        """Not(FieldEqCondition) uses not-equal sign."""
+        constraint = RequireIfConstraint._create_internal(
+            "@require_if",
+            ["parent_division_id"],
+            Not(FieldEqCondition("subtype", "country")),
+        )
+        result = describe_model_constraints((constraint,))
+
+        assert result == ["`parent_division_id` is required when `subtype` ≠ `country`"]
+
+    def test_forbid_if_field_eq(self) -> None:
+        constraint = ForbidIfConstraint._create_internal(
+            "@forbid_if",
+            ["parent_division_id"],
+            FieldEqCondition("subtype", "country"),
+        )
+        result = describe_model_constraints((constraint,))
+
+        assert result == [
+            "`parent_division_id` is forbidden when `subtype` = `country`"
+        ]
+
+    def test_multi_field_uses_plural_verb(self) -> None:
+        """Multiple field names produce 'are required', not 'is required'."""
+        constraint = RequireIfConstraint._create_internal(
+            "@require_if",
+            ["foo", "bar"],
+            FieldEqCondition("flag", "on"),
+        )
+        result = describe_model_constraints((constraint,))
+
+        assert result == ["`foo`, `bar` are required when `flag` = `on`"]
+
+
+class TestDescribeFiltering:
+    """Filtering and fallback behavior."""
+
+    def test_no_extra_fields_filtered_out(self) -> None:
+        """@no_extra_fields produces no output."""
+        constraint = NoExtraFieldsConstraint._create_internal("@no_extra_fields")
+        result = describe_model_constraints((constraint,))
+
+        assert result == []
+
+    def test_unknown_constraint_uses_name_fallback(self) -> None:
+        """Unrecognized constraint type falls back to constraint.name."""
+
+        class FutureConstraint(ModelConstraint):
+            pass
+
+        constraint = FutureConstraint("@future_thing")
+        result = describe_model_constraints((constraint,))
+
+        assert result == ["`@future_thing`"]
+
+
+class TestConsolidation:
+    """Consolidation of same-field conditional constraints."""
+
+    def test_consolidate_require_if_same_field(self) -> None:
+        """Multiple @require_if with same fields, different FieldEqCondition values, merge."""
+        constraints = tuple(
+            RequireIfConstraint._create_internal(
+                "@require_if",
+                ["admin_level"],
+                FieldEqCondition("subtype", val),
+            )
+            for val in ("country", "dependency", "macroregion")
+        )
+        result = describe_model_constraints(constraints)
+
+        assert result == [
+            "`admin_level` is required when `subtype` is one of: "
+            "`country`, `dependency`, `macroregion`"
+        ]
+
+    def test_no_consolidation_for_different_fields(self) -> None:
+        """@require_if with different field_names are not consolidated."""
+        c1 = RequireIfConstraint._create_internal(
+            "@require_if", ["foo"], FieldEqCondition("flag", "a")
+        )
+        c2 = RequireIfConstraint._create_internal(
+            "@require_if", ["bar"], FieldEqCondition("flag", "b")
+        )
+        result = describe_model_constraints((c1, c2))
+
+        assert len(result) == 2
+
+    def test_no_consolidation_for_negated_conditions(self) -> None:
+        """Negated conditions are not consolidated."""
+        c1 = RequireIfConstraint._create_internal(
+            "@require_if", ["foo"], Not(FieldEqCondition("flag", "a"))
+        )
+        c2 = RequireIfConstraint._create_internal(
+            "@require_if", ["foo"], Not(FieldEqCondition("flag", "b"))
+        )
+        result = describe_model_constraints((c1, c2))
+
+        assert len(result) == 2
+
+    def test_consolidate_forbid_if_same_field(self) -> None:
+        """Multiple @forbid_if with same fields also consolidate."""
+        constraints = tuple(
+            ForbidIfConstraint._create_internal(
+                "@forbid_if",
+                ["secret"],
+                FieldEqCondition("role", val),
+            )
+            for val in ("guest", "anonymous")
+        )
+        result = describe_model_constraints(constraints)
+
+        assert result == [
+            "`secret` is forbidden when `role` is one of: `guest`, `anonymous`"
+        ]
+
+
+class TestMixedConstraints:
+    """End-to-end with mixed constraint types."""
+
+    def test_division_like_model(self) -> None:
+        """Mixed constraints render in declaration order with consolidation."""
+        constraints = (
+            RequireAnyOfConstraint._create_internal("@require_any_of", "foo", "bar"),
+            ForbidIfConstraint._create_internal(
+                "@forbid_if",
+                ["parent_id"],
+                FieldEqCondition("subtype", "country"),
+            ),
+            RequireIfConstraint._create_internal(
+                "@require_if",
+                ["parent_id"],
+                Not(FieldEqCondition("subtype", "country")),
+            ),
+            RequireIfConstraint._create_internal(
+                "@require_if",
+                ["level"],
+                FieldEqCondition("subtype", "country"),
+            ),
+            RequireIfConstraint._create_internal(
+                "@require_if",
+                ["level"],
+                FieldEqCondition("subtype", "region"),
+            ),
+            RadioGroupConstraint._create_internal("@radio_group", "is_land", "is_sea"),
+        )
+        result = describe_model_constraints(constraints)
+
+        assert result == [
+            "At least one of `foo`, `bar` must be set",
+            "`parent_id` is forbidden when `subtype` = `country`",
+            "`parent_id` is required when `subtype` ≠ `country`",
+            "`level` is required when `subtype` is one of: `country`, `region`",
+            "Exactly one of `is_land`, `is_sea` must be `true`",
+        ]
+
+
+class TestFieldConstraintNotes:
+    """field_constraint_notes maps field names to their constraint descriptions."""
+
+    def test_require_any_of_maps_all_fields(self) -> None:
+        """RequireAnyOfConstraint maps each field name to the description."""
+        constraint = RequireAnyOfConstraint._create_internal(
+            "@require_any_of", "name", "description"
+        )
+        result = field_constraint_notes((constraint,))
+
+        expected = "At least one of `name`, `description` must be set"
+        assert result == {"name": [expected], "description": [expected]}
+
+    def test_require_if_includes_condition_field(self) -> None:
+        """RequireIfConstraint includes both constrained and condition fields."""
+        constraint = RequireIfConstraint._create_internal(
+            "@require_if", ["admin_level"], FieldEqCondition("subtype", "country")
+        )
+        result = field_constraint_notes((constraint,))
+
+        expected = "`admin_level` is required when `subtype` = `country`"
+        assert result["admin_level"] == [expected]
+        assert result["subtype"] == [expected]
+
+    def test_forbid_if_with_negated_condition_includes_condition_field(self) -> None:
+        """ForbidIfConstraint with Not(FieldEqCondition) includes condition field."""
+        constraint = ForbidIfConstraint._create_internal(
+            "@forbid_if",
+            ["parent_id"],
+            Not(FieldEqCondition("subtype", "country")),
+        )
+        result = field_constraint_notes((constraint,))
+
+        expected = "`parent_id` is forbidden when `subtype` ≠ `country`"
+        assert result["parent_id"] == [expected]
+        assert result["subtype"] == [expected]
+
+    def test_consolidated_constraints_map_all_fields(self) -> None:
+        """Consolidated constraints map to all participating fields."""
+        constraints = tuple(
+            RequireIfConstraint._create_internal(
+                "@require_if",
+                ["admin_level"],
+                FieldEqCondition("subtype", val),
+            )
+            for val in ("country", "dependency")
+        )
+        result = field_constraint_notes(constraints)
+
+        expected = (
+            "`admin_level` is required when `subtype` is one of: "
+            "`country`, `dependency`"
+        )
+        assert result["admin_level"] == [expected]
+        assert result["subtype"] == [expected]
+
+    def test_no_extra_fields_produces_no_annotations(self) -> None:
+        """NoExtraFieldsConstraint produces no field annotations."""
+        constraint = NoExtraFieldsConstraint._create_internal("@no_extra_fields")
+        result = field_constraint_notes((constraint,))
+
+        assert result == {}
+
+    def test_min_fields_set_produces_no_annotations(self) -> None:
+        """MinFieldsSetConstraint produces no field annotations."""
+        constraint = MinFieldsSetConstraint._create_internal("@min_fields_set", 3)
+        result = field_constraint_notes((constraint,))
+
+        assert result == {}
+
+    def test_radio_group_maps_all_fields(self) -> None:
+        """RadioGroupConstraint maps each field name to the description."""
+        constraint = RadioGroupConstraint._create_internal(
+            "@radio_group", "is_land", "is_sea"
+        )
+        result = field_constraint_notes((constraint,))
+
+        expected = "Exactly one of `is_land`, `is_sea` must be `true`"
+        assert result == {"is_land": [expected], "is_sea": [expected]}
+
+    def test_multiple_constraints_on_one_field(self) -> None:
+        """Field appearing in multiple constraints gets all descriptions."""
+        c1 = RequireAnyOfConstraint._create_internal(
+            "@require_any_of", "name", "description"
+        )
+        c2 = RequireIfConstraint._create_internal(
+            "@require_if", ["name"], FieldEqCondition("subtype", "venue")
+        )
+        result = field_constraint_notes((c1, c2))
+
+        assert len(result["name"]) == 2
+
+
+class TestDescribeFieldConstraint:
+    """Tests for describe_field_constraint readable output."""
+
+    def test_ge(self) -> None:
+        assert describe_field_constraint(Ge(ge=0)) == "`≥ 0`"
+
+    def test_le(self) -> None:
+        assert describe_field_constraint(Le(le=100)) == "`≤ 100`"
+
+    def test_gt(self) -> None:
+        assert describe_field_constraint(Gt(gt=0)) == "`> 0`"
+
+    def test_lt(self) -> None:
+        assert describe_field_constraint(Lt(lt=100)) == "`< 100`"
+
+    def test_min_len(self) -> None:
+        assert describe_field_constraint(MinLen(min_length=1)) == "Minimum length: 1"
+
+    def test_max_len(self) -> None:
+        assert describe_field_constraint(MaxLen(max_length=10)) == "Maximum length: 10"
+
+    def test_interval_closed(self) -> None:
+        assert describe_field_constraint(Interval(ge=0, le=100)) == "`0 ≤ x ≤ 100`"
+
+    def test_interval_open(self) -> None:
+        assert describe_field_constraint(Interval(gt=0, lt=100)) == "`0 < x < 100`"
+
+    def test_interval_half_open(self) -> None:
+        assert describe_field_constraint(Interval(ge=0, lt=100)) == "`0 ≤ x < 100`"
+
+    def test_interval_lower_only(self) -> None:
+        assert describe_field_constraint(Interval(ge=0)) == "`≥ 0`"
+
+    def test_interval_upper_only(self) -> None:
+        assert describe_field_constraint(Interval(le=100)) == "`≤ 100`"
+
+    def test_geometry_type_single(self) -> None:
+        constraint = GeometryTypeConstraint(GeometryType.POINT)
+        assert describe_field_constraint(constraint) == "Allowed geometry types: Point"
+
+    def test_geometry_type_multiple(self) -> None:
+        constraint = GeometryTypeConstraint(GeometryType.POINT, GeometryType.POLYGON)
+        assert (
+            describe_field_constraint(constraint)
+            == "Allowed geometry types: Point, Polygon"
+        )
+
+    def test_geometry_type_all_types(self) -> None:
+        constraint = GeometryTypeConstraint(
+            GeometryType.POINT,
+            GeometryType.LINE_STRING,
+            GeometryType.POLYGON,
+        )
+        assert (
+            describe_field_constraint(constraint)
+            == "Allowed geometry types: LineString, Point, Polygon"
+        )
+
+    def test_reference_belongs_to(self) -> None:
+        class Target(Identified):
+            pass
+
+        constraint = Reference(Relationship.BELONGS_TO, Target)
+        assert (
+            describe_field_constraint(constraint) == "References `Target` (belongs to)"
+        )
+
+    def test_reference_connects_to(self) -> None:
+        class Other(Identified):
+            pass
+
+        constraint = Reference(Relationship.CONNECTS_TO, Other)
+        assert (
+            describe_field_constraint(constraint) == "References `Other` (connects to)"
+        )
+
+    def test_reference_link_fn_receives_type_identity(self) -> None:
+        """link_fn callback receives TypeIdentity wrapping the relatee class."""
+
+        class Target(Identified):
+            pass
+
+        received: list[TypeIdentity] = []
+
+        def link_fn(tid: TypeIdentity) -> str:
+            received.append(tid)
+            return f"[`{tid.name}`](link)"
+
+        constraint = Reference(Relationship.BELONGS_TO, Target)
+        result = describe_field_constraint(constraint, link_fn=link_fn)
+
+        assert len(received) == 1
+        assert received[0].obj is Target
+        assert received[0].name == "Target"
+        assert result == "References [`Target`](link) (belongs to)"
+
+    def test_reference_link_fn_used_in_output(self) -> None:
+        """link_fn return value appears verbatim in the description."""
+
+        class Target(Identified):
+            pass
+
+        constraint = Reference(Relationship.CONNECTS_TO, Target)
+        result = describe_field_constraint(
+            constraint, link_fn=lambda tid: f"[`{tid.name}`](path/to/target)"
+        )
+        assert result == "References [`Target`](path/to/target) (connects to)"
+
+
+class TestConstraintDisplayText:
+    """constraint_display_text forwards link_fn to describe_field_constraint."""
+
+    def test_link_fn_forwarded_to_reference_constraint(self) -> None:
+        """link_fn is forwarded when constraint is a Reference."""
+
+        class Target(Identified):
+            pass
+
+        constraint = Reference(Relationship.BELONGS_TO, Target)
+        cs = ConstraintSource(source_ref=None, source_name=None, constraint=constraint)
+
+        received: list[TypeIdentity] = []
+
+        def link_fn(tid: TypeIdentity) -> str:
+            received.append(tid)
+            return f"[`{tid.name}`](link)"
+
+        result = constraint_display_text(cs, link_fn=link_fn)
+
+        assert len(received) == 1
+        assert received[0].obj is Target
+        assert result == "References [`Target`](link) (belongs to)"
diff --git a/packages/overture-schema-codegen/tests/test_enum_extraction.py b/packages/overture-schema-codegen/tests/test_enum_extraction.py
new file mode 100644
index 000000000..2e5367e3b
--- /dev/null
+++ b/packages/overture-schema-codegen/tests/test_enum_extraction.py
@@ -0,0 +1,149 @@
+"""Tests for enum extraction."""
+
+from enum import Enum
+
+from codegen_test_support import find_member
+from overture.schema.codegen.extraction.enum_extraction import extract_enum
+from overture.schema.codegen.extraction.specs import EnumMemberSpec, EnumSpec
+from overture.schema.system.doc import DocumentedEnum
+
+
+class TestEnumMemberSpec:
+    """Tests for EnumMemberSpec dataclass."""
+
+    def test_stores_name_value_description(self) -> None:
+        """EnumMemberSpec should store name, value, and description."""
+        member = EnumMemberSpec(
+            name="GABLED", value="gabled", description="A gabled roof"
+        )
+
+        assert member.name == "GABLED"
+        assert member.value == "gabled"
+        assert member.description == "A gabled roof"
+
+    def test_description_can_be_none(self) -> None:
+        """EnumMemberSpec description should be optional."""
+        member = EnumMemberSpec(name="FLAT", value="flat", description=None)
+
+        assert member.description is None
+
+
+class TestEnumSpec:
+    """Tests for EnumSpec dataclass."""
+
+    def test_stores_name_description_members(self) -> None:
+        """EnumSpec should store name, description, and members list."""
+        members = [
+            EnumMemberSpec(name="A", value="a", description=None),
+            EnumMemberSpec(name="B", value="b", description="The letter B"),
+        ]
+
+        spec = EnumSpec(
+            name="Letters", description="A collection of letters", members=members
+        )
+
+        assert spec.name == "Letters"
+        assert spec.description == "A collection of letters"
+        assert len(spec.members) == 2
+
+
+class TestExtractEnumSimple:
+    """Tests for extract_enum with simple str Enum classes."""
+
+    def test_extracts_simple_str_enum(self) -> None:
+        """Should extract name, description, and members from simple str Enum."""
+
+        class RoofShape(str, Enum):
+            """The shape of the roof."""
+
+            FLAT = "flat"
+            GABLED = "gabled"
+            DOMED = "dome"
+
+        result = extract_enum(RoofShape)
+
+        assert result.name == "RoofShape"
+        assert result.description == "The shape of the roof."
+        assert len(result.members) == 3
+
+        # Check member extraction
+        flat = find_member(result, "FLAT")
+        assert flat.value == "flat"
+        assert flat.description is None
+
+        gabled = find_member(result, "GABLED")
+        assert gabled.value == "gabled"
+
+    def test_enum_without_docstring(self) -> None:
+        """Should handle enum without docstring."""
+
+        class SimpleEnum(str, Enum):
+            A = "a"
+            B = "b"
+
+        result = extract_enum(SimpleEnum)
+
+        assert result.name == "SimpleEnum"
+        assert result.description is None
+
+
+class TestExtractEnumDocumented:
+    """Tests for extract_enum with DocumentedEnum classes."""
+
+    def test_extracts_documented_enum_with_member_descriptions(self) -> None:
+        """Should extract per-member descriptions from DocumentedEnum."""
+
+        class Side(str, DocumentedEnum):
+            """The side on which something appears."""
+
+            LEFT = ("left", "On the left side")
+            RIGHT = ("right", "On the right side")
+
+        result = extract_enum(Side)
+
+        assert result.name == "Side"
+        assert result.description == "The side on which something appears."
+        assert len(result.members) == 2
+
+        left = find_member(result, "LEFT")
+        assert left.value == "left"
+        assert left.description == "On the left side"
+
+        right = find_member(result, "RIGHT")
+        assert right.value == "right"
+        assert right.description == "On the right side"
+
+    def test_documented_enum_with_mixed_documentation(self) -> None:
+        """DocumentedEnum can have some members documented and others not."""
+
+        class ConnectionState(str, DocumentedEnum):
+            """Connection states."""
+
+            CONNECTED = "connected"
+            DISCONNECTED = "disconnected"
+            QUIESCING = ("quiescing", "Gracefully shutting down")
+
+        result = extract_enum(ConnectionState)
+
+        connected = find_member(result, "CONNECTED")
+        assert connected.value == "connected"
+        assert connected.description is None
+
+        quiescing = find_member(result, "QUIESCING")
+        assert quiescing.value == "quiescing"
+        assert quiescing.description == "Gracefully shutting down"
+
+
+class TestEnumSpecSourceType:
+    """Tests for source_type on EnumSpec."""
+
+    def test_enum_spec_source_type_defaults_to_none(self) -> None:
+        spec = EnumSpec(name="Test", description=None)
+        assert spec.source_type is None
+
+    def test_extract_enum_sets_source_type(self) -> None:
+        class Color(str, Enum):
+            RED = "red"
+
+        spec = extract_enum(Color)
+        assert spec.source_type is Color
diff --git a/packages/overture-schema-codegen/tests/test_example_loader.py b/packages/overture-schema-codegen/tests/test_example_loader.py
new file mode 100644
index 000000000..19a562676
--- /dev/null
+++ b/packages/overture-schema-codegen/tests/test_example_loader.py
@@ -0,0 +1,1006 @@
+"""Tests for example_loader module."""
+
+import logging
+import sys
+import types
+from collections.abc import Iterator
+from pathlib import Path
+from textwrap import dedent
+from typing import Annotated, Literal
+
+import pytest
+from overture.schema.codegen.extraction.examples import (
+    ExampleRecord,
+    _denull,
+    _inject_literal_fields,
+    collect_dict_paths,
+    flatten_example,
+    load_examples,
+    load_examples_from_toml,
+    order_example_rows,
+    resolve_pyproject_path,
+    validate_example,
+)
+from overture.schema.codegen.extraction.specs import FieldSpec, ModelSpec
+from overture.schema.codegen.extraction.type_analyzer import TypeInfo, TypeKind
+from pydantic import BaseModel, ConfigDict, Field, Tag, ValidationError
+
+
+class TestFlattenExample:
+    """Tests for flatten_example function."""
+
+    def test_simple_fields(self) -> None:
+        """Flatten simple key-value pairs."""
+        raw = {"id": "123", "version": 1, "name": "test"}
+        result = flatten_example(raw)
+        assert result == [("id", "123"), ("version", 1), ("name", "test")]
+
+    def test_nested_dict(self) -> None:
+        """Flatten nested dict to dot notation."""
+        raw = {"names": {"primary": "foo", "common": {"en": "bar"}}}
+        result = flatten_example(raw)
+        assert result == [
+            ("names.primary", "foo"),
+            ("names.common.en", "bar"),
+        ]
+
+    def test_list_of_dicts(self) -> None:
+        """Flatten list of dicts with array notation."""
+        raw = {"sources": [{"dataset": "OSM", "record_id": "w123"}]}
+        result = flatten_example(raw)
+        assert result == [
+            ("sources[0].dataset", "OSM"),
+            ("sources[0].record_id", "w123"),
+        ]
+
+    def test_bbox_flattened_at_top_level(self) -> None:
+        """Bbox fields are flattened like any other nested dict."""
+        raw = {
+            "id": "123",
+            "bbox": {"xmin": -176.6, "xmax": -176.64},
+            "version": 1,
+        }
+        result = flatten_example(raw)
+        assert result == [
+            ("id", "123"),
+            ("bbox.xmin", -176.6),
+            ("bbox.xmax", -176.64),
+            ("version", 1),
+        ]
+
+    def test_plain_list_kept_as_value(self) -> None:
+        """Plain lists (non-dict items) are kept as values."""
+        raw = {"phones": ["+1234", "+5678"]}
+        result = flatten_example(raw)
+        assert result == [("phones", ["+1234", "+5678"])]
+
+    def test_empty_dict(self) -> None:
+        """Empty dict produces empty list."""
+        raw: dict[str, object] = {}
+        result = flatten_example(raw)
+        assert result == []
+
+    def test_empty_list(self) -> None:
+        """Empty list is kept as value."""
+        raw: dict[str, object] = {"tags": []}
+        result = flatten_example(raw)
+        assert result == [("tags", [])]
+
+    def test_list_of_list_of_dicts(self) -> None:
+        """Flatten list[list[dict]] with double-index notation."""
+        raw = {
+            "hierarchies": [
+                [
+                    {"division_id": "aaa", "name": "Country"},
+                    {"division_id": "bbb", "name": "Region"},
+                ],
+            ]
+        }
+        result = flatten_example(raw)
+        assert result == [
+            ("hierarchies[0][0].division_id", "aaa"),
+            ("hierarchies[0][0].name", "Country"),
+            ("hierarchies[0][1].division_id", "bbb"),
+            ("hierarchies[0][1].name", "Region"),
+        ]
+
+    def test_multiple_list_items(self) -> None:
+        """Handle multiple items in list of dicts."""
+        raw = {
+            "sources": [
+                {"dataset": "OSM", "confidence": 0.9},
+                {"dataset": "MSFT", "confidence": 0.8},
+            ]
+        }
+        result = flatten_example(raw)
+        assert result == [
+            ("sources[0].dataset", "OSM"),
+            ("sources[0].confidence", 0.9),
+            ("sources[1].dataset", "MSFT"),
+            ("sources[1].confidence", 0.8),
+        ]
+
+    def test_dict_field_kept_as_leaf(self) -> None:
+        """Dict values at dict_paths are kept as leaf values."""
+        raw = {
+            "name": "test",
+            "tags": {"color": "red", "size": "large"},
+        }
+        result = flatten_example(raw, dict_paths=frozenset({"tags"}))
+        assert result == [
+            ("name", "test"),
+            ("tags", {"color": "red", "size": "large"}),
+        ]
+
+    def test_nested_dict_path_kept_as_leaf(self) -> None:
+        """Dict values at nested dict_paths are kept as leaf values."""
+        raw = {
+            "names": {
+                "primary": "Tower",
+                "common": {"en": "Tower", "fr": "Tour"},
+            },
+        }
+        result = flatten_example(raw, dict_paths=frozenset({"names.common"}))
+        assert result == [
+            ("names.primary", "Tower"),
+            ("names.common", {"en": "Tower", "fr": "Tour"}),
+        ]
+
+    def test_empty_dict_paths_preserves_behavior(self) -> None:
+        """Empty dict_paths (default) recurses all dicts as before."""
+        raw = {"tags": {"color": "red"}}
+        result = flatten_example(raw)
+        assert result == [("tags.color", "red")]
+
+    def test_dict_inside_list_kept_as_leaf(self) -> None:
+        """Dict at indexed path matches schema path in dict_paths."""
+        raw = {
+            "items": [
+                {"name": "a", "tags": {"color": "red"}},
+                {"name": "b", "tags": {"size": "large"}},
+            ],
+        }
+        result = flatten_example(raw, dict_paths=frozenset({"items[].tags"}))
+        assert result == [
+            ("items[0].name", "a"),
+            ("items[0].tags", {"color": "red"}),
+            ("items[1].name", "b"),
+            ("items[1].tags", {"size": "large"}),
+        ]
+
+
+class TestOrderExampleRows:
+    """Tests for order_example_rows function."""
+
+    def test_order_by_field_names(self) -> None:
+        """Order rows by position in field_names."""
+        flat_rows = [("version", 1), ("id", "123"), ("name", "test")]
+        field_names = ["id", "name", "version"]
+        result = order_example_rows(flat_rows, field_names)
+        assert result == [("id", "123"), ("name", "test"), ("version", 1)]
+
+    def test_extract_base_field_from_dot_notation(self) -> None:
+        """Extract base field from dotted keys."""
+        flat_rows = [
+            ("names.primary", "foo"),
+            ("id", "123"),
+            ("names.common.en", "bar"),
+        ]
+        field_names = ["id", "names"]
+        result = order_example_rows(flat_rows, field_names)
+        assert result == [
+            ("id", "123"),
+            ("names.primary", "foo"),
+            ("names.common.en", "bar"),
+        ]
+
+    def test_extract_base_field_from_array_notation(self) -> None:
+        """Extract base field from array notation."""
+        flat_rows = [
+            ("sources[0].dataset", "OSM"),
+            ("id", "123"),
+            ("sources[0].record_id", "w123"),
+            ("sources[1].dataset", "MSFT"),
+        ]
+        field_names = ["id", "sources"]
+        result = order_example_rows(flat_rows, field_names)
+        assert result == [
+            ("id", "123"),
+            ("sources[0].dataset", "OSM"),
+            ("sources[0].record_id", "w123"),
+            ("sources[1].dataset", "MSFT"),
+        ]
+
+    def test_order_with_mixed_notation(self) -> None:
+        """Order rows with mixed simple, dotted, and array notation."""
+        flat_rows = [
+            ("version", 1),
+            ("sources[0].dataset", "OSM"),
+            ("id", "123"),
+            ("names.primary", "foo"),
+        ]
+        field_names = ["id", "names", "sources", "version"]
+        result = order_example_rows(flat_rows, field_names)
+        assert result == [
+            ("id", "123"),
+            ("names.primary", "foo"),
+            ("sources[0].dataset", "OSM"),
+            ("version", 1),
+        ]
+
+    def test_unknown_fields_sort_to_end(self) -> None:
+        """Unknown fields sort to end, maintaining relative order."""
+        flat_rows = [
+            ("unknown2", "b"),
+            ("id", "123"),
+            ("unknown1", "a"),
+            ("version", 1),
+        ]
+        field_names = ["id", "version"]
+        result = order_example_rows(flat_rows, field_names)
+        assert result == [
+            ("id", "123"),
+            ("version", 1),
+            ("unknown2", "b"),
+            ("unknown1", "a"),
+        ]
+
+
+class TestLoadExamplesFromToml:
+    """Tests for load_examples_from_toml function."""
+
+    def test_load_example_list(self, tmp_path: Path) -> None:
+        """Load examples for a model from TOML."""
+        toml_path = tmp_path / "pyproject.toml"
+        toml_path.write_text(
+            dedent("""
+                [project]
+                name = "test-package"
+
+                [[examples.Building]]
+                id = "123"
+                version = 1
+
+                [[examples.Building]]
+                id = "456"
+                version = 2
+            """)
+        )
+
+        result = load_examples_from_toml(toml_path, "Building")
+        assert len(result) == 2
+        assert result[0] == {"id": "123", "version": 1}
+        assert result[1] == {"id": "456", "version": 2}
+
+    def test_model_not_found_returns_empty(self, tmp_path: Path) -> None:
+        """Return empty list when model has no examples."""
+        toml_path = tmp_path / "pyproject.toml"
+        toml_path.write_text(
+            dedent("""
+                [project]
+                name = "test-package"
+
+                [[examples.Building]]
+                id = "123"
+            """)
+        )
+
+        result = load_examples_from_toml(toml_path, "Road")
+        assert result == []
+
+    def test_no_examples_section_returns_empty(self, tmp_path: Path) -> None:
+        """Return empty list when no examples section exists."""
+        toml_path = tmp_path / "pyproject.toml"
+        toml_path.write_text(
+            dedent("""
+                [project]
+                name = "test-package"
+            """)
+        )
+
+        result = load_examples_from_toml(toml_path, "Building")
+        assert result == []
+
+
+class MockProject:
+    """A temporary project directory with registered mock modules."""
+
+    def __init__(self, root: Path, pyproject: Path, mod_name: str) -> None:
+        self.root = root
+        self.pyproject = pyproject
+        self.mod_name = mod_name
+        self._registered_modules: list[str] = [mod_name]
+
+    def write_pyproject(self, content: str) -> None:
+        self.pyproject.write_text(content)
+
+    def add_submodule(self, *subdirs: str) -> str:
+        """Register a deeper module under this project's src directory.
+
+        Returns the module name for use in __module__ attributes.
+        """
+        pkg_dir = self.root / "src" / Path(*subdirs)
+        pkg_dir.mkdir(parents=True, exist_ok=True)
+        module_file = pkg_dir / "module.py"
+        module_file.write_text("# module")
+
+        sub_mod_name = f"{self.mod_name}_{'_'.join(subdirs)}"
+        mod = types.ModuleType(sub_mod_name)
+        mod.__file__ = str(module_file)
+        sys.modules[sub_mod_name] = mod
+        self._registered_modules.append(sub_mod_name)
+        return sub_mod_name
+
+    def cleanup(self) -> None:
+        for name in self._registered_modules:
+            sys.modules.pop(name, None)
+
+
+@pytest.fixture
+def mock_project(tmp_path: Path) -> Iterator[MockProject]:
+    """Create a project directory with a mock module registered in sys.modules.
+
+    Yields a MockProject with root, pyproject path, and mod_name.
+    Writes a minimal pyproject.toml by default; tests can overwrite via
+    ``project.write_pyproject()``.
+    """
+    root = tmp_path / "project"
+    root.mkdir()
+    pyproject = root / "pyproject.toml"
+    pyproject.write_text("[project]\nname = 'test'")
+
+    src_dir = root / "src"
+    src_dir.mkdir()
+    module_file = src_dir / "module.py"
+    module_file.write_text("# module")
+
+    mod_name = f"_test_mock_{id(tmp_path)}"
+    mod = types.ModuleType(mod_name)
+    mod.__file__ = str(module_file)
+    sys.modules[mod_name] = mod
+
+    project = MockProject(root=root, pyproject=pyproject, mod_name=mod_name)
+    yield project
+    project.cleanup()
+
+
+class TestResolvePyprojectPath:
+    """Tests for resolve_pyproject_path function."""
+
+    def test_finds_pyproject_in_parent_dirs(self, mock_project: MockProject) -> None:
+        """Walk up from module location to find pyproject.toml."""
+        deeper_mod = mock_project.add_submodule("pkg")
+
+        class MockModel:
+            __module__ = deeper_mod
+
+        result = resolve_pyproject_path(MockModel)
+        assert result == mock_project.pyproject
+
+    def test_returns_none_when_not_found(self, tmp_path: Path) -> None:
+        """Return None when pyproject.toml doesn't exist."""
+        module_dir = tmp_path / "src"
+        module_dir.mkdir()
+        module_file = module_dir / "module.py"
+        module_file.write_text("# module")
+
+        mod_name = f"_test_resolve_nf_{id(tmp_path)}"
+        mod = types.ModuleType(mod_name)
+        mod.__file__ = str(module_file)
+        sys.modules[mod_name] = mod
+        try:
+
+            class MockModel:
+                __module__ = mod_name
+
+            result = resolve_pyproject_path(MockModel)
+            assert result is None
+        finally:
+            sys.modules.pop(mod_name, None)
+
+    def test_returns_none_when_no_module(self) -> None:
+        """Return None when model's module is not in sys.modules."""
+
+        class MockModel:
+            __module__ = "_nonexistent_module_for_test"
+
+        result = resolve_pyproject_path(MockModel)
+        assert result is None
+
+
+class TestLoadExamples:
+    """Tests for load_examples entry point."""
+
+    def test_end_to_end(self, mock_project: MockProject) -> None:
+        """Load, flatten, and order examples end-to-end."""
+        mock_project.write_pyproject(
+            dedent("""
+                [project]
+                name = "test"
+
+                [[examples.Building]]
+                version = 1
+                names = { primary = "Tower" }
+                id = "123"
+
+                [examples.Building.bbox]
+                xmin = 1.0
+                xmax = 2.0
+
+                [[examples.Building.sources]]
+                dataset = "OSM"
+                record_id = "w456"
+            """)
+        )
+
+        class MockModel(BaseModel):
+            __module__ = mock_project.mod_name
+            id: str
+            version: int
+            names: dict[str, object]
+            sources: list[dict[str, object]]
+
+        field_names = ["id", "bbox", "names", "sources", "version"]
+        result = load_examples(MockModel, "Building", field_names)
+
+        assert len(result) == 1
+        record = result[0]
+        assert isinstance(record, ExampleRecord)
+
+        assert record.rows == [
+            ("id", "123"),
+            ("bbox.xmin", 1.0),
+            ("bbox.xmax", 2.0),
+            ("names.primary", "Tower"),
+            ("sources[0].dataset", "OSM"),
+            ("sources[0].record_id", "w456"),
+            ("version", 1),
+        ]
+
+    def test_returns_empty_on_missing_pyproject(self) -> None:
+        """Return empty list when model's module not in sys.modules."""
+
+        class MockModel(BaseModel):
+            __module__ = "_nonexistent_module_for_load_test"
+
+        result = load_examples(MockModel, "Building", ["id"])
+        assert result == []
+
+    def test_returns_empty_on_missing_model(self, mock_project: MockProject) -> None:
+        """Return empty list when model has no examples."""
+
+        class MockModel(BaseModel):
+            __module__ = mock_project.mod_name
+
+        result = load_examples(MockModel, "Building", ["id"])
+        assert result == []
+
+    def test_invalid_examples_skipped_with_warning(
+        self, mock_project: MockProject, caplog: pytest.LogCaptureFixture
+    ) -> None:
+        """Invalid examples are skipped and warning logged."""
+        mock_project.write_pyproject(
+            dedent("""
+                [project]
+                name = "test"
+
+                [[examples.MockModel]]
+                name = "valid"
+                count = 1
+
+                [[examples.MockModel]]
+                name = "invalid"
+                count = "not_an_int"
+
+                [[examples.MockModel]]
+                name = "also_valid"
+                count = 2
+            """)
+        )
+
+        class MockModel(BaseModel):
+            __module__ = mock_project.mod_name
+            name: str
+            count: int
+
+        caplog.set_level(logging.WARNING)
+
+        result = load_examples(MockModel, "MockModel", ["name", "count"])
+
+        assert len(result) == 2
+        assert result[0].rows == [("name", "valid"), ("count", 1)]
+        assert result[1].rows == [("name", "also_valid"), ("count", 2)]
+
+        assert any(
+            "MockModel" in record.message
+            and "validation" in record.message.lower()
+            and str(mock_project.pyproject) in record.message
+            for record in caplog.records
+        )
+
+    def test_dict_paths_keep_dicts_as_leaves(self, mock_project: MockProject) -> None:
+        """Dict fields listed in dict_paths stay as leaf values."""
+        mock_project.write_pyproject(
+            dedent("""
+                [project]
+                name = "test"
+
+                [[examples.MockModel]]
+                name = "Tower"
+
+                [examples.MockModel.tags]
+                color = "red"
+                size = "large"
+            """)
+        )
+
+        class MockModel(BaseModel):
+            __module__ = mock_project.mod_name
+            name: str
+            tags: dict[str, str]
+
+        result = load_examples(
+            MockModel,
+            "MockModel",
+            ["name", "tags"],
+            dict_paths=frozenset({"tags"}),
+        )
+
+        assert len(result) == 1
+        assert result[0].rows == [
+            ("name", "Tower"),
+            ("tags", {"color": "red", "size": "large"}),
+        ]
+
+    def test_denulled_values_in_output(self, mock_project: MockProject) -> None:
+        """Flattened output contains None not "null" strings."""
+        mock_project.write_pyproject(
+            dedent("""
+                [project]
+                name = "test"
+
+                [[examples.MockModel]]
+                name = "test"
+                value = "null"
+            """)
+        )
+
+        class MockModel(BaseModel):
+            __module__ = mock_project.mod_name
+            name: str
+            value: int | None
+
+        result = load_examples(MockModel, "MockModel", ["name", "value"])
+
+        assert len(result) == 1
+        assert result[0].rows == [("name", "test"), ("value", None)]
+
+
+class TestDenull:
+    """Tests for _denull function."""
+
+    def test_converts_null_string_to_none(self) -> None:
+        """Top-level "null" strings become None."""
+        assert _denull({"a": "null"}) == {"a": None}
+
+    def test_nested_dict(self) -> None:
+        """Recurse into nested dicts."""
+        data = {"a": {"b": "null"}}
+        assert _denull(data) == {"a": {"b": None}}
+
+    def test_list_of_dicts(self) -> None:
+        """Recurse into dicts inside lists."""
+        data = {"items": [{"x": "null"}]}
+        assert _denull(data) == {"items": [{"x": None}]}
+
+    def test_mixed_types_unchanged(self) -> None:
+        """Non-"null" strings, ints, bools, and plain lists pass through."""
+        data = {
+            "name": "hello",
+            "count": 42,
+            "flag": True,
+            "tags": ["a", "b"],
+            "score": 3.14,
+        }
+        assert _denull(data) == data
+
+    def test_no_mutation(self) -> None:
+        """Original dict is not modified."""
+        original = {"a": "null", "b": {"c": "null"}}
+        _denull(original)
+        assert original == {"a": "null", "b": {"c": "null"}}
+
+    def test_empty_dict(self) -> None:
+        """Empty dict returns empty dict."""
+        assert _denull({}) == {}
+
+    def test_deeply_nested(self) -> None:
+        """Handle multiple levels of nesting."""
+        data = {"a": {"b": {"c": "null"}}}
+        assert _denull(data) == {"a": {"b": {"c": None}}}
+
+    def test_null_strings_in_plain_list(self) -> None:
+        """Convert "null" strings inside plain lists."""
+        data = {"tags": ["a", "null", "b"]}
+        assert _denull(data) == {"tags": ["a", None, "b"]}
+
+
+class TestInjectLiteralFields:
+    """Tests for _inject_literal_fields function."""
+
+    def test_injects_single_value_literal(self) -> None:
+        """Inject field with single-value Literal annotation."""
+
+        class MockModel(BaseModel):
+            theme: Literal["buildings"]
+            name: str
+
+        data = {"name": "Tower"}
+        result = _inject_literal_fields(MockModel.model_fields, data)
+        assert result == {"name": "Tower", "theme": "buildings"}
+
+    def test_skips_non_literal_field(self) -> None:
+        """Do not inject fields without Literal annotations."""
+
+        class MockModel(BaseModel):
+            name: str
+            count: int
+
+        data = {"name": "Tower"}
+        result = _inject_literal_fields(MockModel.model_fields, data)
+        assert result == {"name": "Tower"}
+
+    def test_skips_already_present_field(self) -> None:
+        """Do not overwrite fields already in data."""
+
+        class MockModel(BaseModel):
+            theme: Literal["buildings"]
+            name: str
+
+        data = {"theme": "custom", "name": "Tower"}
+        result = _inject_literal_fields(MockModel.model_fields, data)
+        assert result == {"theme": "custom", "name": "Tower"}
+
+    def test_respects_validation_alias(self) -> None:
+        """Use validation_alias when injecting."""
+
+        class MockModel(BaseModel):
+            class_: Literal["building"] = Field(validation_alias="class")
+            name: str
+
+        data = {"name": "Tower"}
+        result = _inject_literal_fields(MockModel.model_fields, data)
+        assert result == {"name": "Tower", "class": "building"}
+
+    def test_no_mutation(self) -> None:
+        """Original data dict is not modified."""
+
+        class MockModel(BaseModel):
+            theme: Literal["buildings"]
+            name: str
+
+        data = {"name": "Tower"}
+        original_data = data.copy()
+        _inject_literal_fields(MockModel.model_fields, data)
+        assert data == original_data
+
+    def test_multiple_literal_fields(self) -> None:
+        """Inject multiple Literal fields."""
+
+        class MockModel(BaseModel):
+            theme: Literal["buildings"]
+            type: Literal["building"]
+            name: str
+
+        data = {"name": "Tower"}
+        result = _inject_literal_fields(MockModel.model_fields, data)
+        assert result == {"name": "Tower", "theme": "buildings", "type": "building"}
+
+    def test_skips_multi_value_literal(self) -> None:
+        """Do not inject Literal with multiple values."""
+
+        class MockModel(BaseModel):
+            status: Literal["active", "inactive"]
+            name: str
+
+        data = {"name": "Tower"}
+        result = _inject_literal_fields(MockModel.model_fields, data)
+        assert result == {"name": "Tower"}
+
+    def test_respects_alias_fallback(self) -> None:
+        """Fall back to alias if validation_alias not set."""
+
+        class MockModel(BaseModel):
+            class_: Literal["building"] = Field(alias="class")
+            name: str
+
+        data = {"name": "Tower"}
+        result = _inject_literal_fields(MockModel.model_fields, data)
+        assert result == {"name": "Tower", "class": "building"}
+
+    def test_unwraps_optional_literal(self) -> None:
+        """Inject Optional[Literal["x"]] fields (union-wrapped by Pydantic)."""
+
+        class MockModel(BaseModel):
+            theme: Literal["buildings"] | None = None
+            name: str
+
+        data = {"name": "Tower"}
+        result = _inject_literal_fields(MockModel.model_fields, data)
+        assert result == {"name": "Tower", "theme": "buildings"}
+
+
+class TestValidateExample:
+    """Tests for validate_example function."""
+
+    def test_valid_data_passes(self) -> None:
+        """Valid data is validated and denulled dict returned."""
+
+        class MockModel(BaseModel):
+            name: str
+            count: int
+
+        raw = {"name": "test", "count": 42}
+        result = validate_example(MockModel, raw)
+        assert result == {"name": "test", "count": 42}
+
+    def test_invalid_data_raises_validation_error(self) -> None:
+        """Invalid data raises ValidationError."""
+
+        class MockModel(BaseModel):
+            count: int
+
+        raw = {"count": "not_an_int"}
+        with pytest.raises(ValidationError):
+            validate_example(MockModel, raw)
+
+    def test_denulled_dict_returned(self) -> None:
+        """Denulled dict is returned, not raw or preprocessed."""
+
+        class MockModel(BaseModel):
+            name: str
+            value: int | None
+
+        raw = {"name": "test", "value": "null"}
+        result = validate_example(MockModel, raw)
+        assert result == {"name": "test", "value": None}
+
+    def test_literals_injected_before_validation(self) -> None:
+        """Missing Literal fields are injected before validation."""
+
+        class MockModel(BaseModel):
+            theme: Literal["buildings"]
+            name: str
+
+        raw = {"name": "Tower"}
+        result = validate_example(MockModel, raw)
+        # Returned dict is denulled, NOT preprocessed (no injected literals)
+        assert result == {"name": "Tower"}
+
+
+class TestValidateExampleWithUnion:
+    """Tests for validate_example with discriminated unions via TypeAdapter."""
+
+    def test_validates_union_via_type_adapter(self) -> None:
+        """TypeAdapter validates against a discriminated union."""
+
+        class Dog(BaseModel):
+            kind: Literal["dog"]
+            bark: str
+
+        class Cat(BaseModel):
+            kind: Literal["cat"]
+            purr: bool
+
+        PetUnion = Annotated[
+            Annotated[Dog, Tag("dog")] | Annotated[Cat, Tag("cat")],
+            Field(discriminator="kind"),
+        ]
+
+        raw = {"kind": "dog", "bark": "woof"}
+        result = validate_example(PetUnion, raw, model_fields=Dog.model_fields)
+        assert result == {"kind": "dog", "bark": "woof"}
+
+    def test_invalid_union_example_raises(self) -> None:
+        """Invalid data against union raises ValidationError."""
+
+        class Dog(BaseModel):
+            kind: Literal["dog"]
+            bark: str
+
+        class Cat(BaseModel):
+            kind: Literal["cat"]
+            purr: bool
+
+        PetUnion = Annotated[
+            Annotated[Dog, Tag("dog")] | Annotated[Cat, Tag("cat")],
+            Field(discriminator="kind"),
+        ]
+
+        raw = {"kind": "dog", "bark": 42}  # bark should be str
+        with pytest.raises(ValidationError):
+            validate_example(PetUnion, raw, model_fields=Dog.model_fields)
+
+    def test_null_cross_arm_fields_accepted(self) -> None:
+        """Null fields from other union arms are accepted in flat-schema examples.
+
+        Parquet files have columns for all union arms. A road segment row
+        includes ``rail_flags=null`` because the column exists in the table.
+        Validation should accept these cross-arm nulls.
+        """
+
+        class _Base(BaseModel):
+            model_config = ConfigDict(extra="forbid")
+            kind: str
+            name: str
+
+        class Dog(_Base):
+            kind: Literal["dog"]
+            bark: str | None = None
+
+        class Cat(_Base):
+            kind: Literal["cat"]
+            purr: bool | None = None
+
+        PetUnion = Annotated[
+            Annotated[Dog, Tag("dog")] | Annotated[Cat, Tag("cat")],
+            Field(discriminator="kind"),
+        ]
+
+        # Flat schema: Dog example includes Cat's "purr" field as null
+        raw = {"kind": "dog", "name": "Rex", "bark": "woof", "purr": "null"}
+        result = validate_example(PetUnion, raw, model_fields=_Base.model_fields)
+        # Returned dict preserves the original denulled data
+        assert result == {
+            "kind": "dog",
+            "name": "Rex",
+            "bark": "woof",
+            "purr": None,
+        }
+
+
+class TestIntegration:
+    """Integration tests with real schema models."""
+
+    def test_real_building_examples_validate(self) -> None:
+        """Validate real Building examples from the schema package."""
+        pytest.importorskip("overture.schema.buildings.building")
+
+        from overture.schema.buildings.building import Building  # noqa: PLC0415
+
+        # Find the pyproject.toml for the Building model
+        pyproject_path = resolve_pyproject_path(Building)
+        assert pyproject_path is not None, "Could not find pyproject.toml for Building"
+
+        # Load raw examples from TOML
+        raw_examples = load_examples_from_toml(pyproject_path, "Building")
+        assert len(raw_examples) > 0, "No Building examples found in pyproject.toml"
+
+        # Validate each example
+        for idx, raw_example in enumerate(raw_examples):
+            # Should not raise ValidationError
+            validated = validate_example(Building, raw_example)
+            assert isinstance(validated, dict), f"Example {idx}: Expected dict result"
+
+    def test_real_segment_examples_validate(self) -> None:
+        """Validate real Segment examples (discriminated union with cross-arm fields)."""
+        pytest.importorskip("overture.schema.transportation")
+
+        from overture.schema.transportation import Segment  # noqa: PLC0415
+        from overture.schema.transportation.segment.models import (  # noqa: PLC0415
+            RoadSegment,
+            TransportationSegment,
+        )
+
+        pyproject_path = resolve_pyproject_path(RoadSegment)
+        assert pyproject_path is not None
+
+        raw_examples = load_examples_from_toml(pyproject_path, "Segment")
+        assert len(raw_examples) > 0, "No Segment examples found"
+
+        for idx, raw_example in enumerate(raw_examples):
+            validated = validate_example(
+                Segment,
+                raw_example,
+                model_fields=TransportationSegment.model_fields,
+            )
+            assert isinstance(validated, dict), f"Example {idx}: Expected dict result"
+
+
+def _field(
+    name: str,
+    *,
+    kind: TypeKind = TypeKind.PRIMITIVE,
+    base_type: str = "str",
+    is_dict: bool = False,
+    list_depth: int = 0,
+    is_required: bool = True,
+    model: ModelSpec | None = None,
+    starts_cycle: bool = False,
+) -> FieldSpec:
+    """Build a FieldSpec with sensible defaults for testing."""
+    return FieldSpec(
+        name=name,
+        type_info=TypeInfo(
+            base_type=base_type, kind=kind, is_dict=is_dict, list_depth=list_depth
+        ),
+        description=None,
+        is_required=is_required,
+        model=model,
+        starts_cycle=starts_cycle,
+    )
+
+
+class TestCollectDictPaths:
+    """Tests for collect_dict_paths."""
+
+    def test_no_dict_fields(self) -> None:
+        """Model with only primitive fields returns empty set."""
+        fields = [_field("name")]
+        assert collect_dict_paths(fields) == frozenset()
+
+    def test_top_level_dict_field(self) -> None:
+        """Dict field at top level is collected."""
+        fields = [
+            _field("name"),
+            _field("tags", is_dict=True, is_required=False),
+        ]
+        assert collect_dict_paths(fields) == frozenset({"tags"})
+
+    def test_nested_dict_in_sub_model(self) -> None:
+        """Dict field inside a sub-model produces dotted path."""
+        inner_fields = [
+            _field("primary"),
+            _field("common", is_dict=True, is_required=False),
+        ]
+        inner_model = ModelSpec(name="Names", description=None, fields=inner_fields)
+        fields = [
+            _field("names", kind=TypeKind.MODEL, base_type="Names", model=inner_model)
+        ]
+        assert collect_dict_paths(fields) == frozenset({"names.common"})
+
+    def test_list_of_model_with_dict(self) -> None:
+        """Dict inside list-of-model uses [] in path."""
+        inner_fields = [_field("tags", is_dict=True, is_required=False)]
+        inner_model = ModelSpec(name="Item", description=None, fields=inner_fields)
+        fields = [
+            _field(
+                "items",
+                kind=TypeKind.MODEL,
+                base_type="Item",
+                list_depth=1,
+                model=inner_model,
+            ),
+        ]
+        assert collect_dict_paths(fields) == frozenset({"items[].tags"})
+
+    def test_nested_list_depth(self) -> None:
+        """list[list[Model]] produces [][] in path."""
+        inner_fields = [_field("tags", is_dict=True)]
+        inner_model = ModelSpec(name="Item", description=None, fields=inner_fields)
+        fields = [
+            _field(
+                "items",
+                kind=TypeKind.MODEL,
+                base_type="Item",
+                list_depth=2,
+                model=inner_model,
+            ),
+        ]
+        assert collect_dict_paths(fields) == frozenset({"items[][].tags"})
+
+    def test_cycle_stops_recursion(self) -> None:
+        """Fields with starts_cycle=True are not recursed into."""
+        inner_fields = [_field("data", is_dict=True, is_required=False)]
+        inner_model = ModelSpec(name="Node", description=None, fields=inner_fields)
+        fields = [
+            _field(
+                "child",
+                kind=TypeKind.MODEL,
+                base_type="Node",
+                is_required=False,
+                model=inner_model,
+                starts_cycle=True,
+            ),
+        ]
+        assert collect_dict_paths(fields) == frozenset()
diff --git a/packages/overture-schema-codegen/tests/test_golden_markdown.py b/packages/overture-schema-codegen/tests/test_golden_markdown.py
new file mode 100644
index 000000000..42320ee69
--- /dev/null
+++ b/packages/overture-schema-codegen/tests/test_golden_markdown.py
@@ -0,0 +1,130 @@
+"""Golden-file snapshot tests for Markdown renderer output."""
+
+from enum import Enum
+from pathlib import Path
+
+import pytest
+from codegen_test_support import (
+    CommonNames,
+    FeatureWithAddress,
+    FeatureWithDict,
+    FeatureWithSources,
+    HexColor,
+    Id,
+    Instrument,
+    InstrumentFamily,
+    SimpleKind,
+    Sources,
+    Venue,
+    Widget,
+    assert_golden,
+)
+from overture.schema.codegen.extraction.enum_extraction import extract_enum
+from overture.schema.codegen.extraction.model_extraction import (
+    expand_model_tree,
+    extract_model,
+)
+from overture.schema.codegen.extraction.newtype_extraction import extract_newtype
+from overture.schema.codegen.extraction.specs import TypeIdentity
+from overture.schema.codegen.layout.type_collection import (
+    collect_all_supplementary_types,
+)
+from overture.schema.codegen.markdown.renderer import (
+    render_enum,
+    render_feature,
+    render_newtype,
+)
+from overture.schema.codegen.markdown.reverse_references import (
+    UsedByEntry,
+    compute_reverse_references,
+)
+from pydantic import BaseModel
+
+GOLDEN_DIR = Path(__file__).parent / "golden" / "markdown"
+
+FEATURE_CASES = [
+    (Instrument, "instrument.md"),
+    (Venue, "venue.md"),
+    (Widget, "widget.md"),
+    (FeatureWithSources, "feature_with_sources.md"),
+    (FeatureWithAddress, "feature_with_address.md"),
+    (FeatureWithDict, "feature_with_dict.md"),
+]
+
+ENUM_CASES = [
+    (InstrumentFamily, "instrument_family.md"),
+    (SimpleKind, "simple_kind.md"),
+]
+
+NEWTYPE_CASES = [
+    (HexColor, "hex_color.md"),
+    (Id, "id.md"),
+    (Sources, "sources.md"),
+    (CommonNames, "common_names.md"),
+]
+
+
+@pytest.fixture(scope="module")
+def reverse_refs() -> dict[TypeIdentity, list[UsedByEntry]]:
+    """Compute reverse references for all test models."""
+    feature_specs = []
+    for model_class, _ in FEATURE_CASES:
+        assert isinstance(model_class, type) and issubclass(model_class, BaseModel)
+        spec = extract_model(model_class)
+        expand_model_tree(spec)
+        feature_specs.append(spec)
+
+    all_specs = collect_all_supplementary_types(feature_specs)
+    return compute_reverse_references(feature_specs, all_specs)
+
+
+@pytest.mark.parametrize(
+    ("model_class", "golden_filename"),
+    FEATURE_CASES,
+    ids=[name for _, name in FEATURE_CASES],
+)
+def test_feature_golden(
+    model_class: type[BaseModel],
+    golden_filename: str,
+    update_golden: bool,
+    reverse_refs: dict[TypeIdentity, list[UsedByEntry]],
+) -> None:
+    spec = extract_model(model_class)
+    expand_model_tree(spec)
+    used_by = reverse_refs.get(spec.identity)
+    actual = render_feature(spec, used_by=used_by)
+    assert_golden(actual, GOLDEN_DIR / golden_filename, update=update_golden)
+
+
+@pytest.mark.parametrize(
+    ("enum_class", "golden_filename"),
+    ENUM_CASES,
+    ids=[name for _, name in ENUM_CASES],
+)
+def test_enum_golden(
+    enum_class: type[Enum],
+    golden_filename: str,
+    update_golden: bool,
+    reverse_refs: dict[TypeIdentity, list[UsedByEntry]],
+) -> None:
+    spec = extract_enum(enum_class)
+    used_by = reverse_refs.get(spec.identity)
+    actual = render_enum(spec, used_by=used_by)
+    assert_golden(actual, GOLDEN_DIR / golden_filename, update=update_golden)
+
+
+@pytest.mark.parametrize(
+    ("newtype_callable", "golden_filename"),
+    NEWTYPE_CASES,
+    ids=[name for _, name in NEWTYPE_CASES],
+)
+def test_newtype_golden(
+    newtype_callable: object,
+    golden_filename: str,
+    update_golden: bool,
+    reverse_refs: dict[TypeIdentity, list[UsedByEntry]],
+) -> None:
+    spec = extract_newtype(newtype_callable)
+    used_by = reverse_refs.get(spec.identity)
+    actual = render_newtype(spec, used_by=used_by)
+    assert_golden(actual, GOLDEN_DIR / golden_filename, update=update_golden)
diff --git a/packages/overture-schema-codegen/tests/test_integration_real_models.py b/packages/overture-schema-codegen/tests/test_integration_real_models.py
new file mode 100644
index 000000000..b4dd9419f
--- /dev/null
+++ b/packages/overture-schema-codegen/tests/test_integration_real_models.py
@@ -0,0 +1,279 @@
+"""Integration tests against real Overture models.
+
+These tests validate the extraction layer against actual models from
+the installed Overture schema packages.
+"""
+
+import pytest
+from codegen_test_support import assert_literal_field
+from overture.schema.codegen.extraction.model_extraction import extract_model
+from overture.schema.codegen.extraction.specs import (
+    FeatureSpec,
+    ModelSpec,
+    UnionSpec,
+    filter_model_classes,
+    is_model_class,
+    is_union_alias,
+)
+from overture.schema.codegen.extraction.type_analyzer import TypeKind
+from overture.schema.codegen.extraction.union_extraction import extract_union
+from overture.schema.codegen.layout.module_layout import entry_point_class
+from overture.schema.codegen.markdown.pipeline import generate_markdown_pages
+from overture.schema.codegen.markdown.renderer import render_feature
+from overture.schema.core.discovery import discover_models
+from overture.schema.transportation import Segment
+from overture.schema.transportation.segment.models import RoadSegment
+from pydantic import BaseModel
+
+
+class TestDiscoverModels:
+    """Tests for model discovery."""
+
+    def test_discover_models_returns_dict(self) -> None:
+        """discover_models() should return a dictionary."""
+        models = discover_models()
+        assert isinstance(models, dict)
+
+    def test_discover_models_finds_building(
+        self, building_class: type[BaseModel]
+    ) -> None:
+        """Should discover the Building model."""
+        assert issubclass(building_class, BaseModel)
+
+    def test_discover_models_finds_place(self, place_class: type[BaseModel]) -> None:
+        """Should discover the Place model."""
+        assert issubclass(place_class, BaseModel)
+
+    def test_discover_models_returns_multiple_themes(self) -> None:
+        """Should discover models from multiple themes."""
+        models = discover_models()
+        assert len(models) >= 3, f"Expected at least 3 models, got {len(models)}"
+
+
+class TestExtractBuildingModel:
+    """Tests for extracting the Building model."""
+
+    def test_extract_building_has_name(self, building_spec: ModelSpec) -> None:
+        """Building model spec should have correct name."""
+        assert building_spec.name == "Building"
+
+    def test_extract_building_has_theme_type(self, building_spec: ModelSpec) -> None:
+        """Building should have theme='buildings', type='building' as Literal fields."""
+        assert_literal_field(building_spec, "theme", "buildings")
+        assert_literal_field(building_spec, "type", "building")
+
+    def test_extract_building_has_fields(self, building_spec: ModelSpec) -> None:
+        """Building should have multiple fields."""
+        assert len(building_spec.fields) > 0, "Building should have at least one field"
+        field_names = {f.name for f in building_spec.fields}
+        assert "id" in field_names
+
+    def test_building_field_types_are_valid(self, building_spec: ModelSpec) -> None:
+        """All Building fields should have valid TypeInfo."""
+        for field in building_spec.fields:
+            assert field.type_info is not None
+            assert field.type_info.kind in TypeKind
+
+
+class TestExtractPlaceModel:
+    """Tests for extracting the Place model."""
+
+    def test_extract_place_has_theme_type(self, place_class: type[BaseModel]) -> None:
+        """Place should have theme='places', type='place' as Literal fields."""
+        spec = extract_model(place_class)
+        assert_literal_field(spec, "theme", "places")
+        assert_literal_field(spec, "type", "place")
+
+    def test_place_has_fields(self, place_class: type[BaseModel]) -> None:
+        """Place model should have fields."""
+        spec = extract_model(place_class)
+        assert len(spec.fields) > 0
+
+
+class TestExtractDivisionModel:
+    """Tests for extracting Division model."""
+
+    def test_extract_division_theme_type(self, division_class: type[BaseModel]) -> None:
+        """Division should have theme='divisions', type='division' as Literal fields."""
+        spec = extract_model(division_class)
+        assert_literal_field(spec, "theme", "divisions")
+        assert_literal_field(spec, "type", "division")
+
+
+class TestFieldTypeAnalysis:
+    """Tests that analyze_type handles real model field types correctly."""
+
+    def test_no_analyze_type_crashes(self, all_discovered_models: dict) -> None:
+        """extract_model should not crash on any discovered model."""
+        for model_class in filter_model_classes(all_discovered_models):
+            spec = extract_model(model_class)
+            assert spec.name == model_class.__name__
+
+    def test_all_field_types_resolved(self, all_discovered_models: dict) -> None:
+        """All fields should have resolved TypeInfo."""
+        for model_class in filter_model_classes(all_discovered_models):
+            spec = extract_model(model_class)
+            for field in spec.fields:
+                assert field.type_info.base_type, (
+                    f"No base_type for {spec.name}.{field.name}"
+                )
+                assert field.type_info.kind in TypeKind, (
+                    f"Invalid kind for {spec.name}.{field.name}"
+                )
+
+
+class TestMarkdownRenderingRealModels:
+    """Tests for markdown rendering with real models."""
+
+    def test_render_building_content(self, building_class: type[BaseModel]) -> None:
+        """Building renders with title, field table, and expected fields."""
+        markdown = render_feature(extract_model(building_class))
+
+        assert "# Building" in markdown
+        assert "| Name |" in markdown
+        assert "| Type |" in markdown
+        assert "id" in markdown
+        assert "geometry" in markdown
+
+    def test_render_all_models_without_crash(self, all_discovered_models: dict) -> None:
+        """render_feature should not crash on any discovered model."""
+        for model_class in filter_model_classes(all_discovered_models):
+            markdown = render_feature(extract_model(model_class))
+            assert isinstance(markdown, str)
+            assert len(markdown) > 0
+
+
+class TestDiscriminatedUnions:
+    """Tests for discriminated union types like Segment.
+
+    Segment is registered as a discriminated union (type alias), not a class.
+    The extraction layer handles the individual union members (RoadSegment,
+    RailSegment, WaterSegment) but not the union itself.
+    """
+
+    def test_segment_is_not_a_class(self) -> None:
+        """Segment discovery returns a type alias, not a class."""
+        models = discover_models()
+        segment_entries = [
+            (k, v) for k, v in models.items() if "segment" in str(k).lower()
+        ]
+
+        assert len(segment_entries) == 1
+        _key, segment = segment_entries[0]
+
+        assert not isinstance(segment, type)
+
+    def test_individual_segment_types_extractable(self) -> None:
+        """Individual segment member types have expected theme/type literals."""
+        spec = extract_union("Segment", Segment)
+        for member_cls in spec.members:
+            member_spec = extract_model(member_cls)
+            assert_literal_field(member_spec, "theme", "transportation")
+            assert_literal_field(member_spec, "type", "segment")
+
+    def test_road_segment_has_road_specific_fields(self) -> None:
+        """RoadSegment should have road-specific fields."""
+        spec = extract_model(RoadSegment)
+        field_names = {f.name for f in spec.fields}
+
+        assert "subtype" in field_names
+
+
+class TestSegmentUnionExtraction:
+    """Tests for extracting the real Segment discriminated union."""
+
+    @pytest.fixture
+    def segment_spec(self) -> UnionSpec:
+        """Extract Segment union spec."""
+        return extract_union("Segment", Segment)
+
+    def test_segment_extract_union_succeeds(self, segment_spec: UnionSpec) -> None:
+        """extract_union works on the real Segment type alias."""
+        assert segment_spec.name == "Segment"
+        assert len(segment_spec.members) == 3
+
+    def test_segment_has_shared_fields(self, segment_spec: UnionSpec) -> None:
+        """Segment UnionSpec has shared fields from TransportationSegment."""
+        shared = [
+            af for af in segment_spec.annotated_fields if af.variant_sources is None
+        ]
+        shared_names = {af.field_spec.name for af in shared}
+        # All segments share these base fields
+        assert "geometry" in shared_names
+        assert "subtype" in shared_names
+        assert "id" in shared_names
+
+    def test_segment_has_variant_fields(self, segment_spec: UnionSpec) -> None:
+        """Segment UnionSpec has variant-specific fields."""
+        variant = [
+            af for af in segment_spec.annotated_fields if af.variant_sources is not None
+        ]
+        variant_names = {af.field_spec.name for af in variant}
+        # RoadSegment has these specific fields
+        assert "road_flags" in variant_names
+        assert "road_surface" in variant_names
+        assert len(variant_names) > 0
+
+    def test_segment_discriminator_extracted_from_callable(
+        self, segment_spec: UnionSpec
+    ) -> None:
+        """Segment callable discriminator is resolved via _field_name."""
+        assert segment_spec.discriminator_field == "subtype"
+        assert segment_spec.discriminator_mapping is not None
+        assert len(segment_spec.discriminator_mapping) == 3
+        # Keys are str(enum_member), e.g. "Subtype.ROAD"
+        road_key = next(k for k in segment_spec.discriminator_mapping if "ROAD" in k)
+        assert segment_spec.discriminator_mapping[road_key] is RoadSegment
+
+    def test_segment_common_base_is_base_model(self, segment_spec: UnionSpec) -> None:
+        """Segment common_base is the shared base class."""
+        assert segment_spec.common_base is not None
+        assert issubclass(segment_spec.common_base, BaseModel)
+        # Verify common base has expected fields
+        assert "geometry" in segment_spec.common_base.model_fields
+        assert "id" in segment_spec.common_base.model_fields
+
+
+class TestPydanticTypePages:
+    """End-to-end: pipeline produces pages for referenced Pydantic built-in types."""
+
+    _SCHEMA_ROOT = "overture.schema"
+
+    @pytest.fixture(scope="class")
+    def pages(self) -> list:
+        """Generate all pages from real discovered models."""
+        models = discover_models()
+        feature_specs: list[FeatureSpec] = []
+        for key, entry in models.items():
+            if is_model_class(entry):
+                feature_specs.append(extract_model(entry, entry_point=key.entry_point))
+            elif is_union_alias(entry):
+                feature_specs.append(
+                    extract_union(
+                        entry_point_class(key.entry_point),
+                        entry,
+                        entry_point=key.entry_point,
+                    )
+                )
+        return generate_markdown_pages(feature_specs, self._SCHEMA_ROOT)
+
+    def test_http_url_page_exists(self, pages: list) -> None:
+        """Pipeline produces a page for HttpUrl under pydantic/networks/."""
+        paths = {str(p.path) for p in pages}
+        assert any("pydantic/networks/http_url" in path for path in paths)
+
+    def test_email_str_page_exists(self, pages: list) -> None:
+        """Pipeline produces a page for EmailStr under pydantic/networks/."""
+        paths = {str(p.path) for p in pages}
+        assert any("pydantic/networks/email_str" in path for path in paths)
+
+    def test_http_url_page_content(self, pages: list) -> None:
+        """HttpUrl page has expected heading and Pydantic docs link."""
+        page = next(p for p in pages if "pydantic/networks/http_url" in str(p.path))
+        assert "# HttpUrl" in page.content
+        assert "docs.pydantic.dev" in page.content
+
+    def test_place_links_to_http_url(self, pages: list) -> None:
+        """Place feature page links to the HttpUrl type page."""
+        place_page = next(p for p in pages if p.path.stem == "place" and p.is_feature)
+        assert "HttpUrl" in place_page.content
diff --git a/packages/overture-schema-codegen/tests/test_markdown_renderer.py b/packages/overture-schema-codegen/tests/test_markdown_renderer.py
new file mode 100644
index 000000000..e22154196
--- /dev/null
+++ b/packages/overture-schema-codegen/tests/test_markdown_renderer.py
@@ -0,0 +1,1436 @@
+"""Tests for Markdown renderer."""
+
+from collections.abc import Callable
+from enum import Enum
+from pathlib import PurePosixPath
+from typing import Annotated, Literal, NewType
+
+import pytest
+from annotated_types import Ge, Interval
+from codegen_test_support import (
+    EMAIL_STR_SPEC,
+    HTTP_URL_SPEC,
+    STR_TYPE,
+    CommonNames,
+    FeatureBase,
+    FeatureWithAddress,
+    FeatureWithSources,
+    Instrument,
+    SimpleModel,
+    SourceItem,
+    Sources,
+    TreeNode,
+    Venue,
+    make_union_spec,
+)
+from overture.schema.codegen.extraction.examples import ExampleRecord
+from overture.schema.codegen.extraction.model_extraction import (
+    expand_model_tree,
+    extract_model,
+)
+from overture.schema.codegen.extraction.newtype_extraction import extract_newtype
+from overture.schema.codegen.extraction.specs import (
+    AnnotatedField,
+    EnumMemberSpec,
+    EnumSpec,
+    FieldSpec,
+    PrimitiveSpec,
+    TypeIdentity,
+)
+from overture.schema.codegen.extraction.type_analyzer import ConstraintSource
+from overture.schema.codegen.markdown.link_computation import LinkContext
+from overture.schema.codegen.markdown.renderer import (
+    _format_constraint,
+    _format_example_value,
+    _linkify_bare_urls,
+    _sanitize_for_table_cell,
+    render_enum,
+    render_feature,
+    render_newtype,
+    render_primitives_from_specs,
+    render_pydantic_type,
+)
+from overture.schema.codegen.markdown.reverse_references import UsedByEntry, UsedByKind
+from overture.schema.system.field_constraint import (
+    CountryCodeAlpha2Constraint,
+    JsonPointerConstraint,
+    UniqueItemsConstraint,
+)
+from overture.schema.system.model_constraint import no_extra_fields
+from overture.schema.system.primitive import int32
+from overture.schema.system.ref import Id
+from overture.schema.system.string import HexColor, NoWhitespaceString
+from pydantic import BaseModel, Field
+
+_FLAT_MEMBER = EnumMemberSpec(name="FLAT", value="flat", description=None)
+
+_ROOF_SHAPE_SPEC = EnumSpec(
+    name="RoofShape",
+    description="The shape of the roof.",
+    members=[_FLAT_MEMBER],
+)
+
+
+class TestSanitizeForTableCell:
+    """Tests for _sanitize_for_table_cell."""
+
+    def test_single_line_unchanged(self) -> None:
+        """Single-line text passes through unchanged."""
+        assert (
+            _sanitize_for_table_cell("A simple description.") == "A simple description."
+        )
+
+    def test_single_newline_becomes_space(self) -> None:
+        """Single newline within a paragraph becomes a space."""
+        assert _sanitize_for_table_cell("Line one.\nLine two.") == "Line one. Line two."
+
+    def test_blank_line_becomes_double_br(self) -> None:
+        """Blank line (paragraph break) becomes <br/><br/>."""
+        assert (
+            _sanitize_for_table_cell("Para one.\n\nPara two.")
+            == "Para one.<br/><br/>Para two."
+        )
+
+    def test_blank_line_with_whitespace(self) -> None:
+        """Blank line containing only whitespace is treated as blank."""
+        assert (
+            _sanitize_for_table_cell("Para one.\n  \nPara two.")
+            == "Para one.<br/><br/>Para two."
+        )
+
+    def test_multiple_blank_lines_collapsed(self) -> None:
+        """Multiple consecutive blank lines collapse to one <br/><br/>."""
+        assert _sanitize_for_table_cell("A.\n\n\nB.") == "A.<br/><br/>B."
+
+    def test_pipe_escaped(self) -> None:
+        """Pipe characters escaped to avoid breaking table columns."""
+        assert _sanitize_for_table_cell("foo | bar") == "foo \\| bar"
+
+    def test_pipe_and_newline_both_handled(self) -> None:
+        """Pipes and newlines handled together."""
+        assert _sanitize_for_table_cell("a | b\nc | d") == "a \\| b c \\| d"
+
+    def test_strips_leading_trailing_whitespace(self) -> None:
+        """Leading/trailing whitespace stripped."""
+        assert _sanitize_for_table_cell("  hello  ") == "hello"
+
+
+class TestLinkifyBareUrls:
+    """Tests for _linkify_bare_urls."""
+
+    def test_www_url_gets_linked(self) -> None:
+        """www. URLs become Markdown links with https:// href."""
+        assert (
+            _linkify_bare_urls("see www.example.com for details")
+            == "see [www.example.com](https://www.example.com) for details"
+        )
+
+    def test_https_url_gets_linked(self) -> None:
+        """https:// URLs become self-referencing Markdown links."""
+        assert (
+            _linkify_bare_urls("see https://example.com/path")
+            == "see [https://example.com/path](https://example.com/path)"
+        )
+
+    def test_http_url_gets_linked(self) -> None:
+        """http:// URLs become self-referencing Markdown links."""
+        assert (
+            _linkify_bare_urls("see http://example.com")
+            == "see [http://example.com](http://example.com)"
+        )
+
+    def test_existing_markdown_link_unchanged(self) -> None:
+        """URLs already inside [text](url) are left alone."""
+        text = "[example](https://example.com)"
+        assert _linkify_bare_urls(text) == text
+
+    def test_text_without_urls_unchanged(self) -> None:
+        """Plain text passes through unchanged."""
+        assert _linkify_bare_urls("no urls here") == "no urls here"
+
+    def test_url_in_parentheses(self) -> None:
+        """URL inside sentence parentheses gets linked."""
+        result = _linkify_bare_urls("from the OA (www.openaddresses.io) project")
+        assert "[www.openaddresses.io](https://www.openaddresses.io)" in result
+
+    def test_trailing_period_excluded(self) -> None:
+        """Trailing sentence punctuation is not part of the URL."""
+        assert (
+            _linkify_bare_urls("found on https://www.wikidata.org/.")
+            == "found on [https://www.wikidata.org/](https://www.wikidata.org/)."
+        )
+
+    def test_trailing_comma_excluded(self) -> None:
+        """Trailing comma is not part of the URL."""
+        assert (
+            _linkify_bare_urls("see https://example.com, and more")
+            == "see [https://example.com](https://example.com), and more"
+        )
+
+    def test_url_in_backtick_code_span_unchanged(self) -> None:
+        """URLs inside backtick code spans are not linkified."""
+        text = "use `https://example.com` as the base"
+        assert _linkify_bare_urls(text) == text
+
+    def test_url_in_double_backtick_code_span_unchanged(self) -> None:
+        """URLs inside double-backtick code spans are not linkified."""
+        text = "use ``https://example.com/path`` as the base"
+        assert _linkify_bare_urls(text) == text
+
+    def test_mixed_code_span_and_bare_url(self) -> None:
+        """Code-span URLs preserved while bare URLs are linkified."""
+        text = "see `https://a.com` and https://b.com"
+        result = _linkify_bare_urls(text)
+        assert "`https://a.com`" in result
+        assert "[https://b.com](https://b.com)" in result
+
+
+class TestRenderFeatureBasic:
+    """Tests for render_feature with basic models."""
+
+    def test_renders_title_from_model_name(self) -> None:
+        """Should render model name as H1 title."""
+        spec = extract_model(SimpleModel)
+        result = render_feature(spec)
+
+        assert "# SimpleModel" in result
+
+    def test_renders_description_from_docstring(self) -> None:
+        """Should render model docstring as description."""
+
+        class DescribedModel(BaseModel):
+            """This is the model description."""
+
+            value: int
+
+        spec = extract_model(DescribedModel)
+        result = render_feature(spec)
+
+        assert "This is the model description." in result
+
+    def test_renders_fields_section(self) -> None:
+        """Should include Fields section header."""
+
+        class ModelWithField(BaseModel):
+            """Model with a field."""
+
+            name: str
+
+        spec = extract_model(ModelWithField)
+        result = render_feature(spec)
+
+        assert "## Fields" in result
+
+    def test_renders_field_table_header(self) -> None:
+        """Should render field table with proper headers."""
+
+        class ModelWithField(BaseModel):
+            """Model with a field."""
+
+            name: str
+
+        spec = extract_model(ModelWithField)
+        result = render_feature(spec)
+
+        assert "| Name | Type | Description |" in result
+        assert "| -----: | :----: | ------------- |" in result
+
+
+class TestRenderFeatureFieldTable:
+    """Tests for field table rendering."""
+
+    def test_renders_required_field(self) -> None:
+        """Should render required field without (optional) suffix."""
+
+        class ModelWithRequired(BaseModel):
+            """Model with required field."""
+
+            name: str = Field(description="The name")
+
+        spec = extract_model(ModelWithRequired)
+        result = render_feature(spec)
+
+        assert "| `name` |" in result
+        assert "| `string` |" in result
+        assert "The name" in result
+
+    def test_renders_optional_field(self) -> None:
+        """Should render optional field with (optional) suffix."""
+
+        class ModelWithOptional(BaseModel):
+            """Model with optional field."""
+
+            nickname: str | None = Field(None, description="Optional nickname")
+
+        spec = extract_model(ModelWithOptional)
+        result = render_feature(spec)
+
+        assert "| `nickname` |" in result
+        assert "(optional)" in result
+        assert "Optional nickname" in result
+
+    def test_renders_typed_fields(self) -> None:
+        """Should render field types correctly."""
+
+        class ModelWithTypes(BaseModel):
+            """Model with various types."""
+
+            count: int
+            price: float
+            active: bool
+
+        spec = extract_model(ModelWithTypes)
+        result = render_feature(spec)
+
+        # Check that fields are present (exact type format may vary)
+        assert "`count`" in result
+        assert "`price`" in result
+        assert "`active`" in result
+
+    def test_multiline_description_sanitized_in_table(self) -> None:
+        """Multiline field description rendered with <br/> in table cell."""
+
+        class ModelWithMultilineDesc(BaseModel):
+            """Model."""
+
+            name: str = Field(description="First line.\n\nSecond paragraph.")
+
+        spec = extract_model(ModelWithMultilineDesc)
+        result = render_feature(spec)
+
+        assert "First line.<br/><br/>Second paragraph." in result
+        # The table should not be broken by a blank line
+        lines = result.splitlines()
+        table_start = next(i for i, line in enumerate(lines) if "| Name |" in line)
+        for i in range(table_start, len(lines)):
+            if lines[i].strip() == "":
+                break
+            assert lines[i].startswith("|"), f"Table broken at line {i}: {lines[i]}"
+
+
+class TestRenderFeatureWithThemeType:
+    """Tests for rendering Feature-like models with theme/type."""
+
+    def test_renders_theme_and_type_fields(self) -> None:
+        """Should render theme and type as Literal fields."""
+
+        class Place(FeatureBase[Literal["places"], Literal["place"]]):
+            """A place feature."""
+
+            name: str
+
+        spec = extract_model(Place)
+        result = render_feature(spec)
+
+        # Theme and type should appear somewhere in output
+        assert "places" in result
+        assert "place" in result
+
+
+class TestRenderFeatureLiteralField:
+    """Tests for rendering Literal-typed fields."""
+
+    def test_literal_field_renders_as_quoted_value(self) -> None:
+        """Literal field should render as quoted string in backticks."""
+
+        class TestFeature(FeatureBase[Literal["test_theme"], Literal["test_type"]]):
+            """Test feature."""
+
+            name: str
+
+        spec = extract_model(TestFeature)
+        result = render_feature(spec)
+
+        assert '| `"test_theme"` |' in result
+        assert '| `"test_type"` |' in result
+
+
+class TestRenderFeatureNewTypeDisplay:
+    """Tests for NewType rendering in Markdown."""
+
+    def test_newtype_wrapping_list_renders_name_with_list_qualifier(
+        self,
+    ) -> None:
+        """NewType wrapping a list renders as name with (list, optional)."""
+
+        class Item(BaseModel):
+            value: str
+
+        TestSources = NewType(
+            "TestSources", Annotated[list[Item], UniqueItemsConstraint()]
+        )
+
+        class ModelWithSources(BaseModel):
+            """Model with sources."""
+
+            sources: TestSources | None = None
+
+        spec = extract_model(ModelWithSources)
+        expand_model_tree(spec)
+        result = render_feature(spec)
+
+        assert "`TestSources`" in result
+        assert "(list, optional)" in result
+
+    def test_hex_color_renders_as_newtype_name(self) -> None:
+        """HexColor (unregistered NewType) renders as code-formatted name."""
+
+        class ModelWithColor(BaseModel):
+            """Model with color."""
+
+            color: HexColor | None = None
+
+        spec = extract_model(ModelWithColor)
+        result = render_feature(spec)
+
+        assert "`HexColor`" in result
+        assert "(optional)" in result
+
+    def test_registered_primitive_renders_through_registry(self) -> None:
+        """Registered primitive (int32) renders via registry, not as NewType link."""
+
+        class ModelWithCount(BaseModel):
+            """Model with count."""
+
+            count: int32
+
+        spec = extract_model(ModelWithCount)
+        result = render_feature(spec)
+
+        assert "| `int32` |" in result
+        # Should NOT be linked
+        assert "](int32.md)" not in result
+
+    def test_plain_str_renders_as_string(self) -> None:
+        """Plain str field renders as 'string'."""
+
+        class ModelWithName(BaseModel):
+            """Model with name."""
+
+            name: str
+
+        spec = extract_model(ModelWithName)
+        result = render_feature(spec)
+
+        assert "| `string` |" in result
+
+    def test_enum_renders_as_code_without_context(self) -> None:
+        """Enum fields render as inline code without LinkContext."""
+
+        class Status(str, Enum):
+            ACTIVE = "active"
+
+        class ModelWithEnum(BaseModel):
+            """Model with enum."""
+
+            status: Status
+
+        spec = extract_model(ModelWithEnum)
+        result = render_feature(spec)
+
+        assert "| `Status` |" in result
+
+    def test_model_field_renders_as_code_without_context(self) -> None:
+        """BaseModel field renders as inline code without LinkContext."""
+
+        class Inner(BaseModel):
+            value: str
+
+        class Outer(BaseModel):
+            """Model with nested model."""
+
+            inner: Inner
+
+        spec = extract_model(Outer)
+        expand_model_tree(spec)
+        result = render_feature(spec)
+
+        assert "| `Inner` |" in result
+
+
+class TestRenderFeatureInlineExpansion:
+    """Tests for inline expansion of nested model fields."""
+
+    def test_direct_model_fields_expanded_with_dot_prefix(self) -> None:
+        """Direct model field expands sub-fields with dot notation."""
+        spec = extract_model(FeatureWithAddress)
+        expand_model_tree(spec)
+        result = render_feature(spec)
+
+        assert "| `address.street` |" in result
+        assert "| `address.city` |" in result
+        assert "| `address.zip_code` |" in result
+
+    def test_list_of_model_fields_expanded_with_bracket_dot_prefix(self) -> None:
+        """List-of-model field expands sub-fields with []. notation."""
+        spec = extract_model(FeatureWithSources)
+        expand_model_tree(spec)
+        result = render_feature(spec)
+
+        assert "| `sources[]` |" in result
+        assert "| `sources[].dataset` |" in result
+
+    def test_cycle_detection_prevents_infinite_recursion(self) -> None:
+        """Recursive model emits parent row but does not recurse."""
+        spec = extract_model(TreeNode)
+        expand_model_tree(spec)
+        result = render_feature(spec)
+
+        # The parent field row appears
+        assert "| `parent` |" in result
+        # But no recursion into parent.label
+        assert "parent.label" not in result
+
+    def test_primitive_field_unchanged(self) -> None:
+        """Primitive fields produce a single row without expansion."""
+        spec = extract_model(SimpleModel)
+        result = render_feature(spec)
+
+        lines = [line for line in result.splitlines() if "| `name` |" in line]
+        assert len(lines) == 1
+
+    def test_parent_row_preserved_before_expansion(self) -> None:
+        """The parent field row still appears before expanded sub-fields."""
+        spec = extract_model(FeatureWithAddress)
+        expand_model_tree(spec)
+        result = render_feature(spec)
+
+        # Parent row for 'address' itself appears
+        assert "| `address` |" in result
+        # And it appears before the expanded fields
+        lines = result.splitlines()
+        address_line = next(
+            i for i, line in enumerate(lines) if "| `address` |" in line
+        )
+        street_line = next(
+            i for i, line in enumerate(lines) if "| `address.street` |" in line
+        )
+        assert address_line < street_line
+
+
+class TestRenderFeatureConstraints:
+    """Tests for model-level constraint rendering in feature pages."""
+
+    def test_venue_has_constraints_section(self) -> None:
+        """Venue's @require_any_of renders as a Constraints section."""
+        spec = extract_model(Venue)
+        result = render_feature(spec)
+
+        assert "## Constraints" in result
+        assert "At least one of `name`, `description` must be set" in result
+
+    def test_constraints_section_between_fields_and_examples(self) -> None:
+        """Constraints section appears after Fields, before Examples."""
+        spec = extract_model(Venue)
+        examples = [ExampleRecord(rows=[("name", "test")])]
+        result = render_feature(spec, examples=examples)
+
+        lines = result.splitlines()
+        fields_line = next(i for i, line in enumerate(lines) if "## Fields" in line)
+        constraints_line = next(
+            i for i, line in enumerate(lines) if "## Constraints" in line
+        )
+        examples_line = next(i for i, line in enumerate(lines) if "## Examples" in line)
+
+        assert fields_line < constraints_line < examples_line
+
+    def test_no_constraints_section_without_constraints(self) -> None:
+        """Models without model-level constraints omit Constraints section."""
+
+        class Plain(BaseModel):
+            """Plain model."""
+
+            name: str
+
+        spec = extract_model(Plain)
+        result = render_feature(spec)
+
+        assert "## Constraints" not in result
+
+    def test_no_constraints_section_with_only_no_extra_fields(self) -> None:
+        """Model with only @no_extra_fields omits Constraints section."""
+
+        @no_extra_fields
+        class Strict(BaseModel):
+            """Strict model."""
+
+            name: str
+
+        spec = extract_model(Strict)
+        result = render_feature(spec)
+
+        assert "## Constraints" not in result
+
+
+class TestRenderFeatureConstraintNotes:
+    """Tests for inline constraint notes in field description cells."""
+
+    def test_venue_name_field_includes_constraint_note(self) -> None:
+        """Venue's name field description cell includes constraint note in italics."""
+        spec = extract_model(Venue)
+        result = render_feature(spec)
+
+        # Find the row for 'name' field
+        lines = result.splitlines()
+        name_line = next(line for line in lines if "| `name` |" in line)
+        assert "Venue name" in name_line
+        assert "*At least one of `name`, `description` must be set*" in name_line
+        assert "<br/>" in name_line
+
+    def test_field_with_no_description_gets_constraint_note(self) -> None:
+        """Field with no existing description still gets the constraint note."""
+        spec = extract_model(Venue)
+        result = render_feature(spec)
+
+        # description field on Venue has no Field(description=...)
+        lines = result.splitlines()
+        desc_line = next(line for line in lines if "| `description` |" in line)
+        assert "*At least one of `name`, `description` must be set*" in desc_line
+
+
+class TestRenderFeatureFieldConstraints:
+    """Tests for field-level constraint annotation from TypeInfo."""
+
+    def test_venue_geometry_shows_allowed_types(self) -> None:
+        """Venue's geometry field shows GeometryTypeConstraint as a note."""
+        spec = extract_model(Venue)
+        expand_model_tree(spec)
+        result = render_feature(spec)
+
+        lines = result.splitlines()
+        geo_line = next(line for line in lines if "| `geometry` |" in line)
+        assert "*Allowed geometry types: Point, Polygon*" in geo_line
+
+    def test_venue_reference_links_when_context_available(self) -> None:
+        """Reference constraint links the target type when LinkContext has the page."""
+        spec = extract_model(Venue)
+        expand_model_tree(spec)
+        ctx = LinkContext(
+            page_path=PurePosixPath("music/venue.md"),
+            registry={
+                TypeIdentity(Instrument, "Instrument"): PurePosixPath(
+                    "music/instrument.md"
+                )
+            },
+        )
+        result = render_feature(spec, link_ctx=ctx)
+
+        lines = result.splitlines()
+        ref_line = next(line for line in lines if "| `resident_ensemble` |" in line)
+        assert "[`Instrument`](instrument.md)" in ref_line
+        assert "belongs to" in ref_line
+
+    def test_venue_reference_unlinked_without_context(self) -> None:
+        """Reference constraint renders as plain code when no LinkContext."""
+        spec = extract_model(Venue)
+        expand_model_tree(spec)
+        result = render_feature(spec)
+
+        lines = result.splitlines()
+        ref_line = next(line for line in lines if "| `resident_ensemble` |" in line)
+        assert "References `Instrument`" in ref_line
+        assert "belongs to" in ref_line
+
+
+class TestRenderEnumBasic:
+    """Tests for render_enum with simple enums."""
+
+    def test_renders_title_from_enum_name(self) -> None:
+        """Should render enum name as H1 title."""
+        result = render_enum(_ROOF_SHAPE_SPEC)
+
+        assert "# RoofShape" in result
+
+    def test_renders_description_from_docstring(self) -> None:
+        """Should render enum docstring as description."""
+        result = render_enum(_ROOF_SHAPE_SPEC)
+
+        assert "The shape of the roof." in result
+
+    def test_renders_values_section(self) -> None:
+        """Should include Values section header."""
+        result = render_enum(_ROOF_SHAPE_SPEC)
+
+        assert "## Values" in result
+
+    def test_renders_values_as_bullet_list(self) -> None:
+        """Should render each value as a bullet point."""
+        spec = EnumSpec(
+            name="RoofShape",
+            description="The shape of the roof.",
+            members=[
+                EnumMemberSpec(name="FLAT", value="flat", description=None),
+                EnumMemberSpec(name="GABLED", value="gabled", description=None),
+                EnumMemberSpec(name="DOME", value="dome", description=None),
+            ],
+        )
+
+        result = render_enum(spec)
+
+        assert "- `flat`" in result
+        assert "- `gabled`" in result
+        assert "- `dome`" in result
+
+
+class TestRenderEnumDocumented:
+    """Tests for render_enum with DocumentedEnum (per-value descriptions)."""
+
+    def test_renders_member_descriptions(self) -> None:
+        """Should render per-value descriptions after the value."""
+        spec = EnumSpec(
+            name="Side",
+            description="The side on which something appears.",
+            members=[
+                EnumMemberSpec(
+                    name="LEFT", value="left", description="On the left side"
+                ),
+                EnumMemberSpec(
+                    name="RIGHT", value="right", description="On the right side"
+                ),
+            ],
+        )
+
+        result = render_enum(spec)
+
+        assert "- `left` - On the left side" in result
+        assert "- `right` - On the right side" in result
+
+    def test_renders_mixed_documented_undocumented(self) -> None:
+        """Should handle mix of documented and undocumented members."""
+        spec = EnumSpec(
+            name="ConnectionState",
+            description="Connection states.",
+            members=[
+                EnumMemberSpec(name="CONNECTED", value="connected", description=None),
+                EnumMemberSpec(
+                    name="QUIESCING",
+                    value="quiescing",
+                    description="Gracefully shutting down",
+                ),
+            ],
+        )
+
+        result = render_enum(spec)
+
+        # Undocumented: just the value
+        assert "- `connected`" in result
+        # Documented: value + description
+        assert "- `quiescing` - Gracefully shutting down" in result
+
+
+class TestRenderEnumNoDescription:
+    """Tests for enums without class docstrings."""
+
+    def test_enum_without_description(self) -> None:
+        """Should render enum without description section when None."""
+        spec = EnumSpec(
+            name="SimpleEnum",
+            description=None,
+            members=[
+                EnumMemberSpec(name="A", value="a", description=None),
+                EnumMemberSpec(name="B", value="b", description=None),
+            ],
+        )
+
+        result = render_enum(spec)
+
+        # Should still have title and values
+        assert "# SimpleEnum" in result
+        assert "## Values" in result
+        assert "- `a`" in result
+        assert "- `b`" in result
+        # Should not have empty lines where description would be
+        lines = result.strip().split("\n")
+        # Title should be followed by blank line then Values header
+        assert lines[0] == "# SimpleEnum"
+
+
+class TestRenderNewType:
+    """Tests for render_newtype."""
+
+    def test_renders_title(self) -> None:
+        """Should render NewType name as H1 title."""
+        spec = extract_newtype(HexColor)
+        result = render_newtype(spec)
+
+        assert "# HexColor" in result
+
+    def test_renders_underlying_type(self) -> None:
+        """Should show the resolved underlying type below the description."""
+        spec = extract_newtype(HexColor)
+        result = render_newtype(spec)
+
+        assert "# HexColor\n" in result
+        assert "Underlying type: `string`" in result
+
+    def test_renders_constraints(self) -> None:
+        """Should render constraints section with description and pattern."""
+        spec = extract_newtype(HexColor)
+        result = render_newtype(spec)
+
+        assert "## Constraints" in result
+        assert "Allows only hexadecimal color codes" in result
+        assert "`HexColorConstraint`" in result
+        assert "pattern:" in result
+
+    def test_renders_id_with_provenance_without_link(self) -> None:
+        """Id page shows constraints without provenance links when no context."""
+        spec = extract_newtype(Id)
+        result = render_newtype(spec)
+
+        assert "# Id" in result
+        assert "NoWhitespaceConstraint" in result
+        # No link without LinkContext
+        assert "no_whitespace_string.md" not in result
+
+    def test_builtin_underlying_type_not_linked(self) -> None:
+        """Built-in underlying type (string) stays in plain backticks."""
+        spec = extract_newtype(HexColor)
+        result = render_newtype(spec)
+
+        assert "Underlying type: `string`" in result
+
+    def test_list_model_underlying_type_without_context(self) -> None:
+        """List-of-model underlying type renders without link when no context."""
+        spec = extract_newtype(Sources)
+        result = render_newtype(spec)
+
+        assert "Underlying type: `list<SourceItem>`" in result
+
+    def test_dict_underlying_types_without_context(self) -> None:
+        """Dict key/value NewTypes render without links when no context."""
+        spec = extract_newtype(CommonNames)
+        result = render_newtype(spec)
+
+        assert "map<LanguageTag, StrippedString>" in result
+
+
+class TestPlacementAwareLinks:
+    """Tests for rendering with LinkContext for cross-directory links."""
+
+    def test_feature_links_to_shared_type_via_registry(self) -> None:
+        """Feature in theme subdir links to shared type in types/ dir."""
+
+        class ModelWithColor(BaseModel):
+            """Model with color."""
+
+            color: HexColor | None = None
+
+        spec = extract_model(ModelWithColor)
+        page_path = PurePosixPath("buildings/building/building.md")
+        ctx = LinkContext(
+            page_path,
+            {
+                TypeIdentity(HexColor, "HexColor"): PurePosixPath(
+                    "types/strings/hex_color.md"
+                )
+            },
+        )
+
+        result = render_feature(spec, link_ctx=ctx)
+
+        assert "[`HexColor`](../../types/strings/hex_color.md)" in result
+
+    def test_feature_links_to_theme_level_type(self) -> None:
+        """Feature in subdir links to type at theme level."""
+
+        class RoofShape(str, Enum):
+            FLAT = "flat"
+
+        class ModelWithRoof(BaseModel):
+            """Model with roof."""
+
+            roof: RoofShape
+
+        spec = extract_model(ModelWithRoof)
+        page_path = PurePosixPath("buildings/building/building.md")
+        ctx = LinkContext(
+            page_path,
+            {
+                TypeIdentity(RoofShape, "RoofShape"): PurePosixPath(
+                    "buildings/roof_shape.md"
+                )
+            },
+        )
+
+        result = render_feature(spec, link_ctx=ctx)
+
+        assert "[`RoofShape`](../roof_shape.md)" in result
+
+    def test_feature_links_to_sibling_in_same_subdir(self) -> None:
+        """Feature links to type in its own subdirectory."""
+
+        class BuildingClass(str, Enum):
+            RESIDENTIAL = "residential"
+
+        class ModelWithClass(BaseModel):
+            """Model."""
+
+            building_class: BuildingClass
+
+        spec = extract_model(ModelWithClass)
+        page_path = PurePosixPath("buildings/building/building.md")
+        ctx = LinkContext(
+            page_path,
+            {
+                TypeIdentity(BuildingClass, "BuildingClass"): PurePosixPath(
+                    "buildings/building/building_class.md"
+                )
+            },
+        )
+
+        result = render_feature(spec, link_ctx=ctx)
+
+        assert "[`BuildingClass`](building_class.md)" in result
+
+    def test_without_context_renders_as_code(self) -> None:
+        """Without LinkContext, types render as inline code (no link)."""
+
+        class ModelWithColor(BaseModel):
+            """Model with color."""
+
+            color: HexColor | None = None
+
+        spec = extract_model(ModelWithColor)
+        result = render_feature(spec)
+
+        assert "`HexColor`" in result
+        assert "hex_color.md" not in result
+
+    def test_newtype_underlying_type_linked_via_registry(self) -> None:
+        """NewType header links underlying model type through placement registry."""
+        spec = extract_newtype(Sources)
+        page_path = PurePosixPath("types/references/sources.md")
+        ctx = LinkContext(
+            page_path,
+            {
+                TypeIdentity(SourceItem, "SourceItem"): PurePosixPath(
+                    "types/references/source_item.md"
+                )
+            },
+        )
+
+        result = render_newtype(spec, link_ctx=ctx)
+
+        assert "[`SourceItem`](source_item.md)" in result
+
+    def test_newtype_underlying_type_not_linked_when_absent(self) -> None:
+        """Underlying type stays backtick-only when missing from registry."""
+        spec = extract_newtype(Sources)
+        page_path = PurePosixPath("types/references/sources.md")
+        ctx = LinkContext(page_path, {})
+
+        result = render_newtype(spec, link_ctx=ctx)
+
+        assert "`list<SourceItem>`" in result
+        assert "[`SourceItem`]" not in result
+
+    def test_newtype_provenance_link_uses_registry(self) -> None:
+        """NewType provenance links resolve through placement registry."""
+        spec = extract_newtype(Id)
+        page_path = PurePosixPath("types/references/id.md")
+        registry = {
+            TypeIdentity(NoWhitespaceString, "NoWhitespaceString"): PurePosixPath(
+                "types/strings/no_whitespace_string.md"
+            ),
+        }
+        ctx = LinkContext(page_path, registry)
+
+        result = render_newtype(spec, link_ctx=ctx)
+
+        assert "../strings/no_whitespace_string.md" in result
+
+
+class TestFormatExampleValue:
+    """Tests for _format_example_value."""
+
+    def test_none_renders_as_null(self) -> None:
+        """None renders as backtick-quoted null."""
+
+        assert _format_example_value(None) == "`null`"
+
+    def test_string_null_renders_with_backticks(self) -> None:
+        """String 'null' renders as a backtick-wrapped string."""
+
+        assert _format_example_value("null") == "`null`"
+
+    def test_bool_true_renders_lowercase(self) -> None:
+        """Boolean True renders as backtick-quoted lowercase true."""
+
+        assert _format_example_value(True) == "`true`"
+
+    def test_bool_false_renders_lowercase(self) -> None:
+        """Boolean False renders as backtick-quoted lowercase false."""
+
+        assert _format_example_value(False) == "`false`"
+
+    def test_empty_string_renders_empty(self) -> None:
+        """Empty string renders as empty string."""
+
+        assert _format_example_value("") == ""
+
+    def test_short_string_has_backticks(self) -> None:
+        """Non-empty strings render with backticks."""
+
+        assert _format_example_value("OpenStreetMap") == "`OpenStreetMap`"
+
+    def test_long_string_truncated(self) -> None:
+        """Strings longer than 100 chars are truncated with ellipsis."""
+
+        long = "x" * 150
+        result = _format_example_value(long)
+        assert result == f"`{'x' * 97}...`"
+        assert len(result) == 100 + 2  # 100 content + 2 backticks
+
+    def test_integer_has_backticks(self) -> None:
+        """Integers render with backticks."""
+
+        assert _format_example_value(42) == "`42`"
+        assert _format_example_value(0) == "`0`"
+        assert _format_example_value(-17) == "`-17`"
+
+    def test_float_has_backticks(self) -> None:
+        """Floats render with backticks."""
+
+        assert _format_example_value(3.14) == "`3.14`"
+        assert _format_example_value(-2.5) == "`-2.5`"
+
+    def test_list_renders_comma_separated(self) -> None:
+        """Lists render as backtick-wrapped comma-separated values."""
+
+        assert _format_example_value([1, 2, 3]) == "`[1, 2, 3]`"
+        assert _format_example_value(["a", "b"]) == '`["a", "b"]`'
+        assert _format_example_value([]) == "`[]`"
+
+    def test_long_list_truncated(self) -> None:
+        """Lists longer than truncation limit are truncated with ellipsis."""
+        long_list = list(range(200))
+        result = _format_example_value(long_list)
+        assert result.startswith("`[0, 1, 2,")
+        assert result.endswith("...`")
+        inner = result[1:-1]  # strip backticks
+        assert len(inner) <= 100
+
+    def test_long_dict_truncated(self) -> None:
+        """Dicts longer than truncation limit are truncated with ellipsis."""
+        long_dict = {f"key_{i}": f"value_{i}" for i in range(50)}
+        result = _format_example_value(long_dict)
+        assert result.startswith('`{"key_0":')
+        assert result.endswith("...`")
+        inner = result[1:-1]
+        assert len(inner) <= 100
+
+    def test_pipe_character_not_escaped_in_backticks(self) -> None:
+        """Pipe characters need no escaping inside backticks."""
+
+        assert _format_example_value("foo|bar") == "`foo|bar`"
+        assert _format_example_value("a|b|c") == "`a|b|c`"
+
+
+class TestRenderFeatureWithExamples:
+    """Tests for render_feature with examples support."""
+
+    def test_accepts_examples_parameter(self) -> None:
+        """render_feature accepts examples parameter."""
+        spec = extract_model(SimpleModel)
+        examples = [ExampleRecord(rows=[("name", "test")])]
+
+        # Should not raise
+        result = render_feature(spec, examples=examples)
+        assert "# SimpleModel" in result
+
+    def test_renders_single_example_without_heading(self) -> None:
+        """Single example renders without 'Example 1' heading."""
+
+        class ModelWithCount(BaseModel):
+            """A simple model."""
+
+            name: str
+            count: int
+
+        spec = extract_model(ModelWithCount)
+        examples = [ExampleRecord(rows=[("name", "test"), ("count", 42)])]
+
+        result = render_feature(spec, examples=examples)
+        assert "## Examples" in result
+        assert "| Column | Value |" in result
+        assert "| `name` | `test` |" in result
+        assert "| `count` | `42` |" in result
+        # Should NOT have "Example 1" heading
+        assert "### Example 1" not in result
+
+    def test_renders_multiple_examples_with_headings(self) -> None:
+        """Multiple examples render with 'Example N' headings."""
+        spec = extract_model(SimpleModel)
+        examples = [
+            ExampleRecord(rows=[("name", "first")]),
+            ExampleRecord(rows=[("name", "second")]),
+        ]
+
+        result = render_feature(spec, examples=examples)
+        assert "## Examples" in result
+        assert "### Example 1" in result
+        assert "### Example 2" in result
+        assert "| `name` | `first` |" in result
+        assert "| `name` | `second` |" in result
+
+    def test_formats_example_values(self) -> None:
+        """Example values are formatted using _format_example_value."""
+
+        class TestModel(BaseModel):
+            """Test model."""
+
+            text: str
+            count: int
+            active: bool
+            optional: str | None
+
+        spec = extract_model(TestModel)
+        examples = [
+            ExampleRecord(
+                rows=[
+                    ("text", "hello"),
+                    ("count", 42),
+                    ("active", True),
+                    ("optional", None),
+                ]
+            )
+        ]
+
+        result = render_feature(spec, examples=examples)
+        # String with backticks
+        assert "| `text` | `hello` |" in result
+        # Number with backticks
+        assert "| `count` | `42` |" in result
+        # Boolean with backticks, lowercase
+        assert "| `active` | `true` |" in result
+        # None as null
+        assert "| `optional` | `null` |" in result
+
+    def test_no_examples_omits_section(self) -> None:
+        """When examples is None, Examples section is not rendered."""
+        spec = extract_model(SimpleModel)
+        result = render_feature(spec, examples=None)
+
+        assert "## Examples" not in result
+
+    def test_empty_examples_list_omits_section(self) -> None:
+        """When examples is empty list, Examples section is not rendered."""
+        spec = extract_model(SimpleModel)
+        result = render_feature(spec, examples=[])
+
+        assert "## Examples" not in result
+
+
+class TestRenderPrimitivesPage:
+    """Tests for the aggregate primitives page."""
+
+    def test_contains_title(self, primitives_markdown: str) -> None:
+        assert "# Primitive Types" in primitives_markdown
+
+    def test_contains_signed_integers(self, primitives_markdown: str) -> None:
+        assert "| `int8` |" in primitives_markdown
+        assert "| `int16` |" in primitives_markdown
+        assert "| `int32` |" in primitives_markdown
+        assert "| `int64` |" in primitives_markdown
+
+    def test_contains_unsigned_integers(self, primitives_markdown: str) -> None:
+        assert "| `uint8` |" in primitives_markdown
+        assert "| `uint16` |" in primitives_markdown
+        assert "| `uint32` |" in primitives_markdown
+
+    def test_contains_floats(self, primitives_markdown: str) -> None:
+        assert "| `float32` |" in primitives_markdown
+        assert "| `float64` |" in primitives_markdown
+
+    def test_ranges_match_schema_constraints(self, primitives_markdown: str) -> None:
+        """Range strings derive from ge/le constraints in the schema."""
+        assert "-128 to 127" in primitives_markdown
+        assert "-32,768 to 32,767" in primitives_markdown
+        assert "-2,147,483,648 to 2,147,483,647" in primitives_markdown
+        assert "-2^63 to 2^63-1" in primitives_markdown
+        assert "0 to 255" in primitives_markdown
+        assert "0 to 65,535" in primitives_markdown
+        assert "0 to 4,294,967,295" in primitives_markdown
+
+    def test_descriptions_from_docstrings(self, primitives_markdown: str) -> None:
+        """Descriptions derive from first line of NewType docstrings."""
+        assert "Portable 8-bit signed integer." in primitives_markdown
+        assert "Portable 16-bit unsigned integer." in primitives_markdown
+        assert "Portable IEEE 32-bit floating point number." in primitives_markdown
+
+    def test_float_precision(self, primitives_markdown: str) -> None:
+        """Float entries show IEEE 754 precision."""
+        assert "~7 decimal digits" in primitives_markdown
+        assert "~15 decimal digits" in primitives_markdown
+
+    def test_pipe_in_description_escaped(self) -> None:
+        """Pipe characters in primitive descriptions are escaped."""
+        specs = [
+            PrimitiveSpec(
+                name="int8",
+                description="Range: -128 | 127",
+                bounds=Interval(ge=-128, le=127),
+            ),
+        ]
+        result = render_primitives_from_specs(specs)
+        assert "Range: -128 \\| 127" in result
+
+
+class TestRenderGeometryPage:
+    """Tests for the aggregate geometry page."""
+
+    def test_contains_title(self, geometry_markdown: str) -> None:
+        assert "# Geometry Types" in geometry_markdown
+
+    def test_contains_geometry_types(self, geometry_markdown: str) -> None:
+        assert "Geometry" in geometry_markdown
+        assert "BBox" in geometry_markdown
+        assert "GeometryType" in geometry_markdown
+
+    def test_lists_geometry_type_values(self, geometry_markdown: str) -> None:
+        assert "`point`" in geometry_markdown or "`POINT`" in geometry_markdown
+
+
+class TestRenderUnionTemplate:
+    """Tests for UnionSpec template rendering with synthetic specs."""
+
+    def test_shared_fields_have_no_variant_tag(self) -> None:
+        """Shared fields render without variant annotation."""
+        spec = make_union_spec(
+            description="A test union.",
+            annotated_fields=[
+                AnnotatedField(
+                    field_spec=FieldSpec(
+                        name="id",
+                        type_info=STR_TYPE,
+                        description="ID",
+                        is_required=True,
+                    ),
+                    variant_sources=None,
+                ),
+            ],
+        )
+        result = render_feature(spec)
+        assert "| `id` |" in result
+        assert "*(" not in result  # no variant tag
+
+    def test_variant_fields_have_inline_tag(self) -> None:
+        """Variant-specific fields get *(Variant)* tag."""
+        spec = make_union_spec(
+            name="Segment",
+            annotated_fields=[
+                AnnotatedField(
+                    field_spec=FieldSpec(
+                        name="speed_limit",
+                        type_info=STR_TYPE,
+                        description=None,
+                        is_required=False,
+                    ),
+                    variant_sources=("RoadSegment",),
+                ),
+            ],
+        )
+        result = render_feature(spec)
+        assert "| `speed_limit` *(Road)* |" in result
+
+
+class TestFormatConstraintDisplay:
+    """Tests for FieldConstraint display with on-demand description/pattern extraction."""
+
+    def test_description_and_pattern(self) -> None:
+        """Constraint with docstring and pattern renders both."""
+        cs = ConstraintSource(
+            source_ref=None, source_name=None, constraint=CountryCodeAlpha2Constraint()
+        )
+        result = _format_constraint(cs, None)
+        assert "Allows only ISO 3166-1 alpha-2 country codes." in result.display
+        assert "`CountryCodeAlpha2Constraint`" in result.display
+        assert "pattern: `^[A-Z]{2}$`" in result.display
+
+    def test_description_without_pattern(self) -> None:
+        """Constraint with docstring but no pattern renders description only."""
+        cs = ConstraintSource(
+            source_ref=None, source_name=None, constraint=JsonPointerConstraint()
+        )
+        result = _format_constraint(cs, None)
+        assert "Allows only valid JSON Pointer values (RFC 6901)." in result.display
+        assert "`JsonPointerConstraint`" in result.display
+        assert "pattern" not in result.display
+
+    def test_no_description_falls_through(self) -> None:
+        """Plain string metadata has no docstring and falls through."""
+        cs = ConstraintSource(
+            source_ref=None, source_name=None, constraint="plain string metadata"
+        )
+        result = _format_constraint(cs, None)
+        assert result.display == "`plain string metadata`"
+
+    def test_annotated_types_uses_operator_notation_not_docstring(self) -> None:
+        """annotated-types constraints use operator notation, not their __doc__."""
+        cs = ConstraintSource(source_ref=None, source_name=None, constraint=Ge(ge=0))
+        result = _format_constraint(cs, None)
+        assert result.display == "`≥ 0`"
+        assert "Ge(ge=x)" not in result.display
+
+    def test_constraint_class_not_linked(self) -> None:
+        """Constraint class name stays in backticks (no pages generated for constraints)."""
+        cs = ConstraintSource(
+            source_ref=None, source_name=None, constraint=CountryCodeAlpha2Constraint()
+        )
+        result = _format_constraint(cs, None)
+        assert "`CountryCodeAlpha2Constraint`" in result.display
+        assert "[`CountryCodeAlpha2Constraint`](" not in result.display
+
+
+def _feature_spec() -> object:
+    return extract_model(SimpleModel)
+
+
+def _enum_spec() -> object:
+    return _ROOF_SHAPE_SPEC
+
+
+def _newtype_spec() -> object:
+    return extract_newtype(HexColor)
+
+
+_USED_BY_CASES = [
+    pytest.param(_feature_spec, render_feature, id="feature"),
+    pytest.param(_enum_spec, render_enum, id="enum"),
+    pytest.param(_newtype_spec, render_newtype, id="newtype"),
+]
+
+
+class TestUsedByRendering:
+    """Tests for rendering 'Used By' section across all render functions."""
+
+    @pytest.mark.parametrize(("spec_factory", "render_fn"), _USED_BY_CASES)
+    def test_entries_render_without_links_when_no_context(
+        self,
+        spec_factory: Callable[[], object],
+        render_fn: Callable[..., str],
+    ) -> None:
+        """Without LinkContext, 'Used By' entries render as inline code."""
+        _building = object()
+        _building_id = object()
+        used_by = [
+            UsedByEntry(
+                identity=TypeIdentity(_building, "Building"), kind=UsedByKind.MODEL
+            ),
+            UsedByEntry(
+                identity=TypeIdentity(_building_id, "BuildingId"),
+                kind=UsedByKind.NEWTYPE,
+            ),
+        ]
+
+        result = render_fn(spec_factory(), used_by=used_by)
+
+        assert "## Used By" in result
+        assert "- `Building`" in result
+        assert "- `BuildingId`" in result
+
+    @pytest.mark.parametrize(
+        ("spec_factory", "render_fn", "page_path", "expected_link"),
+        [
+            pytest.param(
+                _feature_spec,
+                render_feature,
+                PurePosixPath("types/strings/hex_color.md"),
+                "../../buildings/building/building.md",
+                id="feature",
+            ),
+            pytest.param(
+                _enum_spec,
+                render_enum,
+                PurePosixPath("buildings/roof_shape.md"),
+                "building/building.md",
+                id="enum",
+            ),
+            pytest.param(
+                _newtype_spec,
+                render_newtype,
+                PurePosixPath("types/strings/hex_color.md"),
+                "../../buildings/building/building.md",
+                id="newtype",
+            ),
+        ],
+    )
+    def test_link_context_uses_registry(
+        self,
+        spec_factory: Callable[[], object],
+        render_fn: Callable[..., str],
+        page_path: PurePosixPath,
+        expected_link: str,
+    ) -> None:
+        """Used-by entries resolve links through placement registry."""
+        _building = object()
+        _building_identity = TypeIdentity(_building, "Building")
+        registry = {
+            _building_identity: PurePosixPath("buildings/building/building.md"),
+        }
+        ctx = LinkContext(page_path, registry)
+        used_by = [UsedByEntry(identity=_building_identity, kind=UsedByKind.MODEL)]
+
+        result = render_fn(spec_factory(), link_ctx=ctx, used_by=used_by)
+
+        assert "## Used By" in result
+        assert f"[`Building`]({expected_link})" in result
+
+    @pytest.mark.parametrize(("spec_factory", "render_fn"), _USED_BY_CASES)
+    def test_no_used_by_omits_section(
+        self,
+        spec_factory: Callable[[], object],
+        render_fn: Callable[..., str],
+    ) -> None:
+        """When used_by is None, 'Used By' section is not rendered."""
+        result = render_fn(spec_factory(), used_by=None)
+
+        assert "## Used By" not in result
+
+    @pytest.mark.parametrize(("spec_factory", "render_fn"), _USED_BY_CASES)
+    def test_empty_used_by_omits_section(
+        self,
+        spec_factory: Callable[[], object],
+        render_fn: Callable[..., str],
+    ) -> None:
+        """When used_by is empty list, 'Used By' section is not rendered."""
+        result = render_fn(spec_factory(), used_by=[])
+
+        assert "## Used By" not in result
+
+
+class TestRenderPydanticType:
+    """Tests for render_pydantic_type."""
+
+    def test_heading_is_pascal_case(self) -> None:
+        result = render_pydantic_type(HTTP_URL_SPEC)
+        assert result.startswith("# HttpUrl\n")
+
+    def test_description_rendered(self) -> None:
+        result = render_pydantic_type(HTTP_URL_SPEC)
+        assert "A type that will accept any http or https URL." in result
+
+    def test_no_description_omits_paragraph(self) -> None:
+        result = render_pydantic_type(EMAIL_STR_SPEC)
+        lines = result.strip().split("\n")
+        assert lines[0] == "# EmailStr"
+
+    def test_pydantic_docs_link(self) -> None:
+        result = render_pydantic_type(HTTP_URL_SPEC)
+        assert (
+            "https://docs.pydantic.dev/latest/api/networks/#pydantic.networks.HttpUrl"
+            in result
+        )
+
+    def test_used_by_section(self) -> None:
+        place_cls = type("Place", (), {})
+        place_id = TypeIdentity(place_cls, "Place")
+        used_by = [UsedByEntry(place_id, UsedByKind.MODEL)]
+        ctx = LinkContext(
+            page_path=PurePosixPath("pydantic/networks/http_url.md"),
+            registry={place_id: PurePosixPath("places/place/place.md")},
+        )
+        result = render_pydantic_type(HTTP_URL_SPEC, link_ctx=ctx, used_by=used_by)
+        assert "## Used By" in result
+        assert "Place" in result
diff --git a/packages/overture-schema-codegen/tests/test_markdown_type_format.py b/packages/overture-schema-codegen/tests/test_markdown_type_format.py
new file mode 100644
index 000000000..e54426f5f
--- /dev/null
+++ b/packages/overture-schema-codegen/tests/test_markdown_type_format.py
@@ -0,0 +1,317 @@
+"""Tests for markdown type formatting."""
+
+from enum import Enum
+from pathlib import PurePosixPath
+from typing import Literal, NewType
+
+from overture.schema.codegen.extraction.specs import FieldSpec, TypeIdentity
+from overture.schema.codegen.extraction.type_analyzer import (
+    TypeInfo,
+    TypeKind,
+    analyze_type,
+)
+from overture.schema.codegen.markdown.link_computation import LinkContext
+from overture.schema.codegen.markdown.type_format import (
+    format_dict_type,
+    format_type,
+    format_underlying_type,
+)
+from overture.schema.system.primitive import int32
+from pydantic import BaseModel, HttpUrl
+
+
+class _ModelA(BaseModel):
+    x: int
+
+
+class _ModelB(BaseModel):
+    y: str
+
+
+class TestFormatType:
+    """Tests for format_type."""
+
+    def test_plain_str_renders_as_string(self) -> None:
+        ti = analyze_type(str)
+        assert format_type(_make_field(ti)) == "`string`"
+
+    def test_optional_adds_qualifier(self) -> None:
+        ti = analyze_type(str | None)
+        assert format_type(_make_field(ti, is_required=False)) == "`string` (optional)"
+
+    def test_literal_renders_as_quoted_value(self) -> None:
+        ti = analyze_type(Literal["places"])
+        assert format_type(_make_field(ti)) == '`"places"`'
+
+    def test_multi_value_literal_renders_comma_separated(self) -> None:
+        ti = analyze_type(Literal["a", "b", "c"])
+        assert format_type(_make_field(ti)) == '`"a"` \\| `"b"` \\| `"c"`'
+
+    def test_enum_without_context_renders_as_code(self) -> None:
+        class Color(str, Enum):
+            RED = "red"
+
+        ti = analyze_type(Color)
+        assert format_type(_make_field(ti)) == "`Color`"
+
+    def test_enum_with_link_context(self) -> None:
+        class Color(str, Enum):
+            RED = "red"
+
+        ti = analyze_type(Color)
+        field = _make_field(ti)
+        ctx = LinkContext(
+            page_path=PurePosixPath("buildings/building/building.md"),
+            registry={
+                TypeIdentity(Color, "Color"): PurePosixPath("types/enums/color.md")
+            },
+        )
+        assert format_type(field, ctx) == "[`Color`](../../types/enums/color.md)"
+
+    def test_list_of_primitives(self) -> None:
+        ti = analyze_type(list[str])
+        assert format_type(_make_field(ti)) == "`list<string>`"
+
+    def test_nested_list_of_primitives(self) -> None:
+        ti = analyze_type(list[list[str]])
+        assert format_type(_make_field(ti)) == "`list<list<string>>`"
+
+    def test_registered_primitive_not_linked(self) -> None:
+        ti = analyze_type(int32)
+        result = format_type(_make_field(ti))
+        assert result == "`int32`"
+        assert "](int32.md)" not in result
+
+
+class TestFormatDictType:
+    """Tests for format_dict_type."""
+
+    def test_simple_dict_renders_as_map(self) -> None:
+        ti = analyze_type(dict[str, int])
+        result = format_dict_type(ti)
+        assert result == "map<string, int64>"
+
+    def test_dict_with_newtype_shows_semantic_name(self) -> None:
+        MyKey = NewType("MyKey", str)
+        ti = analyze_type(dict[MyKey, int])
+        result = format_dict_type(ti)
+        assert result == "map<MyKey, int64>"
+
+
+def _make_field(
+    ti: TypeInfo, *, name: str = "x", is_required: bool = True
+) -> FieldSpec:
+    """Build a FieldSpec for test convenience."""
+    return FieldSpec(name=name, type_info=ti, description=None, is_required=is_required)
+
+
+class TestFormatUnionType:
+    """Tests for UNION-kind TypeInfo in format_type."""
+
+    def test_union_renders_all_members(self) -> None:
+        ti = analyze_type(_ModelA | _ModelB)
+        result = format_type(_make_field(ti))
+        assert "`_ModelA`" in result
+        assert "`_ModelB`" in result
+        # Pipe separator escaped for table cells
+        assert r"\|" in result
+
+    def test_union_with_link_context_links_each_member(self) -> None:
+        ti = analyze_type(_ModelA | _ModelB)
+        ctx = LinkContext(
+            page_path=PurePosixPath("theme/feature/feature.md"),
+            registry={
+                TypeIdentity(_ModelA, "_ModelA"): PurePosixPath(
+                    "theme/feature/types/model_a.md"
+                ),
+                TypeIdentity(_ModelB, "_ModelB"): PurePosixPath(
+                    "theme/feature/types/model_b.md"
+                ),
+            },
+        )
+        result = format_type(_make_field(ti), ctx)
+        assert "[`_ModelA`](types/model_a.md)" in result
+        assert "[`_ModelB`](types/model_b.md)" in result
+
+    def test_optional_union_adds_qualifier(self) -> None:
+        ti = analyze_type(_ModelA | _ModelB | None)
+        result = format_type(_make_field(ti, is_required=False))
+        assert "(optional)" in result
+        assert "`_ModelA`" in result
+        assert "`_ModelB`" in result
+
+    def test_list_of_union_adds_qualifier(self) -> None:
+        ti = TypeInfo(
+            base_type="_ModelA",
+            kind=TypeKind.UNION,
+            list_depth=1,
+            union_members=(_ModelA, _ModelB),
+        )
+        result = format_type(_make_field(ti))
+        assert "(list)" in result
+        assert "`_ModelA`" in result
+        assert "`_ModelB`" in result
+
+    def test_union_members_unlinked_without_context(self) -> None:
+        ti = analyze_type(_ModelA | _ModelB)
+        result = format_type(_make_field(ti))
+        # No markdown links without context
+        assert "]()" not in result
+        assert "[`" not in result
+
+    def test_union_partial_links(self) -> None:
+        """Members with pages get linked; members without don't."""
+        ti = analyze_type(_ModelA | _ModelB)
+        ctx = LinkContext(
+            page_path=PurePosixPath("theme/feature/feature.md"),
+            registry={
+                TypeIdentity(_ModelA, "_ModelA"): PurePosixPath(
+                    "theme/feature/types/model_a.md"
+                )
+            },
+        )
+        result = format_type(_make_field(ti), ctx)
+        assert "[`_ModelA`](types/model_a.md)" in result
+        assert "`_ModelB`" in result
+        # _ModelB should NOT be linked
+        assert "[`_ModelB`]" not in result
+
+
+class TestPydanticTypeLinking:
+    """Tests for PRIMITIVE types with pages getting linked."""
+
+    def test_pydantic_type_linked_when_in_registry(self) -> None:
+        ti = analyze_type(HttpUrl)
+        field = _make_field(ti)
+        ctx = LinkContext(
+            page_path=PurePosixPath("places/place/place.md"),
+            registry={
+                TypeIdentity(HttpUrl, "HttpUrl"): PurePosixPath(
+                    "pydantic/networks/http_url.md"
+                )
+            },
+        )
+        result = format_type(field, ctx)
+        assert "[`HttpUrl`]" in result
+        assert "pydantic/networks/http_url.md" in result
+
+    def test_pydantic_type_unlinked_without_registry_entry(self) -> None:
+        ti = analyze_type(HttpUrl)
+        field = _make_field(ti)
+        ctx = LinkContext(
+            page_path=PurePosixPath("places/place/place.md"),
+            registry={},
+        )
+        result = format_type(field, ctx)
+        assert result == "`HttpUrl`"
+        assert "[" not in result
+
+    def test_list_of_pydantic_type_linked(self) -> None:
+        ti = analyze_type(list[HttpUrl])
+        field = _make_field(ti)
+        ctx = LinkContext(
+            page_path=PurePosixPath("places/place/place.md"),
+            registry={
+                TypeIdentity(HttpUrl, "HttpUrl"): PurePosixPath(
+                    "pydantic/networks/http_url.md"
+                )
+            },
+        )
+        result = format_type(field, ctx)
+        assert "HttpUrl" in result
+        assert "pydantic/networks/http_url.md" in result
+
+    def test_registered_primitive_links_to_aggregate_page(self) -> None:
+        """int32 links to the primitives aggregate page when in registry."""
+        ti = analyze_type(int32)
+        field = _make_field(ti)
+        ctx = LinkContext(
+            page_path=PurePosixPath("places/place/place.md"),
+            registry={
+                TypeIdentity(int32, "int32"): PurePosixPath(
+                    "system/primitive/primitives.md"
+                )
+            },
+        )
+        result = format_type(field, ctx)
+        assert "[`int32`]" in result
+        assert "system/primitive/primitives.md" in result
+
+
+class TestListOfSemanticNewtype:
+    """Tests for list[SemanticNewType] rendering.
+
+    When a scalar NewType appears inside list[], the type renders as
+    list<NewTypeName> rather than NewTypeName (list). The (list) qualifier
+    is reserved for NewTypes that internally wrap a list.
+    """
+
+    def test_list_of_scalar_newtype_renders_list_syntax(self) -> None:
+        """list[ScalarNewType] renders as list<Name>, not Name (list)."""
+        ScalarNT = NewType("ScalarNT", str)
+        ti = analyze_type(list[ScalarNT])
+        result = format_type(_make_field(ti))
+        assert "list<" in result
+        assert "ScalarNT" in result
+        assert "(list)" not in result
+
+    def test_newtype_wrapping_list_renders_qualifier(self) -> None:
+        """NewType wrapping list[X] renders as Name (list)."""
+        ListNT = NewType("ListNT", list[str])
+        ti = analyze_type(ListNT)
+        result = format_type(_make_field(ti))
+        assert "(list)" in result
+        assert "ListNT" in result
+
+    def test_list_of_scalar_newtype_with_link(self) -> None:
+        """list[ScalarNewType] with link context renders linked list<Name>."""
+        ScalarNT = NewType("ScalarNT", str)
+        ti = analyze_type(list[ScalarNT])
+        field = _make_field(ti)
+        ctx = LinkContext(
+            page_path=PurePosixPath("places/place/place.md"),
+            registry={
+                TypeIdentity(ScalarNT, "ScalarNT"): PurePosixPath("system/scalar_nt.md")
+            },
+        )
+        result = format_type(field, ctx)
+        assert "list<" in result
+        assert "ScalarNT" in result
+        assert "system/scalar_nt.md" in result
+        assert "(list)" not in result
+
+    def test_nested_list_of_scalar_newtype_renders_nested_list_syntax(self) -> None:
+        """list[list[ScalarNewType]] renders as list<list<Name>>."""
+        ScalarNT = NewType("ScalarNT", str)
+        ti = analyze_type(list[list[ScalarNT]])
+        result = format_type(_make_field(ti))
+        assert "list<" in result
+        assert "list<`" in result or "`list<list<" in result
+        assert "ScalarNT" in result
+        assert "(list)" not in result
+
+
+class TestFormatUnderlyingUnionType:
+    """Tests for UNION-kind TypeInfo in format_underlying_type."""
+
+    def test_union_renders_all_members(self) -> None:
+        ti = analyze_type(_ModelA | _ModelB)
+        result = format_underlying_type(ti)
+        assert result == "`_ModelA` | `_ModelB`"
+
+    def test_union_with_link_context(self) -> None:
+        ti = analyze_type(_ModelA | _ModelB)
+        ctx = LinkContext(
+            page_path=PurePosixPath("types/my_union.md"),
+            registry={
+                TypeIdentity(_ModelA, "_ModelA"): PurePosixPath(
+                    "theme/feature/types/model_a.md"
+                ),
+                TypeIdentity(_ModelB, "_ModelB"): PurePosixPath(
+                    "theme/feature/types/model_b.md"
+                ),
+            },
+        )
+        result = format_underlying_type(ti, ctx)
+        assert "[`_ModelA`](../theme/feature/types/model_a.md)" in result
+        assert "[`_ModelB`](../theme/feature/types/model_b.md)" in result
diff --git a/packages/overture-schema-codegen/tests/test_model_extractor.py b/packages/overture-schema-codegen/tests/test_model_extractor.py
new file mode 100644
index 000000000..f2b2bd257
--- /dev/null
+++ b/packages/overture-schema-codegen/tests/test_model_extractor.py
@@ -0,0 +1,549 @@
+"""Tests for model extraction."""
+
+from typing import Annotated, Literal
+
+from codegen_test_support import (
+    FeatureBase,
+    FeatureWithAddress,
+    Instrument,
+    SourceItem,
+    TreeNode,
+    Venue,
+    assert_literal_field,
+    find_field,
+)
+from overture.schema.codegen.extraction.model_extraction import (
+    expand_model_tree,
+    extract_model,
+)
+from overture.schema.codegen.extraction.specs import ModelSpec
+from overture.schema.system.field_constraint import UniqueItemsConstraint
+from overture.schema.system.model_constraint import (
+    FieldEqCondition,
+    FieldGroupConstraint,
+    require_any_of,
+    require_if,
+)
+from overture.schema.system.primitive import (
+    Geometry,
+    GeometryType,
+    GeometryTypeConstraint,
+)
+from overture.schema.system.string import HexColor
+from pydantic import BaseModel, Field
+
+
+class TestModelConstraints:
+    """Model-level constraint extraction."""
+
+    def test_unconstrained_model_has_empty_constraints(self) -> None:
+        """Models without decorators produce an empty constraints tuple."""
+
+        class Plain(BaseModel):
+            name: str
+
+        spec = extract_model(Plain)
+
+        assert spec.constraints == ()
+
+    def test_extracts_require_any_of(self) -> None:
+        """Should extract @require_any_of from a decorated model."""
+        spec = extract_model(Venue)
+
+        assert len(spec.constraints) == 1
+        (constraint,) = spec.constraints
+        assert constraint.name == "@require_any_of"
+        assert isinstance(constraint, FieldGroupConstraint)
+        assert constraint.field_names == ("name", "description")
+
+    def test_stacked_constraints_preserve_order(self) -> None:
+        """Multiple decorators extracted in stacking order (inner-first)."""
+
+        @require_if(["bar"], FieldEqCondition("baz", "x"))
+        @require_any_of("foo", "bar")
+        class Stacked(BaseModel):
+            foo: str | None = None
+            bar: str | None = None
+            baz: str | None = None
+
+        spec = extract_model(Stacked)
+
+        assert len(spec.constraints) == 2
+        assert spec.constraints[0].name == "@require_any_of"
+        assert spec.constraints[1].name == "@require_if"
+
+
+class TestExtractModelSimple:
+    """Tests for extract_model with simple Pydantic models."""
+
+    def test_extract_simple_model(self) -> None:
+        """Should extract basic model information."""
+
+        class SimpleModel(BaseModel):
+            """A simple test model."""
+
+            name: str
+
+        result = extract_model(SimpleModel)
+
+        assert result.name == "SimpleModel"
+        assert result.description == "A simple test model."
+        assert len(result.fields) == 1
+        assert result.fields[0].name == "name"
+        assert result.fields[0].type_info.base_type == "str"
+        assert result.fields[0].is_required is True
+
+    def test_extract_model_does_not_set_entry_point(self) -> None:
+        class M(BaseModel):
+            x: int
+
+        result = extract_model(M)
+        assert result.entry_point is None
+
+    def test_extract_model_with_optional_field(self) -> None:
+        """Should handle optional fields correctly."""
+
+        class ModelWithOptional(BaseModel):
+            """Model with optional field."""
+
+            name: str
+            nickname: str | None = None
+
+        result = extract_model(ModelWithOptional)
+
+        assert len(result.fields) == 2
+
+        name_field = find_field(result, "name")
+        assert name_field.is_required is True
+
+        nickname_field = find_field(result, "nickname")
+        assert nickname_field.is_required is False
+        assert nickname_field.type_info.is_optional is True
+
+    def test_extract_model_with_field_description(self) -> None:
+        """Should extract field descriptions from Field()."""
+
+        class ModelWithDescription(BaseModel):
+            """Model with field descriptions."""
+
+            name: str = Field(description="The name of the entity")
+
+        result = extract_model(ModelWithDescription)
+
+        assert result.fields[0].description == "The name of the entity"
+
+    def test_extract_model_with_list_field(self) -> None:
+        """Should handle list fields correctly."""
+
+        class ModelWithList(BaseModel):
+            """Model with list field."""
+
+            tags: list[str]
+
+        result = extract_model(ModelWithList)
+
+        tags_field = result.fields[0]
+        assert tags_field.name == "tags"
+        assert tags_field.type_info.is_list is True
+        assert tags_field.type_info.base_type == "str"
+
+
+class TestExtractModelWithThemeType:
+    """Tests for extracting theme/type from Feature-like models."""
+
+    def test_extract_theme_and_type_from_generic(self) -> None:
+        """Should extract theme and type as Literal fields."""
+
+        class Place(FeatureBase[Literal["places"], Literal["place"]]):
+            """A place feature."""
+
+            name: str
+
+        result = extract_model(Place)
+        assert_literal_field(result, "theme", "places")
+        assert_literal_field(result, "type", "place")
+
+    def test_extract_different_theme_type(self) -> None:
+        """Should handle different theme/type values as Literal fields."""
+
+        class Building(FeatureBase[Literal["buildings"], Literal["building"]]):
+            """A building feature."""
+
+            height: float | None = None
+
+        result = extract_model(Building)
+        assert_literal_field(result, "theme", "buildings")
+        assert_literal_field(result, "type", "building")
+
+    def test_non_feature_model_has_no_theme_type(self) -> None:
+        """Regular models without Generic base should have no theme/type fields."""
+
+        class RegularModel(BaseModel):
+            """A regular model."""
+
+            value: int
+
+        result = extract_model(RegularModel)
+
+        field_names = [f.name for f in result.fields]
+        assert "theme" not in field_names
+        assert "type" not in field_names
+
+
+class TestExtractModelFieldAlias:
+    """Tests for field alias handling in extract_model."""
+
+    def test_field_with_alias_uses_alias_name(self) -> None:
+        """Fields with alias should use alias as the field name, not Python attr name."""
+
+        class ModelWithAlias(BaseModel):
+            """Model with aliased field."""
+
+            class_: str | None = Field(default=None, alias="class")
+
+        result = extract_model(ModelWithAlias)
+
+        # Should use alias 'class', not Python name 'class_'
+        class_field = result.fields[0]
+        assert class_field.name == "class"
+
+    def test_field_without_alias_uses_python_name(self) -> None:
+        """Fields without alias should use Python attribute name."""
+
+        class ModelWithoutAlias(BaseModel):
+            """Model without alias."""
+
+            name: str
+
+        result = extract_model(ModelWithoutAlias)
+
+        assert result.fields[0].name == "name"
+
+
+class TestExtractModelDocstring:
+    """Tests for docstring extraction and cleaning."""
+
+    def test_multiline_docstring_has_indentation_stripped(self) -> None:
+        """Multi-line docstrings should have leading whitespace stripped.
+
+        Docstrings defined in classes have leading whitespace on continuation
+        lines. This should be stripped so they render as normal paragraphs
+        in Markdown, not as code blocks.
+        """
+
+        class ModelWithMultilineDoc(BaseModel):
+            """A model with multi-line docstring.
+
+            This is a second paragraph that would have leading
+            whitespace in the raw __doc__ attribute.
+            """
+
+            name: str
+
+        result = extract_model(ModelWithMultilineDoc)
+
+        # Description should NOT have leading whitespace on continuation lines
+        assert result.description is not None
+        assert "\n            " not in result.description
+        # Should still have the content
+        assert "second paragraph" in result.description
+
+
+class TestFieldOrderingWithMixins:
+    """Tests for field ordering when a model has multiple inheritance."""
+
+    def test_mixin_fields_come_after_primary_chain_and_own(self) -> None:
+        """Fields from mixin bases should appear after primary chain and own fields."""
+
+        class PrimaryBase(BaseModel):
+            base_field: str
+
+        class MixinA(BaseModel):
+            a_field: str
+
+        class MixinB(BaseModel):
+            b_field: str
+
+        class Child(PrimaryBase, MixinA, MixinB):
+            """A child model with mixins."""
+
+            own_field: str
+
+        result = extract_model(Child)
+        field_names = [f.name for f in result.fields]
+
+        assert field_names == ["base_field", "own_field", "a_field", "b_field"]
+
+    def test_single_inheritance_order_unchanged(self) -> None:
+        """Single-inheritance models should keep Pydantic's default order."""
+
+        class Parent(BaseModel):
+            parent_field: str
+
+        class Child(Parent):
+            """A child model."""
+
+            child_field: str
+
+        result = extract_model(Child)
+        field_names = [f.name for f in result.fields]
+
+        assert field_names == ["parent_field", "child_field"]
+
+    def test_mixin_fields_in_declaration_order(self) -> None:
+        """Mixin fields should appear in class declaration order, not reversed MRO."""
+
+        class Primary(BaseModel):
+            p: str
+
+        class MixinFirst(BaseModel):
+            first: str
+
+        class MixinSecond(BaseModel):
+            second: str
+
+        class MixinThird(BaseModel):
+            third: str
+
+        class Model(Primary, MixinFirst, MixinSecond, MixinThird):
+            """Model with three mixins."""
+
+            own: str
+
+        result = extract_model(Model)
+        field_names = [f.name for f in result.fields]
+
+        # Mixins in declaration order: First, Second, Third
+        assert field_names == ["p", "own", "first", "second", "third"]
+
+    def test_deep_primary_chain_before_mixins(self) -> None:
+        """Fields from the entire primary chain should precede mixin fields."""
+
+        class GrandParent(BaseModel):
+            gp_field: str
+
+        class Parent(GrandParent):
+            p_field: str
+
+        class Mixin(BaseModel):
+            m_field: str
+
+        class Child(Parent, Mixin):
+            """Child with deep primary chain."""
+
+            own_field: str
+
+        result = extract_model(Child)
+        field_names = [f.name for f in result.fields]
+
+        assert field_names == ["gp_field", "p_field", "own_field", "m_field"]
+
+    def test_recursive_mixin_reordering(self) -> None:
+        """Mixins on primary-chain classes should also be reordered."""
+
+        class CoreBase(BaseModel):
+            core: str
+
+        class ParentMixin(BaseModel):
+            pm: str
+
+        class Parent(CoreBase, ParentMixin):
+            p: str
+
+        class ChildMixin(BaseModel):
+            cm: str
+
+        class Child(Parent, ChildMixin):
+            """Child where primary-chain parent has its own mixin."""
+
+            own: str
+
+        result = extract_model(Child)
+        field_names = [f.name for f in result.fields]
+
+        # CoreBase (Parent's primary) -> Parent own -> ParentMixin -> Child own -> ChildMixin
+        assert field_names == ["core", "p", "pm", "own", "cm"]
+
+
+class TestExpandModelTree:
+    """Tests for expand_model_tree."""
+
+    def test_model_without_sub_models_unchanged(self) -> None:
+        """Fields without MODEL kind get model=None."""
+
+        class Simple(BaseModel):
+            name: str
+            count: int
+
+        spec = extract_model(Simple)
+        expand_model_tree(spec)
+
+        for f in spec.fields:
+            assert f.model is None
+            assert f.starts_cycle is False
+
+    def test_nested_model_gets_expanded(self) -> None:
+        """MODEL-kind fields get their model populated."""
+        spec = extract_model(FeatureWithAddress)
+        expand_model_tree(spec)
+
+        addr_field = find_field(spec, "address")
+        assert addr_field.model is not None
+        assert addr_field.model.name == "Address"
+        assert addr_field.starts_cycle is False
+
+        # Sub-model fields should exist
+        sub_names = [f.name for f in addr_field.model.fields]
+        assert "street" in sub_names
+        assert "city" in sub_names
+
+    def test_cycle_detected_and_marked(self) -> None:
+        """Self-referential model gets starts_cycle=True."""
+        spec = extract_model(TreeNode)
+        expand_model_tree(spec)
+
+        parent_field = find_field(spec, "parent")
+        assert parent_field.model is not None
+        assert parent_field.model is spec  # Same object -- cycle
+        assert parent_field.starts_cycle is True
+
+    def test_shared_reference_not_marked_as_cycle(self) -> None:
+        """Two models referencing the same sub-model share it without cycle."""
+
+        class Shared(BaseModel):
+            value: str
+
+        class ModelA(BaseModel):
+            ref: Shared
+
+        class ModelB(BaseModel):
+            ref: Shared
+
+        cache: dict[type, ModelSpec] = {}
+        spec_a = extract_model(ModelA)
+        expand_model_tree(spec_a, cache)
+
+        spec_b = extract_model(ModelB)
+        expand_model_tree(spec_b, cache)
+
+        ref_a = find_field(spec_a, "ref")
+        ref_b = find_field(spec_b, "ref")
+
+        # Same ModelSpec object, neither is a cycle
+        assert ref_a.model is ref_b.model
+        assert ref_a.starts_cycle is False
+        assert ref_b.starts_cycle is False
+
+    def test_list_of_model_gets_expanded(self) -> None:
+        """list[Model] fields also get their model populated."""
+
+        class HasList(BaseModel):
+            items: list[SourceItem]
+
+        spec = extract_model(HasList)
+        expand_model_tree(spec)
+
+        items_field = find_field(spec, "items")
+        assert items_field.model is not None
+        assert items_field.model.name == "SourceItem"
+
+
+class TestFieldInfoMetadataConstraints:
+    """Constraints from field_info.metadata are merged into TypeInfo.
+
+    Pydantic strips the Annotated wrapper from some fields and moves the
+    metadata to field_info.metadata. extract_model merges these back into
+    TypeInfo.constraints so they aren't silently dropped.
+    """
+
+    def test_geometry_type_constraint_extracted(self) -> None:
+        """GeometryTypeConstraint on geometry field should appear in constraints."""
+        spec = extract_model(Venue)
+        geometry_field = find_field(spec, "geometry")
+
+        constraint_types = [
+            type(cs.constraint) for cs in geometry_field.type_info.constraints
+        ]
+        assert GeometryTypeConstraint in constraint_types
+
+    def test_geometry_type_constraint_has_null_source(self) -> None:
+        """Constraints from field_info.metadata have source_ref=None (not from a NewType)."""
+        spec = extract_model(Venue)
+        geometry_field = find_field(spec, "geometry")
+
+        geo_constraints = [
+            cs
+            for cs in geometry_field.type_info.constraints
+            if isinstance(cs.constraint, GeometryTypeConstraint)
+        ]
+        assert len(geo_constraints) == 1
+        assert geo_constraints[0].source_ref is None
+
+    def test_metadata_constraints_not_duplicated(self) -> None:
+        """Fields where Pydantic preserves Annotated don't get duplicate constraints.
+
+        When field_info.metadata is empty (Pydantic kept the Annotated wrapper),
+        no extra constraints are added.
+        """
+        spec = extract_model(Instrument)
+        tags_field = find_field(spec, "tags")
+
+        unique_constraints = [
+            cs
+            for cs in tags_field.type_info.constraints
+            if isinstance(cs.constraint, UniqueItemsConstraint)
+        ]
+        assert len(unique_constraints) == 1
+
+    def test_standalone_annotated_field_extracts_metadata(self) -> None:
+        """Direct Annotated[Type, constraint] fields (non-optional, non-union)
+        get their constraints from field_info.metadata."""
+
+        class Model(BaseModel):
+            geo: Annotated[
+                Geometry,
+                GeometryTypeConstraint(GeometryType.POINT),
+            ]
+
+        spec = extract_model(Model)
+        geo_field = find_field(spec, "geo")
+
+        constraint_types = [
+            type(cs.constraint) for cs in geo_field.type_info.constraints
+        ]
+        assert GeometryTypeConstraint in constraint_types
+
+
+class TestFieldDescriptionFallback:
+    """Tests for field description fallback from NewType Field metadata."""
+
+    def test_field_inherits_newtype_description(self) -> None:
+        """Field with no explicit description gets NewType's Field description."""
+
+        class TestModel(BaseModel):
+            color: HexColor
+
+        spec = extract_model(TestModel)
+        field = find_field(spec, "color")
+        assert field.description is not None
+        assert "color" in field.description.lower()
+
+    def test_explicit_description_not_overridden(self) -> None:
+        """Field with explicit description keeps its own, ignores NewType's."""
+
+        class TestModel(BaseModel):
+            color: HexColor = Field(description="Custom color description")
+
+        spec = extract_model(TestModel)
+        field = find_field(spec, "color")
+        assert field.description == "Custom color description"
+
+    def test_field_without_newtype_description_stays_none(self) -> None:
+        """Field typed as plain str (no NewType description) keeps None."""
+
+        class TestModel(BaseModel):
+            name: str
+
+        spec = extract_model(TestModel)
+        field = find_field(spec, "name")
+        assert field.description is None
diff --git a/packages/overture-schema-codegen/tests/test_module_layout.py b/packages/overture-schema-codegen/tests/test_module_layout.py
new file mode 100644
index 000000000..5766e60e0
--- /dev/null
+++ b/packages/overture-schema-codegen/tests/test_module_layout.py
@@ -0,0 +1,175 @@
+"""Tests for module_layout: output directory layout from module paths."""
+
+from pathlib import PurePosixPath
+
+import pytest
+from overture.schema.codegen.layout.module_layout import (
+    compute_output_dir,
+    compute_schema_root,
+    entry_point_class,
+    entry_point_module,
+    is_package_module,
+    module_relpath,
+)
+
+
+class TestComputeSchemaRoot:
+    def test_multiple_paths_common_prefix(self) -> None:
+        paths = [
+            "overture.schema.buildings",
+            "overture.schema.places",
+            "overture.schema.divisions",
+        ]
+        assert compute_schema_root(paths) == "overture.schema"
+
+    def test_single_path_drops_last_component(self) -> None:
+        assert compute_schema_root(["overture.schema.buildings"]) == "overture.schema"
+
+    def test_mixed_depth_paths(self) -> None:
+        paths = [
+            "overture.schema.buildings",
+            "overture.schema.core.names.primary_name",
+        ]
+        assert compute_schema_root(paths) == "overture.schema"
+
+    def test_divergent_namespaces(self) -> None:
+        paths = ["overture.schema.buildings", "acme.transit"]
+        assert compute_schema_root(paths) == ""
+
+    def test_empty_raises(self) -> None:
+        with pytest.raises(ValueError):
+            compute_schema_root([])
+
+    def test_single_component_path(self) -> None:
+        assert compute_schema_root(["buildings"]) == ""
+
+    def test_identical_paths_deduplicated(self) -> None:
+        paths = ["overture.schema.buildings", "overture.schema.buildings"]
+        assert compute_schema_root(paths) == "overture.schema"
+
+
+class TestEntryPointModule:
+    def test_extracts_module(self) -> None:
+        assert entry_point_module("overture.schema.buildings:Building") == (
+            "overture.schema.buildings"
+        )
+
+    def test_missing_colon_raises(self) -> None:
+        with pytest.raises(ValueError):
+            entry_point_module("no_colon")
+
+    def test_multiple_colons_splits_on_first(self) -> None:
+        assert entry_point_module("mod:A:B") == "mod"
+
+
+class TestEntryPointClass:
+    def test_extracts_class(self) -> None:
+        assert entry_point_class("overture.schema.buildings:Building") == "Building"
+
+    def test_missing_colon_raises(self) -> None:
+        with pytest.raises(ValueError):
+            entry_point_class("no_colon")
+
+    def test_colon_at_end_returns_empty(self) -> None:
+        assert entry_point_class("mod:") == ""
+
+    def test_multiple_colons_splits_on_first(self) -> None:
+        assert entry_point_class("mod:A:B") == "A:B"
+
+
+class TestModuleRelpath:
+    def test_strips_root_prefix(self) -> None:
+        assert (
+            module_relpath("overture.schema.buildings", "overture.schema")
+            == "buildings"
+        )
+
+    def test_deep_path(self) -> None:
+        assert (
+            module_relpath("overture.schema.core.names.primary_name", "overture.schema")
+            == "core.names.primary_name"
+        )
+
+    def test_module_equals_root(self) -> None:
+        assert module_relpath("overture.schema", "overture.schema") == ""
+
+    def test_empty_root(self) -> None:
+        assert module_relpath("buildings", "") == "buildings"
+
+    def test_nonmatching_raises(self) -> None:
+        with pytest.raises(ValueError):
+            module_relpath("acme.transit", "overture.schema")
+
+
+def _make_registry(*entries: tuple[str, bool]) -> dict[str, object]:
+    """Build a synthetic module registry.
+
+    Each entry is (module_path, is_package). Packages get __path__;
+    file modules do not.
+    """
+    registry: dict[str, object] = {}
+    for mod_path, is_pkg in entries:
+        if is_pkg:
+            registry[mod_path] = type("pkg", (), {"__path__": ["/fake"]})()
+        else:
+            registry[mod_path] = type("mod", (), {})()
+    return registry
+
+
+class TestIsPackageModule:
+    def test_package_has_path(self) -> None:
+        registry = _make_registry(("my.package", True))
+        assert is_package_module("my.package", registry) is True
+
+    def test_file_module_no_path(self) -> None:
+        registry = _make_registry(("my.module", False))
+        assert is_package_module("my.module", registry) is False
+
+    def test_missing_module_raises(self) -> None:
+        with pytest.raises(ValueError):
+            is_package_module("nonexistent", {})
+
+
+class TestComputeOutputDir:
+    def test_package_keeps_all_parts(self) -> None:
+        reg = _make_registry(("overture.schema.buildings", True))
+        result = compute_output_dir("overture.schema.buildings", "overture.schema", reg)
+        assert result == PurePosixPath("buildings")
+
+    def test_file_module_drops_last(self) -> None:
+        reg = _make_registry(("overture.schema.core.names.primary_name", False))
+        result = compute_output_dir(
+            "overture.schema.core.names.primary_name", "overture.schema", reg
+        )
+        assert result == PurePosixPath("core/names")
+
+    def test_deep_package(self) -> None:
+        reg = _make_registry(("overture.schema.core.names", True))
+        result = compute_output_dir(
+            "overture.schema.core.names", "overture.schema", reg
+        )
+        assert result == PurePosixPath("core/names")
+
+    def test_file_module_in_theme(self) -> None:
+        reg = _make_registry(("overture.schema.buildings.enums", False))
+        result = compute_output_dir(
+            "overture.schema.buildings.enums", "overture.schema", reg
+        )
+        assert result == PurePosixPath("buildings")
+
+    def test_file_module_deep(self) -> None:
+        reg = _make_registry(("overture.schema.divisions.division.models", False))
+        result = compute_output_dir(
+            "overture.schema.divisions.division.models", "overture.schema", reg
+        )
+        assert result == PurePosixPath("divisions/division")
+
+    def test_root_module_returns_dot(self) -> None:
+        reg = _make_registry(("overture.schema", True))
+        result = compute_output_dir("overture.schema", "overture.schema", reg)
+        assert result == PurePosixPath(".")
+
+    def test_file_module_one_level_returns_dot(self) -> None:
+        reg = _make_registry(("overture.schema.types", False))
+        result = compute_output_dir("overture.schema.types", "overture.schema", reg)
+        assert result == PurePosixPath(".")
diff --git a/packages/overture-schema-codegen/tests/test_naming.py b/packages/overture-schema-codegen/tests/test_naming.py
new file mode 100644
index 000000000..77e4d5773
--- /dev/null
+++ b/packages/overture-schema-codegen/tests/test_naming.py
@@ -0,0 +1,23 @@
+"""Tests for PascalCase to snake_case conversion."""
+
+import pytest
+from overture.schema.codegen.extraction.case_conversion import to_snake_case
+
+
+class TestToSnakeCase:
+    """Tests for snake_case conversion helper."""
+
+    @pytest.mark.parametrize(
+        ("input_name", "expected"),
+        [
+            ("Building", "building"),
+            ("BuildingPart", "building_part"),
+            ("RoadSegment", "road_segment"),
+            ("Place", "place"),
+            ("simple", "simple"),  # Already lowercase
+            ("HTTPServer", "http_server"),  # Consecutive caps
+        ],
+    )
+    def test_converts_pascal_to_snake(self, input_name: str, expected: str) -> None:
+        """PascalCase names should convert to snake_case."""
+        assert to_snake_case(input_name) == expected
diff --git a/packages/overture-schema-codegen/tests/test_newtype_extraction.py b/packages/overture-schema-codegen/tests/test_newtype_extraction.py
new file mode 100644
index 000000000..6cd73c5c2
--- /dev/null
+++ b/packages/overture-schema-codegen/tests/test_newtype_extraction.py
@@ -0,0 +1,74 @@
+"""Tests for NewType extraction."""
+
+from typing import Annotated, NewType
+
+from codegen_test_support import STR_TYPE
+from overture.schema.codegen.extraction.newtype_extraction import extract_newtype
+from overture.schema.codegen.extraction.specs import NewTypeSpec
+from overture.schema.system.field_constraint import UniqueItemsConstraint
+from overture.schema.system.ref import Id
+from overture.schema.system.string import HexColor
+from pydantic import BaseModel, Field
+
+
+class TestExtractNewType:
+    """Tests for extract_newtype function."""
+
+    def test_extract_hex_color(self) -> None:
+        """Should extract HexColor NewType specification."""
+        spec = extract_newtype(HexColor)
+
+        assert spec.name == "HexColor"
+        assert spec.type_info.newtype_name == "HexColor"
+
+    def test_extract_id(self) -> None:
+        """Should extract Id NewType with nested chain."""
+        spec = extract_newtype(Id)
+
+        assert spec.name == "Id"
+        assert spec.type_info.newtype_name == "Id"
+        assert spec.type_info.base_type == "NoWhitespaceString"
+
+    def test_extract_newtype_wrapping_list(self) -> None:
+        """Should extract a list-wrapping NewType."""
+
+        class Item(BaseModel):
+            value: str
+
+        TestSources = NewType(
+            "TestSources", Annotated[list[Item], UniqueItemsConstraint()]
+        )
+        spec = extract_newtype(TestSources)
+
+        assert spec.name == "TestSources"
+        assert spec.type_info.is_list is True
+        assert spec.type_info.newtype_name == "TestSources"
+
+    def test_extract_newtype_without_doc_uses_field_description(self) -> None:
+        """NewType with Field(description=...) but no __doc__ uses Field description."""
+        TestType = NewType(
+            "TestType",
+            Annotated[str, Field(description="A test type description")],
+        )
+        spec = extract_newtype(TestType)
+        assert spec.description == "A test type description"
+
+    def test_extract_newtype_with_doc_ignores_field_description(self) -> None:
+        """NewType with custom __doc__ uses docstring, not Field description."""
+        spec = extract_newtype(HexColor)
+        # HexColor has both __doc__ and Field(description=...).
+        # __doc__ should win because is_custom_docstring returns True.
+        assert spec.description is not None
+        assert "example" in spec.description.lower() or "#" in spec.description
+
+
+class TestNewTypeSpecSourceType:
+    """Tests for source_type on NewTypeSpec."""
+
+    def test_newtype_spec_source_type_defaults_to_none(self) -> None:
+        spec = NewTypeSpec(name="Test", description=None, type_info=STR_TYPE)
+        assert spec.source_type is None
+
+    def test_extract_newtype_sets_source_type(self) -> None:
+        spec = extract_newtype(HexColor)
+        assert spec.source_type is HexColor
diff --git a/packages/overture-schema-codegen/tests/test_primitive_extraction.py b/packages/overture-schema-codegen/tests/test_primitive_extraction.py
new file mode 100644
index 000000000..5fb2de9ed
--- /dev/null
+++ b/packages/overture-schema-codegen/tests/test_primitive_extraction.py
@@ -0,0 +1,109 @@
+"""Tests for primitive extraction and numeric bounds."""
+
+from typing import Annotated, NewType
+
+import overture.schema.system.primitive as _system_primitive
+from overture.schema.codegen.extraction.newtype_extraction import extract_newtype
+from overture.schema.codegen.extraction.primitive_extraction import (
+    extract_numeric_bounds,
+    extract_primitives,
+    partition_primitive_and_geometry_names,
+)
+from overture.schema.codegen.extraction.specs import TypeIdentity
+from overture.schema.codegen.extraction.type_analyzer import analyze_type
+from overture.schema.system.primitive import float32, int32, int64, uint8
+from pydantic import Field
+
+
+class TestPartitionPrimitiveAndGeometryNames:
+    """Tests for partition_primitive_and_geometry_names function."""
+
+    def test_returns_type_identities(self) -> None:
+        prims, geoms = partition_primitive_and_geometry_names(_system_primitive)
+        assert all(isinstance(p, TypeIdentity) for p in prims)
+        assert all(isinstance(g, TypeIdentity) for g in geoms)
+
+    def test_identity_obj_is_actual_callable(self) -> None:
+        prims, _ = partition_primitive_and_geometry_names(_system_primitive)
+        int32_id = next(p for p in prims if p.name == "int32")
+        assert int32_id.obj is _system_primitive.int32
+
+
+class TestExtractPrimitives:
+    """Tests for extract_primitives function."""
+
+    def test_accepts_type_identities(self) -> None:
+        prims, _ = partition_primitive_and_geometry_names(_system_primitive)
+        specs = extract_primitives(prims)
+        assert len(specs) > 0
+        names = [s.name for s in specs]
+        assert "int32" in names
+
+    def test_extracts_bounds(self) -> None:
+        prims, _ = partition_primitive_and_geometry_names(_system_primitive)
+        specs = extract_primitives(prims)
+        int32_spec = next(s for s in specs if s.name == "int32")
+        assert int32_spec.bounds.ge == -(2**31)
+        assert int32_spec.bounds.le == 2**31 - 1
+
+
+class TestExtractNumericBounds:
+    """Tests for extract_numeric_bounds function."""
+
+    def test_signed_integer_bounds(self) -> None:
+        """Should extract ge/le from a constrained integer NewType."""
+        spec = extract_newtype(int32)
+        bounds = extract_numeric_bounds(spec.type_info)
+
+        assert bounds.ge == -(2**31)
+        assert bounds.le == 2**31 - 1
+
+    def test_unsigned_integer_bounds(self) -> None:
+        """Should extract 0-based bounds from unsigned NewType."""
+        spec = extract_newtype(uint8)
+        bounds = extract_numeric_bounds(spec.type_info)
+
+        assert bounds.ge == 0
+        assert bounds.le == 255
+
+    def test_int64_bounds(self) -> None:
+        """Should extract large bounds from int64."""
+        spec = extract_newtype(int64)
+        bounds = extract_numeric_bounds(spec.type_info)
+
+        assert bounds.ge == -(2**63)
+        assert bounds.le == 2**63 - 1
+
+    def test_unconstrained_type(self) -> None:
+        """Should return empty Interval for types without numeric constraints."""
+        spec = extract_newtype(float32)
+        bounds = extract_numeric_bounds(spec.type_info)
+
+        assert bounds.ge is None
+        assert bounds.gt is None
+        assert bounds.le is None
+        assert bounds.lt is None
+
+    def test_exclusive_bounds(self) -> None:
+        """Should extract gt/lt from constraints using exclusive bounds."""
+        ExclusiveBounded = NewType(
+            "ExclusiveBounded", Annotated[int, Field(gt=0, lt=100)]
+        )
+        type_info = analyze_type(ExclusiveBounded)
+        bounds = extract_numeric_bounds(type_info)
+
+        assert bounds.gt == 0
+        assert bounds.lt == 100
+        assert bounds.ge is None
+        assert bounds.le is None
+
+    def test_mixed_bounds(self) -> None:
+        """Should extract a mix of inclusive and exclusive bounds."""
+        MixedBounded = NewType("MixedBounded", Annotated[int, Field(ge=0, lt=256)])
+        type_info = analyze_type(MixedBounded)
+        bounds = extract_numeric_bounds(type_info)
+
+        assert bounds.ge == 0
+        assert bounds.lt == 256
+        assert bounds.gt is None
+        assert bounds.le is None
diff --git a/packages/overture-schema-codegen/tests/test_pydantic_extraction.py b/packages/overture-schema-codegen/tests/test_pydantic_extraction.py
new file mode 100644
index 000000000..1d8803d16
--- /dev/null
+++ b/packages/overture-schema-codegen/tests/test_pydantic_extraction.py
@@ -0,0 +1,29 @@
+"""Tests for Pydantic type extraction."""
+
+from overture.schema.codegen.extraction.pydantic_extraction import extract_pydantic_type
+from overture.schema.codegen.extraction.specs import PydanticTypeSpec
+from pydantic import EmailStr, HttpUrl
+
+
+class TestExtractPydanticType:
+    def test_extracts_http_url(self) -> None:
+        spec = extract_pydantic_type(HttpUrl)
+        assert isinstance(spec, PydanticTypeSpec)
+        assert spec.name == "HttpUrl"
+        assert spec.source_type is HttpUrl
+        assert spec.source_module == "networks"
+        assert spec.description is not None
+        assert "http" in spec.description.lower()
+
+    def test_extracts_email_str(self) -> None:
+        spec = extract_pydantic_type(EmailStr)
+        assert isinstance(spec, PydanticTypeSpec)
+        assert spec.name == "EmailStr"
+        assert spec.source_type is EmailStr
+        assert spec.source_module == "networks"
+
+    def test_admonition_label_filtered_from_description(self) -> None:
+        spec = extract_pydantic_type(EmailStr)
+        # EmailStr.__doc__ starts with "Info:" (bare admonition label).
+        # _usable_description filters this, returning None.
+        assert spec.description is None
diff --git a/packages/overture-schema-codegen/tests/test_reverse_references.py b/packages/overture-schema-codegen/tests/test_reverse_references.py
new file mode 100644
index 000000000..fb8e1e41a
--- /dev/null
+++ b/packages/overture-schema-codegen/tests/test_reverse_references.py
@@ -0,0 +1,227 @@
+"""Tests for reverse reference computation."""
+
+from enum import Enum as PyEnum
+from typing import NewType
+
+import pytest
+from codegen_test_support import (
+    FeatureWithAddress,
+    FeatureWithUrl,
+    Instrument,
+    RoadSegment,
+    TreeNode,
+    Venue,
+    has_name,
+    lookup_by_name,
+    make_union_spec,
+)
+from overture.schema.codegen.extraction.enum_extraction import extract_enum
+from overture.schema.codegen.extraction.model_extraction import (
+    expand_model_tree,
+    extract_model,
+)
+from overture.schema.codegen.extraction.newtype_extraction import extract_newtype
+from overture.schema.codegen.extraction.specs import PydanticTypeSpec, TypeIdentity
+from overture.schema.codegen.layout.type_collection import (
+    collect_all_supplementary_types,
+)
+from overture.schema.codegen.markdown.reverse_references import (
+    UsedByKind,
+    compute_reverse_references,
+)
+from overture.schema.system.ref import Id
+from overture.schema.system.string import NoWhitespaceString
+from pydantic import BaseModel
+
+
+@pytest.mark.parametrize(
+    ("model_class", "model_name", "target_name"),
+    [
+        (Instrument, "Instrument", "InstrumentFamily"),
+        (Instrument, "Instrument", "HexColor"),
+        (FeatureWithAddress, "FeatureWithAddress", "Address"),
+    ],
+    ids=["enum", "newtype", "sub-model"],
+)
+def test_model_referencing_type_produces_used_by_entry(
+    model_class: type,
+    model_name: str,
+    target_name: str,
+) -> None:
+    """Model referencing a type produces a 'used by' entry on that type."""
+    model_spec = extract_model(model_class, entry_point=model_name)
+    expand_model_tree(model_spec)
+    all_specs = collect_all_supplementary_types([model_spec])
+
+    assert has_name(all_specs, target_name)
+
+    result = compute_reverse_references([model_spec], all_specs)
+
+    entries = lookup_by_name(result, target_name)
+    assert len(entries) == 1
+    assert entries[0].identity.name == model_name
+    assert entries[0].kind == UsedByKind.MODEL
+
+
+def test_newtype_inheriting_from_newtype_produces_used_by_entry() -> None:
+    """NewType inheriting constraints from another NewType produces a 'used by' entry."""
+    # Id wraps NoWhitespaceString, which is also a NewType
+    # When we extract Id, its constraints include ConstraintSource(source_ref=NoWhitespaceString, ...)
+    id_spec = extract_newtype(Id)
+    nws_spec = extract_newtype(NoWhitespaceString)
+
+    all_specs = {
+        TypeIdentity(Id, "Id"): id_spec,
+        TypeIdentity(NoWhitespaceString, "NoWhitespaceString"): nws_spec,
+    }
+
+    result = compute_reverse_references([], all_specs)
+
+    # NoWhitespaceString should have a used_by entry from Id
+    entries = lookup_by_name(result, "NoWhitespaceString")
+    assert len(entries) == 1
+    assert entries[0].identity.name == "Id"
+    assert entries[0].kind == UsedByKind.NEWTYPE
+
+
+def test_union_members_have_used_by_entries() -> None:
+    """Union members have 'used by' entries pointing to the union feature."""
+    # Create a union spec with RoadSegment as a member
+    union_spec = make_union_spec(
+        name="TestSegment",
+        description="Test segment union",
+        members=[RoadSegment],
+        entry_point="TestSegment",
+    )
+
+    # Extract the member
+    road_spec = extract_model(RoadSegment)
+    expand_model_tree(road_spec)
+    all_specs = {TypeIdentity(RoadSegment, "RoadSegment"): road_spec}
+
+    result = compute_reverse_references([union_spec], all_specs)
+
+    entries = lookup_by_name(result, "RoadSegment")
+    assert len(entries) == 1
+    assert entries[0].identity.name == "TestSegment"
+    assert entries[0].kind == UsedByKind.MODEL
+
+
+def test_self_references_filtered_out() -> None:
+    """Self-references are filtered out (handles recursive types)."""
+    tree_spec = extract_model(TreeNode, entry_point="TreeNode")
+    expand_model_tree(tree_spec)
+
+    # Manually add TreeNode to all_specs to test self-reference filtering
+    all_specs = {TypeIdentity(TreeNode, "TreeNode"): tree_spec}
+
+    result = compute_reverse_references([tree_spec], all_specs)
+
+    # TreeNode should not appear in result since it only references itself
+    with pytest.raises(KeyError):
+        lookup_by_name(result, "TreeNode")
+
+
+def test_deduplication_same_type_multiple_fields() -> None:
+    """Deduplication works when same type is referenced via multiple fields."""
+    instrument_spec = extract_model(Instrument, entry_point="Instrument")
+    venue_spec = extract_model(Venue, entry_point="Venue")
+    expand_model_tree(instrument_spec)
+    expand_model_tree(venue_spec)
+    all_specs = collect_all_supplementary_types([instrument_spec, venue_spec])
+
+    assert has_name(all_specs, "Id")
+
+    result = compute_reverse_references([instrument_spec, venue_spec], all_specs)
+
+    entries = lookup_by_name(result, "Id")
+    # Both Instrument and Venue reference Id
+    assert len(entries) == 2
+    names = {e.identity.name for e in entries}
+    assert names == {"Instrument", "Venue"}
+    # All should be MODELs
+    assert all(e.kind == UsedByKind.MODEL for e in entries)
+
+
+def test_pydantic_type_has_used_by_from_feature() -> None:
+    """Pydantic type in all_specs gets used-by entries from features referencing it."""
+    model_spec = extract_model(FeatureWithUrl, entry_point="FeatureWithUrl")
+    expand_model_tree(model_spec)
+    all_specs = collect_all_supplementary_types([model_spec])
+
+    assert has_name(all_specs, "HttpUrl")
+    assert isinstance(lookup_by_name(all_specs, "HttpUrl"), PydanticTypeSpec)
+
+    result = compute_reverse_references([model_spec], all_specs)
+
+    entries = lookup_by_name(result, "HttpUrl")
+    assert any(e.identity.name == "FeatureWithUrl" for e in entries)
+
+
+def test_sort_tiebreaker_uses_module_for_same_name_referrers() -> None:
+    """Referrers with the same name sort deterministically by module."""
+
+    # Two model classes named "Feature" from different modules.
+    class SharedEnum(PyEnum):
+        A = "a"
+
+    class FeatureAlpha(BaseModel):
+        value: SharedEnum
+
+    class FeatureBeta(BaseModel):
+        value: SharedEnum
+
+    FeatureAlpha.__name__ = "Feature"
+    FeatureAlpha.__module__ = "alpha.models"
+    FeatureBeta.__name__ = "Feature"
+    FeatureBeta.__module__ = "beta.models"
+
+    spec_a = extract_model(FeatureAlpha, entry_point="Feature")
+    spec_b = extract_model(FeatureBeta, entry_point="Feature")
+    expand_model_tree(spec_a)
+    expand_model_tree(spec_b)
+
+    enum_id = TypeIdentity(SharedEnum, "SharedEnum")
+    all_specs = {enum_id: extract_enum(SharedEnum)}
+
+    result = compute_reverse_references([spec_a, spec_b], all_specs)
+
+    entries = lookup_by_name(result, "SharedEnum")
+    assert len(entries) == 2
+    # Both named "Feature" — module provides the tiebreaker
+    modules = [e.identity.module for e in entries]
+    assert modules == ["alpha.models", "beta.models"]
+
+
+def test_sorting_models_before_newtypes() -> None:
+    """Sorting produces models before NewTypes, alphabetical within groups."""
+    # Create a test where the same type (Id) is referenced by:
+    # - Two models (Instrument and Venue) - both MODEL referrers
+    # - A NewType wrapper around Id
+    # Create a synthetic NewType that wraps Id
+    CustomId = NewType("CustomId", Id)
+
+    instrument_spec = extract_model(Instrument, entry_point="Instrument")
+    venue_spec = extract_model(Venue, entry_point="Venue")
+    expand_model_tree(instrument_spec)
+    expand_model_tree(venue_spec)
+    all_specs = collect_all_supplementary_types([instrument_spec, venue_spec])
+
+    # Add the CustomId NewType which references Id
+    custom_id_spec = extract_newtype(CustomId)
+    all_specs[TypeIdentity(CustomId, "CustomId")] = custom_id_spec
+
+    result = compute_reverse_references([instrument_spec, venue_spec], all_specs)
+
+    # Id should have entries from both Instrument and Venue (MODELs) and CustomId (NEWTYPE)
+    entries = lookup_by_name(result, "Id")
+    assert len(entries) == 3
+
+    # Check sorting: MODELs first, then NEWTYPE
+    # Within MODELs: alphabetical (Instrument, Venue)
+    assert entries[0].kind == UsedByKind.MODEL
+    assert entries[0].identity.name == "Instrument"
+    assert entries[1].kind == UsedByKind.MODEL
+    assert entries[1].identity.name == "Venue"
+    assert entries[2].kind == UsedByKind.NEWTYPE
+    assert entries[2].identity.name == "CustomId"
diff --git a/packages/overture-schema-codegen/tests/test_specs.py b/packages/overture-schema-codegen/tests/test_specs.py
new file mode 100644
index 000000000..0780e2fda
--- /dev/null
+++ b/packages/overture-schema-codegen/tests/test_specs.py
@@ -0,0 +1,305 @@
+"""Tests for spec data structures and predicates."""
+
+from typing import Annotated
+
+import pytest
+from codegen_test_support import (
+    STR_TYPE,
+    InstrumentFamily,
+    SimpleModel,
+    make_union_spec,
+)
+from overture.schema.codegen.extraction.model_extraction import extract_model
+from overture.schema.codegen.extraction.specs import (
+    AnnotatedField,
+    EnumSpec,
+    FeatureSpec,
+    FieldSpec,
+    ModelSpec,
+    NewTypeSpec,
+    TypeIdentity,
+    is_union_alias,
+)
+from overture.schema.codegen.extraction.type_analyzer import TypeInfo, TypeKind
+from pydantic import BaseModel, Field
+
+
+class TestFeatureSpecProtocol:
+    """Tests for FeatureSpec protocol compliance."""
+
+    def test_model_spec_satisfies_feature_spec(self) -> None:
+        """ModelSpec satisfies the FeatureSpec protocol."""
+
+        class Simple(BaseModel):
+            name: str
+
+        spec = extract_model(Simple)
+        # Protocol compliance check
+        assert isinstance(spec, FeatureSpec)
+        # Verify protocol attributes
+        assert spec.name == "Simple"
+        assert isinstance(spec.fields, list)
+        assert spec.source_type is Simple
+
+
+class TestFieldSpec:
+    """Tests for FieldSpec dataclass."""
+
+    def test_fieldspec_stores_basic_attributes(self) -> None:
+        """FieldSpec should store name, type_info, description, is_required."""
+        field_spec = FieldSpec(
+            name="test_field",
+            type_info=STR_TYPE,
+            description="A test field",
+            is_required=True,
+        )
+
+        assert field_spec.name == "test_field"
+        assert field_spec.type_info == STR_TYPE
+        assert field_spec.description == "A test field"
+        assert field_spec.is_required is True
+
+    def test_fieldspec_optional_field(self) -> None:
+        """FieldSpec should handle optional fields."""
+        optional_str = TypeInfo(
+            base_type="str", kind=TypeKind.PRIMITIVE, is_optional=True
+        )
+
+        field_spec = FieldSpec(
+            name="optional_field",
+            type_info=optional_str,
+            description=None,
+            is_required=False,
+        )
+
+        assert field_spec.is_required is False
+        assert field_spec.description is None
+
+
+class TestModelSpec:
+    """Tests for ModelSpec dataclass."""
+
+    def test_modelspec_stores_basic_attributes(self) -> None:
+        """ModelSpec should store name, description, fields."""
+        field = FieldSpec(
+            name="id",
+            type_info=STR_TYPE,
+            description="Unique identifier",
+            is_required=True,
+        )
+
+        model_spec = ModelSpec(
+            name="TestModel",
+            description="A test model",
+            fields=[field],
+        )
+
+        assert model_spec.name == "TestModel"
+        assert model_spec.description == "A test model"
+        assert len(model_spec.fields) == 1
+        assert model_spec.fields[0].name == "id"
+
+    def test_entry_point_defaults_to_none(self) -> None:
+        spec = ModelSpec(name="M", description=None)
+        assert spec.entry_point is None
+
+
+class TestAnnotatedField:
+    """Tests for AnnotatedField wrapper."""
+
+    def test_stores_field_and_variant_sources(self) -> None:
+        """AnnotatedField pairs a FieldSpec with variant provenance."""
+        fs = FieldSpec(name="x", type_info=STR_TYPE, description=None, is_required=True)
+        af = AnnotatedField(field_spec=fs, variant_sources=("RoadSegment",))
+        assert af.field_spec is fs
+        assert af.variant_sources == ("RoadSegment",)
+
+    def test_none_variant_sources_means_shared(self) -> None:
+        """variant_sources=None indicates a shared field."""
+        fs = FieldSpec(name="x", type_info=STR_TYPE, description=None, is_required=True)
+        af = AnnotatedField(field_spec=fs, variant_sources=None)
+        assert af.variant_sources is None
+
+
+class TestFieldSpecModelTree:
+    """Tests for FieldSpec model and starts_cycle fields."""
+
+    def test_model_defaults_to_none(self) -> None:
+        field_spec = FieldSpec(
+            name="test", type_info=STR_TYPE, description=None, is_required=True
+        )
+        assert field_spec.model is None
+
+    def test_starts_cycle_defaults_to_false(self) -> None:
+        field_spec = FieldSpec(
+            name="test", type_info=STR_TYPE, description=None, is_required=True
+        )
+        assert field_spec.starts_cycle is False
+
+    def test_model_can_hold_model_spec(self) -> None:
+        type_info = TypeInfo(base_type="Address", kind=TypeKind.MODEL)
+        sub = ModelSpec(name="Address", description=None)
+        field_spec = FieldSpec(
+            name="address",
+            type_info=type_info,
+            description=None,
+            is_required=True,
+            model=sub,
+        )
+        assert field_spec.model is sub
+
+    def test_starts_cycle_can_be_set(self) -> None:
+        type_info = TypeInfo(base_type="Node", kind=TypeKind.MODEL)
+        sub = ModelSpec(name="Node", description=None)
+        field_spec = FieldSpec(
+            name="parent",
+            type_info=type_info,
+            description=None,
+            is_required=False,
+            model=sub,
+            starts_cycle=True,
+        )
+        assert field_spec.starts_cycle is True
+        assert field_spec.model is sub
+
+    def test_starts_cycle_without_model_is_nonsensical(self) -> None:
+        """starts_cycle=True with model=None is expressible but invalid.
+
+        expand_model_tree never produces this combination -- starts_cycle
+        is only set when model points to the cycle-causing ModelSpec.
+        Document the invariant so violations stand out.
+        """
+        type_info = TypeInfo(base_type="Node", kind=TypeKind.MODEL)
+        field_spec = FieldSpec(
+            name="parent",
+            type_info=type_info,
+            description=None,
+            is_required=False,
+            starts_cycle=True,
+        )
+        # Expressible but meaningless: cycle to nowhere
+        assert field_spec.starts_cycle is True
+        assert field_spec.model is None
+
+
+class TestIsUnionAlias:
+    """Tests for is_union_alias predicate."""
+
+    def test_annotated_union_of_models_returns_true(self) -> None:
+        """Annotated[Union of BaseModels] is a union alias."""
+
+        class A(BaseModel):
+            x: int
+
+        class B(BaseModel):
+            y: str
+
+        union_type = Annotated[A | B, Field(description="test")]
+        assert is_union_alias(union_type) is True
+
+    def test_model_class_returns_false(self) -> None:
+        """A concrete BaseModel class is not a union alias."""
+
+        class A(BaseModel):
+            x: int
+
+        assert is_union_alias(A) is False
+
+    def test_plain_string_returns_false(self) -> None:
+        """A plain string is not a union alias."""
+        assert is_union_alias("not a type") is False
+
+    def test_non_model_union_returns_false(self) -> None:
+        """A union of non-model types is not a union alias."""
+        assert is_union_alias(str | int) is False
+
+
+class TestUnionSpec:
+    """Tests for UnionSpec data structure."""
+
+    def test_fields_property_returns_plain_field_specs(self) -> None:
+        """UnionSpec.fields property returns list[FieldSpec] from annotated_fields."""
+        fs1 = FieldSpec(
+            name="a", type_info=STR_TYPE, description=None, is_required=True
+        )
+        fs2 = FieldSpec(
+            name="b", type_info=STR_TYPE, description=None, is_required=False
+        )
+        spec = make_union_spec(
+            annotated_fields=[
+                AnnotatedField(field_spec=fs1, variant_sources=None),
+                AnnotatedField(field_spec=fs2, variant_sources=("X",)),
+            ],
+        )
+        assert spec.fields == [fs1, fs2]
+
+
+class TestTypeIdentity:
+    def test_frozen(self) -> None:
+        ti = TypeIdentity(obj=int, name="int")
+        with pytest.raises(AttributeError):
+            ti.obj = str  # type: ignore[misc]
+
+    def test_same_obj_equal(self) -> None:
+        a = TypeIdentity(obj=int, name="int")
+        b = TypeIdentity(obj=int, name="integer")
+        assert a == b
+
+    def test_same_obj_same_hash(self) -> None:
+        a = TypeIdentity(obj=int, name="int")
+        b = TypeIdentity(obj=int, name="integer")
+        assert hash(a) == hash(b)
+
+    def test_different_obj_not_equal(self) -> None:
+        a = TypeIdentity(obj=int, name="int")
+        b = TypeIdentity(obj=str, name="int")
+        assert a != b
+
+    def test_works_as_dict_key(self) -> None:
+        ti = TypeIdentity(obj=int, name="int")
+        d = {ti: "value"}
+        assert d[TypeIdentity(obj=int, name="different")] == "value"
+
+    def test_not_equal_to_non_identity(self) -> None:
+        ti = TypeIdentity(obj=int, name="int")
+        non_identity_type: object = int
+        non_identity_str: object = "int"
+        assert ti != non_identity_type
+        assert ti != non_identity_str
+
+
+class TestSpecIdentity:
+    def test_model_spec_identity(self) -> None:
+        spec = ModelSpec(name="Foo", description=None, source_type=SimpleModel)
+        ident = spec.identity
+        assert isinstance(ident, TypeIdentity)
+        assert ident.obj is SimpleModel
+        assert ident.name == "Foo"
+
+    def test_enum_spec_identity(self) -> None:
+        spec = EnumSpec(name="Color", description=None, source_type=InstrumentFamily)
+        ident = spec.identity
+        assert ident.obj is InstrumentFamily
+        assert ident.name == "Color"
+
+    def test_newtype_spec_identity(self) -> None:
+        from overture.schema.system.primitive import int32
+
+        spec = NewTypeSpec(
+            name="int32", description=None, type_info=STR_TYPE, source_type=int32
+        )
+        ident = spec.identity
+        assert ident.obj is int32
+        assert ident.name == "int32"
+
+    def test_union_spec_identity(self) -> None:
+        sentinel = object()
+        spec = make_union_spec("TestUnion", source_annotation=sentinel)
+        ident = spec.identity
+        assert ident.obj is sentinel
+        assert ident.name == "TestUnion"
+
+    def test_model_spec_satisfies_feature_protocol_with_identity(self) -> None:
+        spec = ModelSpec(name="Foo", description=None, source_type=SimpleModel)
+        feature: FeatureSpec = spec
+        assert feature.identity.obj is SimpleModel
diff --git a/packages/overture-schema-codegen/tests/test_type_analyzer.py b/packages/overture-schema-codegen/tests/test_type_analyzer.py
new file mode 100644
index 000000000..d48d12211
--- /dev/null
+++ b/packages/overture-schema-codegen/tests/test_type_analyzer.py
@@ -0,0 +1,676 @@
+"""Tests for type analysis."""
+
+from enum import Enum
+from typing import Annotated, Any, Literal, NewType, Optional
+
+import pytest
+from annotated_types import Ge
+from overture.schema.codegen.extraction.type_analyzer import (
+    TypeInfo,
+    TypeKind,
+    UnsupportedUnionError,
+    analyze_type,
+    single_literal_value,
+)
+from overture.schema.system.primitive import float64, int32
+from overture.schema.system.ref import Id
+from overture.schema.system.string import (
+    HexColor,
+    NoWhitespaceConstraint,
+    NoWhitespaceString,
+    SnakeCaseString,
+)
+from pydantic import BaseModel, Field, Tag
+from typing_extensions import Sentinel
+
+
+@pytest.fixture()
+def id_type_info() -> TypeInfo:
+    return analyze_type(Id)
+
+
+@pytest.fixture()
+def hex_color_type_info() -> TypeInfo:
+    return analyze_type(HexColor)
+
+
+class TestAnalyzeTypePrimitives:
+    """Tests for primitive type analysis."""
+
+    @pytest.mark.parametrize("annotation", [str, int, float, bool])
+    def test_builtin_returns_primitive_type_info(self, annotation: type) -> None:
+        """Builtin type annotations return PRIMITIVE TypeInfo with matching base_type."""
+        result = analyze_type(annotation)
+
+        assert result.base_type == annotation.__name__
+        assert result.kind == TypeKind.PRIMITIVE
+        assert result.is_optional is False
+        assert result.is_list is False
+
+
+class TestAnalyzeTypeSentinel:
+    """Tests for Sentinel type filtering in unions.
+
+    Pydantic uses ``typing_extensions.Sentinel`` instances (like ``<MISSING>``)
+    in union types for optional fields. The type analyzer filters these out
+    alongside ``None`` when processing unions.
+    """
+
+    @pytest.fixture()
+    def missing_sentinel(self) -> object:
+        return Sentinel("MISSING")
+
+    def test_sentinel_filtered_from_union(self, missing_sentinel: object) -> None:
+        """Sentinel is filtered out, leaving the concrete type."""
+        result = analyze_type(str | missing_sentinel)  # type: ignore[arg-type]
+
+        assert result.base_type == "str"
+        assert result.kind == TypeKind.PRIMITIVE
+        assert result.is_optional is False
+
+    def test_sentinel_with_none_sets_optional(self, missing_sentinel: object) -> None:
+        """Sentinel + None both filtered; None triggers is_optional."""
+        result = analyze_type(str | missing_sentinel | None)  # type: ignore[arg-type]
+
+        assert result.base_type == "str"
+        assert result.kind == TypeKind.PRIMITIVE
+        assert result.is_optional is True
+
+
+class TestAnalyzeTypeOptional:
+    """Tests for Optional type analysis."""
+
+    def test_pipe_none_sets_is_optional(self) -> None:
+        """str | None returns TypeInfo with is_optional=True."""
+        result = analyze_type(str | None)
+
+        assert result.base_type == "str"
+        assert result.kind == TypeKind.PRIMITIVE
+        assert result.is_optional is True
+        assert result.is_list is False
+
+    def test_type_with_literal_and_none(self) -> None:
+        """str | Literal[""] | None filters Literal and marks optional."""
+        result = analyze_type(str | Literal[""] | None)
+
+        assert result.base_type == "str"
+        assert result.kind == TypeKind.PRIMITIVE
+        assert result.is_optional is True
+
+    def test_typing_optional_sets_is_optional(self) -> None:
+        """Optional[str] from typing module returns TypeInfo with is_optional=True."""
+        result = analyze_type(Optional[str])  # noqa: UP045
+
+        assert result.base_type == "str"
+        assert result.kind == TypeKind.PRIMITIVE
+        assert result.is_optional is True
+        assert result.is_list is False
+
+
+class TestAnalyzeTypeUnionLiteralFiltering:
+    """Tests for filtering Literal arms out of unions."""
+
+    def test_type_with_literal_alternative(self) -> None:
+        """str | Literal[""] filters out the Literal and analyzes the concrete type."""
+        result = analyze_type(str | Literal[""])
+
+        assert result.base_type == "str"
+        assert result.kind == TypeKind.PRIMITIVE
+        assert result.is_optional is False
+
+
+class TestAnalyzeTypeList:
+    """Tests for list type analysis."""
+
+    def test_list_str_sets_is_list(self) -> None:
+        """list[str] returns TypeInfo with is_list=True."""
+        result = analyze_type(list[str])
+
+        assert result.base_type == "str"
+        assert result.kind == TypeKind.PRIMITIVE
+        assert result.is_optional is False
+        assert result.is_list is True
+
+    def test_nested_list_sets_depth_2(self) -> None:
+        """list[list[str]] records two levels of nesting."""
+        result = analyze_type(list[list[str]])
+
+        assert result.list_depth == 2
+        assert result.base_type == "str"
+        assert result.kind == TypeKind.PRIMITIVE
+
+
+class TestAnalyzeTypeComposite:
+    """Tests for composite/nested type analysis."""
+
+    def test_list_optional_str(self) -> None:
+        """list[str | None] sets both is_list and is_optional."""
+        result = analyze_type(list[str | None])
+
+        assert result.base_type == "str"
+        assert result.is_list is True
+        assert result.is_optional is True
+
+    def test_optional_list_str(self) -> None:
+        """list[str] | None sets both is_list and is_optional."""
+        result = analyze_type(list[str] | None)
+
+        assert result.base_type == "str"
+        assert result.is_list is True
+        assert result.is_optional is True
+
+    def test_annotated_optional_str(self) -> None:
+        """Annotated[str | None, ...] extracts constraints and sets is_optional."""
+        result = analyze_type(Annotated[str | None, "description"])
+
+        assert result.base_type == "str"
+        assert result.is_optional is True
+        assert len(result.constraints) == 1
+        assert result.constraints[0].source_ref is None
+        assert result.constraints[0].constraint == "description"
+
+    def test_annotated_list_str(self) -> None:
+        """Annotated[list[str], ...] extracts constraints and sets is_list."""
+        result = analyze_type(Annotated[list[str], Field(min_length=1)])
+
+        assert result.base_type == "str"
+        assert result.is_list is True
+        assert len(result.constraints) == 1
+        assert result.constraints[0].source_ref is None
+
+
+class TestAnalyzeTypeAnnotated:
+    """Tests for Annotated type analysis."""
+
+    def test_annotated_int_with_ge_extracts_constraint(self) -> None:
+        """Annotated[int, Field(ge=0)] unpacks FieldInfo to extract Ge constraint."""
+        result = analyze_type(Annotated[int, Field(ge=0)])
+
+        assert result.base_type == "int"
+        assert result.kind == TypeKind.PRIMITIVE
+        assert len(result.constraints) == 1
+        cs = result.constraints[0]
+        assert cs.source_ref is None
+        assert isinstance(cs.constraint, Ge)
+        assert cs.constraint.ge == 0
+
+    def test_annotated_without_constraints(self) -> None:
+        """Annotated[str, 'description'] extracts non-Field metadata."""
+        result = analyze_type(Annotated[str, "just a description"])
+
+        assert result.base_type == "str"
+        assert len(result.constraints) == 1
+        assert result.constraints[0].source_ref is None
+        assert result.constraints[0].constraint == "just a description"
+
+
+class TestAnalyzeTypeLiteral:
+    """Tests for Literal type analysis."""
+
+    def test_literal_string_extracts_values(self) -> None:
+        """Literal["active"] stores the value in literal_values tuple."""
+        result = analyze_type(Literal["active"])
+
+        assert result.kind == TypeKind.LITERAL
+        assert result.literal_values == ("active",)
+
+    def test_literal_int_extracts_values(self) -> None:
+        """Literal[42] stores the value in literal_values tuple."""
+        result = analyze_type(Literal[42])
+
+        assert result.kind == TypeKind.LITERAL
+        assert result.literal_values == (42,)
+
+    def test_multi_value_literal_stores_all_args(self) -> None:
+        """Literal["a", "b"] stores all args in literal_values tuple."""
+        result = analyze_type(Literal["a", "b"])
+
+        assert result.kind == TypeKind.LITERAL
+        assert result.literal_values == ("a", "b")
+
+    def test_optional_literal_extracts_values(self) -> None:
+        """Optional[Literal["x"]] unwraps to Literal with is_optional set."""
+        result = analyze_type(Literal["x"] | None)
+
+        assert result.kind == TypeKind.LITERAL
+        assert result.literal_values == ("x",)
+        assert result.is_optional is True
+
+
+class TestAnalyzeTypeEnum:
+    """Tests for Enum type analysis."""
+
+    def test_enum_subclass_returns_kind_enum(self) -> None:
+        """Enum subclass returns TypeInfo with kind=ENUM."""
+
+        class Color(Enum):
+            RED = "red"
+            GREEN = "green"
+
+        result = analyze_type(Color)
+
+        assert result.base_type == "Color"
+        assert result.kind == TypeKind.ENUM
+
+
+class TestAnalyzeTypeModel:
+    """Tests for BaseModel type analysis."""
+
+    def test_basemodel_subclass_returns_kind_model(self) -> None:
+        """BaseModel subclass returns TypeInfo with kind=MODEL."""
+
+        class Person(BaseModel):
+            name: str
+
+        result = analyze_type(Person)
+
+        assert result.base_type == "Person"
+        assert result.kind == TypeKind.MODEL
+
+
+class TestAnalyzeTypeNewType:
+    """Tests for NewType primitive analysis."""
+
+    def test_int32_returns_newtype_name(self) -> None:
+        """int32 NewType returns TypeInfo with base_type='int32'."""
+        result = analyze_type(int32)
+
+        assert result.base_type == "int32"
+        assert result.kind == TypeKind.PRIMITIVE
+
+    def test_float64_returns_newtype_name(self) -> None:
+        """float64 NewType returns TypeInfo with base_type='float64'."""
+        result = analyze_type(float64)
+
+        assert result.base_type == "float64"
+        assert result.kind == TypeKind.PRIMITIVE
+
+    def test_optional_int32(self) -> None:
+        """int32 | None sets is_optional and preserves base_type."""
+        result = analyze_type(int32 | None)
+
+        assert result.base_type == "int32"
+        assert result.is_optional is True
+
+
+class TestNewtypeName:
+    """Tests for outermost NewType name tracking."""
+
+    def test_single_layer_newtype(self) -> None:
+        """Single NewType like int32 sets newtype_name to its name."""
+        result = analyze_type(int32)
+
+        assert result.newtype_name == "int32"
+        assert result.base_type == "int32"
+
+    def test_nested_newtype_preserves_outermost(self, id_type_info: TypeInfo) -> None:
+        """Nested NewType chain uses outermost name for newtype_name."""
+        assert id_type_info.newtype_name == "Id"
+        assert id_type_info.base_type == "NoWhitespaceString"
+
+    def test_plain_type_has_no_newtype_name(self) -> None:
+        """Plain types without NewType wrapping have newtype_name=None."""
+        result = analyze_type(str)
+
+        assert result.newtype_name is None
+
+    def test_newtype_ref_set_for_newtype(self, id_type_info: TypeInfo) -> None:
+        """newtype_ref points to the outermost NewType callable."""
+        assert id_type_info.newtype_ref is Id
+
+    def test_newtype_ref_none_for_plain_type(self) -> None:
+        """Plain types have newtype_ref=None."""
+        result = analyze_type(str)
+
+        assert result.newtype_ref is None
+
+
+class TestNewtypeWrappingList:
+    """Tests for NewType wrapping a list type."""
+
+    def test_newtype_wrapping_list(self) -> None:
+        """NewType wrapping a list sets is_list and preserves newtype_name."""
+        TestSources = NewType("TestSources", Annotated[list[str], Field(min_length=1)])
+        result = analyze_type(TestSources)
+
+        assert result.is_list is True
+        assert result.newtype_name == "TestSources"
+
+    def test_scalar_newtype_is_not_list(self) -> None:
+        """Scalar NewType like int32 has is_list=False."""
+        result = analyze_type(int32)
+
+        assert result.is_list is False
+
+    def test_plain_list_has_no_newtype_name(self) -> None:
+        """Plain list[str] without NewType has newtype_name=None."""
+        result = analyze_type(list[str])
+
+        assert result.newtype_name is None
+        assert result.is_list is True
+
+    def test_newtype_wrapping_list_of_models(self) -> None:
+        """list[NewType wrapping list[Model]] records depth 2, outer depth 1."""
+
+        class _Item(BaseModel):
+            name: str
+
+        Inner = NewType("Inner", Annotated[list[_Item], Field(min_length=1)])
+        result = analyze_type(list[Inner])
+
+        assert result.list_depth == 2
+        assert result.newtype_outer_list_depth == 1
+        assert result.base_type == "Inner"
+        assert result.kind == TypeKind.MODEL
+        assert result.source_type is _Item
+
+
+class TestNewtypeOuterListDepth:
+    """Tests for newtype_outer_list_depth tracking."""
+
+    def test_list_of_scalar_newtype_has_outer_depth(self) -> None:
+        """list[ScalarNewType] records the list layer as outside the NewType."""
+        ScalarNT = NewType("ScalarNT", str)
+        result = analyze_type(list[ScalarNT])
+
+        assert result.newtype_outer_list_depth == 1
+        assert result.list_depth == 1
+
+    def test_newtype_wrapping_list_has_zero_outer_depth(self) -> None:
+        """NewType wrapping list[X] records no list layers outside the NewType."""
+        ListNT = NewType("ListNT", Annotated[list[str], Field(min_length=1)])
+        result = analyze_type(ListNT)
+
+        assert result.newtype_outer_list_depth == 0
+        assert result.list_depth == 1
+
+    @pytest.mark.parametrize(
+        "annotation",
+        [
+            list[str],  # list without NewType
+            int32,  # scalar NewType
+            str,  # plain type
+        ],
+        ids=["plain_list", "scalar_newtype", "plain_type"],
+    )
+    def test_zero_outer_depth_without_newtype_boundary(
+        self, annotation: object
+    ) -> None:
+        """Types without a NewType inside a list have newtype_outer_list_depth=0."""
+        result = analyze_type(annotation)
+
+        assert result.newtype_outer_list_depth == 0
+
+    def test_nested_list_of_scalar_newtype_has_outer_depth_2(self) -> None:
+        """list[list[ScalarNewType]] records two outer list layers."""
+        ScalarNT = NewType("ScalarNT", str)
+        result = analyze_type(list[list[ScalarNT]])
+
+        assert result.newtype_outer_list_depth == 2
+        assert result.list_depth == 2
+
+
+class TestConstraintProvenance:
+    """Tests for flattened constraints with provenance tracking."""
+
+    def test_nested_newtype_flattens_constraints(self, id_type_info: TypeInfo) -> None:
+        """Id -> NoWhitespaceString -> str flattens all constraints with sources."""
+        source_names = {
+            cs.source_name for cs in id_type_info.constraints if cs.source_name
+        }
+        assert "Id" in source_names
+        assert "NoWhitespaceString" in source_names
+
+    def test_nested_newtype_includes_inner_constraints(
+        self, id_type_info: TypeInfo
+    ) -> None:
+        """Inner NewType constraints are collected with provenance."""
+        nws_constraints = [
+            cs for cs in id_type_info.constraints if cs.source_ref is NoWhitespaceString
+        ]
+        constraint_types = {type(cs.constraint) for cs in nws_constraints}
+        assert NoWhitespaceConstraint in constraint_types
+
+    def test_direct_annotation_has_none_source(self) -> None:
+        """Constraints from direct Annotated (no NewType) have source_ref=None."""
+        result = analyze_type(Annotated[str, "direct"])
+
+        assert len(result.constraints) == 1
+        assert result.constraints[0].source_ref is None
+        assert result.constraints[0].constraint == "direct"
+
+    def test_single_newtype_constraints_attributed(
+        self, hex_color_type_info: TypeInfo
+    ) -> None:
+        """HexColor constraints are attributed to the HexColor callable."""
+        assert all(cs.source_ref is HexColor for cs in hex_color_type_info.constraints)
+        assert len(hex_color_type_info.constraints) > 0
+
+    def test_source_ref_is_newtype_callable(
+        self, hex_color_type_info: TypeInfo
+    ) -> None:
+        """source_ref is the actual NewType callable, not a string."""
+        cs = hex_color_type_info.constraints[0]
+        assert cs.source_ref is HexColor
+
+    def test_constraint_preserves_original_object(
+        self, hex_color_type_info: TypeInfo
+    ) -> None:
+        """ConstraintSource.constraint holds the original constraint object."""
+        hcc = next(
+            cs
+            for cs in hex_color_type_info.constraints
+            if type(cs.constraint).__name__ == "HexColorConstraint"
+        )
+        assert hcc.constraint.__class__.__name__ == "HexColorConstraint"
+
+
+class TestTypeInfoDescription:
+    """Tests for TypeInfo.description from Field(description=...) metadata."""
+
+    def test_newtype_with_field_description(
+        self, hex_color_type_info: TypeInfo
+    ) -> None:
+        """Should extract Field description from HexColor."""
+        assert hex_color_type_info.description is not None
+        assert "color" in hex_color_type_info.description.lower()
+
+    def test_newtype_without_field_description(self) -> None:
+        """Should have None description for types without Field(description=...)."""
+        result = analyze_type(int)
+        assert result.description is None
+
+    def test_plain_annotated_with_field_description(self) -> None:
+        """Should extract description from Annotated with Field(description=...)."""
+        MyType = Annotated[str, Field(description="A test description")]
+        result = analyze_type(MyType)
+        assert result.description == "A test description"
+
+    def test_outermost_description_wins(self, id_type_info: TypeInfo) -> None:
+        """Outermost FieldInfo.description takes precedence in nested NewTypes."""
+        assert id_type_info.description is not None
+        assert "unique identifier" in id_type_info.description.lower()
+
+    def test_newtype_without_field_has_none_description(self) -> None:
+        """NewType with constraints but no Field(description=...) has None."""
+        result = analyze_type(SnakeCaseString)
+        assert result.description is None
+
+
+class TestAnalyzeTypeAny:
+    """Tests for typing.Any analysis."""
+
+    def test_any_returns_primitive(self) -> None:
+        """Any annotation returns TypeInfo with base_type='Any' and kind=PRIMITIVE."""
+        result = analyze_type(Any)
+
+        assert result.base_type == "Any"
+        assert result.kind == TypeKind.PRIMITIVE
+
+    def test_dict_with_any_value(self) -> None:
+        """dict[str, Any] analyzes without error."""
+        result = analyze_type(dict[str, Any])
+
+        assert result.is_dict is True
+        assert result.dict_value_type is not None
+        assert result.dict_value_type.base_type == "Any"
+
+
+class TestAnalyzeTypeDict:
+    """Tests for dict type analysis."""
+
+    @pytest.fixture()
+    def dict_str_int(self) -> TypeInfo:
+        return analyze_type(dict[str, int])
+
+    def test_dict_str_int_sets_is_dict(self, dict_str_int: TypeInfo) -> None:
+        """dict[str, int] returns TypeInfo with is_dict=True."""
+        assert dict_str_int.is_dict is True
+        assert dict_str_int.is_optional is False
+        assert dict_str_int.is_list is False
+
+    def test_dict_key_type_analyzed(self, dict_str_int: TypeInfo) -> None:
+        """dict[str, int] has dict_key_type describing the key."""
+        assert dict_str_int.dict_key_type is not None
+        assert dict_str_int.dict_key_type.base_type == "str"
+        assert dict_str_int.dict_key_type.kind == TypeKind.PRIMITIVE
+
+    def test_dict_value_type_analyzed(self, dict_str_int: TypeInfo) -> None:
+        """dict[str, int] has dict_value_type describing the value."""
+        assert dict_str_int.dict_value_type is not None
+        assert dict_str_int.dict_value_type.base_type == "int"
+        assert dict_str_int.dict_value_type.kind == TypeKind.PRIMITIVE
+
+    def test_optional_dict(self) -> None:
+        """dict[str, str] | None sets is_dict and is_optional."""
+        result = analyze_type(dict[str, str] | None)
+
+        assert result.is_dict is True
+        assert result.is_optional is True
+
+    def test_newtype_wrapping_dict(self) -> None:
+        """NewType wrapping dict preserves newtype_name and sets is_dict."""
+        TestMapping = NewType("TestMapping", dict[str, str])
+        result = analyze_type(TestMapping)
+
+        assert result.is_dict is True
+        assert result.newtype_name == "TestMapping"
+
+    def test_bare_dict_raises_type_error(self) -> None:
+        """Bare dict without type arguments raises TypeError."""
+        with pytest.raises(TypeError, match="Bare dict"):
+            analyze_type(dict)
+
+
+class TestAnalyzeTypeErrors:
+    """Tests for error handling."""
+
+    def test_unsupported_annotation_raises_type_error(self) -> None:
+        """Unsupported annotation type raises TypeError."""
+        with pytest.raises(TypeError, match="Unsupported annotation type"):
+            analyze_type("not a type")
+
+    def test_multi_type_union_raises_clear_error(self) -> None:
+        """Multi-type unions like str | int raise UnsupportedUnionError."""
+        with pytest.raises(
+            UnsupportedUnionError, match="Multi-type unions not supported"
+        ):
+            analyze_type(str | int)
+
+    def test_multi_type_union_with_none_raises_clear_error(self) -> None:
+        """Multi-type optional unions like str | int | None raise UnsupportedUnionError."""
+        with pytest.raises(
+            UnsupportedUnionError, match="Multi-type unions not supported"
+        ):
+            analyze_type(str | int | None)
+
+    def test_bare_list_raises_type_error(self) -> None:
+        """Bare list without type argument raises TypeError."""
+        with pytest.raises(TypeError, match="Bare list without type argument"):
+            analyze_type(list)
+
+
+class UnionModelA(BaseModel):
+    x: int
+
+
+class UnionModelB(BaseModel):
+    y: str
+
+
+class TestAnalyzeTypeUnion:
+    """Tests for discriminated union analysis."""
+
+    def test_all_model_union_returns_union_kind(self) -> None:
+        """Annotated[Union of BaseModel subclasses] returns TypeKind.UNION."""
+        union_type = Annotated[UnionModelA | UnionModelB, Field(description="test")]
+        result = analyze_type(union_type)
+
+        assert result.kind == TypeKind.UNION
+        assert result.union_members is not None
+        assert len(result.union_members) == 2
+        assert UnionModelA in result.union_members
+        assert UnionModelB in result.union_members
+
+    def test_annotated_wrapped_members_unwrapped(self) -> None:
+        """Union members wrapped in Annotated[X, Tag(...)] are unwrapped."""
+        union_type = Annotated[
+            Annotated[UnionModelA, Tag("a")] | Annotated[UnionModelB, Tag("b")],
+            Field(description="disc"),
+        ]
+        result = analyze_type(union_type)
+
+        assert result.kind == TypeKind.UNION
+        assert result.union_members is not None
+        assert len(result.union_members) == 2
+        assert UnionModelA in result.union_members
+        assert UnionModelB in result.union_members
+
+    def test_mixed_model_nonmodel_union_still_raises(self) -> None:
+        """Union of model + non-model types still raises UnsupportedUnionError."""
+        with pytest.raises(UnsupportedUnionError):
+            analyze_type(UnionModelA | str)
+
+    def test_non_model_multi_union_still_raises(self) -> None:
+        """Multi-type union of non-models still raises UnsupportedUnionError."""
+        with pytest.raises(UnsupportedUnionError):
+            analyze_type(str | int)
+
+    def test_union_base_type_is_first_member_name(self) -> None:
+        """UNION TypeInfo base_type is the first member's class name."""
+        result = analyze_type(
+            Annotated[UnionModelA | UnionModelB, Field(description="test")]
+        )
+        assert result.base_type == "UnionModelA"
+
+    def test_optional_union_sets_is_optional(self) -> None:
+        """Union with None among model members sets is_optional."""
+        result = analyze_type(
+            Annotated[UnionModelA | UnionModelB, Field(description="test")] | None
+        )
+        assert result.kind == TypeKind.UNION
+        assert result.is_optional is True
+
+
+class TestSingleLiteralValue:
+    """Tests for single_literal_value convenience accessor."""
+
+    def test_single_value_literal(self) -> None:
+        """Literal["x"] returns the literal value."""
+        assert single_literal_value(Literal["x"]) == "x"
+
+    def test_single_int_literal(self) -> None:
+        """Literal[42] returns the integer value."""
+        assert single_literal_value(Literal[42]) == 42
+
+    def test_multi_value_literal_returns_none(self) -> None:
+        """Multi-value Literal returns None (no single default)."""
+        assert single_literal_value(Literal["a", "b"]) is None
+
+    def test_non_literal_returns_none(self) -> None:
+        """Non-Literal types return None."""
+        assert single_literal_value(str) is None
+
+    def test_unsupported_type_returns_none(self) -> None:
+        """Types that raise during analysis return None."""
+        assert single_literal_value("not a type") is None
diff --git a/packages/overture-schema-codegen/tests/test_type_collection.py b/packages/overture-schema-codegen/tests/test_type_collection.py
new file mode 100644
index 000000000..154b39e2c
--- /dev/null
+++ b/packages/overture-schema-codegen/tests/test_type_collection.py
@@ -0,0 +1,128 @@
+"""Tests for type collection module."""
+
+from codegen_test_support import (
+    FeatureWithAddress,
+    FeatureWithSources,
+    FeatureWithUrl,
+    Instrument,
+    TestSegmentWithSubModel,
+    has_name,
+    lookup_by_name,
+)
+from overture.schema.codegen.extraction.model_extraction import (
+    expand_model_tree,
+    extract_model,
+)
+from overture.schema.codegen.extraction.specs import (
+    EnumSpec,
+    ModelSpec,
+    NewTypeSpec,
+    PydanticTypeSpec,
+    SupplementarySpec,
+    TypeIdentity,
+)
+from overture.schema.codegen.layout.type_collection import (
+    collect_all_supplementary_types,
+)
+from pydantic import BaseModel
+
+
+def _make_feature_with_sub_model(sub_model: type) -> type[BaseModel]:
+    """Build a feature class whose only field references sub_model."""
+    return type(
+        f"FeatureWith{sub_model.__name__}",
+        (BaseModel,),
+        {"__annotations__": {"sub": sub_model}, "sub": None},
+    )
+
+
+def _expanded_supplementary(model_class: type) -> dict[TypeIdentity, SupplementarySpec]:
+    spec = extract_model(model_class)
+    expand_model_tree(spec)
+    return collect_all_supplementary_types([spec])
+
+
+class TestCollectAllSupplementarySpecs:
+    """Tests for collect_all_supplementary_types returning specs from expanded trees."""
+
+    def test_returns_enum_specs(self) -> None:
+        result = _expanded_supplementary(Instrument)
+
+        assert has_name(result, "InstrumentFamily")
+        assert isinstance(lookup_by_name(result, "InstrumentFamily"), EnumSpec)
+
+    def test_returns_newtype_specs(self) -> None:
+        result = _expanded_supplementary(Instrument)
+
+        assert has_name(result, "HexColor")
+        assert isinstance(lookup_by_name(result, "HexColor"), NewTypeSpec)
+
+    def test_returns_model_specs_from_expanded_tree(self) -> None:
+        result = _expanded_supplementary(FeatureWithAddress)
+
+        assert has_name(result, "Address")
+        assert isinstance(lookup_by_name(result, "Address"), ModelSpec)
+
+    def test_collects_transitive_types(self) -> None:
+        """Types referenced by sub-models are also collected."""
+        result = _expanded_supplementary(FeatureWithSources)
+
+        # Sources is a semantic NewType; SourceItem is a sub-model
+        # referenced transitively via the expanded tree
+        assert has_name(result, "Sources")
+        assert has_name(result, "SourceItem")
+
+    def test_same_name_different_types_both_collected(self) -> None:
+        """Two types with the same __name__ from different modules are both collected."""
+        ModelA = type("Address", (BaseModel,), {"__annotations__": {"x": str}})
+        ModelB = type("Address", (BaseModel,), {"__annotations__": {"y": int}})
+
+        outer_a = extract_model(_make_feature_with_sub_model(ModelA))
+        expand_model_tree(outer_a)
+
+        outer_b = extract_model(_make_feature_with_sub_model(ModelB))
+        expand_model_tree(outer_b)
+
+        result = collect_all_supplementary_types([outer_a, outer_b])
+
+        address_entries = [
+            spec for tid, spec in result.items() if tid.name == "Address"
+        ]
+        assert len(address_entries) == 2
+
+
+class TestCollectUnionMemberSubModels:
+    """Tests for union members with nested sub-model fields."""
+
+    def test_union_member_with_sub_model_collects_sub_model(self) -> None:
+        """Sub-models inside union members are collected without RuntimeError."""
+
+        class FeatureWithUnionSubModel(BaseModel):
+            segment: TestSegmentWithSubModel
+
+        result = _expanded_supplementary(FeatureWithUnionSubModel)
+
+        assert has_name(result, "ContactInfo")
+        assert isinstance(lookup_by_name(result, "ContactInfo"), ModelSpec)
+
+
+class TestCollectPydanticTypes:
+    """Tests for Pydantic built-in type collection."""
+
+    def test_collects_pydantic_type_from_field(self) -> None:
+        """Pydantic types referenced in fields are collected."""
+        result = _expanded_supplementary(FeatureWithUrl)
+        assert has_name(result, "HttpUrl")
+        assert isinstance(lookup_by_name(result, "HttpUrl"), PydanticTypeSpec)
+
+    def test_collects_pydantic_type_inside_list(self) -> None:
+        """Pydantic types wrapped in list[] are collected."""
+        result = _expanded_supplementary(FeatureWithUrl)
+        assert has_name(result, "EmailStr")
+        assert isinstance(lookup_by_name(result, "EmailStr"), PydanticTypeSpec)
+
+    def test_does_not_collect_builtin_primitives(self) -> None:
+        """Plain primitives like str are not collected as PydanticTypeSpec."""
+        result = _expanded_supplementary(FeatureWithUrl)
+        assert not has_name(result, "str")
+        assert not has_name(result, "int")
diff --git a/packages/overture-schema-codegen/tests/test_type_placement.py b/packages/overture-schema-codegen/tests/test_type_placement.py
new file mode 100644
index 000000000..62ef7449a
--- /dev/null
+++ b/packages/overture-schema-codegen/tests/test_type_placement.py
@@ -0,0 +1,247 @@
+"""Tests for type placement module."""
+
+from pathlib import PurePosixPath
+
+import overture.schema.system.primitive as _system_primitive
+from codegen_test_support import (
+    EMAIL_STR_SPEC,
+    HTTP_URL_SPEC,
+    STR_TYPE,
+    flat_specs_from_discovery,
+    lookup_by_name,
+    make_union_spec,
+)
+from overture.schema.codegen.extraction.model_extraction import expand_model_tree
+from overture.schema.codegen.extraction.primitive_extraction import (
+    partition_primitive_and_geometry_names,
+)
+from overture.schema.codegen.extraction.specs import (
+    AnnotatedField,
+    FeatureSpec,
+    FieldSpec,
+    ModelSpec,
+    SupplementarySpec,
+    TypeIdentity,
+)
+from overture.schema.codegen.layout.type_collection import (
+    collect_all_supplementary_types,
+)
+from overture.schema.codegen.markdown.link_computation import LinkContext, relative_link
+from overture.schema.codegen.markdown.path_assignment import (
+    GEOMETRY_PAGE,
+    PRIMITIVES_PAGE,
+    build_placement_registry,
+)
+from pydantic import BaseModel
+
+_PRIMITIVE_NAMES, _GEOMETRY_NAMES = partition_primitive_and_geometry_names(
+    _system_primitive
+)
+
+_SCHEMA_ROOT = "overture.schema"
+
+
+def _build_registry(
+    feature_specs: list[ModelSpec],
+) -> tuple[dict[TypeIdentity, PurePosixPath], dict[TypeIdentity, SupplementarySpec]]:
+    """Build placement registry with standard aggregate names."""
+    cache: dict[type, ModelSpec] = {}
+    for spec in feature_specs:
+        expand_model_tree(spec, cache)
+    all_specs = collect_all_supplementary_types(feature_specs)
+    registry = build_placement_registry(
+        feature_specs, all_specs, _PRIMITIVE_NAMES, _GEOMETRY_NAMES, _SCHEMA_ROOT
+    )
+    return registry, all_specs
+
+
+class TestRelativeLink:
+    """Test relative path computation between pages."""
+
+    def test_same_directory(self) -> None:
+        source = PurePosixPath("buildings/building.md")
+        target = PurePosixPath("buildings/facade_material.md")
+        assert relative_link(source, target) == "facade_material.md"
+
+    def test_sibling_directory(self) -> None:
+        source = PurePosixPath("buildings/building.md")
+        target = PurePosixPath("core/names/names.md")
+        assert relative_link(source, target) == "../core/names/names.md"
+
+    def test_within_core(self) -> None:
+        source = PurePosixPath("core/names/names.md")
+        target = PurePosixPath("core/sources/sources.md")
+        assert relative_link(source, target) == "../sources/sources.md"
+
+    def test_to_aggregate_page(self) -> None:
+        source = PurePosixPath("core/names/names.md")
+        target = PurePosixPath("system/primitive/primitives.md")
+        assert relative_link(source, target) == "../../system/primitive/primitives.md"
+
+
+class TestBuildPlacementRegistry:
+    """Test the full placement registry builder with module-mirrored paths."""
+
+    def test_features_at_theme_level(self) -> None:
+        """Features land directly in their theme directory."""
+        specs = flat_specs_from_discovery("buildings")
+        registry, _ = _build_registry(specs)
+
+        assert lookup_by_name(registry, "Building") == PurePosixPath(
+            "buildings/building.md"
+        )
+        assert lookup_by_name(registry, "BuildingPart") == PurePosixPath(
+            "buildings/building_part.md"
+        )
+
+    def test_shared_types_mirror_source_modules(self) -> None:
+        """Core/system types land in directories matching their module path."""
+        specs = flat_specs_from_discovery("buildings")
+        registry, _ = _build_registry(specs)
+
+        names = {tid.name for tid in registry}
+        if "Names" in names:
+            assert str(lookup_by_name(registry, "Names")).startswith("core/")
+
+    def test_no_duplicate_paths(self) -> None:
+        """No two individual types share an output path."""
+        specs = flat_specs_from_discovery()
+        registry, _ = _build_registry(specs)
+
+        aggregate_pages = {
+            PurePosixPath("system/primitive/primitives.md"),
+            PurePosixPath("system/primitive/geometry.md"),
+        }
+        individual = [p for p in registry.values() if p not in aggregate_pages]
+        assert len(individual) == len(set(individual)), (
+            "Duplicate output paths detected"
+        )
+
+    def test_aggregate_pages_at_system_primitive(self) -> None:
+        """Primitive and geometry aggregate pages under system/primitive/."""
+        assert PRIMITIVES_PAGE == PurePosixPath("system/primitive/primitives.md")
+        assert GEOMETRY_PAGE == PurePosixPath("system/primitive/geometry.md")
+
+    def test_supplementary_types_nested_under_types(self) -> None:
+        """Supplementary types in a feature directory go under types/."""
+        specs = flat_specs_from_discovery("buildings")
+        registry, _ = _build_registry(specs)
+
+        # BuildingClass is a supplementary type from the buildings module
+        assert lookup_by_name(registry, "BuildingClass") == PurePosixPath(
+            "buildings/types/building_class.md"
+        )
+
+    def test_submodule_supplementary_types_nested_under_types(self) -> None:
+        """Supplementary types in a feature subdirectory go under types/."""
+        specs = flat_specs_from_discovery("divisions")
+        registry, _ = _build_registry(specs)
+
+        # AreaClass is from overture.schema.divisions.division_area.enums,
+        # a subdirectory of the divisions feature directory.
+        assert lookup_by_name(registry, "AreaClass") == PurePosixPath(
+            "divisions/types/division_area/area_class.md"
+        )
+
+    def test_shared_types_not_nested(self) -> None:
+        """Core/system supplementary types stay at their module-mirrored path."""
+        specs = flat_specs_from_discovery("buildings")
+        registry, _ = _build_registry(specs)
+
+        # Names is from overture.schema.core -- no features there, no nesting
+        names = {tid.name for tid in registry}
+        if "Names" in names:
+            path = str(lookup_by_name(registry, "Names"))
+            assert path.startswith("core/")
+            assert "/types/" not in path
+
+
+class TestPlacementWithUnionSpec:
+    """Tests for placement registry with UnionSpec."""
+
+    def test_union_spec_gets_placement(self) -> None:
+        """UnionSpec is placed alongside ModelSpec in the registry."""
+
+        class Base(BaseModel):
+            name: str
+
+        class A(Base):
+            x: int
+
+        union_spec = make_union_spec(
+            annotated_fields=[
+                AnnotatedField(
+                    field_spec=FieldSpec(
+                        name="name",
+                        type_info=STR_TYPE,
+                        description=None,
+                        is_required=True,
+                    ),
+                    variant_sources=None,
+                ),
+            ],
+            members=[A],
+            common_base=Base,
+            entry_point="test.package:TestUnion",
+        )
+
+        feature_specs: list[FeatureSpec] = [union_spec]
+        all_specs = collect_all_supplementary_types(feature_specs)
+        registry = build_placement_registry(
+            feature_specs, all_specs, [], [], "test.package"
+        )
+        assert any(tid.name == "TestUnion" for tid in registry)
+
+
+class TestLinkContextWithTypeIdentity:
+    """Tests for LinkContext using TypeIdentity keys."""
+
+    def test_same_name_different_identity_separate_paths(self) -> None:
+        """Two types with the same name but different objects resolve to different paths."""
+        obj_a = type("Address", (), {})
+        obj_b = type("Address", (), {})
+        registry = {
+            TypeIdentity(obj_a, "Address"): PurePosixPath("places/types/address.md"),
+            TypeIdentity(obj_b, "Address"): PurePosixPath("addresses/address.md"),
+        }
+        ctx = LinkContext(page_path=PurePosixPath("places/place.md"), registry=registry)
+        assert ctx.resolve_link(TypeIdentity(obj_a, "Address")) == "types/address.md"
+        assert (
+            ctx.resolve_link(TypeIdentity(obj_b, "Address"))
+            == "../addresses/address.md"
+        )
+
+
+class TestPydanticTypePlacement:
+    """Tests for placement of Pydantic built-in types."""
+
+    def test_pydantic_type_placed_under_module_dir(self) -> None:
+        registry = build_placement_registry(
+            feature_specs=[],
+            all_specs={HTTP_URL_SPEC.identity: HTTP_URL_SPEC},
+            primitive_names=[],
+            geometry_names=[],
+            schema_root="overture.schema",
+        )
+        assert lookup_by_name(registry, "HttpUrl") == PurePosixPath(
+            "pydantic/networks/http_url.md"
+        )
+
+    def test_multiple_pydantic_types_same_module(self) -> None:
+        specs: dict[TypeIdentity, SupplementarySpec] = {
+            HTTP_URL_SPEC.identity: HTTP_URL_SPEC,
+            EMAIL_STR_SPEC.identity: EMAIL_STR_SPEC,
+        }
+        registry = build_placement_registry(
+            feature_specs=[],
+            all_specs=specs,
+            primitive_names=[],
+            geometry_names=[],
+            schema_root="overture.schema",
+        )
+        assert lookup_by_name(registry, "HttpUrl") == PurePosixPath(
+            "pydantic/networks/http_url.md"
+        )
+        assert lookup_by_name(registry, "EmailStr") == PurePosixPath(
+            "pydantic/networks/email_str.md"
+        )
diff --git a/packages/overture-schema-codegen/tests/test_type_registry.py b/packages/overture-schema-codegen/tests/test_type_registry.py
new file mode 100644
index 000000000..b9d02d2ac
--- /dev/null
+++ b/packages/overture-schema-codegen/tests/test_type_registry.py
@@ -0,0 +1,143 @@
+"""Tests for type registry."""
+
+import pytest
+from overture.schema.codegen.extraction.type_analyzer import TypeInfo, TypeKind
+from overture.schema.codegen.extraction.type_registry import (
+    PRIMITIVE_TYPES,
+    TypeMapping,
+    get_type_mapping,
+    resolve_type_name,
+)
+
+
+class TestTypeMapping:
+    """Tests for TypeMapping dataclass."""
+
+    def test_typemapping_accepts_markdown(self) -> None:
+        """TypeMapping should construct with markdown field."""
+        mapping = TypeMapping(markdown="int32")
+
+        assert mapping.markdown == "int32"
+
+    def test_for_target_returns_markdown(self) -> None:
+        """for_target should return markdown representation for markdown target."""
+        mapping = TypeMapping(markdown="int32")
+
+        assert mapping.for_target("markdown") == "int32"
+
+    def test_for_target_rejects_unknown_target(self) -> None:
+        """for_target should raise ValueError for unknown targets."""
+        mapping = TypeMapping(markdown="int32")
+
+        with pytest.raises(ValueError, match="Unknown target 'scala'"):
+            mapping.for_target("scala")
+
+
+class TestPrimitiveTypes:
+    """Tests for PRIMITIVE_TYPES registry."""
+
+    def test_registry_contains_expected_types(self) -> None:
+        """Registry should contain all expected primitive types."""
+        expected_types = {
+            "int8",
+            "int16",
+            "int32",
+            "int64",
+            "uint8",
+            "uint16",
+            "uint32",
+            "float32",
+            "float64",
+            "str",
+            "bool",
+            "int",
+            "float",
+            "Geometry",
+            "BBox",
+        }
+
+        assert set(PRIMITIVE_TYPES.keys()) == expected_types
+
+    def test_bbox_mapping(self) -> None:
+        """BBox should map to bbox."""
+        bbox = PRIMITIVE_TYPES["BBox"]
+
+        assert bbox.markdown == "bbox"
+
+
+class TestGetTypeMapping:
+    """Tests for get_type_mapping function."""
+
+    def test_returns_mapping_for_known_type(self) -> None:
+        """Should return TypeMapping for known primitive type."""
+        result = get_type_mapping("int32")
+
+        assert result is not None
+        assert result.markdown == "int32"
+
+    def test_returns_none_for_unknown_type(self) -> None:
+        """Should return None for unknown type names."""
+        result = get_type_mapping("unknown_type")
+
+        assert result is None
+
+    def test_returns_mapping_for_builtin_int(self) -> None:
+        """Should map Python int to int64."""
+        result = get_type_mapping("int")
+
+        assert result is not None
+        assert result.markdown == "int64"
+
+    def test_returns_mapping_for_builtin_float(self) -> None:
+        """Should map Python float to float64."""
+        result = get_type_mapping("float")
+
+        assert result is not None
+        assert result.markdown == "float64"
+
+
+class TestResolveTypeNameNewTypeFallback:
+    """Tests for resolve_type_name with unregistered NewTypes."""
+
+    def test_unregistered_newtype_falls_back_to_source_type(self) -> None:
+        """Unregistered NewType resolves to source_type name."""
+        ti = TypeInfo(
+            base_type="Sources",
+            kind=TypeKind.MODEL,
+            newtype_name="Sources",
+            source_type=type("SourceItem", (), {}),
+        )
+        result = resolve_type_name(ti, "markdown")
+
+        assert result == "SourceItem"
+
+    def test_registered_newtype_unaffected(self) -> None:
+        """Registered NewType (int32) still resolves through the registry."""
+        ti = TypeInfo(
+            base_type="int32",
+            kind=TypeKind.PRIMITIVE,
+            newtype_name="int32",
+            source_type=int,
+        )
+        result = resolve_type_name(ti, "markdown")
+
+        assert result == "int32"
+
+
+class TestResolveTypeName:
+    """Tests for resolve_type_name with list/optional flags."""
+
+    def _make_type_info(self, **kwargs: object) -> TypeInfo:
+        defaults = {"base_type": "str", "kind": TypeKind.PRIMITIVE}
+        defaults.update(kwargs)
+        return TypeInfo(**defaults)  # type: ignore[arg-type]
+
+    def test_ignores_list_depth(self) -> None:
+        """resolve_type_name returns the base type regardless of list_depth."""
+        ti = self._make_type_info(list_depth=1)
+        assert resolve_type_name(ti, "markdown") == "string"
+
+    def test_ignores_is_optional(self) -> None:
+        """resolve_type_name returns the base type regardless of is_optional."""
+        ti = self._make_type_info(is_optional=True)
+        assert resolve_type_name(ti, "markdown") == "string"
diff --git a/packages/overture-schema-codegen/tests/test_union_extraction.py b/packages/overture-schema-codegen/tests/test_union_extraction.py
new file mode 100644
index 000000000..a8b685c48
--- /dev/null
+++ b/packages/overture-schema-codegen/tests/test_union_extraction.py
@@ -0,0 +1,91 @@
+"""Tests for union extraction."""
+
+import pytest
+from codegen_test_support import (
+    RailSegment,
+    RoadSegment,
+    SegmentBase,
+    TestSegment,
+    WaterSegment,
+)
+from overture.schema.codegen.extraction.specs import FieldSpec, UnionSpec
+from overture.schema.codegen.extraction.union_extraction import extract_union
+
+
+class TestExtractUnion:
+    """Tests for extract_union function."""
+
+    @pytest.fixture
+    def segment_spec(self) -> UnionSpec:
+        return extract_union("TestSegment", TestSegment)
+
+    def test_extracts_name_and_description(self, segment_spec: UnionSpec) -> None:
+        """UnionSpec captures the union name and docstring."""
+        assert segment_spec.name == "TestSegment"
+        assert segment_spec.description == "Test segment union"
+
+    def test_finds_common_base(self, segment_spec: UnionSpec) -> None:
+        """Identifies SegmentBase as the common base class."""
+        assert segment_spec.common_base is SegmentBase
+
+    def test_shared_fields_first(self, segment_spec: UnionSpec) -> None:
+        """Shared fields from common base come first with variant_sources=None."""
+        shared = [
+            af for af in segment_spec.annotated_fields if af.variant_sources is None
+        ]
+        shared_names = [af.field_spec.name for af in shared]
+        assert "geometry" in shared_names
+        assert "subtype" in shared_names
+        # Shared fields are at the start
+        first_variant_idx = next(
+            (
+                i
+                for i, af in enumerate(segment_spec.annotated_fields)
+                if af.variant_sources is not None
+            ),
+            len(segment_spec.annotated_fields),
+        )
+        for af in segment_spec.annotated_fields[:first_variant_idx]:
+            assert af.variant_sources is None
+
+    def test_variant_specific_fields_have_sources(
+        self, segment_spec: UnionSpec
+    ) -> None:
+        """Variant-only fields carry their source class names."""
+        speed = next(
+            af
+            for af in segment_spec.annotated_fields
+            if af.field_spec.name == "speed_limit"
+        )
+        assert speed.variant_sources == ("RoadSegment",)
+        gauge = next(
+            af
+            for af in segment_spec.annotated_fields
+            if af.field_spec.name == "rail_gauge"
+        )
+        assert gauge.variant_sources == ("RailSegment",)
+
+    def test_heterogeneous_same_name_produces_separate_rows(
+        self, segment_spec: UnionSpec
+    ) -> None:
+        """class_ in Road (str) vs Rail (int): separate rows, not merged."""
+        class_fields = [
+            af for af in segment_spec.annotated_fields if af.field_spec.name == "class"
+        ]
+        assert len(class_fields) == 2
+        sources = {af.variant_sources for af in class_fields}
+        assert ("RoadSegment",) in sources
+        assert ("RailSegment",) in sources
+
+    def test_members_lists_all_member_classes(self, segment_spec: UnionSpec) -> None:
+        """UnionSpec.members contains all union member classes."""
+        assert set(segment_spec.members) == {RoadSegment, RailSegment, WaterSegment}
+
+    def test_source_annotation_preserved(self, segment_spec: UnionSpec) -> None:
+        """source_annotation holds the original Annotated[Union[...]]."""
+        assert segment_spec.source_annotation is TestSegment
+
+    def test_fields_property_returns_plain_list(self, segment_spec: UnionSpec) -> None:
+        """spec.fields returns list[FieldSpec] without provenance."""
+        for f in segment_spec.fields:
+            assert isinstance(f, FieldSpec)
diff --git a/packages/overture-schema-core/src/overture/schema/core/discovery.py b/packages/overture-schema-core/src/overture/schema/core/discovery.py
index 15da3abc4..b9290d29a 100644
--- a/packages/overture-schema-core/src/overture/schema/core/discovery.py
+++ b/packages/overture-schema-core/src/overture/schema/core/discovery.py
@@ -21,15 +21,15 @@ class ModelKey:
         The theme name (e.g., "buildings", "places"), or None for non-themed models
     type : str
         The feature type (e.g., "building", "place")
-    class_name : str
-        The fully qualified class name from the entry point value
+    entry_point : str
+        The entry point value in "module:Class" format
 
     """
 
     namespace: str
     theme: str | None
     type: str
-    class_name: str
+    entry_point: str
 
 
 def discover_models(
@@ -87,7 +87,7 @@ def discover_models(
                     namespace=ns,
                     theme=theme,
                     type=feature_type,
-                    class_name=entry_point.value,
+                    entry_point=entry_point.value,
                 )
                 models[key] = model_class
             except Exception as e:
diff --git a/packages/overture-schema-core/src/overture/schema/core/models.py b/packages/overture-schema-core/src/overture/schema/core/models.py
index ed43eabe9..dbb77a281 100644
--- a/packages/overture-schema-core/src/overture/schema/core/models.py
+++ b/packages/overture-schema-core/src/overture/schema/core/models.py
@@ -65,9 +65,9 @@ def __validate_ext_fields__(self) -> Self:
             maybe_plural = "s" if len(invalid_extra_fields) > 1 else ""
             raise ValueError(
                 f"invalid extra field name{maybe_plural}: {', '.join(invalid_extra_fields)} "
-                "(extra fields are temporarily allowed, but only if their names start with 'ext_', "
-                "but all extra field name support in {self.__class__.__name__} is on a deprecation path"
-                "and will be removed)"
+                f"(extra fields are temporarily allowed, but only if their names start with 'ext_', "
+                f"but all extra field name support in {self.__class__.__name__} is on a deprecation path "
+                f"and will be removed)"
             )
         return self
 
diff --git a/packages/overture-schema-core/src/overture/schema/core/names.py b/packages/overture-schema-core/src/overture/schema/core/names.py
index e968b24a7..aed77d985 100644
--- a/packages/overture-schema-core/src/overture/schema/core/names.py
+++ b/packages/overture-schema-core/src/overture/schema/core/names.py
@@ -110,7 +110,7 @@
         Field(json_schema_extra={"additionalProperties": False}),
     ],
 )
-"""A mapping from language to the most commonly used or recognized name in that language."""
+CommonNames.__doc__ = """A mapping from language to the most commonly used or recognized name in that language."""
 
 
 class NameVariant(str, DocumentedEnum):
diff --git a/packages/overture-schema-core/src/overture/schema/core/scoping/opening_hours.py b/packages/overture-schema-core/src/overture/schema/core/scoping/opening_hours.py
index a8efc8c16..8f8acbe6a 100644
--- a/packages/overture-schema-core/src/overture/schema/core/scoping/opening_hours.py
+++ b/packages/overture-schema-core/src/overture/schema/core/scoping/opening_hours.py
@@ -15,7 +15,7 @@
         ),
     ],
 )
-"""
+OpeningHours.__doc__ = """
 Time span or time spans during which something is open or active, specified in the OpenStreetMap
 opening hours specification: https://wiki.openstreetmap.org/wiki/Key:opening_hours/specification.
 """
diff --git a/packages/overture-schema-divisions-theme/pyproject.toml b/packages/overture-schema-divisions-theme/pyproject.toml
index 409598de9..21fe72551 100644
--- a/packages/overture-schema-divisions-theme/pyproject.toml
+++ b/packages/overture-schema-divisions-theme/pyproject.toml
@@ -37,3 +37,138 @@ packages = ["src/overture"]
 "overture:divisions:division" = "overture.schema.divisions:Division"
 "overture:divisions:division_area" = "overture.schema.divisions:DivisionArea"
 "overture:divisions:division_boundary" = "overture.schema.divisions:DivisionBoundary"
+ 
+[[examples.Division]]
+id = "350e85f6-68ba-4114-9906-c2844815988b"
+geometry = "POINT (-175.2551522 -21.1353686)"
+country = "TO"
+version = 1
+subtype = "locality"
+class = "village"
+wikidata = "null"
+region = "TO-04"
+perspectives = "null"
+hierarchies = [
+  [
+    {division_id = "fef8748b-0c91-46ad-9f2d-976d8d2de3e9", subtype = "country", name = "Tonga"},
+    {division_id = "4d67561a-2292-41bd-8996-7853d276a42c", subtype = "region", name = "Tongatapu"},
+    {division_id = "8730f0cc-d436-4f11-a7d3-49085813ef44", subtype = "county", name = "Vahe Kolomotu'a"},
+    {division_id = "350e85f6-68ba-4114-9906-c2844815988b", subtype = "locality", name = "Sia'atoutai"},
+  ],
+]
+parent_division_id = "8730f0cc-d436-4f11-a7d3-49085813ef44"
+norms = "null"
+population = 534
+capital_division_ids = "null"
+capital_of_divisions = "null"
+theme = "divisions"
+type = "division"
+
+[examples.Division.bbox]
+xmin = -175.25515747070312
+xmax = -175.255126953125
+ymin = -21.1353702545166
+ymax = -21.13536834716797
+
+[[examples.Division.sources]]
+property = ""
+dataset = "OpenStreetMap"
+record_id = "n3173231082@4"
+update_time = "2014-12-18T09:17:03Z"
+confidence = "null"
+between = "null"
+
+[examples.Division.cartography]
+prominence = 29
+min_zoom = "null"
+max_zoom = "null"
+sort_key = "null"
+
+[examples.Division.names]
+primary = "Sia'atoutai"
+common = "null"
+
+[[examples.Division.names.rules]]
+variant = "alternate"
+language = "null"
+perspectives = "null"
+value = "Nafualu"
+between = "null"
+side = "null"
+
+[examples.Division.local_type]
+en = "village"
+
+[[examples.DivisionArea]]
+id = "eb9b112f-ec3c-47f7-b519-6f9f2e6fc2bd"
+geometry = "MULTIPOLYGON (((-174.9553949 -21.4730179, -174.9514163 -21.4719978, -174.9520108 -21.4681253, -174.9566122 -21.4687535, -174.9553949 -21.4730179)), ((-174.9634398 -21.3476807, -174.9753507 -21.3833656, -174.9702168 -21.4037277, -174.950488 -21.4269887, -174.9082983 -21.4577763, -174.9004303 -21.4398142, -174.9048159 -21.3698688, -174.9165467 -21.3035402, -174.9126977 -21.2903268, -174.9199765 -21.2834922, -174.9634398 -21.3476807)))"
+country = "TO"
+version = 2
+subtype = "region"
+class = "land"
+is_land = true
+is_territorial = false
+region = "TO-01"
+division_id = "21597af0-b564-463c-a356-42c29e712b7d"
+theme = "divisions"
+type = "division_area"
+
+[examples.DivisionArea.bbox]
+xmin = -174.97535705566406
+xmax = -174.90040588378906
+ymin = -21.473018646240234
+ymax = -21.283489227294922
+
+[[examples.DivisionArea.sources]]
+property = ""
+dataset = "OpenStreetMap"
+record_id = "r7247527@3"
+update_time = "2020-12-30T18:41:56Z"
+confidence = "null"
+between = "null"
+
+[examples.DivisionArea.names]
+primary = "ʻEua"
+common = "null"
+rules = "null"
+
+[[examples.DivisionBoundary]]
+id = "2bdf68e4-860d-3d8c-a472-ccf439a5302a"
+geometry = "LINESTRING (-147.064823 -15.4231537, -147.0519131 -15.2885069, -147.048482 -15.1511701)"
+country = "PF"
+version = 1
+subtype = "county"
+class = "maritime"
+is_land = false
+is_territorial = true
+division_ids = [
+  "ae266459-63a4-4508-8295-0101e27d039b",
+  "d4a6873d-885a-4f2a-bc0f-37e9d9e874e4"
+]
+region = "null"
+is_disputed = false
+perspectives = "null"
+theme = "divisions"
+type = "division_boundary"
+
+[examples.DivisionBoundary.bbox]
+xmin = -147.06483459472656
+xmax = -147.04847717285156
+ymin = -15.4231538772583
+ymax = -15.151169776916504
+
+[[examples.DivisionBoundary.sources]]
+property = ""
+dataset = "OpenStreetMap"
+record_id = "r6063055@9"
+update_time = "2023-07-20T00:28:40Z"
+confidence = "null"
+between = "null"
+
+[[examples.DivisionBoundary.sources]]
+property = ""
+dataset = "OpenStreetMap"
+record_id = "r6063063@12"
+update_time = "2023-07-20T00:28:40Z"
+confidence = "null"
+between = "null"
diff --git a/packages/overture-schema-places-theme/pyproject.toml b/packages/overture-schema-places-theme/pyproject.toml
index afbf1aee4..1d851b356 100644
--- a/packages/overture-schema-places-theme/pyproject.toml
+++ b/packages/overture-schema-places-theme/pyproject.toml
@@ -36,3 +36,60 @@ packages = ["src/overture"]
 
 [project.entry-points."overture.models"]
 "overture:places:place" = "overture.schema.places:Place"
+ 
+[[examples.Place]]
+id = "99003ee6-e75b-4dd6-8a8a-53a5a716c50d"
+geometry = "POINT (-150.46875 -79.1713346)"
+version = 1
+confidence = 0.7337175792507205
+websites = [
+  "https://www.superhotel.co.jp/s_hotels/beppu/"
+]
+socials = [
+  "https://www.facebook.com/107663894904826"
+]
+emails = "null"
+phones = [
+  "+81977009000"
+]
+operating_status = "open"
+theme = "places"
+type = "place"
+
+[examples.Place.bbox]
+xmin = -150.46875
+xmax = -150.46875
+ymin = -79.17134094238281
+ymax = -79.17133331298828
+
+[[examples.Place.sources]]
+property = ""
+dataset = "meta"
+record_id = "107663894904826"
+update_time = "2025-06-30T07:00:00.000Z"
+confidence = 0.7337175792507205
+between = "null"
+
+[examples.Place.names]
+primary = "スーパーホテル別府駅前"
+common = "null"
+rules = "null"
+
+[examples.Place.categories]
+primary = "hotel"
+alternate = "null"
+
+[examples.Place.brand]
+wikidata = "null"
+
+[examples.Place.brand.names]
+primary = "SUPER HOTEL"
+common = "null"
+rules = "null"
+
+[[examples.Place.addresses]]
+freeform = "秋田県横手市駅前町１３−８"
+locality = "横手市"
+postcode = "013-0036"
+region = "null"
+country = "JP"
diff --git a/packages/overture-schema-system/src/overture/schema/system/feature.py b/packages/overture-schema-system/src/overture/schema/system/feature.py
index de3f14df6..fd067df54 100644
--- a/packages/overture-schema-system/src/overture/schema/system/feature.py
+++ b/packages/overture-schema-system/src/overture/schema/system/feature.py
@@ -30,6 +30,33 @@
 from overture.schema.system.ref import Id
 
 
+def resolve_discriminator_field_name(discriminator: object) -> str | None:
+    """Resolve a Pydantic discriminator value to its field name string.
+
+    Handles the three forms a discriminator can take:
+    - A plain string (used directly as the field name).
+    - A ``pydantic.Discriminator`` whose ``.discriminator`` attribute is a string.
+    - A ``pydantic.Discriminator`` whose ``.discriminator`` is a callable
+      produced by ``Feature.field_discriminator``, which stores the field name
+      as ``_field_name`` on the callable.
+
+    Returns None if *discriminator* is None or its field name cannot be
+    determined.
+    """
+    if discriminator is None:
+        return None
+    if isinstance(discriminator, str):
+        return discriminator
+    inner = getattr(discriminator, "discriminator", None)
+    if isinstance(inner, str):
+        return inner
+    if callable(inner):
+        field_name = getattr(inner, "_field_name", None)
+        if isinstance(field_name, str):
+            return field_name
+    return None
+
+
 class Feature(BaseModel):
     """
     A feature is something you can point to on a map—like a building, road, lake, or park—with the
@@ -206,7 +233,10 @@ def field_discriminator(
         Returns
         -------
         Discriminator
-            Discriminator that enables discriminated unions that include features
+            Discriminator that enables discriminated unions that include features.
+            The inner callable carries a ``_field_name`` attribute set to *field*,
+            allowing introspection code to recover the discriminator field name
+            without hardcoding it.
 
         Raises
         ------
@@ -296,6 +326,7 @@ def get_discriminator_value(data: object) -> Any:
                     else getattr(data, field, None)
                 )
 
+        get_discriminator_value._field_name = field  # type: ignore[attr-defined]
         return Discriminator(get_discriminator_value)
 
     @model_serializer(mode="wrap")
diff --git a/packages/overture-schema-system/src/overture/schema/system/field_constraint/collection.py b/packages/overture-schema-system/src/overture/schema/system/field_constraint/collection.py
index a39159217..f8a699d91 100644
--- a/packages/overture-schema-system/src/overture/schema/system/field_constraint/collection.py
+++ b/packages/overture-schema-system/src/overture/schema/system/field_constraint/collection.py
@@ -43,7 +43,7 @@ def _is_collection_type(source: type[Any]) -> bool:
 
 
 class UniqueItemsConstraint(CollectionConstraint):
-    """Ensures all items in a collection are unique."""
+    """All items must be unique."""
 
     def validate(self, value: list[Any] | None, info: ValidationInfo) -> None:
         # Skip validation for None values (used with optional fields)
diff --git a/packages/overture-schema-system/src/overture/schema/system/field_constraint/string.py b/packages/overture-schema-system/src/overture/schema/system/field_constraint/string.py
index 8c2d90415..68737f5db 100644
--- a/packages/overture-schema-system/src/overture/schema/system/field_constraint/string.py
+++ b/packages/overture-schema-system/src/overture/schema/system/field_constraint/string.py
@@ -324,7 +324,7 @@ def __get_pydantic_json_schema__(
 
 
 class PhoneNumberConstraint(StringConstraint):
-    """Constraint for international phone numbers."""
+    """Allows only international phone numbers."""
 
     def __init__(self) -> None:
         self.pattern = re.compile(r"^\+\d{1,3}[\s\-\(\)0-9]+$")
@@ -359,7 +359,7 @@ def __get_pydantic_json_schema__(
 
 
 class RegionCodeConstraint(StringConstraint):
-    """ISO 3166-2 principal subdivision code constraint."""
+    """Allows only ISO 3166-2 principal subdivision codes."""
 
     def __init__(self) -> None:
         self.pattern = re.compile(r"^[A-Z]{2}-[A-Z0-9]{1,3}$")
@@ -392,7 +392,7 @@ def __get_pydantic_json_schema__(
 
 
 class WikidataIdConstraint(StringConstraint):
-    """Constraint for Wikidata identifiers (Q followed by digits)."""
+    """Allows only Wikidata identifiers (Q followed by digits)."""
 
     def __init__(self) -> None:
         self.pattern = re.compile(r"^Q\d+$")
diff --git a/packages/overture-schema-system/src/overture/schema/system/primitive/__init__.py b/packages/overture-schema-system/src/overture/schema/system/primitive/__init__.py
index 96bfd0250..2b27ef89e 100644
--- a/packages/overture-schema-system/src/overture/schema/system/primitive/__init__.py
+++ b/packages/overture-schema-system/src/overture/schema/system/primitive/__init__.py
@@ -24,7 +24,7 @@
 )
 
 uint8 = NewType("uint8", Annotated[int, Field(ge=0, le=255)])  # type: ignore [type-arg]
-"""
+uint8.__doc__ = """
 Portable 8-bit unsigned integer.
 
 This is an `int` at runtime, but using `uint8` for Pydantic model fields instead of `int` makes them
@@ -32,7 +32,7 @@
 """
 
 uint16 = NewType("uint16", Annotated[int, Field(ge=0, le=65535)])  # type: ignore[type-arg]
-"""
+uint16.__doc__ = """
 Portable 16-bit unsigned integer.
 
 This is an `int` at runtime, but using `uint16` for Pydantic model fields instead of `int` makes
@@ -40,7 +40,7 @@
 """
 
 uint32 = NewType("uint32", Annotated[int, Field(ge=0, le=4294967295)])  # type: ignore[type-arg]
-"""
+uint32.__doc__ = """
 Portable 32-bit unsigned integer.
 
 This is an `int` at runtime, but using `uint32` for Pydantic model fields instead of `int` makes
@@ -48,7 +48,7 @@
 """
 
 int8 = NewType("int8", Annotated[int, Field(ge=-128, le=127)])  # type: ignore[type-arg]
-"""
+int8.__doc__ = """
 Portable 8-bit signed integer.
 
 This is an `int` at runtime, but using `int8` for Pydantic model fields instead of `int` makes them
@@ -56,7 +56,7 @@
 """
 
 int16 = NewType("int16", Annotated[int, Field(ge=-32768, le=32767)])  # type: ignore[type-arg]
-"""
+int16.__doc__ = """
 Portable 16-bit signed integer.
 
 This is an `int` at runtime, but using `int16` for Pydantic model fields instead of `int` makes them
@@ -64,7 +64,7 @@
 """
 
 int32 = NewType("int32", Annotated[int, Field(ge=-(2**31), le=2**31 - 1)])  # type: ignore[type-arg]
-"""
+int32.__doc__ = """
 Portable 32-bit signed integer.
 
 This is an `int` at runtime, but using `int32` for Pydantic model fields instead of `int` makes them
@@ -72,7 +72,7 @@
 """
 
 int64 = NewType("int64", Annotated[int, Field(ge=-(2**63), le=2**63 - 1)])  # type: ignore[type-arg]
-"""
+int64.__doc__ = """
 Portable 64-bit signed integer.
 
 This is an `int` at runtime, but using `int64` for Pydantic model fields instead of `int` makes them
@@ -80,7 +80,7 @@
 """
 
 float32 = NewType("float32", float)  # type: ignore[type-arg]
-"""
+float32.__doc__ = """
 Portable IEEE 32-bit floating point number.
 
 This is a `float` at runtime, but using `float32` for Pydantic model fields instead of `float` makes
@@ -88,7 +88,7 @@
 """
 
 float64 = NewType("float64", float)  # type: ignore[type-arg]
-"""
+float64.__doc__ = """
 Portable IEEE 64-bit floating point number.
 
 This is a `float` at runtime, but using `float64` for Pydantic model fields instead of `float` makes
diff --git a/packages/overture-schema-system/src/overture/schema/system/ref/id.py b/packages/overture-schema-system/src/overture/schema/system/ref/id.py
index eb467f142..2ffa5dad8 100644
--- a/packages/overture-schema-system/src/overture/schema/system/ref/id.py
+++ b/packages/overture-schema-system/src/overture/schema/system/ref/id.py
@@ -18,7 +18,7 @@
         ),
     ],
 )
-"""
+Id.__doc__ = """
 A unique identifier.
 """
 
diff --git a/packages/overture-schema-system/src/overture/schema/system/string.py b/packages/overture-schema-system/src/overture/schema/system/string.py
index cac9112ca..6533a192a 100644
--- a/packages/overture-schema-system/src/overture/schema/system/string.py
+++ b/packages/overture-schema-system/src/overture/schema/system/string.py
@@ -35,7 +35,7 @@
         Field(description="An ISO 3166-1 alpha-2 country code"),
     ],
 )  # type: ignore [type-arg]
-"""
+CountryCodeAlpha2.__doc__ = """
 An ISO-3166-1 alpha-2 country code.
 """
 
@@ -49,7 +49,7 @@
         ),
     ],
 )  # type: ignore [type-arg]
-"""
+HexColor.__doc__ = """
 A color represented as an #RRGGBB or #RGB hexadecimal string.
 
 For example:
@@ -67,7 +67,7 @@
         Field(description="A JSON Pointer (as described in RFC-6901)"),
     ],
 )  # type: ignore [type-arg]
-"""
+JsonPointer.__doc__ = """
 A JSON Pointer
 
 As described in `the JSON Pointer specification, RFC-6901`_.
@@ -91,7 +91,7 @@
         ),
     ],
 )  # type: ignore [type-arg]
-"""
+LanguageTag.__doc__ = """
 A BCP-47 language tag.
 
 As described in `Tags for Identifying Languages, BCP-47`_.
@@ -114,7 +114,7 @@
         Field(description="A string that contains no whitespace characters"),
     ],
 )  # type: ignore [type-arg]
-"""
+NoWhitespaceString.__doc__ = """
 A string that contains no whitespace characters.
 """
 
@@ -124,7 +124,7 @@
         str, PhoneNumberConstraint(), Field(description="An international phone number")
     ],
 )  # type: ignore [type-arg]
-"""
+PhoneNumber.__doc__ = """
 An international phone number.
 """
 
@@ -136,12 +136,12 @@
         Field(description="An ISO 3166-2 principal subdivision code"),
     ],
 )  # type: ignore [type-arg]
-"""
+RegionCode.__doc__ = """
 An ISO 3166-2 principal subdivision code.
 """
 
 SnakeCaseString = NewType("SnakeCaseString", Annotated[str, SnakeCaseConstraint()])
-"""
+SnakeCaseString.__doc__ = """
 A string that looks like a snake case identifier, like a Python variable name (*e.g.*, `foo_bar`).
 """
 
@@ -155,7 +155,7 @@
         ),
     ],
 )  # type: ignore [type-arg]
-"""
+StrippedString.__doc__ = """
 A string without leading or trailing whitespace.
 """
 
@@ -167,7 +167,7 @@
         Field(description="A wikidata ID, as found on https://www.wikidata.org/"),
     ],
 )  # type: ignore [type-arg]
-"""
+WikidataId.__doc__ = """
 A wikidata ID, as found on https://www.wikidata.org/.
 
 - `"Q42"`
diff --git a/packages/overture-schema-system/tests/test_feature.py b/packages/overture-schema-system/tests/test_feature.py
index ec0dfc795..834cd7881 100644
--- a/packages/overture-schema-system/tests/test_feature.py
+++ b/packages/overture-schema-system/tests/test_feature.py
@@ -332,6 +332,18 @@ class BarModel(BaseModel):
         ):
             tap.validate_json(json.dumps(data))
 
+    def test_field_discriminator_attaches_field_name(self) -> None:
+        """The callable returned by field_discriminator carries _field_name for introspection."""
+
+        class A(Feature):
+            kind: Literal["a"]
+
+        class B(Feature):
+            kind: Literal["b"]
+
+        disc = Feature.field_discriminator("kind", A, B)
+        assert disc.discriminator._field_name == "kind"  # type: ignore[union-attr]
+
     def test_error_field_not_str(self) -> None:
         with pytest.raises(
             TypeError, match="`field` must be a `str`, but 42 has type `int`"
diff --git a/packages/overture-schema-system/tests/util.py b/packages/overture-schema-system/tests/util.py
index 0ce2e4932..f9725f5ba 100644
--- a/packages/overture-schema-system/tests/util.py
+++ b/packages/overture-schema-system/tests/util.py
@@ -10,10 +10,10 @@ def subset_conflicts(
 
     Parameters
     ----------
-    a : dict[str, object]
+    a : Mapping[str, object]
         Candidate subset of `b`
-    b : dict[str, object]
-        Candidate supserset of `a`
+    b : Mapping[str, object]
+        Candidate superset of `a`
 
     Returns
     -------
diff --git a/packages/overture-schema-transportation-theme/pyproject.toml b/packages/overture-schema-transportation-theme/pyproject.toml
index 702a8aea8..3302845b2 100644
--- a/packages/overture-schema-transportation-theme/pyproject.toml
+++ b/packages/overture-schema-transportation-theme/pyproject.toml
@@ -37,3 +37,86 @@ packages = ["src/overture"]
 [project.entry-points."overture.models"]
 "overture:transportation:connector" = "overture.schema.transportation:Connector"
 "overture:transportation:segment" = "overture.schema.transportation:Segment"
+ 
+[[examples.Connector]]
+id = "39542bee-230f-4b91-b7e5-a9b58e0c59b1"
+geometry = "POINT (-176.5472979 -43.9679472)"
+version = 1
+theme = "transportation"
+type = "connector"
+
+[examples.Connector.bbox]
+xmin = -176.54730224609375
+xmax = -176.54727172851562
+ymin = -43.96794891357422
+ymax = -43.96794128417969
+
+[[examples.Connector.sources]]
+property = ""
+dataset = "OpenStreetMap"
+record_id = "null"
+update_time = "null"
+confidence = "null"
+between = "null"
+
+[[examples.Segment]]
+id = "1bc62f3b-08b5-42b8-89fe-36f685f60455"
+geometry = "LINESTRING (-176.5636191 -43.954404, -176.5643637 -43.9538145, -176.5647264 -43.9535274, -176.5649947 -43.953251)"
+version = 1
+subtype = "road"
+class = "residential"
+routes = "null"
+subclass_rules = "null"
+access_restrictions = "null"
+level_rules = "null"
+destinations = "null"
+prohibited_transitions = "null"
+road_flags = "null"
+speed_limits = "null"
+width_rules = "null"
+subclass = "null"
+rail_flags = "null"
+theme = "transportation"
+type = "segment"
+
+[examples.Segment.bbox]
+xmin = -176.5650177001953
+xmax = -176.56361389160156
+ymin = -43.954410552978516
+ymax = -43.953250885009766
+
+[[examples.Segment.sources]]
+property = ""
+dataset = "OpenStreetMap"
+record_id = "w53435546@6"
+update_time = "2021-05-03T06:37:03Z"
+confidence = "null"
+between = "null"
+
+[examples.Segment.names]
+primary = "Meteorological Lane"
+common = "null"
+
+[[examples.Segment.names.rules]]
+variant = "common"
+language = "null"
+perspectives = "null"
+value = "Meteorological Lane"
+between = "null"
+side = "null"
+
+[[examples.Segment.connectors]]
+connector_id = "15b2c131-9137-4add-88c6-2acd3fa61355"
+at = 0.0
+
+[[examples.Segment.connectors]]
+connector_id = "23ae2702-ef77-4d2e-b39d-77360b696d20"
+at = 0.523536154
+
+[[examples.Segment.connectors]]
+connector_id = "8e944ce1-4b81-49eb-a823-7d98779c855c"
+at = 1.0
+
+[[examples.Segment.road_surface]]
+value = "gravel"
+between = "null"
diff --git a/pyproject.toml b/pyproject.toml
index c51f94221..1679e49a3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -56,12 +56,14 @@ dev = [
 ]
 
 [tool.pytest.ini_options]
+verbosity_subtests = 0
 pythonpath = [
     "packages/overture-schema-addresses-theme/tests",
     "packages/overture-schema-annex/tests",
     "packages/overture-schema-base-theme/tests",
     "packages/overture-schema-buildings-theme/tests",
     "packages/overture-schema-cli/tests",
+    "packages/overture-schema-codegen/tests",
     "packages/overture-schema-core/tests",
     "packages/overture-schema-divisions-theme/tests",
     "packages/overture-schema-places-theme/tests",
diff --git a/uv.lock b/uv.lock
index d35d4080e..08122cf2a 100644
--- a/uv.lock
+++ b/uv.lock
@@ -14,6 +14,7 @@ members = [
     "overture-schema-base-theme",
     "overture-schema-buildings-theme",
     "overture-schema-cli",
+    "overture-schema-codegen",
     "overture-schema-core",
     "overture-schema-divisions-theme",
     "overture-schema-places-theme",
@@ -785,6 +786,26 @@ dev = [
     { name = "ruff" },
 ]
 
+[[package]]
+name = "overture-schema-codegen"
+source = { editable = "packages/overture-schema-codegen" }
+dependencies = [
+    { name = "click" },
+    { name = "jinja2" },
+    { name = "overture-schema-core" },
+    { name = "overture-schema-system" },
+    { name = "tomli", marker = "python_full_version < '3.11'" },
+]
+
+[package.metadata]
+requires-dist = [
+    { name = "click", specifier = ">=8.0" },
+    { name = "jinja2", specifier = ">=3.0" },
+    { name = "overture-schema-core", editable = "packages/overture-schema-core" },
+    { name = "overture-schema-system", editable = "packages/overture-schema-system" },
+    { name = "tomli", marker = "python_full_version < '3.11'", specifier = ">=2.0" },
+]
+
 [[package]]
 name = "overture-schema-core"
 source = { editable = "packages/overture-schema-core" }