diff --git a/Makefile b/Makefile index 6788c8850..0ae1d38e6 100644 --- a/Makefile +++ b/Makefile @@ -16,7 +16,7 @@ test-all: uv-sync @uv run pytest -W error packages/ test: uv-sync - @uv run pytest -W error packages/ -x + @uv run pytest -W error packages/ -x -q --tb=short coverage: uv-sync @uv run pytest packages/ --cov overture.schema --cov-report=term --cov-report=html && open htmlcov/index.html diff --git a/packages/overture-schema-addresses-theme/pyproject.toml b/packages/overture-schema-addresses-theme/pyproject.toml index 485f21a8b..ad840f40d 100644 --- a/packages/overture-schema-addresses-theme/pyproject.toml +++ b/packages/overture-schema-addresses-theme/pyproject.toml @@ -38,3 +38,36 @@ testpaths = ["tests"] [project.entry-points."overture.models"] "overture:addresses:address" = "overture.schema.addresses:Address" + +[[examples.Address]] +id = "416ab01c-d836-4c4f-aedc-2f30941ce94d" +geometry = "POINT (-176.5637854 -43.9471955)" +country = "NZ" +postcode = "null" +street = "Tikitiki Hill Road" +number = "54" +unit = "null" +postal_city = "null" +version = 1 +theme = "addresses" +type = "address" + +[examples.Address.bbox] +xmin = -176.56381225585938 +xmax = -176.56378173828125 +ymin = -43.94719696044922 +ymax = -43.94718933105469 + +[[examples.Address.address_levels]] +value = "Chatham Islands" + +[[examples.Address.address_levels]] +value = "Chatham Island" + +[[examples.Address.sources]] +property = "" +dataset = "OpenAddresses/LINZ" +record_id = "null" +update_time = "null" +confidence = "null" +between = "null" diff --git a/packages/overture-schema-base-theme/pyproject.toml b/packages/overture-schema-base-theme/pyproject.toml index 09a46f613..dedd9e0db 100644 --- a/packages/overture-schema-base-theme/pyproject.toml +++ b/packages/overture-schema-base-theme/pyproject.toml @@ -41,3 +41,210 @@ packages = ["src/overture"] "overture:base:land_cover" = "overture.schema.base:LandCover" "overture:base:land_use" = "overture.schema.base:LandUse" "overture:base:water" = "overture.schema.base:Water" + +[[examples.Bathymetry]] +id = "5d40bd6c-db14-5492-b29f-5e25a59032bc" +geometry = "MULTIPOLYGON (((-170.71296928 -76.744313428, -170.719841483 -76.757076376, -170.731061124 -76.761566192, -170.775652756 -76.76338726, -170.853616381 -76.76253958, -170.918562293 -76.755380155, -170.970490492 -76.741908984, -170.998699301 -76.729180777, -171.003188718 -76.717195533, -170.990421551 -76.703765214, -170.960397802 -76.68888982, -170.940748072 -76.674697941, -170.931472364 -76.661189576, -170.927114414 -76.637296658, -170.927674224 -76.603019188, -170.939335393 -76.574637428, -170.962097922 -76.552151379, -170.999015387 -76.535715361, -171.050087788 -76.525329373, -171.079133298 -76.50751024, -171.086151917 -76.482257963, -171.098653755 -76.462747286, -171.11663881 -76.448978211, -171.146691397 -76.437601179, -171.188811514 -76.428616191, -171.296181785 -76.4228609, -171.468802209 -76.420335306, -171.566055241 -76.41501101, -171.587940879 -76.406888013, -171.59004284 -76.387987744, -171.572361122 -76.358310204, -171.549343725 -76.334488281, -171.520990649 -76.316521976, -171.453759127 -76.301763636, -171.347649159 -76.290213262, -171.30597166 -76.267707269, -171.328726628 -76.234245658, -171.36676019 -76.195627518, -171.420072345 -76.151852851, -171.444766298 -76.12494912, -171.44084205 -76.114916326, -171.378107286 -76.099627787, -171.256562007 -76.079083503, -171.228218647 -76.058825682, -171.293077208 -76.038854322, -171.421365419 -76.023534207, -171.613083278 -76.012865337, -171.76411833 -75.99938969, -171.874470572 -75.983107266, -172.121928361 -75.958403596, -172.506491695 -75.925278679, -172.744527804 -75.899736153, -172.836036689 -75.88177602, -172.904681746 -75.862406785, -172.950462974 -75.841628448, -173.000855857 -75.830396498, -173.055860393 -75.828710933, -173.177561398 -75.810743709, -173.365958872 -75.776494827, -173.493573084 -75.759370386, -173.560404033 -75.759370386, -173.620925776 -75.77158365, -173.675138312 -75.796010178, -173.733786206 -75.808642966, -173.796869456 -75.809482015, -173.847216433 -75.805553449, -173.884827135 -75.79685727, -173.90475244 -75.789177124, -173.906992347 -75.782513013, -173.881736947 -75.76894365, -173.828986239 -75.748469035, -173.797974615 -75.732298475, -173.788702075 -75.72043197, -173.82491541 -75.701013882, -173.90661462 -75.674044211, -173.977087913 -75.656066882, -174.03633529 -75.647081894, -174.150190099 -75.643010485, -174.31865234 -75.643852655, -174.444433211 -75.652836726, -174.527532713 -75.669962696, -174.581709229 -75.687086831, -174.606962758 -75.704209131, -174.631095834 -75.708279163, -174.654108458 -75.699296928, -174.688637451 -75.699296928, -174.734682816 -75.708279163, -174.797846917 -75.708699866, -174.878129754 -75.700559037, -174.939903816 -75.70870181, -174.9831691 -75.733128185, -175.025841122 -75.746602837, -175.06791988 -75.749125768, -175.09922327 -75.755318987, -175.119751293 -75.765182495, -175.127900229 -75.775197415, -175.123670077 -75.785363749, -175.111718372 -75.791289392, -175.092045112 -75.792974345, -175.049907399 -75.780622976, -174.985305232 -75.754235285, -174.935355308 -75.74552996, -174.900057628 -75.754507001, -174.886060973 -75.766815613, -174.893365345 -75.782455795, -174.907537393 -75.791536245, -174.928577117 -75.794056963, -174.971105378 -75.818213107, -175.035122174 -75.864004677, -175.060941949 -75.892403254, -175.048564703 -75.903408839, -175.020469049 -75.909193043, -174.976654988 -75.909755867, -174.944760829 -75.90482541, -174.924786572 -75.894401673, -174.92111336 -75.881479168, -174.933741192 -75.866057897, -174.900484967 -75.857513625, -174.821344686 -75.855846351, -174.752433709 -75.839289534, -174.693752038 -75.807843172, -174.652894268 -75.780747792, -174.629860399 -75.758003392, -174.571227588 -75.745793709, -174.476995837 -75.744118743, -174.398722205 -75.751841803, -174.336406693 -75.768962888, -174.300477946 -75.783262828, -174.290935964 -75.794741623, -174.28812912 -75.812412878, -174.292057414 -75.836276591, -174.289237223 -75.852155302, -174.279668547 -75.860049012, -174.205113931 -75.879998026, -174.065573375 -75.912002343, -173.957779122 -75.924071248, -173.881731171 -75.916204739, -173.846521251 -75.926706189, -173.852149361 -75.955575598, -173.845408416 -75.979439305, -173.826298414 -75.99829731, -173.76424232 -76.018956172, -173.659240133 -76.041415889, -173.560434089 -76.057698465, -173.467824188 -76.067803901, -173.404678836 -76.077625909, -173.370998032 -76.087164489, -173.332530272 -76.106814524, -173.289275555 -76.136576014, -173.231864101 -76.154545405, -173.160295911 -76.1607227, -173.093917454 -76.17278471, -173.032728732 -76.190731436, -173.009710709 -76.205560908, -173.024863387 -76.217273124, -173.048718935 -76.225374126, -173.081277354 -76.229863912, -173.219658797 -76.237442552, -173.463863265 -76.248110046, -173.60352174 -76.25793895, -173.638634223 -76.266929265, -173.658723482 -76.274676093, -173.663789516 -76.281179435, -173.661403366 -76.289363255, -173.651565032 -76.299227554, -173.627282775 -76.313843189, -173.588556596 -76.33321016, -173.575369172 -76.355231445, -173.587720504 -76.379907046, -173.573965869 -76.402499893, -173.53410527 -76.423009985, -173.518376226 -76.437156259, -173.526778738 -76.444938715, -173.559015515 -76.446303683, -173.615086557 -76.441251162, -173.686785609 -76.421600788, -173.774112673 -76.387352563, -173.854573513 -76.372333877, -173.928168128 -76.37654473, -173.968906731 -76.383732772, -173.97678932 -76.393898005, -173.979325549 -76.410884215, -173.976515417 -76.434691403, -174.000646474 -76.454452818, -174.051718722 -76.470168462, -174.08231827 -76.482963711, -174.092445119 -76.492838563, -174.075053216 -76.514344245, -174.030142562 -76.547480757, -174.016669929 -76.575274601, -174.034635317 -76.597725777, -174.037021169 -76.62030279, -174.023827484 -76.64300564, -174.034634583 -76.661942018, -174.069442464 -76.677111923, -174.086843964 -76.690616859, -174.086839082 -76.702456825, -174.080513222 -76.712456309, -174.067866385 -76.72061531, -174.036259441 -76.725116584, -173.98569239 -76.725960131, -173.93723318 -76.720486558, -173.89088181 -76.708695864, -173.780274695 -76.695221211, -173.605411835 -76.6800626, -173.487930602 -76.662096294, -173.427830996 -76.641322294, -173.370307559 -76.630935294, -173.315360292 -76.630935294, -173.249406002 -76.637251344, -173.17244469 -76.649883444, -173.110795196 -76.653532162, -173.06445752 -76.648197497, -173.029349452 -76.637355272, -173.005470993 -76.621005486, -173.01753216 -76.605236858, -173.065532955 -76.590049388, -173.096548505 -76.576599032, -173.11057881 -76.564885791, -173.108053605 -76.552301955, -173.08897289 -76.538847523, -173.051362225 -76.527628807, -172.99522161 -76.518645807, -172.891534181 -76.516119525, -172.740299938 -76.52004996, -172.648684331 -76.524540794, -172.61668736 -76.529592027, -172.584268588 -76.541098757, -172.551428016 -76.559060982, -172.533042741 -76.576141146, -172.529112765 -76.592339249, -172.540195073 -76.604524646, -172.566289666 -76.612697339, -172.576243291 -76.621303431, -172.570055947 -76.630342924, -172.555183534 -76.636123529, -172.531626051 -76.638645245, -172.517040304 -76.643518276, -172.511426292 -76.650742621, -172.551848294 -76.672312544, -172.63830631 -76.708228042, -172.701431121 -76.728711408, -172.741222726 -76.733762641, -172.81460886 -76.72534004, -172.921589524 -76.703443605, -173.006960733 -76.697273314, -173.070722487 -76.706829166, -173.101615682 -76.719791531, -173.099640316 -76.736160408, -173.033958817 -76.759064999, -172.904571183 -76.788505304, -172.847033841 -76.810916113, -172.861346791 -76.826297424, -172.924787296 -76.856444925, -173.037355356 -76.901358615, -173.149640378 -76.935043659, -173.26164236 -76.957500057, -173.354942309 -76.968728255, -173.429540223 -76.968728255, -173.487771718 -76.964657535, -173.529636796 -76.956516094, -173.572768938 -76.955559014, -173.617168145 -76.961786296, -173.614655836 -76.97446809, -173.565232013 -76.993604396, -173.461502424 -77.006682128, -173.303467069 -77.013701287, -173.163373388 -77.02787859, -173.041221382 -77.049214037, -172.918094542 -77.059179951, -172.793992869 -77.057776334, -172.720418717 -77.044861043, -172.697372088 -77.020434079, -172.675885915 -77.003730799, -172.655960197 -76.994751205, -172.60882792 -76.987594764, -172.534489083 -76.982261476, -172.480072837 -76.983094424, -172.445579184 -76.990093609, -172.428332542 -76.998610734, -172.428332911 -77.008645799, -172.435068344 -77.018150822, -172.448538839 -77.027125803, -172.490777829 -77.039613708, -172.561785312 -77.055614535, -172.628175119 -77.080598263, -172.68994725 -77.114564892, -172.751818039 -77.133793765, -172.813787485 -77.138284883, -172.900229764 -77.131828165, -173.011144875 -77.114423613, -173.119679588 -77.128474884, -173.2258339 -77.17398198, -173.273849553 -77.202664633, -173.263726547 -77.214522842, -173.165895559 -77.239681117, -172.980356589 -77.278139457, -172.880291531 -77.312658914, -172.865700386 -77.343239487, -172.867667457 -77.371126102, -172.886192744 -77.39631876, -172.999732531 -77.429966955, -173.208286817 -77.472070689, -173.335454668 -77.509278677, -173.381236082 -77.541590921, -173.403703936 -77.570407724, -173.40285823 -77.595729086, -173.378288408 -77.634921, -173.329994472 -77.687983467, -173.241287742 -77.735563094, -173.112168219 -77.777659882, -173.054064387 -77.81089869, -173.066976248 -77.835279519, -173.063736051 -77.854657976, -173.044343797 -77.869034061, -172.890349983 -77.896435115, -172.60175461 -77.936861139, -172.376181212 -77.961986812, -172.213629791 -77.971812135, -172.023427102 -77.967320559, -171.805573145 -77.948512083, -171.581263004 -77.918894833, -171.350496677 -77.87846881, -171.217147208 -77.851799157, -171.181214596 -77.838885875, -171.160572341 -77.826074082, -171.155220441 -77.813363779, -171.178789134 -77.790158543, -171.231278422 -77.756458375, -171.27338337 -77.70988804, -171.305103978 -77.65044754, -171.293875473 -77.602346602, -171.239697854 -77.565585227, -171.168401509 -77.532887375, -171.079986438 -77.504253044, -171.028614514 -77.483042244, -171.014285737 -77.469254974, -171.016677114 -77.456576914, -171.035788644 -77.445008064, -171.086879845 -77.431646501, -171.169950715 -77.416492226, -171.216537864 -77.403175691, -171.226641293 -77.391696895, -171.228607057 -77.378968685, -171.222435157 -77.364991059, -171.168824693 -77.334840949, -171.067775664 -77.288518355, -171.000402018 -77.24121644, -170.966703754 -77.192935206, -170.894838531 -77.157002595, -170.784806349 -77.133418606, -170.725150821 -77.11627156, -170.715871945 -77.105561456, -170.710674146 -77.077210652, -170.709557424 -77.031219147, -170.697909144 -76.992502178, -170.675729304 -76.961059744, -170.654536164 -76.940848729, -170.634329723 -76.931869135, -170.581564681 -76.922044903, -170.496241038 -76.911376032, -170.429709562 -76.893409727, -170.381970254 -76.868145986, -170.285260999 -76.838950739, -170.139581798 -76.805823986, -170.061542334 -76.78431495, -170.051142608 -76.77442363, -170.076677284 -76.763148845, -170.138146365 -76.750490597, -170.192753568 -76.731526593, -170.240498896 -76.706256833, -170.315896371 -76.686462585, -170.418945993 -76.67214385, -170.498267121 -76.665405567, -170.553859754 -76.666247738, -170.609039198 -76.673409769, -170.663805452 -76.68689166, -170.695686968 -76.698414281, -170.704683743 -76.70797763, -170.710444514 -76.723277346, -170.71296928 -76.744313428), (-172.46185717 -77.485683162, -172.491725041 -77.49003391, -172.535448064 -77.490594163, -172.566986057 -77.488349711, -172.586339021 -77.483300552, -172.598540475 -77.476173053, -172.60359042 -77.466967216, -172.601627836 -77.458872071, -172.592652724 -77.451887618, -172.556765055 -77.448396429, -172.49396483 -77.448398503, -172.453726685 -77.452881992, -172.436050621 -77.461846897, -172.429868964 -77.468114837, -172.435181715 -77.47168581, -172.44584445 -77.477541919, -172.46185717 -77.485683162), (-172.812798475 -76.363628771, -172.855573928 -76.365453015, -172.885037626 -76.36040045, -172.90720433 -76.351027386, -172.92207404 -76.337333821, -172.9168827 -76.324750727, -172.89163031 -76.313278104, -172.862193885 -76.307261221, -172.828573425 -76.30670008, -172.792121028 -76.311189877, -172.752836694 -76.320730613, -172.732062811 -76.331770033, -172.729799379 -76.344308139, -172.756711267 -76.354927718, -172.812798475 -76.363628771), (-171.932998671 -76.183124002, -172.010021088 -76.180457336, -172.070931389 -76.166984091, -172.113033554 -76.150312062, -172.136327583 -76.130441248, -172.133522137 -76.111120124, -172.104617217 -76.092348689, -172.06028165 -76.080296327, -172.000515436 -76.074963039, -171.918725408 -76.076928027, -171.814911566 -76.086191292, -171.745182124 -76.097695899, -171.709537083 -76.111441849, -171.696346087 -76.126554541, -171.705609136 -76.143033974, -171.731004713 -76.156183802, -171.77253282 -76.166004024, -171.83986414 -76.174984091, -171.932998671 -76.183124002), (-173.16885937 -76.066345013, -173.199147981 -76.070696107, -173.23950163 -76.071257052, -173.269213382 -76.065813298, -173.288283234 -76.054364845, -173.2799961 -76.038973879, -173.244351978 -76.0196404, -173.207608446 -76.007588038, -173.169765504 -76.002816794, -173.139490241 -76.003094691, -173.116782658 -76.008421729, -173.104589039 -76.016938854, -173.102909386 -76.028646065, -173.111183172 -76.03940804, -173.129410398 -76.049224779, -173.148635798 -76.05820377, -173.16885937 -76.066345013)))" +version = 0 +depth = 500 +theme = "base" +type = "bathymetry" + +[examples.Bathymetry.bbox] +xmin = -175.12791442871094 +xmax = -170.05111694335938 +ymin = -77.9718246459961 +ymax = -75.64299774169922 + +[[examples.Bathymetry.sources]] +property = "" +dataset = "ETOPO/GLOBathy" +record_id = "2024-12-09T00:00:00.000Z" +update_time = "null" +confidence = "null" +between = "null" + +[examples.Bathymetry.cartography] +prominence = "null" +min_zoom = "null" +max_zoom = "null" +sort_key = 12 + +[[examples.Infrastructure]] +id = "e9e3d506-89c0-3473-8cee-5e5ac6596d6c" +geometry = "POINT (-179.9999994 -82.42408)" +version = 0 +level = "null" +subtype = "pedestrian" +class = "information" +height = "null" +surface = "null" +wikidata = "Q800558" +theme = "base" +type = "infrastructure" + +[examples.Infrastructure.bbox] +xmin = -180.0 +xmax = -179.99998474121094 +ymin = -82.42408752441406 +ymax = -82.42407989501953 + +[[examples.Infrastructure.sources]] +property = "" +dataset = "OpenStreetMap" +record_id = "n7674174803@2" +update_time = "2023-04-07T17:37:48.000Z" +confidence = "null" +between = "null" + +[examples.Infrastructure.names] +primary = "1306 km to South Pole" +common = "null" +rules = "null" + +[examples.Infrastructure.source_tags] +description = "1036 km to South Pole." +information = "route_marker" +note = "The road continue in west side of the map" +start_date = "2007" +tourism = "information" +wikipedia = "en:South Pole Traverse" + +[[examples.Land]] +id = "70fc3596-a987-3fea-820c-c016c0a2f0da" +geometry = "POINT (-178.7 -85.45)" +version = 0 +level = "null" +subtype = "physical" +class = "cliff" +surface = "null" +wikidata = "Q5282342" +elevation = "null" +theme = "base" +type = "land" + +[examples.Land.bbox] +xmin = -178.7000274658203 +xmax = -178.6999969482422 +ymin = -85.45001220703125 +ymax = -85.44999694824219 + +[[examples.Land.sources]] +property = "" +dataset = "OpenStreetMap" +record_id = "n11693475112@1" +update_time = "2024-03-05T09:23:39.000Z" +confidence = "null" +between = "null" + +[examples.Land.names] +primary = "Dismal Buttress" +common = "null" +rules = "null" + +[examples.Land.source_tags] +natural = "cliff" +"ref:linz:place_id" = "12318" +wikipedia = "en:Dismal Buttress" + +[[examples.LandCover]] +id = "c347312d-012b-5e73-8bd3-a10d04b2981d" +geometry = "POLYGON ((-179.99877531181616 65.95172539425603, -179.99740705536922 65.95265577758867, -179.99751722434937 65.9532545912543, -179.9974078443441 65.9541507615366, -179.9965398649702 65.95451215813897, -179.99644396804533 65.95493010632842, -179.99602533095998 65.95502533095993, -179.99468737767813 65.95677071067811, -179.9933586639601 65.9576086639598, -179.99313729490444 65.95812767174695, -179.99314866836227 65.95857649030111, -179.99370507548738 65.95907781410224, -179.99372539425596 65.95947468818369, -179.99395850935272 65.95967260714353, -179.99410866395988 65.96030800303998, -179.99534017576838 65.96101799736452, -179.99575621846904 65.96104928900519, -179.9959057775888 65.96150961146397, -179.9965950523775 65.96161426988128, -179.99663895270027 65.96216619349144, -179.99807649030126 65.96218466463768, -179.99819891654494 65.96189707483568, -179.99799883949768 65.96160842248709, -179.99825961146388 65.96142755541139, -179.99830761159433 65.9610635173197, -179.99936104612706 65.9609995273612, -179.9993797906372 65.96051410937864, -179.99964133604004 65.96039133604008, -179.9997 65.96016912258357, -179.99936104729989 65.95958380650865, -179.99900447103303 65.95954329910117, -179.9987608894112 65.95924038853603, -179.99806463264497 65.95902716440592, -179.99798856507215 65.95838313921075, -179.99834294463088 65.95801088941111, -179.9983374593203 65.9575910941953, -179.99855761159426 65.95723018431977, -179.99921013502978 65.95698784186104, -179.99931463264488 65.95663950159415, -179.99990450886096 65.95637680202988, -179.99997427859432 65.9560635173197, -180.00019127274402 65.9558913550169, -180.00019127274405 65.95544197881631, -180.0000389948438 65.9553039610106, -179.9996246090062 65.9553159274193, -179.99935793918766 65.95327531026125, -179.9988434361254 65.95288259953995, -179.99885243016726 65.95244253241113, -179.9991661934914 65.95227771429981, -179.9991960389287 65.95187767174694, -179.99877531181616 65.95172539425603))" +version = 0 +subtype = "barren" +theme = "base" +type = "land_cover" + +[examples.LandCover.bbox] +xmin = -180.0001983642578 +xmax = -179.99313354492188 +ymin = 65.95172119140625 +ymax = 65.96218872070312 + +[[examples.LandCover.sources]] +property = "" +dataset = "ESA WorldCover" +record_id = "null" +update_time = "2024-11-07T00:00:00.000Z" +confidence = "null" +between = "null" + +[examples.LandCover.cartography] +prominence = "null" +min_zoom = 8 +max_zoom = 15 +sort_key = 3 + +[[examples.LandUse]] +id = "1e1f6095-5bd2-3fdb-a422-41351b848e9d" +geometry = "POLYGON ((-176.5623454 -43.9567812, -176.5627644 -43.9561272, -176.5626898 -43.9557432, -176.5624297 -43.9553592, -176.562679 -43.9551603, -176.5629058 -43.9552064, -176.5631441 -43.9551769, -176.5632428 -43.9550676, -176.5633066 -43.9548702, -176.5634402 -43.9548071, -176.5639052 -43.9546682, -176.5642479 -43.9544118, -176.5647302 -43.9542142, -176.5651547 -43.954277, -176.5658293 -43.9545243, -176.5659454 -43.9543521, -176.566934 -43.9547987, -176.5669179 -43.955018, -176.5682465 -43.9553205, -176.5671004 -43.9579593, -176.5662034 -43.9600044, -176.5655366 -43.9597247, -176.5646109 -43.9595326, -176.564467 -43.9592563, -176.5639885 -43.9589226, -176.5637013 -43.9586925, -176.563223 -43.9586237, -176.5623454 -43.9567812))" +version = 0 +level = "null" +subtype = "golf" +class = "golf_course" +surface = "null" +wikidata = "null" +elevation = "null" +theme = "base" +type = "land_use" + +[examples.LandUse.bbox] +xmin = -176.56825256347656 +xmax = -176.56231689453125 +ymin = -43.96001052856445 +ymax = -43.95420837402344 + +[[examples.LandUse.sources]] +property = "" +dataset = "OpenStreetMap" +record_id = "w56117029@3" +update_time = "2010-04-24T22:35:13.000Z" +confidence = "null" +between = "null" + +[examples.LandUse.names] +primary = "Chatham Islands Golf Club" +common = "null" +rules = "null" + +[examples.LandUse.source_tags] +"LINZ:source_version" = "V16" +attribution = "http://wiki.osm.org/wiki/Attribution#LINZ" +leisure = "golf_course" +source_ref = "http://www.linz.govt.nz/topography/topo-maps/" + +[[examples.Water]] +id = "6bbb5fe5-bf26-3efa-b120-0a7079b60840" +geometry = "POINT (-177.031799 -84.934793)" +version = 0 +level = "null" +subtype = "physical" +class = "cape" +wikidata = "Q33140589" +is_salt = "null" +is_intermittent = "null" +theme = "base" +type = "water" + +[examples.Water.bbox] +xmin = -177.03179931640625 +xmax = -177.0317840576172 +ymin = -84.93480682373047 +ymax = -84.9347915649414 + +[[examples.Water.sources]] +property = "" +dataset = "OpenStreetMap" +record_id = "n11109190647@2" +update_time = "2024-02-11T05:52:05.000Z" +confidence = "null" +between = "null" + +[examples.Water.names] +primary = "Thanksgiving Point" +common = "null" +rules = "null" + +[examples.Water.source_tags] +natural = "cape" +"ref:linz:place_id" = "13433" diff --git a/packages/overture-schema-buildings-theme/pyproject.toml b/packages/overture-schema-buildings-theme/pyproject.toml index c0ca9297c..8d169f4f4 100644 --- a/packages/overture-schema-buildings-theme/pyproject.toml +++ b/packages/overture-schema-buildings-theme/pyproject.toml @@ -37,3 +37,81 @@ packages = ["src/overture"] [project.entry-points."overture.models"] "overture:buildings:building" = "overture.schema.buildings:Building" "overture:buildings:building_part" = "overture.schema.buildings:BuildingPart" + +[[examples.Building]] +id = "148f35b1-7bc1-4180-9280-10d39b13883b" +geometry = "POLYGON ((-176.6435004 -43.9938042, -176.6435738 -43.9937107, -176.6437726 -43.9937913, -176.6436992 -43.9938849, -176.6435004 -43.9938042))" +version = 1 +level = "null" +subtype = "null" +class = "null" +height = "null" +names = "null" +has_parts = false +is_underground = false +num_floors = "null" +num_floors_underground = "null" +min_height = "null" +min_floor = "null" +facade_color = "null" +facade_material = "null" +roof_material = "null" +roof_shape = "null" +roof_direction = "null" +roof_orientation = "null" +roof_color = "null" +roof_height = "null" +theme = "buildings" +type = "building" + +[examples.Building.bbox] +xmin = -176.643798828125 +xmax = -176.64349365234375 +ymin = -43.9938850402832 +ymax = -43.993709564208984 + +[[examples.Building.sources]] +property = "" +dataset = "OpenStreetMap" +record_id = "w519166507@1" +update_time = "2017-08-27T21:39:50.000Z" +confidence = "null" +between = "null" + +[[examples.BuildingPart]] +id = "19412d64-51ac-3d6a-ac2f-8a8c8b91bb60" +geometry = "POLYGON ((-73.2462509 -39.8108937, -73.2462755 -39.8109047, -73.246291 -39.8109182, -73.2463022 -39.8109382, -73.2463039 -39.810959, -73.2462962 -39.81098, -73.2462796 -39.8109977, -73.2462674 -39.8110052, -73.2462281 -39.8110153, -73.2461998 -39.811013, -73.2461743 -39.8110034, -73.2461566 -39.8109898, -73.246144 -39.8109702, -73.2461418 -39.8109427, -73.2461511 -39.8109221, -73.2461669 -39.8109066, -73.2461908 -39.8108947, -73.2462184 -39.8108898, -73.2462509 -39.8108937))" +version = 0 +level = 3 +height = "null" +names = "null" +is_underground = false +num_floors = "null" +num_floors_underground = "null" +min_height = "null" +min_floor = "null" +facade_color = "null" +facade_material = "null" +roof_material = "null" +roof_shape = "null" +roof_direction = "null" +roof_orientation = "null" +roof_color = "null" +roof_height = "null" +building_id = "bd663bd4-1844-4d7d-a400-114de051cf49" +theme = "buildings" +type = "building_part" + +[examples.BuildingPart.bbox] +xmin = -73.24630737304688 +xmax = -73.24613952636719 +ymin = -39.81101608276367 +ymax = -39.81088638305664 + +[[examples.BuildingPart.sources]] +property = "" +dataset = "OpenStreetMap" +record_id = "w223076787@2" +update_time = "2014-10-31T22:55:36.000Z" +confidence = "null" +between = "null" diff --git a/packages/overture-schema-cli/src/overture/schema/cli/commands.py b/packages/overture-schema-cli/src/overture/schema/cli/commands.py index 8fdd8bdf4..a75b16cd4 100644 --- a/packages/overture-schema-cli/src/overture/schema/cli/commands.py +++ b/packages/overture-schema-cli/src/overture/schema/cli/commands.py @@ -798,7 +798,7 @@ def dump_namespace( sorted_types = sorted(theme_types[theme], key=lambda x: x[0].type) for key, model_class in sorted_types: stdout.print( - f" [bright_black]→[/bright_black] [bold cyan]{key.type}[/bold cyan] [dim magenta]({key.class_name})[/dim magenta]" + f" [bright_black]→[/bright_black] [bold cyan]{key.type}[/bold cyan] [dim magenta]({key.entry_point})[/dim magenta]" ) docstring = get_model_docstring(model_class) if docstring: diff --git a/packages/overture-schema-cli/src/overture/schema/cli/type_analysis.py b/packages/overture-schema-cli/src/overture/schema/cli/type_analysis.py index 9316c80c0..4362d7f76 100644 --- a/packages/overture-schema-cli/src/overture/schema/cli/type_analysis.py +++ b/packages/overture-schema-cli/src/overture/schema/cli/type_analysis.py @@ -8,6 +8,8 @@ from pydantic import BaseModel from pydantic.fields import FieldInfo +from overture.schema.system.feature import resolve_discriminator_field_name + from .types import ErrorLocation, ValidationErrorDict # Type aliases for structural tuple elements @@ -29,11 +31,23 @@ class UnionMetadata: nested_unions: dict[str, "UnionMetadata"] +def _extract_literal_value(model: type[BaseModel], field_name: str) -> str | None: + """Extract the single Literal value from a model field as a string, if present.""" + field_info = model.model_fields.get(field_name) + if field_info is None or field_info.annotation is None: + return None + if get_origin(field_info.annotation) is Literal: + args = get_args(field_info.annotation) + return str(args[0]) if args else None + return None + + def _process_union_member( member: Any, # noqa: ANN401 discriminator_to_model: dict[str, type[BaseModel]], model_name_to_model: dict[str, type[BaseModel]], nested_unions: dict[str, UnionMetadata], + discriminator_field: str | None = None, ) -> None: """Process a single union member, handling nesting recursively. @@ -43,6 +57,7 @@ def _process_union_member( discriminator_to_model: Dict to populate with discriminator value mappings model_name_to_model: Dict to populate with model name mappings nested_unions: Dict to populate with nested union metadata + discriminator_field: The discriminator field name from the parent union annotation """ member_origin = get_origin(member) @@ -63,12 +78,24 @@ def _process_union_member( nested_metadata = introspect_union(member) nested_unions[str(member)] = nested_metadata discriminator_to_model.update(nested_metadata.discriminator_to_model) + # The nested union's discriminator_to_model uses the nested discriminator + # field (e.g. "subtype"). Re-extract using the parent discriminator field + # (e.g. "type") so leaf models are also reachable by the parent's values. + if discriminator_field is not None: + for model in nested_metadata.model_name_to_model.values(): + value = _extract_literal_value(model, discriminator_field) + if value is not None: + discriminator_to_model[value] = model return # Unwrap Annotated to get the actual type (e.g., Annotated[Building, Tag('building')]) # and process it recursively _process_union_member( - member_args[0], discriminator_to_model, model_name_to_model, nested_unions + member_args[0], + discriminator_to_model, + model_name_to_model, + nested_unions, + discriminator_field, ) return @@ -76,17 +103,10 @@ def _process_union_member( if inspect.isclass(member) and issubclass(member, BaseModel): model_name_to_model[member.__name__] = member - # Extract discriminator values from known discriminator fields only - # Restrict to known discriminator names to avoid false positives from other Literal fields - discriminator_fields = ("type", "theme", "subtype") - for field_name, field_info in member.model_fields.items(): - if field_name not in discriminator_fields: - continue - annotation = field_info.annotation - if get_origin(annotation) is Literal: - literal_args = get_args(annotation) - if literal_args: - discriminator_to_model[literal_args[0]] = member + if discriminator_field is not None: + value = _extract_literal_value(member, discriminator_field) + if value is not None: + discriminator_to_model[value] = member def introspect_union(union_type: Any) -> UnionMetadata: # noqa: ANN401 @@ -163,9 +183,9 @@ def introspect_union(union_type: Any) -> UnionMetadata: # noqa: ANN401 if isinstance(metadata, FieldInfo) and hasattr( metadata, "discriminator" ): - disc = metadata.discriminator - # discriminator can be a string or Discriminator object - discriminator_field = str(disc) if disc is not None else None + discriminator_field = resolve_discriminator_field_name( + metadata.discriminator + ) break # Get union members @@ -183,7 +203,11 @@ def introspect_union(union_type: Any) -> UnionMetadata: # noqa: ANN401 # Process each union member for member in union_members: _process_union_member( - member, discriminator_to_model, model_name_to_model, nested_unions + member, + discriminator_to_model, + model_name_to_model, + nested_unions, + discriminator_field, ) return UnionMetadata( diff --git a/packages/overture-schema-cli/tests/test_type_analysis.py b/packages/overture-schema-cli/tests/test_type_analysis.py index 12fb10e75..0a21e168a 100644 --- a/packages/overture-schema-cli/tests/test_type_analysis.py +++ b/packages/overture-schema-cli/tests/test_type_analysis.py @@ -29,14 +29,9 @@ class ModelB(BaseModel): UnionType = Annotated[ModelA | ModelB, Field(discriminator="type")] - # Test simple discriminated union error path loc = ("a", "required_a") metadata = introspect_union(UnionType) structural = create_structural_tuple(loc, metadata) - print(f"\nloc: {loc}") - print(f"structural: {structural}") - assert len(structural) == len(loc) - # First element should be discriminator, second should be field assert structural == ("discriminator", "field") def test_mixed_union_structural_tuple(self) -> None: @@ -56,17 +51,11 @@ class Sources(BaseModel): # Test discriminated side loc1 = ("tagged-union[ModelA]", "a", "required_a") structural1 = create_structural_tuple(loc1, metadata) - print("\nDiscriminated side:") - print(f"loc: {loc1}") - print(f"structural: {structural1}") assert structural1 == ("union", "discriminator", "field") # Test non-discriminated side loc2 = ("Sources", "datasets") structural2 = create_structural_tuple(loc2, metadata) - print("\nNon-discriminated side:") - print(f"loc: {loc2}") - print(f"structural: {structural2}") assert structural2 == ("model", "field") def test_list_context_structural_tuple(self) -> None: @@ -78,13 +67,9 @@ class ModelA(BaseModel): UnionType = Annotated[ModelA, Field(discriminator="type")] - # Test list context loc = (1, "a", "required_a") metadata = introspect_union(list[UnionType]) structural = create_structural_tuple(loc, metadata) - print("\nList context:") - print(f"loc: {loc}") - print(f"structural: {structural}") assert structural == ("list_index", "discriminator", "field") def test_nested_discriminated_structural_tuple(self) -> None: @@ -114,13 +99,9 @@ class Sources(BaseModel): FeatureUnion = Annotated[Building | SegmentUnion, Field(discriminator="type")] MixedUnion = FeatureUnion | Sources - # Test nested discriminator path (type=segment, subtype=road) loc = ("tagged-union[SegmentUnion]", "segment", "road", "road_class") metadata = introspect_union(MixedUnion) structural = create_structural_tuple(loc, metadata) - print("\nNested discriminated:") - print(f"loc: {loc}") - print(f"structural: {structural}") assert structural == ("union", "discriminator", "discriminator", "field") @@ -253,34 +234,71 @@ class ModelA(BaseModel): assert metadata.discriminator_field == "type" assert "a" in metadata.discriminator_to_model - @pytest.mark.parametrize( - "literal_value,expected_in_mapping", - [ - pytest.param("building", True, id="literal_building"), - pytest.param("place", True, id="literal_place"), - pytest.param("nonexistent", False, id="not_present"), - ], - ) - def test_introspect_extracts_all_literals( - self, literal_value: str, expected_in_mapping: bool - ) -> None: - """Test that introspect_union extracts all Literal field values.""" + +class TestDiscriminatorDiscovery: + """Tests for runtime discriminator field discovery (not hardcoded).""" + + def test_nonstandard_discriminator_field_name(self) -> None: + """Discriminator field not named type/theme/subtype is discovered at runtime.""" + + class Cat(BaseModel): + kind: Literal["cat"] + indoor: bool + + class Dog(BaseModel): + kind: Literal["dog"] + breed: str + + UnionType = Annotated[Cat | Dog, Field(discriminator="kind")] + metadata = introspect_union(UnionType) + + assert metadata.is_discriminated is True + assert metadata.discriminator_field == "kind" + assert metadata.discriminator_to_model["cat"] == Cat + assert metadata.discriminator_to_model["dog"] == Dog + + def test_non_discriminator_literal_fields_excluded(self) -> None: + """Literal fields that aren't the discriminator are not in the mapping.""" class Building(BaseModel): type: Literal["building"] - subtype: Literal["residential"] + status: Literal["active"] class Place(BaseModel): type: Literal["place"] - category: Literal["restaurant"] + status: Literal["active"] UnionType = Annotated[Building | Place, Field(discriminator="type")] metadata = introspect_union(UnionType) - if expected_in_mapping: - assert literal_value in metadata.discriminator_to_model - else: - assert literal_value not in metadata.discriminator_to_model + assert "building" in metadata.discriminator_to_model + assert "place" in metadata.discriminator_to_model + assert "active" not in metadata.discriminator_to_model + + def test_callable_discriminator_extracts_field_name(self) -> None: + """Callable discriminators (Feature.field_discriminator) are supported.""" + from pydantic import Discriminator + + class ModelA(BaseModel): + kind: Literal["a"] + + class ModelB(BaseModel): + kind: Literal["b"] + + def get_kind(data: object) -> str | None: + return data.get("kind") if isinstance(data, dict) else None + + get_kind._field_name = "kind" # type: ignore[attr-defined] + + UnionType = Annotated[ + ModelA | ModelB, Field(discriminator=Discriminator(get_kind)) + ] + metadata = introspect_union(UnionType) + + assert metadata.is_discriminated is True + assert metadata.discriminator_field == "kind" + assert metadata.discriminator_to_model["a"] == ModelA + assert metadata.discriminator_to_model["b"] == ModelB class TestStructuralTupleCaching: diff --git a/packages/overture-schema-codegen/README.md b/packages/overture-schema-codegen/README.md new file mode 100644 index 000000000..f09467f77 --- /dev/null +++ b/packages/overture-schema-codegen/README.md @@ -0,0 +1,118 @@ +# Overture Schema Codegen + +Generates documentation from Overture Maps Pydantic schema definitions. + +Pydantic's `model_json_schema()` flattens the schema's domain vocabulary into JSON +Schema primitives. NewType names disappear, constraint provenance is lost (which NewType +contributed which bound), custom constraint classes lose their identity (a +`GeometryTypeConstraint` becomes an anonymous `enum` array), and discriminated union +structure collapses into `anyOf` arrays with duplicated fields. + +Navigating Python's type annotation machinery -- NewType chains, nested `Annotated` +wrappers, union filtering, generic resolution -- is complex. The codegen does it once. +`analyze_type()` unwraps annotations into `TypeInfo`, a flat target-independent +representation. Extractors build specs from `TypeInfo`. Renderers consume specs without +touching the type system. New output targets (Arrow schemas, PySpark expressions) add +renderers, not extraction logic. + +## Usage + +```bash +# Generate markdown documentation for all themes +overture-codegen generate --format markdown --output-dir docs/schema/reference + +# Generate for a single theme +overture-codegen generate --format markdown --theme buildings --output-dir out/ + +# List discovered models +overture-codegen list +``` + +The generator discovers models via `overture.models` entry points (provided by theme +packages like `overture-schema-buildings-theme`), extracts type information, and renders +output pages with cross-page links, constraint descriptions, and validated examples. + +## Architecture + +Four layers with strict downward imports -- no layer references the one above it: + +```text +Rendering Output formatting, all presentation decisions + ^ +Output Layout What to generate, where it goes, how outputs link + ^ +Extraction TypeInfo, FieldSpec, ModelSpec, UnionSpec + ^ +Discovery discover_models() from overture-schema-core +``` + +**Discovery** loads registered Pydantic models via entry points. The return dict +includes both concrete `BaseModel` subclasses (like `Building`) and discriminated union +type aliases (like `Segment`). Both satisfy the `FeatureSpec` protocol and flow through +the same pipeline. + +**Extraction** unwraps type annotations into specs. `analyze_type()` is the central +function -- a single iterative loop that peels NewType, Annotated, Union, and container +wrappers, accumulating constraints tagged with the NewType that contributed them. +Domain-specific extractors (`model_extraction`, `union_extraction`, `enum_extraction`, +`newtype_extraction`, `primitive_extraction`) call `analyze_type()` for field types and +produce spec dataclasses. + +**Output Layout** determines what artifacts to generate and where they go. Supplementary +type collection walks expanded feature trees to find referenced enums, NewTypes, and +sub-models. Path assignment maps every type to an output file path mirroring the Python +module structure. Link computation and reverse references enable cross-page navigation. + +**Rendering** consumes specs and owns all presentation decisions. Markdown output uses +Jinja2 templates for feature pages (with field tables, constraint sections, and +examples), enum pages, NewType pages, and aggregate primitive/geometry reference pages. + +`markdown/pipeline.py` orchestrates the full pipeline without I/O, returning +`list[RenderedPage]`. The CLI writes files to disk with Docusaurus frontmatter. + +## Programmatic use + +```python +from overture.schema.codegen.extraction.type_analyzer import analyze_type, TypeKind + +info = analyze_type(some_annotation) +assert info.kind == TypeKind.PRIMITIVE +assert info.base_type == "int32" +assert info.newtype_name == "FeatureVersion" +# Constraints carry provenance: +for cs in info.constraints: + print(f"{cs.constraint} from {cs.source}") +``` + +## Fetching sample data + +Theme packages include example records in their `pyproject.toml` files under +`[[examples.]]` sections. The codegen validates these against Pydantic +models and renders them in feature pages. + +To fetch a fresh sample from the latest Overture release using DuckDB: + +```bash +duckdb -json \ + -c "load spatial" \ + -c "attach 'http://labs.overturemaps.org/data/latest.ddb' as overture" \ + -c "select to_json(columns(*)) + from ( + select * REPLACE ST_AsText(geometry) as geometry + from overture.place + USING SAMPLE 1 + )" \ + | jq . +``` + +The `latest.ddb` database always points to the current release. Tables use +the type name directly (`overture.place`, `overture.segment`, +`overture.building`, etc.). Convert the JSON output to TOML for inclusion in +the theme's `pyproject.toml`. + +## Further reading + +- [Design document](docs/design.md) -- architecture, extension points, data flow + diagrams +- [Walkthrough](docs/walkthrough.md) -- module-by-module narrative tracing Segment + through the full pipeline diff --git a/packages/overture-schema-codegen/docs/design.md b/packages/overture-schema-codegen/docs/design.md new file mode 100644 index 000000000..f541fb359 --- /dev/null +++ b/packages/overture-schema-codegen/docs/design.md @@ -0,0 +1,262 @@ +# Code Generator Design + +Code generator that produces documentation and code from Overture Maps Pydantic schema +definitions. + +## Problem + +Overture Maps schema definitions live in Pydantic models across theme packages. Each +model carries type annotations, field constraints, docstrings, and relationships +(inheritance, composition, discriminated unions). Generating documentation or code from +these models requires introspecting all of that structure and rendering it into output +formats. + +Pydantic's internal representation is JSON-schema-oriented and discards the vocabulary +the code generator needs to preserve. `model_json_schema()` flattens `FeatureVersion` (a +NewType wrapping `int32` wrapping `Annotated[int, Field(ge=0, le=2^31-1)]`) to `{"type": +"integer", "minimum": 0}` -- the NewType names `FeatureVersion` and `int32` are gone, +custom constraint classes (`GeometryTypeConstraint`, `UniqueItemsConstraint`) are gone, +Python class references are gone, and constraint provenance (which NewType contributed +which bound) is gone. `FieldInfo.annotation` gives the raw annotation, but Pydantic does +not unwrap NewType chains or track multi-depth constraint provenance. + +The schema's domain language -- custom primitives (`int32`, `float64`), semantic +NewTypes (`FeatureVersion`, `Sources`), and custom constraint classes -- needs to +survive extraction intact. A single field annotation like `NewType("Foo", +Annotated[list[SomeModel] | None, Field(ge=0)])` encodes optionality, collection type, +element type, constraints, and semantic naming in nested Python typing constructs. Type +definitions regularly nest `Annotated` inside `NewType` inside `Annotated` -- +`FeatureVersion = NewType("FeatureVersion", int32)` where `int32 = NewType("int32", +Annotated[int, Field(ge=...)])` -- and constraints at each depth need to be tagged with +the NewType that contributed them. + +The code generator solves this by extracting type information once into a flat, +navigable representation (`TypeInfo`), then passing that to renderers that produce +output without touching Python's type system. + +## Inputs and Outputs + +**Inputs**: Pydantic `BaseModel` subclasses discovered via `overture.models` entry +points, plus example data from theme `pyproject.toml` files. Examples serve two +purposes: rendered examples in documentation pages, and a starting point for generating +tests that verify behavior of generated code. + +**Current Outputs**: Markdown documentation pages with field tables, cross-page links, +constraint descriptions, and examples. + +**Planned outputs**: Arrow schemas, PySpark expressions. + +## Architecture + +Four layers with strict downward imports -- no layer references the one above it: + +```text +Rendering Output formatting, all presentation decisions + ^ +Output Layout What to generate, where it goes, how outputs link + ^ +Extraction TypeInfo, FieldSpec, ModelSpec, EnumSpec, ... + ^ +Discovery discover_models() from overture-schema-core +``` + +`markdown/pipeline.py` orchestrates the pipeline without I/O: it expands feature trees, +collects supplementary types, builds placement registries, computes reverse references, +and calls renderers -- returning `RenderedPage` objects. The CLI (`cli.py`) is a thin +Click wrapper that calls `generate_markdown_pages()` and writes files to disk. + +```mermaid +graph TD + subgraph Discovery + DM["discover_models()"] + end + + DM -->|"dict[ModelKey, type]"| EX + + subgraph Extraction + EX["extraction/type_analyzer / extractors"] + EX -->|"ModelSpec, UnionSpec"| TREE["expand_model_tree()"] + end + + TREE -->|"FeatureSpec[]"| OL + + subgraph "Output Layout" + OL["layout/type_collection"] + OL -->|"SupplementarySpec{}"| PA["markdown/path_assignment"] + PA -->|"dict[str, Path]"| LC["markdown/link_computation"] + RR["markdown/reverse_references"] + end + + subgraph Rendering + R["markdown/renderer"] + TR["extraction/type_registry"] -.->|"type name resolution"| R + end + + subgraph Orchestration + MP["markdown/pipeline"] + end + + OL --> MP + LC --> MP + RR --> MP + MP --> R + R -->|"RenderedPage[]"| MP + MP -->|"list[RenderedPage]"| CLI["cli.py → disk"] +``` + +## Extraction + +### `analyze_type` -- iterative type unwrapping + +`analyze_type(annotation)` is a single iterative function that peels type annotation +layers in a fixed order, accumulating information into an `_UnwrapState`: + +1. **NewType**: Records the outermost name (user-facing semantic identity, e.g. + `FeatureVersion`) and updates the "current" name (used for constraint provenance and + as `base_type` at terminal) +2. **Annotated**: Collects constraints from metadata, each tagged with whichever NewType + was most recently entered. Extracts `Field.description` when present +3. **Union**: Filters out `None` (marks optional), `Sentinel`, and `Literal` sentinel + arms. If multiple concrete `BaseModel` arms remain, classifies as `UNION`; otherwise + continues with the single remaining arm +4. **list / dict**: Increments `list_depth` for each `list[...]` layer, sets dict flags, + continues into element types +5. **Terminal**: Classifies as `PRIMITIVE`, `LITERAL`, `ENUM`, `MODEL`, or `UNION` + +The result is `TypeInfo` -- a flat dataclass that fully describes the unwrapped type: +classification (`TypeKind`), optional/dict flags, `list_depth` (count of `list[...]` +layers), `newtype_outer_list_depth` (list layers outside the outermost NewType boundary), +accumulated constraints with provenance, NewType names, source type, literal values, and +(for UNION kind) the tuple of concrete `BaseModel` member types. Dict types carry +recursively analyzed `TypeInfo` for their key and value types. + +Multi-depth `Annotated` layers (common in practice, since NewTypes wrap `Annotated` +types that wrap further NewTypes) are handled naturally by the loop -- each iteration +processes the next wrapper. Constraints from each `Annotated` layer are tagged with the +NewType active at that depth. + +### Extractors by domain + +Extraction is split by entity kind: + +- `extraction/model_extraction.py`: Pydantic model -> `ModelSpec` (fields in MRO-aware + documentation order, alias-resolved names, model-level constraints) +- `extraction/enum_extraction.py`: Enum class -> `EnumSpec` +- `extraction/newtype_extraction.py`: NewType -> `NewTypeSpec` +- `extraction/union_extraction.py`: Discriminated union alias -> `UnionSpec` +- `extraction/primitive_extraction.py`: Numeric primitives -> `PrimitiveSpec` + +Each calls `analyze_type()` for field types. Tree expansion (`expand_model_tree()`) +walks MODEL-kind fields to populate nested model references, with a shared cache and +cycle detection (`starts_cycle=True`). + +### Unions and the FeatureSpec protocol + +Discriminated unions (e.g. `Segment = Annotated[Union[RoadSegment, ...], +Discriminator(...)]`) are type aliases, not classes. `UnionSpec` captures the union +structure: member types, discriminator field and value mapping, and a merged field list. +Fields shared across all variants appear once; fields present in some variants are +wrapped in `AnnotatedField` with `variant_sources` indicating which members contribute +them. The common base class is identified so shared fields can be deduplicated. + +`FeatureSpec` is a `Protocol` satisfied by both `ModelSpec` and `UnionSpec`. Code that +operates on "any top-level feature" -- tree expansion, supplementary type collection, +rendering dispatch -- uses `FeatureSpec` rather than a concrete type, so union and model +features flow through the same pipeline. + +### Constraints + +Field-level constraints come from `Annotated` metadata -- `Ge`, `Le`, `Interval`, custom +constraint classes. Each is tagged with the NewType that contributed it via +`ConstraintSource`. + +Model-level constraints come from decorators (`@require_any_of`, `@require_if`, +`@forbid_if`) and are extracted via `ModelConstraint.get_model_constraints()`. + +## Output Layout + +Determines the full set of artifacts to generate, where each lives on disk, and how they +reference each other. + +### Supplementary type collection + +`collect_all_supplementary_types()` walks the expanded field trees of all feature specs, +extracting enums, semantic NewTypes, and sub-models that need their own output. Returns +`dict[str, SupplementarySpec]`. + +### Module-mirrored output paths + +Output paths derive from the source Python module path relative to a computed schema +root (`compute_schema_root()` finds the longest common prefix of all entry point module +paths). `compute_output_dir()` maps a Python module to an output directory. Feature +models land in their module-derived directory. Supplementary types land at their own +module-derived path, with a `types/` segment inserted when they fall under a feature +directory. + +### Link computation + +`LinkContext` carries the current output's path and the full type-to-path registry. When +a renderer formats a type reference, it looks up the target in the registry and computes +a relative path. Links exist only for types with registry entries, avoiding broken +references to ungenerated outputs. + +### Reverse references + +`compute_reverse_references()` walks feature specs to build `dict[type_name, +list[UsedByEntry]]` for "Used By" sections. + +## Rendering + +Renderers consume specs and own all presentation decisions -- formatting, casing, link +syntax. Extraction and the type registry carry no presentation logic. + +### Type registry + +`extraction/type_registry.py` maps type names to per-target string representations via +`TypeMapping`. `format_type_string()` wraps the resolved name with list/optional +qualifiers. `is_semantic_newtype()` distinguishes NewTypes that deserve their own +identity (like `FeatureVersion` wrapping `int32`) from pass-through aliases to +registered primitives. + +### Markdown renderer + +Jinja2 templates for feature, enum, NewType, primitives, and geometry pages. +`render_feature()` expands MODEL-kind fields inline with dot-notation (e.g., +`sources[].dataset`), stopping at cycle boundaries. `format_type()` in +`markdown/type_format.py` converts `TypeInfo` into link-aware display strings using +`LinkContext`. + +### Constraint prose + +`extraction/field_constraints.py` and `extraction/model_constraints.py` convert +constraint objects into human-readable descriptions. Field constraints produce inline +text. Model constraints produce section-level descriptions and per-field notes, with +consolidation for related conditional constraints (`require_if` / `forbid_if` grouped by +trigger). + +### Example loader + +Loads example data from theme `pyproject.toml` files, validates against Pydantic models, +and flattens to dot-notation rows for display in feature pages. Also provides a starting +point for generated test data. + +`collect_dict_paths` walks the `FieldSpec` tree to identify dict-typed fields (like +`tags: dict[str, str]`), returning their dot-paths as a `frozenset`. `flatten_example` +checks this set before recursing into dicts -- paths in the set are kept as leaf values +rather than being split into dot-notation rows. The pipeline computes `dict_paths` from +`spec.fields` and threads it through `load_examples`. + +## Extension Points + +**Adding a new output target** (Arrow schemas next, PySpark expressions after): Add a +column to `TypeMapping` in `extraction/type_registry.py` for type-name resolution. Write +a new renderer module that consumes specs and the type registry. The extraction layer and +output layout are target-independent. + +**Adding a new type kind**: Add a variant to `TypeKind` in `extraction/type_analyzer.py`. +Handle it in the terminal classification of `analyze_type()`. Add an extraction function +and spec dataclass if needed. Update renderers to handle the new kind. + +**Adding a new constraint type**: The iterative unwrapper collects it automatically (any +`Annotated` metadata becomes a `ConstraintSource`). Add a case to +`describe_field_constraint()` for the prose representation. diff --git a/packages/overture-schema-codegen/docs/walkthrough.md b/packages/overture-schema-codegen/docs/walkthrough.md new file mode 100644 index 000000000..b51e3f0a4 --- /dev/null +++ b/packages/overture-schema-codegen/docs/walkthrough.md @@ -0,0 +1,757 @@ +# Walkthrough: overture-schema-codegen + +Pydantic's serialization machinery destroys the vocabulary that documentation needs. The +codegen recovers it. + +Consider the transportation theme's `Segment` type -- a discriminated union of +`RoadSegment`, `RailSegment`, and `WaterSegment`. All three share fields inherited from +`TransportationSegment`. Each adds variant-specific fields. The discriminator field +`subtype` carries a `Literal` value (`"road"`, `"rail"`, `"water"`) that selects the +arm. Call `model_json_schema()` and the union collapses into an `anyOf` array with +duplicated field definitions, the discriminator mapping disappears, and the common-base +relationship between variants is unrecoverable. + +The same loss happens at the field level. `FeatureVersion = NewType("FeatureVersion", +int32)` where `int32 = NewType("int32", Annotated[int, Field(ge=0, le=2147483647)])` +becomes `{"type": "integer", "minimum": 0, "maximum": 2147483647}`. Three things +vanished: the name "FeatureVersion," the name "int32," and the fact that `ge=0` came +from the `int32` layer rather than `FeatureVersion`. Custom constraint classes like +`GeometryTypeConstraint` lose their identity -- the class name, its docstring, and its +relationship to a specific NewType dissolve into anonymous JSON Schema keywords. + +Documentation needs all of this. The codegen exists to preserve it. + +Navigating Python's type annotation machinery -- NewType chains, nested `Annotated` +wrappers, union filtering, generic resolution -- is complex. The codegen does it once. +`analyze_type()` unwraps annotations into `TypeInfo`, a flat target-independent +representation. Extractors build specs from `TypeInfo`. Renderers consume specs without +re-entering the type system. New output targets add renderers, not extraction logic. + +The solution decomposes into four layers. Discovery finds models. Extraction unwraps +them into flat specifications. Output Layout decides what to generate and where it goes. +Rendering formats the output. Imports flow strictly downward -- no layer references the +one above it. + +Sixteen sections follow, ordered by dependency: each module appears before anything that +imports it. The final section inverts this and traces the full pipeline top-down. +Segment threads through as the primary example, since its path through the system -- +union classification, common base discovery, variant field partitioning, discriminator +extraction, tagged rendering -- exercises more of the pipeline than any model feature +does. + +--- + +## 1. Discovery + +The pipeline starts in `overture-schema-core`, not in the codegen package itself. +`discover_models()` calls `importlib.metadata.entry_points(group="overture.models")` and +loads every registered model. Each entry point name encodes identity as a +colon-delimited triple (`overture:buildings:building`); each value encodes the Python +location (`overture.schema.buildings:Building`). The function parses both formats -- +three-part names carry a theme component, two-part names set theme to `None` -- and +returns `dict[ModelKey, type[BaseModel]]`. + +`ModelKey` is a frozen dataclass with four fields: `namespace`, `theme`, `type`, and +`entry_point`. The `entry_point` field preserves the raw `module:Class` string that +downstream modules split to determine output directory structure. + +The return dict includes both concrete `BaseModel` subclasses and type aliases. +`Building` is a concrete class -- `isinstance(Building, type)` returns true. `Segment` +is not. It is an `Annotated` alias wrapping `Union[RoadSegment, RailSegment, +WaterSegment]` with a discriminator field. `isinstance` and `issubclass` cannot inspect +it. The entry point `overture:transportation:segment` maps to +`overture.schema.transportation:Segment`, which loads the alias itself. + +The codegen classifies these at the CLI boundary: `is_model_class` identifies concrete +`BaseModel` subclasses, `is_union_alias` calls `analyze_type` to identify discriminated +unions. From that point forward both model features and union features satisfy the +`FeatureSpec` protocol and flow through the same pipeline. + +## 2. Leaf utilities + +Two modules with no internal dependencies. Both serve multiple layers. + +### extraction/case_conversion.py + +Converts PascalCase to snake_case with two compiled regexes. `_ACRONYM_BOUNDARY` inserts +an underscore between an uppercase run and a capitalized word start: `HTMLParser` +becomes `HTML_Parser` becomes `html_parser`. `_CAMEL_BOUNDARY` inserts between +lowercase-or-digit and uppercase: `buildingPart` becomes `building_part`. +`to_snake_case` applies them in sequence and lowercases. + +`slug_filename` composes the conversion with a file extension. Every output file path in +the system passes through this function. + +```python +>>> slug_filename("HexColor") +'hex_color.md' +``` + +### extraction/docstring.py + +Distinguishes author-written docstrings from auto-generated ones. Both `Enum` and +`NewType` produce default docstrings that vary across Python versions. Rather than +hardcoding version-specific strings, the module creates temporary instances at import +time, captures their `__doc__` attributes, then deletes the instances: + +```python +class _DocstringProbeEnum(Enum): + pass + +_ENUM_DEFAULT_DOCSTRING = _DocstringProbeEnum.__doc__ +del _DocstringProbeEnum +``` + +`is_custom_docstring` compares a given docstring against these captured defaults and an +optional inherited docstring. The enum extractor uses this both at class level and +per-member, since `DocumentedEnum` members carry individual `__doc__` attributes. + +`clean_docstring` delegates to `inspect.cleandoc` and returns `None` for empty results. +`first_docstring_line` takes the first line only -- used by renderers that show +summaries. + +## 3. Type analysis + +This is the module the entire package exists to house. `analyze_type` takes a raw type +annotation and returns `TypeInfo` -- a flat dataclass that fully describes the unwrapped +type without any reference to Python's typing machinery. + +### The loop + +The function runs a single `while True` loop that peels layers in fixed order. Each +iteration handles one wrapper: + +**NewType** records names at two levels. The first NewType encountered becomes +`outermost_newtype_name` (the user-facing identity, e.g. "FeatureVersion") and snapshots +the current `list_depth` into `newtype_outer_list_depth` -- capturing how many list +layers appeared before the NewType boundary. Subsequent NewTypes update +`last_newtype_name` (the innermost, used for constraint provenance and as the terminal +`base_type`). The loop unwraps via `__supertype__` and continues. + +**Annotated** collects every metadata object as a `ConstraintSource`, tagging each with +whichever NewType was most recently entered. This is how constraint provenance survives: +when `int32`'s `Annotated` layer contributes `Field(ge=0)`, the constraint records +`source="int32"`. If a `FieldInfo` carries a description, the function captures it -- +first description wins, so the outermost NewType's documentation takes precedence. + +**Union** filters out `NoneType` (marks optional), `Sentinel` instances (Pydantic's +`` marker for undeclared defaults), and `Literal` sentinel arms (like +`Literal[""]` used alongside `HttpUrl`). If multiple concrete `BaseModel` subclasses +remain after filtering, the function classifies the type as `UNION` and returns +immediately with the member tuple. Non-BaseModel multi-type unions raise +`UnsupportedUnionError`. A single remaining arm continues the loop. + +The `Literal` filtering has a guard: when a union contains *only* Literal arms (like +`Optional[Literal["x"]]`), the function keeps them rather than filtering everything out. + +**list/dict** increments `list_depth` for each `list[...]` layer (so `list[list[str]]` +records depth 2), sets dict flags, and continues into element types. Dict is the one +case where `analyze_type` recurses -- it calls itself for key and value types, storing +the results as nested `TypeInfo` objects. + +**Terminal** classification in `_classify_terminal` handles what remains after all +wrappers are peeled: `Any` becomes a PRIMITIVE, `Literal` returns with the literal value +(single-value only -- multi-value Literals get `literal_value=None`), `Enum` subclasses +become ENUM, `BaseModel` subclasses become MODEL, everything else becomes PRIMITIVE. + +### Concrete walkthroughs + +**Segment (union path).** `analyze_type` receives the `Annotated` alias. Iteration 1 +sees `Annotated` -- collects the `FieldInfo` with discriminator metadata as a +constraint, unwraps to `Union[RoadSegment, RailSegment, WaterSegment]`. Iteration 2 sees +the union. No `None` arm, no sentinels. Three concrete `BaseModel` subclasses remain -- +the function classifies the type as `UNION` and returns immediately: `kind=UNION`, +`union_members=(RoadSegment, RailSegment, WaterSegment)`, `base_type="RoadSegment"` (the +first member). Two iterations, done. The union members are raw type objects, not +recursively analyzed -- callers that need field details call `extract_model` on each +member separately. + +**FeatureVersion (NewType chain path).** `FeatureVersion = NewType("FeatureVersion", +int32)` where `int32 = NewType("int32", Annotated[int, Field(ge=0, le=2147483647)])`. + +Iteration 1 sees `FeatureVersion`. It's a NewType -- record +`outermost_newtype_name="FeatureVersion"`, snapshot `newtype_outer_list_depth=0` (no list +layers yet), unwrap to `int32`, continue. Iteration 2 sees +`int32`. Also a NewType -- update `last_newtype_name="int32"`, unwrap to `Annotated[int, +Field(ge=0, ...)]`, continue. Iteration 3 sees `Annotated`. Collect +`ConstraintSource(source="int32", constraint=)`, unwrap to `int`. The +loop breaks on `int` (not a NewType, not Annotated, not a union, not a container). +`_classify_terminal` returns a `TypeInfo` with `base_type="int32"`, +`newtype_name="FeatureVersion"`, `kind=PRIMITIVE`, and a constraint tuple recording the +provenance chain. + +The two paths demonstrate the function's range. Segment exits early on the union branch +with member types for downstream extraction. FeatureVersion runs the full loop through +NewType and Annotated layers, accumulating constraint provenance that survives to +rendering. + +### _UnwrapState + +The accumulator dataclass carries state across iterations: optional/dict flags, +`list_depth` (incremented per `list[...]` layer), `newtype_outer_list_depth` (snapshotted +from `list_depth` when the first NewType is entered), the constraint list, both NewType +name slots, and the captured description. Its `build_type_info` method assembles the +final `TypeInfo` from accumulated state, freezing the constraint list into a tuple. + +### walk_type_info + +A shared visitor that recurses into dict key/value `TypeInfo` children. Both type +collection and reverse reference computation use it rather than duplicating the descent +pattern. Union members are raw `type` objects (not `TypeInfo` instances), so callers +handle them directly. + +## 4. Data structures + +`extraction/specs.py` defines the vocabulary shared between extraction and rendering. Every spec is +a dataclass with no methods beyond field access and, in `UnionSpec`'s case, one cached +property. + +**FieldSpec** represents one model field: alias-resolved name, `TypeInfo`, description, +required flag. Two fields populated later by tree expansion: `model` (a reference to the +nested `ModelSpec` for MODEL-kind fields) and `starts_cycle` (true when following this +field's model would create a cycle in the ancestor chain). + +**ModelSpec** represents one Pydantic model: class name, cleaned docstring, fields in +documentation order, source class reference, the entry point string that located it, and +model-level constraints from decorators like `@require_any_of`. + +**UnionSpec** represents a discriminated union type alias. Segment's `UnionSpec` carries +`members=[RoadSegment, RailSegment, WaterSegment]`, `discriminator_field="subtype"`, and +`common_base=TransportationSegment`. Its `annotated_fields` list pairs each `FieldSpec` +with `variant_sources` -- a tuple of class names indicating which union members +contribute that field, or `None` for fields from `TransportationSegment` shared across +all members. The `fields` cached property unwraps this for code that doesn't need +provenance. `UnionSpec` uses `eq=False` because it contains mutable lists and a +`cached_property` -- dataclass-generated `__eq__` would be unreliable. + +**FeatureSpec** is a `Protocol` satisfied by both `ModelSpec` and `UnionSpec`. This is +the pipeline's unifying abstraction. Tree expansion, type collection, rendering +dispatch, and example loading all operate on `FeatureSpec` without knowing which +concrete type they hold. + +**EnumSpec** and **EnumMemberSpec** serve enums. **NewTypeSpec** serves NewTypes. +**PrimitiveSpec** serves numeric primitives with an `Interval` for bounds and optional +`float_bits`. + +**SupplementarySpec** is the union type alias `EnumSpec | NewTypeSpec | ModelSpec` -- +the set of non-feature types that need their own output pages. `PrimitiveSpec` and +geometry types are excluded because they render on aggregate pages rather than +individual ones. + +### Classification functions + +Three functions at the bottom of `extraction/specs.py` classify discovery results. `is_model_class` +is a `TypeGuard` that checks `isinstance(obj, type) and issubclass(obj, BaseModel)`. +`is_union_alias` calls `analyze_type` and checks for `UNION` kind -- the only place +outside the type analyzer that touches Python type annotations. `filter_model_classes` +applies the model guard across the discovery dict's values. + +## 5. Type registry + +Maps type names to per-target display strings. `PRIMITIVE_TYPES` contains 15 entries: +four signed integer widths, three unsigned, two floats, `str`/`bool`, two Python builtin +aliases (`int` maps to `int64`, `float` maps to `float64`), and two geometry types +(`Geometry`, `BBox`). Each maps to a `TypeMapping` with a `markdown` field. + +`is_semantic_newtype` answers a question: does this NewType deserve its own +documentation page? The function returns true when the outermost name differs from the +base type (`FeatureVersion` wrapping `int32`) or when the base type has no registry +entry (`HexColor` wrapping `str` via constraints). It returns false for registered +primitives (`int32` wrapping `int`) -- those are the type system's building blocks, not +user-facing concepts. + +`resolve_type_name` looks up the registry by `base_type`, tries `source_type.__name__` +when the first lookup fails, and falls back to `base_type` as a last resort. Semantic +NewTypes wrapping unregistered classes (like `Sources` wrapping `SourceItem`) use the +underlying class name rather than the NewType alias -- `source_type.__name__` takes +precedence. + +## 6. Model extraction + +`extract_model` converts a Pydantic `BaseModel` subclass into a `ModelSpec`. + +### Field ordering + +Documentation order differs from Python declaration order. `_class_order` produces the +MRO-aware sequence: for single inheritance, reversed MRO puts base class fields first +and derived fields last. For multiple inheritance, the primary chain (first base) comes +first, then the class's own fields, then mixin fields. This matches how a reader +encounters the model -- shared structure before specialization. + +`_field_order` walks the class hierarchy produced by `_class_order` and collects +`__annotations__` keys, deduplicating as it goes. + +### Field extraction + +For each field, the extractor resolves the alias chain (`validation_alias` > `alias` > +Python name via `resolve_field_alias`), calls `analyze_type` on `field_info.annotation`, +and builds a `FieldSpec`. The extractor uses `field_info.annotation` rather than +`get_type_hints()` because the latter returns unresolved TypeVars for generic base +classes. + +One subtlety: Pydantic strips the `Annotated` wrapper from some fields and moves the +metadata to `field_info.metadata`. When this happens, `analyze_type` sees a bare type +and misses the constraints. `_merge_field_metadata` patches them back in, tagging them +with `source=None` since they came from the field's own annotation rather than a NewType +chain. + +Model-level constraints come from `ModelConstraint.get_model_constraints(model_class)`, +which inspects decorators like `@require_any_of` and `@require_if`. + +### Tree expansion + +`expand_model_tree` is the recursive step that populates `FieldSpec.model` references. +It maintains a shared cache keyed by Python class and an ancestor set for cycle +detection. + +The cache insert happens *before* recursion. Without this ordering, a back-edge +encounter would find no cached entry and infinite-loop instead of marking +`starts_cycle=True`. The sequence: extract the sub-model, insert it into the cache, then +recurse into its fields. Shared references (the same sub-model used in multiple fields) +reuse the cached `ModelSpec` without marking cycles. + +Union-kind fields skip inline expansion -- they appear as a single row in the output, +linking to their members, rather than expanding inline. + +## 7. Other extractors + +### Enum extraction + +`extract_enum` iterates members, checking `is_custom_docstring` for both class-level and +per-member descriptions. `DocumentedEnum` members carry `__doc__` attributes that the +extractor preserves. The class-level docstring is passed as `inherited_doc` to the +per-member check, so members that inherit the class docstring verbatim get +`description=None`. + +### NewType extraction + +`extract_newtype` calls `analyze_type` on the NewType callable and extracts the custom +docstring. When the NewType has no explicit docstring, it falls back to +`TypeInfo.description` -- the first `Field.description` found in the `Annotated` +metadata chain. + +### Union extraction + +The most involved extractor. Walk through `Segment` concretely. + +`extract_union("Segment", annotation)` calls `analyze_type` on the +`Annotated[Union[RoadSegment, RailSegment, WaterSegment], ...]` alias. The analyzer +returns `kind=UNION` with the three member types. + +Next, `_find_common_base` intersects each member's filtered MRO (BaseModel subclasses +only, excluding `BaseModel` itself). All three share `TransportationSegment` in their +MRO. The function picks the most-derived class in the intersection -- the one whose +worst-case MRO distance is smallest. `TransportationSegment` wins: it is the direct +parent of all three members. + +The extractor calls `extract_model(TransportationSegment)` to get the shared field set. +Fields like `id`, `geometry`, `version`, `sources`, and `subtype` appear in the common +base. These become shared `AnnotatedField` entries with `variant_sources=None`. + +Then it extracts each member: `RoadSegment`, `RailSegment`, `WaterSegment`. Fields not +in the shared set are variant-specific, deduplicated by `(name, type_identity)` where +`type_identity` captures `base_type`, `kind`, `is_optional`, and `list_depth`. If +`RoadSegment` and `WaterSegment` both define a `width` field with the same type +identity, the `AnnotatedField` accumulates both class names: +`variant_sources=("RoadSegment", "WaterSegment")`. Fields unique to one member get a +single-element tuple. + +`extract_discriminator` inspects the `Annotated` metadata for a `FieldInfo` with a +discriminator attribute. For Segment, it finds `subtype` and builds the mapping: +`{"road": RoadSegment, "rail": RailSegment, "water": WaterSegment}` by checking each +member for single-value `Literal` fields on the discriminator. + +### Primitive extraction + +`partition_primitive_and_geometry_names` reads a module's `__all__` exports. NewType +exports are numeric primitives; non-constraint class exports are geometry types. + +`extract_primitives` builds `PrimitiveSpec` objects. For each primitive name it resolves +the object from the module, calls `extract_newtype` for the type analysis, then extracts +numeric bounds from constraints. `extract_numeric_bounds` scans constraint objects for +`ge`/`gt`/`le`/`lt` attributes and packs them into an `Interval`. + +## 8. Constraint prose + +Two modules convert constraint objects into human-readable text. + +### Field constraints + +`extraction/field_constraints.py` pattern-matches constraint types. `Interval` renders +as `lower <= x <= upper` using Unicode comparison operators. Single-bound constraints +(`Ge`, `Gt`, `Le`, `Lt`) render as `>= value` or `< value`. Length constraints +(`MinLen`, `MaxLen`) render as plain prose (e.g. "Minimum length: 1"). `GeometryTypeConstraint` lists +allowed geometry types by name, converting snake_case values to PascalCase. `Reference` +describes the relationship and target model, using an optional `link_fn` to produce +markdown links. + +Opaque constraints -- classes that inherit `object.__repr__` without customization -- +render as their class name plus docstring. When a regex pattern attribute exists, the +prose includes it. + +`constraint_display_text` is the top-level entry point. It checks whether the constraint +is opaque and has a docstring, and if so, produces a composite description combining the +docstring, class name, and pattern. Otherwise it delegates to +`describe_field_constraint`. + +### Model constraints + +`extraction/model_constraints.py` handles model-level constraints from decorators. +`analyze_model_constraints` returns two things in one pass: a list of section-level +descriptions and a dict mapping field names to the constraint descriptions that +reference them. + +The module consolidates related conditionals. Three `require_if` constraints with the +same target fields but different trigger values merge into "when X is one of: a, b, c" +instead of three separate bullets. `_consolidation_key` groups constraints by `(type, +field_names, condition_field_name)`. Groups with one member render normally; groups with +multiple members produce consolidated prose. + +`NoExtraFieldsConstraint` is silently skipped -- it is a structural validation rule, not +something a documentation reader acts on. + +## 9. Module layout + +Translates Python module paths into output directory paths. `compute_schema_root` finds +the longest common dotted prefix across all entry point module paths. Given paths like +`overture.schema.buildings`, `overture.schema.places`, and +`overture.schema.transportation`, the root is `overture.schema`. For a single unique +path, it drops the last component. + +`compute_output_dir` mirrors the remaining package structure after stripping the root. +Packages (directories with `__path__` per PEP 302) keep all components. File modules +drop their last component, since the `.py` filename adds no useful structure. +`is_package_module` checks `sys.modules` for `__path__` to make this distinction. + +The entry point string `overture.schema.buildings:Building` encodes both module and +class. `entry_point_module` extracts the module path, `entry_point_class` extracts the +class name. `output_dir_for_entry_point` composes these to produce the output directory +for a feature. + +## 10. Supplementary type collection + +`collect_all_supplementary_types` walks the expanded field trees of all feature specs to +discover every referenced type that needs its own output page: enums, semantic NewTypes, +and sub-models. + +The walk maintains a visited set for models and a feature name set for skip detection. +Types that are themselves top-level features get skipped. For UNION-kind fields, the +function extracts and walks each member's fields. For semantic NewTypes, it walks the +`__supertype__` chain to collect intermediate NewTypes -- `Id` wraps +`NoWhitespaceString` wraps `str`, and both `Id` and `NoWhitespaceString` get their own +pages. The `walk_type_info` visitor handles dict key/value recursion. + +MODEL-kind fields follow `field_spec.model` references that were populated by +`expand_model_tree`. The function raises `RuntimeError` if it encounters a MODEL-kind +field with `model=None` -- a guard against calling collection before tree expansion. + +A single field matches multiple conditions independently. A semantic NewType wrapping a +MODEL-kind type triggers both NewType extraction and model collection. The checks use +independent `if` statements, not `elif`. + +## 11. Path assignment + +`build_placement_registry` builds the complete mapping from type names to output file +paths. Three tiers: + +Aggregate pages come first. All numeric primitives point to +`system/primitive/primitives.md`. All geometry types point to +`system/primitive/geometry.md`. These are hardcoded paths since the types share a single +reference page. + +Feature specs get individual pages. Output directories derive from +`output_dir_for_entry_point`. Filenames use `slug_filename`. + +Supplementary specs get module-derived paths from `source_type.__module__`. When a +supplementary type's output directory falls under a feature directory, +`_nest_under_types` inserts a `types/` segment. Without this insertion, an enum defined +in `overture.schema.buildings` would land alongside the Building feature page. With it, +the enum lands in `buildings/types/` -- preventing supplementary type pages from +cluttering feature directories. + +`_nest_under_types` sorts feature directories by path length (descending) before +checking containment, so the most specific match wins. + +## 12. Links and reverse references + +### Link computation + +`LinkContext` carries the current page's output path and the full type-to-path registry. +When a renderer formats a type reference, it calls `resolve_link` to compute a relative +path from the current page to the target. Types without registry entries return `None`, +telling renderers to show inline code instead of a broken link. `resolve_link_or_slug` +provides a fallback when a link is required regardless. + +`relative_link` computes `../` navigation between any two paths in the output tree. It +finds the common prefix of directory components, counts the levels up from the source +directory, and descends into the target. Both paths must be normalized -- the function +rejects `..` components to prevent path traversal surprises. + +### Reverse references + +`compute_reverse_references` walks all feature fields and supplementary specs to build +`dict[str, list[UsedByEntry]]`. Each entry maps a type name to the list of types that +reference it. Entries sort models before NewTypes, alphabetical within each group. + +The function tracks references with sets for deduplication, then sorts into lists at the +end. It skips self-references and references to types not in the supplementary spec dict +(features don't need "used by" sections since they are the entry points). + +NewType specs register additional references from their constraint sources. If `Id` +inherits a constraint from `NoWhitespaceString`, the reverse reference captures that +`Id` uses `NoWhitespaceString` -- even though the relationship is through constraint +provenance rather than direct field reference. + +## 13. Markdown type formatting + +`markdown/type_format.py` converts `TypeInfo` into display strings for markdown output. + +`format_type` handles the full range of field types. Single-value Literals render as +`"value"` in backticks. Semantic NewTypes and enums/models get markdown links via +`_resolve_type_link`, which checks the `LinkContext` registry and falls back to plain +code spans. For types with a linked identity (semantic NewTypes, enums, models), list +rendering depends on where the list layers sit relative to the NewType boundary. +`newtype_outer_list_depth > 0` means the list wraps the NewType (`list[PhoneNumber]`) and +renders as `list`. `is_list` with `newtype_name` set means the NewType +wraps a list internally (`Sources` wrapping `list[SourceItem]`) and renders with a +`(list)` qualifier. Non-NewType identities (enums, models) use `list` syntax. Linked +inner types use broken-backtick syntax (`` `list<` `` ... `` `>` ``) built as a single +wrapper to avoid adjacent backticks that CommonMark would interpret as multi-backtick +code span delimiters. Dict types render as `` `map` ``. Qualifiers (optional, list, +map) append in parentheses. + +Union members format independently -- each gets its own link resolution, joined with +pipe separators escaped for table-cell safety. + +`format_underlying_type` handles NewType page headers. It links enums and models that +have their own pages but skips the outermost NewType name to avoid self-referencing. The +function uses `source_type.__name__` rather than `base_type` for link resolution, since +`base_type` may carry the outermost NewType name when only one NewType wraps a class. + +## 14. Markdown rendering + +`markdown/renderer.py` is the template driver. + +### Templates + +Six Jinja2 templates in `markdown/templates/`. `feature.md.jinja2` renders a field table +with Name, Type, and Description columns, an optional Constraints section, an optional +Examples section, and a "Used By" partial. `enum.md.jinja2` renders a bullet list of +values. `newtype.md.jinja2` shows underlying type and constraints with provenance links. +`primitives.md.jinja2` and `geometry.md.jinja2` render aggregate reference pages. +`_used_by.md.jinja2` is an included partial. + +The Jinja2 environment registers `linkify_urls` as a filter, which wraps bare URLs in +markdown link syntax. The filter uses a two-pass approach: extract code spans first (to +avoid modifying URLs inside backticks), linkify the remaining text, then restore code +spans. + +### Field expansion + +`render_feature` dispatches on spec type. `ModelSpec` gets `_expand_model_fields`, which +walks the pre-populated `FieldSpec.model` tree and produces dot-notation rows. +`sources[0].dataset` appears as a single row in the flat field table, with `[]` +appended per nesting level to list-of-model fields (so a doubly-nested list gets +`[][]`). Expansion stops at fields marked with +`starts_cycle`. + +`UnionSpec` gets `_expand_union_fields`, which adds italic variant tags to +variant-specific fields. For Segment, shared fields from `TransportationSegment` (like +`id`, `geometry`, `sources`) render as plain rows. Variant-specific fields get tagged: +`_short_variant_name` strips the union name suffix, so `RoadSegment` becomes `Road`, +`WaterSegment` becomes `Water`. A field present in two of three members renders as `` +`width` *(Road, Water)* ``. Shared fields render without tags. + +### Constraint annotation + +Field-level constraints from the field's own annotation (not inherited from NewType +chains) annotate the field's description cell as italic text. The distinction matters: +constraints with `source=None` came from the field itself, while constraints with a +named source live on the NewType's own page. + +Model-level constraints annotate top-level field rows (those without dot-notation +prefixes) using the `field_notes` dict from `analyze_model_constraints`. + +### Example formatting + +Example values render in backticks for monospace consistency. Booleans use +`true`/`false` (not Python's `True`/`False`). `None` renders as `null`. Long values +truncate at 100 characters. Lists and dicts use compact bracket/brace notation. + +### Aggregate pages + +`render_primitives_from_specs` sorts primitives by bit-width key (prefix then numeric +width), groups into signed integers, unsigned integers, and floats, and formats ranges. +Integer ranges show both bounds as a compact "lower to upper" form; `int64`-scale bounds +use `2^63` notation for readability. `render_geometry_from_values` produces a +comma-separated backtick list. + +## 15. Example loader + +Loads example data from theme `pyproject.toml` files and validates it against the +schema. + +`resolve_pyproject_path` walks up from a model's module file to find `pyproject.toml`. +`load_examples_from_toml` reads the `[examples.ModelName]` TOML section. + +Validation requires three preprocessing steps that handle TOML's limitations and +flat-schema conventions. + +TOML has no null literal, so examples use the string `"null"` as a stand-in. `_denull` +replaces these recursively, walking nested dicts and lists. + +Literal fields (like `theme="buildings"`) are omitted from examples since they carry +constant values. `_inject_literal_fields` adds them back before validation by scanning +`model_fields` for single-value `Literal` annotations via `single_literal_value`. + +Discriminated union examples from flat parquet schemas include null fields from +non-selected variant arms. `_strip_null_unknown_fields` removes null-valued fields not +in the common base's field set, so the selected arm's validator accepts the data without +choking on fields that belong to sibling variants. + +`collect_dict_paths` walks the `FieldSpec` tree to identify dict-typed fields (like +`tags: dict[str, str]`), returning their dot-paths as a `frozenset`. Schema-notation +paths use empty brackets (`items[].tags`) while runtime paths carry indices +(`items[0].tags`); `_normalize_path` strips indices before membership checks. + +`flatten_example` converts nested dicts to dot-notation. Nested dicts become +`parent.child`, lists of dicts become `parent[0].child`. Dicts at paths in `dict_paths` +are kept as leaf values -- a `tags` field typed as `dict[str, str]` renders as a whole +map rather than being split into `tags.color`, `tags.size`. `order_example_rows` sorts by +field position in the documentation's field order using a stable sort, so sub-fields +maintain their original relative order. + +`load_examples` orchestrates the full flow: find the pyproject.toml, load the TOML +section, validate each example, flatten, and order. Invalid examples log a warning and +skip rather than failing the pipeline. + +## 16. Orchestration and CLI + +### The pipeline + +`generate_markdown_pages` in `markdown/pipeline.py` is the "main" function. It takes +feature specs and a schema root, returns rendered pages without touching the filesystem. +Eight steps: + +1. **Expand model trees** with a shared cache across all features, so sub-models + referenced by multiple features extract once. + +2. **Partition primitive and geometry names** from the system primitive module's + `__all__` exports. + +3. **Collect supplementary types** by walking expanded feature trees. + +4. **Build the placement registry** mapping every type to its output file path. + +5. **Compute reverse references** across all features and supplements. + +6. **Render each feature** with its `LinkContext`, loaded examples, and used-by entries. + +7. **Render each supplementary type** -- dispatching to `render_enum`, `render_newtype`, + or `render_feature` (for sub-models) based on spec type. + +8. **Render aggregate pages** for primitives and geometry. + +The return value is `list[RenderedPage]` -- frozen dataclasses carrying content, output +path, and a boolean `is_feature` flag. The caller decides what to do with them. + +### The CLI + +`cli.py` is a thin Click wrapper. The `generate` command discovers models, computes +schema root from *all* entry points (before any theme filtering), classifies each entry +as model or union via `is_model_class` and `is_union_alias`, extracts specs, calls the +pipeline, and writes output. + +Schema root computation uses all entry points deliberately. Theme filtering narrows +which features appear in the output, but the directory structure must remain stable +regardless of which themes are selected. Computing the root from filtered paths would +shift output directories when themes change. + +Feature pages get Docusaurus frontmatter (`sidebar_position: 1`) prepended. The CLI +generates `_category_.json` files for sidebar navigation, assigning positions +alphabetically with feature directories first. + +The `list` command prints sorted model names -- a diagnostic tool for verifying which +models the entry point system discovers. + +--- + +## Top-down trace: Segment through the pipeline + +A reader who reached this point has seen every module in isolation. This section follows +`Segment` from discovery to rendered markdown, showing how the pieces compose. + +**Discovery.** The CLI calls `discover_models()`. The entry point +`overture:transportation:segment` loads `overture.schema.transportation:Segment` -- the +`Annotated[Union[...]]` alias. `Segment` lands in the return dict keyed by +`ModelKey(namespace="overture", theme="transportation", type="segment", +entry_point="overture.schema.transportation:Segment")`. + +**Classification.** The CLI tests each entry. `is_model_class(Segment)` returns false -- +`Segment` is not a class. `is_union_alias(Segment)` calls `analyze_type`, which peels +the `Annotated` wrapper and finds three `BaseModel` subclasses in the union. The +analyzer returns `kind=UNION`. The CLI routes Segment to `extract_union`. + +**Extraction.** `extract_union("Segment", annotation)` calls `analyze_type` again (cheap +-- the same two-iteration path), gets the three member types, and finds +`TransportationSegment` as the common base via `_find_common_base`. It extracts the +common base's fields as shared, then extracts each member's fields and partitions the +non-shared ones into `AnnotatedField` entries with variant provenance. +`extract_discriminator` finds `subtype` and builds `{"road": RoadSegment, "rail": +RailSegment, "water": WaterSegment}`. The result is a `UnionSpec` satisfying +`FeatureSpec`. + +Meanwhile, concrete models like `Building` go through `extract_model`, which calls +`analyze_type` on each field annotation. A field typed `FeatureVersion` unwraps through +two NewType layers and an `Annotated` layer, producing a `TypeInfo` with +`base_type="int32"`, `newtype_name="FeatureVersion"`, and constraint provenance linking +`ge=0` back to the `int32` NewType. Both extraction paths produce specs satisfying +`FeatureSpec`. + +**Pipeline entry.** The feature specs enter `generate_markdown_pages`. +`expand_model_tree` walks MODEL-kind fields on Segment's `UnionSpec` and populates +`FieldSpec.model` references. The shared cache ensures sub-models referenced by multiple +features (like `Sources`) extract once. Union-kind fields skip inline expansion. + +**Layout.** `partition_primitive_and_geometry_names` reads the system module's exports. +`collect_all_supplementary_types` walks Segment's expanded fields and discovers +referenced enums (like `Subtype`), semantic NewTypes (like `Id`, `Sources`), and +sub-models. The walk follows `FieldSpec.model` references down the tree, and for +UNION-kind fields, extracts and walks each member's fields separately. + +`build_placement_registry` assigns Segment's output path from its entry point: +`entry_point_module` extracts `overture.schema.transportation`, `compute_output_dir` +strips the schema root and mirrors the remaining structure. Supplementary types get +module-derived paths with `types/` inserted under feature directories. + +**Reverse references.** `compute_reverse_references` walks Segment's fields and records +that Segment references `Subtype`, `Id`, `Sources`, and other types. These references +populate "Used By" sections: the `Subtype` enum page shows that Segment uses it. + +**Rendering.** The pipeline builds a `LinkContext` from Segment's output path and the +full registry. `render_feature` dispatches to `_expand_union_fields` because the spec is +a `UnionSpec`. Shared fields from `TransportationSegment` render as plain rows. +Variant-specific fields get italic tags: `` `road_class` *(Road)* ``. The renderer +formats each field's type via `format_type`, which resolves links through the +`LinkContext` -- `Subtype` gets a relative link to its enum page, `Id` links to its +NewType page. Constraints with `source=None` annotate field rows; constraints with named +sources appear on the source NewType's page instead. + +The example loader finds `pyproject.toml` in the transportation theme package, reads +`[examples.Segment]`, validates each example against the union alias (injecting literal +fields, stripping null fields from non-selected arms), computes `dict_paths` from +`spec.fields` to identify dict-typed fields, flattens to dot-notation (keeping dict-typed +fields as leaf values), and orders by field position. + +The Jinja2 template assembles the field table, optional constraints section, examples, +and "Used By" partial into markdown. + +**Output.** The pipeline returns a `RenderedPage` with Segment's content, its output +path, and `is_feature=True`. The CLI prepends Docusaurus frontmatter and writes the +file. `_category_.json` files get generated for sidebar navigation. + +**The layering principle.** At every stage, the modules that do the work never reach +back up the dependency chain. Renderers consume specs and registries but never import +extractors. Extractors consume `analyze_type` but never import renderers. The type +analyzer imports nothing from the codegen package except `clean_docstring`. Any module +can be understood, tested, and modified by reading only the modules below it. diff --git a/packages/overture-schema-codegen/pyproject.toml b/packages/overture-schema-codegen/pyproject.toml new file mode 100644 index 000000000..de42c5fb9 --- /dev/null +++ b/packages/overture-schema-codegen/pyproject.toml @@ -0,0 +1,29 @@ +[build-system] +build-backend = "hatchling.build" +requires = ["hatchling"] + +[project] +dependencies = [ + "click>=8.0", + "jinja2>=3.0", + "overture-schema-core", + "overture-schema-system", + "tomli>=2.0; python_version < '3.11'", +] +description = "Code generator that produces documentation and code from Pydantic models" +dynamic = ["version"] +license = "MIT" +name = "overture-schema-codegen" + +[project.scripts] +overture-codegen = "overture.schema.codegen.cli:main" + +[tool.uv.sources] +overture-schema-core = { workspace = true } +overture-schema-system = { workspace = true } + +[tool.hatch.version] +path = "src/overture/schema/codegen/__about__.py" + +[tool.hatch.build.targets.wheel] +packages = ["src/overture"] diff --git a/packages/overture-schema-codegen/src/overture/__init__.py b/packages/overture-schema-codegen/src/overture/__init__.py new file mode 100644 index 000000000..8db66d3d0 --- /dev/null +++ b/packages/overture-schema-codegen/src/overture/__init__.py @@ -0,0 +1 @@ +__path__ = __import__("pkgutil").extend_path(__path__, __name__) diff --git a/packages/overture-schema-codegen/src/overture/schema/__init__.py b/packages/overture-schema-codegen/src/overture/schema/__init__.py new file mode 100644 index 000000000..8db66d3d0 --- /dev/null +++ b/packages/overture-schema-codegen/src/overture/schema/__init__.py @@ -0,0 +1 @@ +__path__ = __import__("pkgutil").extend_path(__path__, __name__) diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/__about__.py b/packages/overture-schema-codegen/src/overture/schema/codegen/__about__.py new file mode 100644 index 000000000..3dc1f76bc --- /dev/null +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/__about__.py @@ -0,0 +1 @@ +__version__ = "0.1.0" diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/__init__.py b/packages/overture-schema-codegen/src/overture/schema/codegen/__init__.py new file mode 100644 index 000000000..2de7d7120 --- /dev/null +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/__init__.py @@ -0,0 +1 @@ +"""Code generator for Overture Schema Pydantic models.""" diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/cli.py b/packages/overture-schema-codegen/src/overture/schema/codegen/cli.py new file mode 100644 index 000000000..0a24c7348 --- /dev/null +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/cli.py @@ -0,0 +1,195 @@ +"""CLI entrypoint for schema code generation.""" + +import json +import logging +from pathlib import Path, PurePosixPath + +import click + +from overture.schema.core.discovery import discover_models + +from .extraction.model_extraction import extract_model +from .extraction.specs import ( + FeatureSpec, + is_model_class, + is_union_alias, +) +from .extraction.union_extraction import extract_union +from .layout.module_layout import ( + OUTPUT_ROOT, + compute_schema_root, + entry_point_class, + entry_point_module, +) +from .markdown.pipeline import generate_markdown_pages + +log = logging.getLogger(__name__) + +__all__ = ["cli"] + +_OUTPUT_FORMATS = ("markdown",) + +_FEATURE_FRONTMATTER = "---\nsidebar_position: 1\n---\n\n" + + +def _write_output( + content: str, + output_dir: Path | None, + output_path: PurePosixPath, +) -> None: + """Write content to a file under output_dir, or stdout.""" + if output_dir: + file_path = output_dir / output_path + file_path.parent.mkdir(parents=True, exist_ok=True) + file_path.write_text(content) + else: + click.echo(content) + click.echo() # separate entries with a blank line in stdout mode + + +@click.group() +def cli() -> None: + """Overture Schema code generator. + + Generate documentation and code from Pydantic schema models. + """ + + +@cli.command("list") +def list_models() -> None: + """List all discovered models.""" + models = discover_models() + names = sorted( + model.__name__ if isinstance(model, type) else str(model) + for model in models.values() + ) + for name in names: + click.echo(name) + + +@cli.command() +@click.option( + "--format", + "output_format", + required=True, + type=click.Choice(_OUTPUT_FORMATS), + help="Output format", +) +@click.option( + "--theme", + multiple=True, + help="Filter to specific theme(s); repeatable (e.g., --theme buildings --theme places)", +) +@click.option( + "--output-dir", + type=click.Path(path_type=Path), + default=None, + help="Write output to directory (default: stdout)", +) +def generate( + output_format: str, + theme: tuple[str, ...], + output_dir: Path | None, +) -> None: + """Generate code/docs from discovered models.""" + all_models = discover_models() + + # Schema root from ALL entry points (before theme filter). + module_paths = [entry_point_module(k.entry_point) for k in all_models] + schema_root = compute_schema_root(module_paths) + + models = ( + {k: v for k, v in all_models.items() if k.theme in theme} + if theme + else all_models + ) + + if output_dir: + output_dir.mkdir(parents=True, exist_ok=True) + + feature_specs: list[FeatureSpec] = [] + for key, entry in models.items(): + if is_model_class(entry): + feature_specs.append(extract_model(entry, entry_point=key.entry_point)) + elif is_union_alias(entry): + feature_specs.append( + extract_union( + entry_point_class(key.entry_point), + entry, + entry_point=key.entry_point, + ) + ) + + _generate_markdown(feature_specs, schema_root, output_dir) + + +def _generate_markdown( + feature_specs: list[FeatureSpec], + schema_root: str, + output_dir: Path | None, +) -> None: + """Generate markdown with directory layout and placement-aware links.""" + pages = generate_markdown_pages(feature_specs, schema_root) + + for page in pages: + content = ( + f"{_FEATURE_FRONTMATTER}{page.content}" if page.is_feature else page.content + ) + _write_output(content, output_dir, page.path) + + if output_dir: + feature_paths = {page.path for page in pages if page.is_feature} + all_paths = {page.path for page in pages} + _write_category_files(output_dir, all_paths, feature_paths) + + +def _ancestor_dirs(paths: set[PurePosixPath]) -> set[PurePosixPath]: + """Collect all ancestor directories for a set of file paths.""" + dirs: set[PurePosixPath] = set() + for path in paths: + parent = path.parent + while parent != OUTPUT_ROOT: + dirs.add(parent) + parent = parent.parent + return dirs + + +def _top_level_positions( + dirs: set[PurePosixPath], + feature_paths: set[PurePosixPath], +) -> dict[PurePosixPath, int]: + """Assign sidebar positions: feature dirs first, then non-feature, both alphabetical.""" + feature_dir_names = {p.parts[0] for p in feature_paths} + top_level = sorted(d for d in dirs if d.parent == OUTPUT_ROOT) + feature_dirs = [d for d in top_level if d.name in feature_dir_names] + non_feature_dirs = [d for d in top_level if d.name not in feature_dir_names] + return {d: i for i, d in enumerate(feature_dirs + non_feature_dirs, start=1)} + + +def _write_category_files( + output_dir: Path, + all_paths: set[PurePosixPath], + feature_paths: set[PurePosixPath], +) -> None: + """Write _category_.json files for Docusaurus sidebar navigation.""" + dirs = _ancestor_dirs(all_paths) + positions = _top_level_positions(dirs, feature_paths) + + for dir_path in sorted(dirs): + label = dir_path.name.replace("_", " ").title() + category: dict[str, object] = {"label": label} + if dir_path in positions: + category["position"] = positions[dir_path] + + file_path = output_dir / dir_path / "_category_.json" + file_path.parent.mkdir(parents=True, exist_ok=True) + file_path.write_text(json.dumps(category, indent=2) + "\n") + + +def main() -> None: + """Run the CLI entry point.""" + cli() + + +if __name__ == "__main__": + main() diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/__init__.py b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/case_conversion.py b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/case_conversion.py new file mode 100644 index 000000000..9d06341fb --- /dev/null +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/case_conversion.py @@ -0,0 +1,41 @@ +"""PascalCase to snake_case conversion for code generation.""" + +import re + +__all__ = ["slug_filename", "to_snake_case"] + +# Insert _ between an acronym run and a capitalized word start (HTML|Parser) +_ACRONYM_BOUNDARY = re.compile(r"([A-Z]+)([A-Z][a-z])") +# Insert _ between a lowercase/digit and an uppercase letter (building|Part) +_CAMEL_BOUNDARY = re.compile(r"([a-z0-9])([A-Z])") + + +def to_snake_case(name: str) -> str: + """Convert PascalCase to snake_case. + + Handles acronym runs correctly: "HTMLParser" becomes "html_parser", + not "h_t_m_l_parser". + + >>> to_snake_case("HTMLParser") + 'html_parser' + >>> to_snake_case("BuildingPart") + 'building_part' + >>> to_snake_case("simple") + 'simple' + """ + name = _ACRONYM_BOUNDARY.sub(r"\1_\2", name) + name = _CAMEL_BOUNDARY.sub(r"\1_\2", name) + return name.lower() + + +def slug_filename(name: str, ext: str = ".md") -> str: + """Convert a PascalCase type name to a snake_case filename. + + >>> slug_filename("HexColor") + 'hex_color.md' + >>> slug_filename("BuildingPart") + 'building_part.md' + >>> slug_filename("BuildingPart", ext=".json") + 'building_part.json' + """ + return f"{to_snake_case(name)}{ext}" diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/docstring.py b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/docstring.py new file mode 100644 index 000000000..7dc2e112f --- /dev/null +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/docstring.py @@ -0,0 +1,46 @@ +"""Docstring extraction and cleaning utilities.""" + +import inspect +from enum import Enum +from typing import NewType + +__all__ = ["clean_docstring", "first_docstring_line", "is_custom_docstring"] + + +# Probe auto-generated docstrings so we can distinguish them from explicit ones. +# Both Enum and NewType generate default docstrings that vary by Python version; +# capturing at import time adapts automatically if the format changes. +class _DocstringProbeEnum(Enum): + pass + + +_ENUM_DEFAULT_DOCSTRING = _DocstringProbeEnum.__doc__ +del _DocstringProbeEnum +_NewtypeProbe = NewType("_NewtypeProbe", int) +_NEWTYPE_DEFAULT_DOCSTRING = _NewtypeProbe.__doc__ +del _NewtypeProbe + + +def clean_docstring(doc: str | None) -> str | None: + """Return cleaned docstring, or None if absent or whitespace-only.""" + if not doc: + return None + cleaned = inspect.cleandoc(doc) + return cleaned or None + + +def first_docstring_line(doc: str | None) -> str | None: + """Return the first line of a docstring, or None if absent.""" + cleaned = clean_docstring(doc) + if not cleaned: + return None + return cleaned.split("\n")[0] + + +def is_custom_docstring(doc: str | None, inherited_doc: str | None = None) -> bool: + """Check if a docstring was explicitly written, not auto-generated or inherited.""" + return bool(doc) and doc not in ( + _ENUM_DEFAULT_DOCSTRING, + _NEWTYPE_DEFAULT_DOCSTRING, + inherited_doc, + ) diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/enum_extraction.py b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/enum_extraction.py new file mode 100644 index 000000000..545979d66 --- /dev/null +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/enum_extraction.py @@ -0,0 +1,40 @@ +"""Enum extraction.""" + +from enum import Enum + +from .docstring import clean_docstring, is_custom_docstring +from .specs import EnumMemberSpec, EnumSpec + +__all__ = ["extract_enum"] + + +def extract_enum(enum_class: type[Enum]) -> EnumSpec: + """Extract enum specification from an Enum class. + + Handles both simple str Enums and DocumentedEnums where members + have per-value descriptions via the __doc__ attribute. + """ + class_doc = enum_class.__doc__ + description = clean_docstring(class_doc) if is_custom_docstring(class_doc) else None + + members: list[EnumMemberSpec] = [] + for member in enum_class: + member_doc = getattr(member, "__doc__", None) + member_description = ( + member_doc if is_custom_docstring(member_doc, class_doc) else None + ) + + members.append( + EnumMemberSpec( + name=member.name, + value=str(member.value), + description=member_description, + ) + ) + + return EnumSpec( + name=enum_class.__name__, + description=description, + members=members, + source_type=enum_class, + ) diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/examples.py b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/examples.py new file mode 100644 index 000000000..18d71ea8b --- /dev/null +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/examples.py @@ -0,0 +1,370 @@ +"""Load and process example data from theme pyproject.toml files.""" + +import logging +import re +import sys +from dataclasses import dataclass +from pathlib import Path +from typing import Any + +from pydantic import BaseModel, TypeAdapter, ValidationError +from pydantic.fields import FieldInfo + +from .model_extraction import resolve_field_alias +from .specs import FieldSpec +from .type_analyzer import single_literal_value + +log = logging.getLogger(__name__) + +__all__ = ["ExampleRecord", "collect_dict_paths", "load_examples", "validate_example"] + +# tomllib is stdlib from 3.11+; tomli is the backport for 3.10. +try: + import tomllib # type: ignore[import-not-found] +except ModuleNotFoundError: + import tomli as tomllib # type: ignore[import-not-found] + + +@dataclass +class ExampleRecord: + """A flattened example with field-value pairs in documentation order.""" + + rows: list[tuple[str, Any]] + + +def _inject_literal_fields( + model_fields_dict: dict[str, FieldInfo], data: dict[str, Any] +) -> dict[str, Any]: + """Inject single-value Literal field defaults missing from *data*. + + Inspects *model_fields_dict* for fields with single-value ``Literal`` + annotations. For each field missing from *data*, injects the literal + value using the field's ``validation_alias`` (if set), falling back + to ``alias``, then to the field name. + + Returns a new dict; the original is not mutated. + """ + result = data.copy() + + for field_name, field_info in model_fields_dict.items(): + key = resolve_field_alias(field_name, field_info) + if key in result: + continue + + literal_value = single_literal_value(field_info.annotation) + if literal_value is not None: + result[key] = literal_value + + return result + + +def _denull_value(value: object) -> object: + """Convert a single value, replacing ``"null"`` strings with ``None``.""" + if value == "null": + return None + if isinstance(value, dict): + return _denull(value) + if isinstance(value, list): + return [_denull_value(item) for item in value] + return value + + +def _denull(data: dict[str, Any]) -> dict[str, Any]: + """Convert ``"null"`` sentinel strings to ``None``. + + TOML has no null literal, so example data uses the string ``"null"`` + as a stand-in. This recursively walks *data* (including nested dicts, + lists of dicts, and plain lists) and replaces every ``"null"`` value + with ``None``. + + Returns a new dict; the original is not mutated. + """ + return {key: _denull_value(value) for key, value in data.items()} + + +def _known_field_keys(model_fields_dict: dict[str, FieldInfo]) -> frozenset[str]: + """Alias-resolved field keys from a model_fields dict.""" + return frozenset( + resolve_field_alias(name, info) for name, info in model_fields_dict.items() + ) + + +def _strip_null_unknown_fields( + data: dict[str, Any], known_keys: frozenset[str] +) -> dict[str, Any]: + """Drop null-valued fields not in *known_keys*. + + For discriminated unions, *known_keys* contains only common base + fields. Variant-specific null fields from other arms (present in + flat parquet schemas) are stripped so the selected arm's validator + doesn't reject them as unknown extras. + + Non-null fields are always kept so the arm's own validator can + accept or reject them normally. + """ + return {k: v for k, v in data.items() if v is not None or k in known_keys} + + +def validate_example( + validation_type: object, + raw: dict[str, Any], + *, + model_fields: dict[str, FieldInfo] | None = None, +) -> dict[str, Any]: + """Validate example data against a model or union type. + + Uses TypeAdapter for validation, supporting both concrete models + and discriminated union aliases. + + Preprocesses *raw* data by: + 1. Converting "null" strings to None + 2. Injecting missing Literal fields for validation (if model_fields provided) + 3. Stripping null-valued fields not in *model_fields* (handles + flat-schema examples from discriminated unions where fields from + non-selected arms appear as nulls) + + Returns the denulled dict (not the preprocessed one with injected + literals). Lets ValidationError propagate on validation failure. + """ + denulled = _denull(raw) + + if model_fields is None: + if isinstance(validation_type, type) and issubclass(validation_type, BaseModel): + model_fields = validation_type.model_fields + else: + model_fields = {} + + known_keys = _known_field_keys(model_fields) + preprocessed = _inject_literal_fields(model_fields, denulled) + preprocessed = _strip_null_unknown_fields(preprocessed, known_keys) + TypeAdapter(validation_type).validate_python(preprocessed) + return denulled + + +_DEFAULT_SKIP_KEYS: frozenset[str] = frozenset() +_DEFAULT_DICT_PATHS: frozenset[str] = frozenset() + +_INDEXED_BRACKET = re.compile(r"\[\d+\]") + + +def _normalize_path(path: str) -> str: + """Replace indexed brackets with empty brackets for dict_paths matching. + + ``collect_dict_paths`` produces schema-notation paths like + ``items[].tags``, while ``_flatten_value`` builds runtime paths like + ``items[0].tags``. Normalizing before membership testing makes them + comparable. + """ + return _INDEXED_BRACKET.sub("[]", path) + + +def collect_dict_paths(fields: list[FieldSpec], prefix: str = "") -> frozenset[str]: + """Collect dot-paths of dict-typed fields from a FieldSpec tree. + + Walks the ``FieldSpec.model`` tree (same structure the renderer walks + for inline expansion) and returns paths where ``type_info.is_dict`` + is True. These paths tell ``flatten_example`` which dicts are maps + (keep as leaf) vs. models (recurse into). + + Parameters + ---------- + fields : list[FieldSpec] + Fields to walk. + prefix : str + Dot-notation prefix accumulated from parent fields. + """ + paths: set[str] = set() + for f in fields: + path = f"{prefix}{f.name}" if prefix else f.name + if f.type_info.is_dict: + paths.add(path) + elif f.model and not f.starts_cycle: + suffix = "[]" * f.type_info.list_depth if f.type_info.is_list else "" + paths |= collect_dict_paths(f.model.fields, f"{path}{suffix}.") + return frozenset(paths) + + +def _flatten_value( + prefix: str, value: object, dict_paths: frozenset[str] +) -> list[tuple[str, Any]]: + """Recursively flatten a value into dot/bracket-notation rows.""" + if isinstance(value, dict): + if _normalize_path(prefix) in dict_paths: + return [(prefix, value)] + result: list[tuple[str, Any]] = [] + for k, v in value.items(): + result.extend(_flatten_value(f"{prefix}.{k}", v, dict_paths)) + return result + if isinstance(value, list) and value and isinstance(value[0], (dict, list)): + result = [] + for i, item in enumerate(value): + result.extend(_flatten_value(f"{prefix}[{i}]", item, dict_paths)) + return result + return [(prefix, value)] + + +def flatten_example( + raw: dict[str, Any], + *, + skip_keys: frozenset[str] = _DEFAULT_SKIP_KEYS, + dict_paths: frozenset[str] = _DEFAULT_DICT_PATHS, +) -> list[tuple[str, Any]]: + """Flatten nested example dict to dot-notation key-value pairs. + + Nested dicts become ``"parent.child"``; lists of dicts become + ``"parent[0].child"``; lists of lists of dicts use double-index + notation ``"parent[0][1].child"``. Keys in *skip_keys* are dropped + at the top level only. Plain lists are kept as values. + + Dicts at paths in *dict_paths* are kept as leaf values instead of + being recursed into. Use ``collect_dict_paths`` to compute this set + from a FieldSpec tree. + """ + result: list[tuple[str, Any]] = [] + for key, value in raw.items(): + if key in skip_keys: + continue + result.extend(_flatten_value(key, value, dict_paths)) + return result + + +def extract_base_field(key: str) -> str: + """Extract the top-level field name from a flattened key. + + >>> extract_base_field("sources[0].dataset") + 'sources' + >>> extract_base_field("names.primary") + 'names' + >>> extract_base_field("id") + 'id' + """ + if "[" in key: + return key.split("[")[0] + if "." in key: + return key.split(".")[0] + return key + + +def order_example_rows( + flat_rows: list[tuple[str, Any]], + field_names: list[str], +) -> list[tuple[str, Any]]: + """Order flattened rows by field position in documentation. + + Sorts by position of base field name in *field_names*. + Fields with the same base maintain their original order (stable sort). + Unknown fields sort to end. + """ + position = {name: i for i, name in enumerate(field_names)} + sentinel = len(field_names) + + def sort_key(row: tuple[str, Any]) -> int: + return position.get(extract_base_field(row[0]), sentinel) + + return sorted(flat_rows, key=sort_key) + + +def load_examples_from_toml( + pyproject_path: Path, + model_name: str, +) -> list[dict[str, Any]]: + """Load ``[examples.]`` from a pyproject.toml file.""" + with pyproject_path.open("rb") as f: + data = tomllib.load(f) + + examples: dict[str, list[dict[str, Any]]] = data.get("examples", {}) + return examples.get(model_name, []) + + +def resolve_pyproject_path(model_class: type) -> Path | None: + """Find pyproject.toml by walking up from the model's module location.""" + module_name = getattr(model_class, "__module__", None) + if not module_name: + return None + + module = sys.modules.get(module_name) + if not module: + return None + + module_file = getattr(module, "__file__", None) + if not module_file: + return None + + # Walk up from module directory + current = Path(module_file).parent + while current != current.parent: # Stop at filesystem root + pyproject = current / "pyproject.toml" + if pyproject.exists(): + return pyproject + current = current.parent + + return None + + +def load_examples( + validation_type: object, + model_name: str, + field_names: list[str], + *, + pyproject_source: type | None = None, + model_fields: dict[str, FieldInfo] | None = None, + dict_paths: frozenset[str] = _DEFAULT_DICT_PATHS, +) -> list[ExampleRecord]: + """Load examples for a model, flattened and ordered by *field_names*. + + Validates each example against the validation type. Invalid examples + are skipped with a warning logged. Returns an empty list on any failure + (missing file, missing section, parse error). + + Parameters + ---------- + validation_type : type[BaseModel] | object + Model class or union alias to validate against. + model_name : str + Name of the model to load examples for. + field_names : list[str] + List of field names for ordering output. + pyproject_source : type or None + Type to use for finding pyproject.toml. If None, + uses validation_type if it's a class. + model_fields : dict[str, FieldInfo] or None + Field info dict for Literal injection. If None, infers + from validation_type if it's a BaseModel class. + dict_paths : frozenset[str] + Dot-paths of dict-typed fields to keep as leaf values. + Use ``collect_dict_paths`` to compute from a FieldSpec tree. + """ + source_type = pyproject_source if pyproject_source is not None else validation_type + if not isinstance(source_type, type): + return [] + + pyproject_path = resolve_pyproject_path(source_type) + if not pyproject_path: + return [] + + try: + raw_examples = load_examples_from_toml(pyproject_path, model_name) + except (OSError, tomllib.TOMLDecodeError): + log.debug("Failed to load examples for %s", model_name, exc_info=True) + return [] + + if not raw_examples: + return [] + + records = [] + for raw in raw_examples: + try: + denulled = validate_example(validation_type, raw, model_fields=model_fields) + except ValidationError as e: + log.warning( + "Skipping invalid example for %s in %s: %s", + model_name, + pyproject_path, + e, + ) + continue + flat_rows = flatten_example(denulled, dict_paths=dict_paths) + ordered_rows = order_example_rows(flat_rows, field_names) + records.append(ExampleRecord(rows=ordered_rows)) + + return records diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/field_constraints.py b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/field_constraints.py new file mode 100644 index 000000000..5981528d1 --- /dev/null +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/field_constraints.py @@ -0,0 +1,153 @@ +"""Convert field-level constraints to display text. + +Handles constraints from Annotated metadata and NewType wrappers: +Ge, Gt, Interval, Le, Lt, MaxLen, MinLen, GeometryTypeConstraint, +Reference, and custom constraint classes. +""" + +from __future__ import annotations + +from collections.abc import Callable + +from annotated_types import Ge, Gt, Interval, Le, Lt, MaxLen, MinLen + +from overture.schema.system.primitive import GeometryTypeConstraint +from overture.schema.system.ref import Reference + +from .docstring import first_docstring_line +from .specs import TypeIdentity +from .type_analyzer import ConstraintSource + +__all__ = [ + "constraint_display_text", + "constraint_pattern", + "describe_field_constraint", +] + +# Bound attribute names paired with display operators. Each entry maps an +# annotated_types constraint attribute (Ge, Gt, Le, Lt, Interval) to its +# mathematical symbol for prose rendering. +# +# primitive_extraction.py has its own _BOUND_ATTRS for numeric extraction. The +# duplication is deliberate: these modules use the same attribute names for +# unrelated purposes (display formatting vs. numeric bound extraction), and +# coupling them for four string literals adds a dependency without value. +_BOUND_OPS: tuple[tuple[str, str], ...] = ( + ("ge", "≥"), + ("gt", ">"), + ("le", "≤"), + ("lt", "<"), +) + + +def _first_bound(obj: object) -> str | None: + """Return backticked notation for the first set bound, or None.""" + for attr, op in _BOUND_OPS: + val = getattr(obj, attr, None) + if val is not None: + return f"`{op} {val}`" + return None + + +def _describe_interval(iv: Interval) -> str: + """Format an Interval as readable bound notation.""" + lower_val = iv.ge if iv.ge is not None else iv.gt + lower_op = "≤" if iv.ge is not None else "<" + upper_val = iv.le if iv.le is not None else iv.lt + upper_op = "≤" if iv.le is not None else "<" + + if lower_val is not None and upper_val is not None: + return f"`{lower_val} {lower_op} x {upper_op} {upper_val}`" + + return _first_bound(iv) or "" + + +def _is_opaque_constraint(constraint: object) -> bool: + """Check whether the constraint has no custom __repr__ (renders as just its class name).""" + return type(constraint).__repr__ is object.__repr__ + + +def _geometry_type_label(value: str) -> str: + """Convert a GeometryType value to PascalCase display name. + + >>> _geometry_type_label("line_string") + 'LineString' + """ + return "".join(part.title() for part in value.split("_")) + + +def describe_field_constraint( + constraint: object, + link_fn: Callable[[TypeIdentity], str] | None = None, +) -> str: + """Return a display string for a field-level constraint object. + + *link_fn* resolves a TypeIdentity to a markdown link string (e.g. + `` [`Name`](path) ``). When None, names render as inline code. + """ + if isinstance(constraint, GeometryTypeConstraint): + labels = ", ".join( + _geometry_type_label(gt.value) for gt in constraint.allowed_types + ) + return f"Allowed geometry types: {labels}" + if isinstance(constraint, Reference): + rel_value: str = constraint.relationship.value # type: ignore[assignment] + rel_label = rel_value.replace("_", " ") + target = constraint.relatee + target_id = TypeIdentity.of(target) + target_str = link_fn(target_id) if link_fn else f"`{target.__name__}`" + return f"References {target_str} ({rel_label})" + if isinstance(constraint, Interval): + desc = _describe_interval(constraint) + if desc: + return desc + elif isinstance(constraint, (Ge, Gt, Le, Lt)): + result = _first_bound(constraint) + if result is not None: + return result + if isinstance(constraint, MinLen): + return f"Minimum length: {constraint.min_length}" + if isinstance(constraint, MaxLen): + return f"Maximum length: {constraint.max_length}" + + if _is_opaque_constraint(constraint): + return f"`{type(constraint).__name__}`" + return f"`{constraint}`" + + +def _constraint_class_description(constraint: object) -> str | None: + """Extract the first docstring line from a custom constraint class. + + Returns None for builtins and classes without docstrings. + """ + constraint_type = type(constraint) + if constraint_type.__module__ == "builtins": + return None + line = first_docstring_line(constraint_type.__doc__) + return line or None + + +def constraint_pattern(constraint: object) -> str | None: + """Extract the regex pattern string from a constraint, if present. + + Traverses two levels: constraint.pattern is a compiled re.Pattern + object, and re.Pattern.pattern is the raw string. + """ + compiled = getattr(constraint, "pattern", None) + return getattr(compiled, "pattern", None) + + +def constraint_display_text( + cs: ConstraintSource, + link_fn: Callable[[TypeIdentity], str] | None = None, +) -> str: + """Build display text for a constraint, combining description/pattern when available.""" + description = _constraint_class_description(cs.constraint) + if _is_opaque_constraint(cs.constraint) and description: + cls_name = type(cs.constraint).__name__ + pattern = constraint_pattern(cs.constraint) + if pattern: + return f"{description} (`{cls_name}`, pattern: `{pattern}`)" + return f"{description} (`{cls_name}`)" + + return describe_field_constraint(cs.constraint, link_fn=link_fn) diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/model_constraints.py b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/model_constraints.py new file mode 100644 index 000000000..76f2934fc --- /dev/null +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/model_constraints.py @@ -0,0 +1,228 @@ +"""Convert model-level constraints to human-readable prose. + +Handles RequireAnyOf, RadioGroup, ForbidIf, RequireIf, and other +ModelConstraint types. Produces descriptions and per-field notes for +documentation rendering. +""" + +from __future__ import annotations + +from dataclasses import dataclass + +from overture.schema.system.model_constraint import ( + FieldEqCondition, + ForbidIfConstraint, + MinFieldsSetConstraint, + ModelConstraint, + NoExtraFieldsConstraint, + Not, + RadioGroupConstraint, + RequireAnyOfConstraint, + RequireIfConstraint, +) + +__all__ = ["analyze_model_constraints"] + +_ConditionalConstraint = RequireIfConstraint | ForbidIfConstraint + + +@dataclass(frozen=True) +class _ConstraintEntry: + """A constraint description paired with the field names it affects.""" + + description: str + field_names: frozenset[str] + + +def _format_field_list(names: tuple[str, ...]) -> str: + """Format field names as backtick-quoted, comma-separated list.""" + return ", ".join(f"`{n}`" for n in names) + + +def _conditional_verb(constraint: _ConditionalConstraint) -> str: + """Return 'required' or 'forbidden' based on constraint type.""" + return "required" if isinstance(constraint, RequireIfConstraint) else "forbidden" + + +def _plural_verb(names: tuple[str, ...]) -> str: + """Return 'is' or 'are' based on field count.""" + return "are" if len(names) > 1 else "is" + + +def _unwrap_field_eq(condition: object) -> tuple[FieldEqCondition, bool] | None: + """Extract the FieldEqCondition from a condition, with negation flag. + + Returns (field_eq, is_negated) or None for unrecognized conditions. + """ + if isinstance(condition, Not) and isinstance(condition.inner, FieldEqCondition): + return condition.inner, True + if isinstance(condition, FieldEqCondition): + return condition, False + return None + + +def _describe_condition(condition: object) -> str: + """Render a Condition as human-readable text.""" + unwrapped = _unwrap_field_eq(condition) + if unwrapped is not None: + field_eq, negated = unwrapped + op = "≠" if negated else "=" + return f"`{field_eq.field_name}` {op} `{field_eq.value}`" + return str(condition) + + +def _describe_conditional(constraint: _ConditionalConstraint) -> str: + """Describe a require_if or forbid_if constraint.""" + fields = _format_field_list(constraint.field_names) + verb = _conditional_verb(constraint) + cond = _describe_condition(constraint.condition) + return f"{fields} {_plural_verb(constraint.field_names)} {verb} when {cond}" + + +def _consolidation_key( + constraint: _ConditionalConstraint, +) -> tuple[type, tuple[str, ...], str] | None: + """Return a grouping key if the constraint is consolidatable, else None. + + Consolidatable: same type, same field_names, plain FieldEqCondition + (not negated) on the same condition field. + """ + cond = constraint.condition + if not isinstance(cond, FieldEqCondition): + return None + return (type(constraint), constraint.field_names, cond.field_name) + + +def _as_field_eq(constraint: _ConditionalConstraint) -> FieldEqCondition: + """Narrow a conditional constraint's condition to FieldEqCondition. + + Only called on constraints that passed _consolidation_key, which + rejects non-FieldEqCondition conditions. + """ + cond = constraint.condition + if not isinstance(cond, FieldEqCondition): + raise TypeError(f"Expected FieldEqCondition, got {type(cond).__name__}") + return cond + + +def _describe_consolidated( + constraints: list[_ConditionalConstraint], +) -> str: + """Describe a group of consolidated conditional constraints.""" + first = constraints[0] + fields = _format_field_list(first.field_names) + verb = _conditional_verb(first) + cond_field = _as_field_eq(first).field_name + values = ", ".join(f"`{_as_field_eq(c).value}`" for c in constraints) + return ( + f"{fields} {_plural_verb(first.field_names)} {verb} " + f"when `{cond_field}` is one of: {values}" + ) + + +def _condition_field_names(condition: object) -> frozenset[str]: + """Extract field names referenced by a condition.""" + unwrapped = _unwrap_field_eq(condition) + if unwrapped is not None: + return frozenset({unwrapped[0].field_name}) + return frozenset() + + +def _affected_field_names(constraint: ModelConstraint) -> frozenset[str]: + """Return all field names referenced by a constraint. + + Includes both constrained field_names and condition trigger fields. + Returns empty set for constraints that don't reference specific fields + (NoExtraFieldsConstraint, MinFieldsSetConstraint). + """ + if isinstance(constraint, (NoExtraFieldsConstraint, MinFieldsSetConstraint)): + return frozenset() + if isinstance(constraint, (RequireIfConstraint, ForbidIfConstraint)): + return frozenset(constraint.field_names) | _condition_field_names( + constraint.condition + ) + if isinstance(constraint, (RequireAnyOfConstraint, RadioGroupConstraint)): + return frozenset(constraint.field_names) + return frozenset() + + +def _describe_one(constraint: ModelConstraint) -> str | None: + """Describe a single constraint, or None to skip it.""" + if isinstance(constraint, NoExtraFieldsConstraint): + return None + if isinstance(constraint, RequireAnyOfConstraint): + return ( + f"At least one of {_format_field_list(constraint.field_names)} must be set" + ) + if isinstance(constraint, RadioGroupConstraint): + return f"Exactly one of {_format_field_list(constraint.field_names)} must be `true`" + if isinstance(constraint, MinFieldsSetConstraint): + return f"At least {constraint.count} fields must be set" + if isinstance(constraint, (RequireIfConstraint, ForbidIfConstraint)): + return _describe_conditional(constraint) + return f"`{constraint.name}`" + + +def _analyze_constraints( + constraints: tuple[ModelConstraint, ...], +) -> list[_ConstraintEntry]: + """Analyze constraints into descriptions paired with affected fields. + + Handles consolidation and filtering, preserving original declaration order. + """ + groups: dict[ + tuple[type, tuple[str, ...], str], list[tuple[int, _ConditionalConstraint]] + ] = {} + standalone: list[tuple[int, ModelConstraint]] = [] + + for i, c in enumerate(constraints): + if isinstance(c, (RequireIfConstraint, ForbidIfConstraint)): + key = _consolidation_key(c) + if key is not None: + groups.setdefault(key, []).append((i, c)) + continue + standalone.append((i, c)) + + entries: list[tuple[int, _ConstraintEntry]] = [] + + for group_items in groups.values(): + first_idx = group_items[0][0] + group_constraints = [c for _, c in group_items] + all_fields: frozenset[str] = frozenset().union( + *(_affected_field_names(c) for c in group_constraints) + ) + if len(group_constraints) == 1: + desc = _describe_one(group_constraints[0]) + else: + desc = _describe_consolidated(group_constraints) + if desc is not None: + entries.append((first_idx, _ConstraintEntry(desc, all_fields))) + + for idx, c in standalone: + desc = _describe_one(c) + if desc is not None: + entries.append((idx, _ConstraintEntry(desc, _affected_field_names(c)))) + + entries.sort(key=lambda e: e[0]) + return [entry for _, entry in entries] + + +def analyze_model_constraints( + constraints: tuple[ModelConstraint, ...], +) -> tuple[list[str], dict[str, list[str]]]: + """Analyze constraints into descriptions and per-field notes in one pass. + + Returns (descriptions, field_notes) where descriptions is the list of + human-readable constraint strings and field_notes maps field names to + constraint descriptions that reference them. + """ + entries = _analyze_constraints(constraints) + + descriptions = [entry.description for entry in entries] + + field_notes: dict[str, list[str]] = {} + for entry in entries: + for name in entry.field_names: + field_notes.setdefault(name, []).append(entry.description) + + return descriptions, field_notes diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/model_extraction.py b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/model_extraction.py new file mode 100644 index 000000000..81c360538 --- /dev/null +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/model_extraction.py @@ -0,0 +1,204 @@ +"""Model extraction and tree expansion.""" + +from __future__ import annotations + +import dataclasses + +from pydantic import BaseModel +from pydantic.fields import FieldInfo +from pydantic_core import PydanticUndefined + +from overture.schema.system.model_constraint import ModelConstraint + +from .docstring import clean_docstring +from .specs import FeatureSpec, FieldSpec, ModelSpec, is_model_class +from .type_analyzer import ConstraintSource, TypeInfo, TypeKind, analyze_type + +__all__ = [ + "expand_model_tree", + "extract_model", + "resolve_field_alias", +] + + +def resolve_field_alias(field_name: str, field_info: FieldInfo) -> str: + """Return the data-dict key for a Pydantic field. + + Prefers ``validation_alias``, falls back to ``alias``, then the + Python field name. Only string aliases are supported; AliasPath + and AliasChoices are ignored. + """ + validation_alias = field_info.validation_alias + if isinstance(validation_alias, str): + return validation_alias + alias = field_info.alias + if isinstance(alias, str): + return alias + return field_name + + +def _merge_field_metadata(type_info: TypeInfo, field_info: FieldInfo) -> TypeInfo: + """Merge constraints from field_info.metadata into TypeInfo. + + Pydantic strips the Annotated wrapper from some fields (non-optional, + non-union) and moves the metadata to field_info.metadata. When this + happens, analyze_type sees a bare type and misses the constraints. + The two sets never overlap: field_info.metadata is empty when the + Annotated wrapper survives in the annotation. + """ + if not field_info.metadata: + return type_info + extra = tuple(ConstraintSource(None, None, m) for m in field_info.metadata) + return dataclasses.replace(type_info, constraints=type_info.constraints + extra) + + +def _is_field_required(field_info: FieldInfo, type_info: TypeInfo) -> bool: + """Determine whether a field is required (no default and not Optional).""" + has_default = ( + field_info.default is not PydanticUndefined + or field_info.default_factory is not None + ) + return not has_default and not type_info.is_optional + + +def _basemodel_bases(cls: type) -> list[type[BaseModel]]: + """Return direct BaseModel bases, excluding BaseModel itself.""" + return [b for b in cls.__bases__ if is_model_class(b) and b is not BaseModel] + + +def _class_order(model_class: type[BaseModel]) -> list[type]: + """Return MRO classes in documentation order, recursively. + + For single-inheritance: reversed MRO (base first, derived last). + For multiple-inheritance: primary chain → self → mixins, where + primary chain and each mixin are themselves recursively ordered. + """ + bases = _basemodel_bases(model_class) + + if len(bases) <= 1: + return [ + cls + for cls in reversed(model_class.__mro__) + if issubclass(cls, BaseModel) and cls is not BaseModel + ] + + primary = _class_order(bases[0]) + mixins = [cls for base in bases[1:] for cls in _class_order(base)] + return primary + [model_class] + mixins + + +def _field_order(model_class: type[BaseModel]) -> list[str]: + """Return model_fields keys in documentation order. + + Walks the class hierarchy recursively. At each level of multiple + inheritance, the first base is the "primary chain" and the rest + are "mixins." Primary chain and own fields come first, then mixin + fields in declaration order. Single-inheritance levels use + Pydantic's default reversed-MRO order. + """ + valid_names = set(model_class.model_fields.keys()) + result: list[str] = [] + seen: set[str] = set() + for cls in _class_order(model_class): + for name in getattr(cls, "__annotations__", {}): + if name not in seen and name in valid_names: + result.append(name) + seen.add(name) + return result + + +def extract_model( + model_class: type[BaseModel], + *, + entry_point: str | None = None, +) -> ModelSpec: + """Extract model specification from a Pydantic model class.""" + field_info_map = model_class.model_fields + ordered_keys = _field_order(model_class) + + fields: list[FieldSpec] = [] + for field_name in ordered_keys: + field_info = field_info_map[field_name] + output_name = resolve_field_alias(field_name, field_info) + + # Use field_info.annotation (resolved TypeVars) not get_type_hints + annotation = field_info.annotation + if annotation is None: + continue + + type_info = _merge_field_metadata(analyze_type(annotation), field_info) + + fields.append( + FieldSpec( + name=output_name, + type_info=type_info, + description=field_info.description or type_info.description, + is_required=_is_field_required(field_info, type_info), + ) + ) + + return ModelSpec( + name=model_class.__name__, + description=clean_docstring(model_class.__doc__), + fields=fields, + source_type=model_class, + entry_point=entry_point, + constraints=ModelConstraint.get_model_constraints(model_class), + ) + + +def expand_model_tree( + spec: FeatureSpec, + cache: dict[type, ModelSpec] | None = None, +) -> FeatureSpec: + """Populate model references on MODEL-kind fields, recursively. + + Walks *spec*'s fields and sets ``field.model`` for fields whose type + is a Pydantic model. Uses *cache* to reuse already-extracted ModelSpecs + and detect shared references. Marks fields whose model creates a cycle + in the ancestor chain with ``starts_cycle=True``. + + Mutates *spec* in place and returns it. + """ + if cache is None: + cache = {} + if isinstance(spec, ModelSpec) and spec.source_type is not None: + cache[spec.source_type] = spec + ancestors = frozenset({spec.source_type}) if spec.source_type else frozenset() + _expand_fields(spec.fields, cache, ancestors) + return spec + + +def _expand_fields( + fields: list[FieldSpec], + cache: dict[type, ModelSpec], + ancestors: frozenset[type], +) -> None: + """Recursive helper for expand_model_tree. + + Cache insertion happens before recursion — cycle detection depends + on the ancestor's ModelSpec being in the cache when the back-edge + is encountered. + """ + for field_spec in fields: + ti = field_spec.type_info + source = ti.source_type + if ti.kind == TypeKind.UNION: + # Union fields have no single model to recurse into. + # The field row appears in the output; skip inline expansion. + continue + if ti.kind != TypeKind.MODEL or source is None: + continue + + if source in ancestors: + # Cycle: reuse existing spec, mark the edge + field_spec.model = cache.get(source) + field_spec.starts_cycle = True + elif source in cache: + # Shared reference: reuse, not a cycle + field_spec.model = cache[source] + else: + sub_spec = extract_model(source) + cache[source] = sub_spec # insert BEFORE recursing + field_spec.model = sub_spec + _expand_fields(sub_spec.fields, cache, ancestors | {source}) diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/newtype_extraction.py b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/newtype_extraction.py new file mode 100644 index 000000000..ff11c770a --- /dev/null +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/newtype_extraction.py @@ -0,0 +1,26 @@ +"""NewType extraction.""" + +from .docstring import clean_docstring, is_custom_docstring +from .specs import NewTypeSpec +from .type_analyzer import analyze_type + +__all__ = ["extract_newtype"] + + +def extract_newtype(newtype_callable: object) -> NewTypeSpec: + """Extract NewType specification from a NewType callable.""" + type_info = analyze_type(newtype_callable) + doc = getattr(newtype_callable, "__doc__", None) + name = type_info.newtype_name or getattr(newtype_callable, "__name__", None) + if name is None: + msg = f"Cannot determine name for NewType: {newtype_callable!r}" + raise ValueError(msg) + description = ( + clean_docstring(doc) if is_custom_docstring(doc) else type_info.description + ) + return NewTypeSpec( + name=name, + description=description, + type_info=type_info, + source_type=newtype_callable, + ) diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/primitive_extraction.py b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/primitive_extraction.py new file mode 100644 index 000000000..ceb8ff7cd --- /dev/null +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/primitive_extraction.py @@ -0,0 +1,95 @@ +"""Primitive extraction and partitioning.""" + +from annotated_types import Interval + +from .docstring import first_docstring_line +from .newtype_extraction import extract_newtype +from .specs import PrimitiveSpec, TypeIdentity +from .type_analyzer import TypeInfo, is_newtype + +__all__ = [ + "extract_numeric_bounds", + "extract_primitives", + "partition_primitive_and_geometry_names", +] + + +# Bound attribute names on annotated_types constraint objects (Ge, Gt, Le, +# Lt, Interval) used for numeric bound extraction. +# +# field_constraint_description.py has its own _BOUND_OPS for display formatting. +# The duplication is deliberate: these modules use the same attribute names +# for unrelated purposes (numeric extraction vs. prose rendering), and +# coupling them for four string literals adds a dependency without value. +_BOUND_ATTRS = ("ge", "gt", "le", "lt") + + +def extract_numeric_bounds(type_info: TypeInfo) -> Interval: + """Extract numeric bounds from a TypeInfo's constraints. + + Checks for ge, gt, le, and lt attributes on constraint objects. + Stops at the first constraint defining each bound. + """ + found: dict[str, int | float] = {} + for cs in type_info.constraints: + c = cs.constraint + for attr in _BOUND_ATTRS: + if attr not in found: + val = getattr(c, attr, None) + if val is not None: + found[attr] = val + return Interval(**found) + + +def extract_primitives( + primitive_ids: list[TypeIdentity], +) -> list[PrimitiveSpec]: + """Extract specifications for numeric primitive types.""" + specs: list[PrimitiveSpec] = [] + for tid in primitive_ids: + newtype_spec = extract_newtype(tid.obj) + bounds = extract_numeric_bounds(newtype_spec.type_info) + description = first_docstring_line(getattr(tid.obj, "__doc__", None)) + float_bits = _extract_float_bits(tid.name) + specs.append( + PrimitiveSpec( + name=tid.name, + description=description, + bounds=bounds, + float_bits=float_bits, + ) + ) + return specs + + +_FLOAT_BITS: dict[str, int] = { + "float32": 32, + "float64": 64, +} + + +def _extract_float_bits(name: str) -> int | None: + """Extract bit width from a float type name like 'float32'.""" + return _FLOAT_BITS.get(name) + + +def partition_primitive_and_geometry_names( + primitive_module: object, +) -> tuple[list[TypeIdentity], list[TypeIdentity]]: + """Discover primitive and geometry types from a module's exports. + + NewType exports are numeric primitives. + Non-constraint class/enum exports are geometry types. + """ + module_all: list[str] = getattr(primitive_module, "__all__", []) + primitives: list[TypeIdentity] = [] + geometries: list[TypeIdentity] = [] + + for name in module_all: + obj = getattr(primitive_module, name) + if is_newtype(obj): + primitives.append(TypeIdentity(obj, name)) + elif isinstance(obj, type) and not name.endswith("Constraint"): + geometries.append(TypeIdentity(obj, name)) + + return primitives, geometries diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/pydantic_extraction.py b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/pydantic_extraction.py new file mode 100644 index 000000000..120f4760d --- /dev/null +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/pydantic_extraction.py @@ -0,0 +1,33 @@ +"""Pydantic built-in type extraction.""" + +import re + +from .docstring import first_docstring_line +from .specs import PydanticTypeSpec + +__all__ = ["extract_pydantic_type"] + +# Matches bare admonition labels like "Info:" or "Note:" with no following text. +_ADMONITION_LABEL = re.compile(r"^\w+:\s*$") + + +def _usable_description(doc: str | None) -> str | None: + """Return the first docstring line, or None if it's an admonition label.""" + line = first_docstring_line(doc) + if line is None or _ADMONITION_LABEL.match(line): + return None + return line + + +def extract_pydantic_type(cls: type) -> PydanticTypeSpec: + """Extract a PydanticTypeSpec from a Pydantic built-in type class.""" + module = getattr(cls, "__module__", "") + if not module.startswith("pydantic"): + msg = f"Expected a pydantic type, got {cls!r} from {module!r}" + raise ValueError(msg) + return PydanticTypeSpec( + name=cls.__name__, + description=_usable_description(cls.__doc__), + source_type=cls, + source_module=cls.__module__.removeprefix("pydantic."), + ) diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/specs.py b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/specs.py new file mode 100644 index 000000000..801e69c23 --- /dev/null +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/specs.py @@ -0,0 +1,260 @@ +"""Data types for extracted specifications.""" + +from __future__ import annotations + +import functools +from dataclasses import dataclass, field +from typing import Any, Protocol, TypeGuard, runtime_checkable + +from annotated_types import Interval +from pydantic import BaseModel + +from overture.schema.system.model_constraint import ModelConstraint + +from .type_analyzer import TypeInfo, TypeKind, UnsupportedUnionError, analyze_type + +__all__ = [ + "AnnotatedField", + "EnumMemberSpec", + "EnumSpec", + "FeatureSpec", + "FieldSpec", + "ModelSpec", + "NewTypeSpec", + "PrimitiveSpec", + "PydanticTypeSpec", + "SupplementarySpec", + "TypeIdentity", + "filter_model_classes", + "is_model_class", + "is_pydantic_sourced", + "is_pydantic_type", + "is_union_alias", +] + + +@dataclass(frozen=True, eq=False) +class TypeIdentity: + """Unique identity for a type in the codegen system. + + Pairs a unique Python object (class, NewType callable, or union + annotation) with its display name. Equality and hashing delegate + to ``obj`` identity so registry lookups work regardless of how + the display name was derived. + """ + + obj: object + name: str + + @classmethod + def of(cls, obj: object) -> TypeIdentity: + """Derive a TypeIdentity from a named object (class, NewType, etc.).""" + assert obj is not None + return cls(obj, obj.__name__) # type: ignore[attr-defined] + + def __eq__(self, other: object) -> bool: + return isinstance(other, TypeIdentity) and self.obj is other.obj + + def __hash__(self) -> int: + return id(self.obj) + + @property + def module(self) -> str: + """Source module of the underlying object, or empty string.""" + return getattr(self.obj, "__module__", "") + + +class _SourceTypeIdentityMixin: + """Mixin providing ``identity`` from ``source_type`` and ``name``. + + Shared by EnumSpec, ModelSpec, NewTypeSpec, and PydanticTypeSpec -- + each has a ``source_type`` (the Python class/callable) and a ``name``. + UnionSpec uses ``source_annotation`` instead, so it defines its + own ``identity``. + """ + + source_type: object | None + name: str + + @property + def identity(self) -> TypeIdentity: + assert self.source_type is not None + return TypeIdentity(self.source_type, self.name) + + +@dataclass +class EnumMemberSpec: + """Specification for an enum member.""" + + name: str + value: str + description: str | None + + +@dataclass +class EnumSpec(_SourceTypeIdentityMixin): + """Specification for an Enum class.""" + + name: str + description: str | None + members: list[EnumMemberSpec] = field(default_factory=list) + source_type: type | None = None + + +@dataclass +class FieldSpec: + """Specification for a model field.""" + + name: str + type_info: TypeInfo + description: str | None + is_required: bool + model: ModelSpec | None = None + starts_cycle: bool = False + + +@runtime_checkable +class FeatureSpec(Protocol): + """Shared interface for feature-level specs (ModelSpec, UnionSpec).""" + + name: str + description: str | None + source_type: type[BaseModel] | None + entry_point: str | None + constraints: tuple[ModelConstraint, ...] + + @property + def fields(self) -> list[FieldSpec]: ... + + @property + def identity(self) -> TypeIdentity: ... + + +@dataclass +class ModelSpec(_SourceTypeIdentityMixin): + """Specification for a Pydantic model.""" + + name: str + description: str | None + fields: list[FieldSpec] = field(default_factory=list) + source_type: type[BaseModel] | None = None + entry_point: str | None = None + constraints: tuple[ModelConstraint, ...] = () + + +@dataclass +class AnnotatedField: + """A FieldSpec paired with union variant provenance.""" + + field_spec: FieldSpec + variant_sources: tuple[str, ...] | None + + +# eq=False: contains mutable lists and a cached_property, so +# dataclass-generated __eq__ would be unreliable. +@dataclass(eq=False) +class UnionSpec: + """Specification for a discriminated union type alias.""" + + name: str + description: str | None + annotated_fields: list[AnnotatedField] + members: list[type[BaseModel]] + discriminator_field: str | None + discriminator_mapping: dict[str, type[BaseModel]] | None + source_annotation: object + common_base: type[BaseModel] + source_type: type[BaseModel] | None = field(default=None, init=False) + entry_point: str | None = None + constraints: tuple[ModelConstraint, ...] = () + + @functools.cached_property + def fields(self) -> list[FieldSpec]: + """Plain field list for tree expansion and supplementary collection.""" + return [af.field_spec for af in self.annotated_fields] + + @property + def identity(self) -> TypeIdentity: + return TypeIdentity(self.source_annotation, self.name) + + +@dataclass +class NewTypeSpec(_SourceTypeIdentityMixin): + """Specification for a NewType.""" + + name: str + description: str | None + type_info: TypeInfo + source_type: object | None = None + + +@dataclass +class PrimitiveSpec: + """Extracted specification for a numeric primitive type.""" + + name: str + description: str | None + bounds: Interval = field(default_factory=Interval) + float_bits: int | None = None + + +@dataclass +class PydanticTypeSpec(_SourceTypeIdentityMixin): + """Specification for a Pydantic built-in type (HttpUrl, EmailStr, etc.).""" + + name: str + description: str | None + source_type: type + source_module: str + + @property + def docs_url(self) -> str: + """Pydantic documentation URL for this type.""" + return ( + f"https://docs.pydantic.dev/latest/api/{self.source_module}" + f"/#pydantic.{self.source_module}.{self.name}" + ) + + +SupplementarySpec = EnumSpec | NewTypeSpec | ModelSpec | PydanticTypeSpec +"""Non-feature types referenced by feature models. + +Excludes PrimitiveSpec and geometry types, which are extracted +separately via dedicated functions. +""" + + +def is_pydantic_sourced(source_type: type | None) -> bool: + """Check whether *source_type* originates from the ``pydantic`` package.""" + return getattr(source_type, "__module__", "").startswith("pydantic") + + +def is_pydantic_type(ti: TypeInfo) -> bool: + """Check whether a TypeInfo represents a Pydantic built-in type.""" + return ( + ti.kind == TypeKind.PRIMITIVE + and ti.source_type is not None + and is_pydantic_sourced(ti.source_type) + ) + + +def is_model_class(obj: object) -> TypeGuard[type[BaseModel]]: + """Check whether *obj* is a concrete BaseModel subclass (not a type alias).""" + return isinstance(obj, type) and issubclass(obj, BaseModel) + + +def is_union_alias(obj: object) -> bool: + """Check whether *obj* is a discriminated union type alias of BaseModel subclasses.""" + try: + ti = analyze_type(obj) + except (TypeError, UnsupportedUnionError): + return False + return ti.kind == TypeKind.UNION + + +def filter_model_classes(models: dict[Any, Any]) -> list[type[BaseModel]]: + """Filter discovered models to concrete BaseModel subclasses. + + Excludes type aliases (like discriminated unions) and non-class entries. + """ + return [v for v in models.values() if is_model_class(v)] diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/type_analyzer.py b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/type_analyzer.py new file mode 100644 index 000000000..fd4c9a06b --- /dev/null +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/type_analyzer.py @@ -0,0 +1,344 @@ +"""Iterative type unwrapping for Pydantic model annotations.""" + +from __future__ import annotations + +import types +from collections.abc import Callable +from dataclasses import dataclass, field +from enum import Enum, auto +from typing import Annotated, Any, Literal, Union, get_args, get_origin + +from pydantic import BaseModel +from pydantic.fields import FieldInfo +from typing_extensions import Sentinel + +from .docstring import clean_docstring + +__all__ = [ + "ConstraintSource", + "TypeKind", + "TypeInfo", + "UnsupportedUnionError", + "analyze_type", + "is_newtype", + "single_literal_value", + "walk_type_info", +] + + +class UnsupportedUnionError(TypeError): + """Raised when analyze_type encounters a multi-type union it cannot represent.""" + + +class TypeKind(Enum): + """Classification of type kinds.""" + + PRIMITIVE = auto() + LITERAL = auto() + ENUM = auto() + MODEL = auto() + UNION = auto() + + +@dataclass(slots=True) +class ConstraintSource: + """A constraint paired with the NewType that contributed it.""" + + source_ref: object | None + source_name: str | None + constraint: object + + +@dataclass(slots=True) +class TypeInfo: + """Information about a type annotation.""" + + base_type: str + kind: TypeKind + is_optional: bool = False + list_depth: int = 0 + newtype_outer_list_depth: int = 0 + is_dict: bool = False + dict_key_type: TypeInfo | None = None + dict_value_type: TypeInfo | None = None + constraints: tuple[ConstraintSource, ...] = () + literal_values: tuple[object, ...] | None = None + source_type: type | None = None + newtype_name: str | None = None + newtype_ref: object | None = None + union_members: tuple[type[BaseModel], ...] | None = None + description: str | None = None + + @property + def is_list(self) -> bool: + """Whether this type has any list wrapping.""" + return self.list_depth > 0 + + +def walk_type_info(ti: TypeInfo, visitor: Callable[[TypeInfo], None]) -> None: + """Call *visitor* on *ti*, then recurse into dict key/value types. + + Captures the shared recursive descent pattern used by type collection + and reverse reference computation. Union members are ``type`` objects + (not ``TypeInfo``), so callers handle them directly. + """ + visitor(ti) + if ti.dict_key_type is not None: + walk_type_info(ti.dict_key_type, visitor) + if ti.dict_value_type is not None: + walk_type_info(ti.dict_value_type, visitor) + + +def is_newtype(annotation: object) -> bool: + """Check if annotation is a typing.NewType. + + NewType creates a callable with a __supertype__ attribute pointing + to the wrapped type. No public API exists for this check. + """ + return callable(annotation) and hasattr(annotation, "__supertype__") + + +def _is_union(origin: object) -> bool: + """Check if an origin represents a union type (X | Y or Union[X, Y]).""" + return origin in (types.UnionType, Union) + + +@dataclass(slots=True) +class _UnwrapState: + """Accumulated state from iterative type unwrapping. + + Tracks NewType names and refs during unwrapping: + - ``outermost_newtype_name`` / ``outermost_newtype_ref``: the first + NewType encountered, exposed as ``TypeInfo.newtype_name`` / ``newtype_ref``. + - ``last_newtype_name``: the most recently entered NewType name, used + as the resolved ``base_type`` for the terminal type. + - ``last_newtype_ref``: the most recently entered NewType callable, + used as constraint provenance (which NewType contributed each constraint). + - ``newtype_outer_list_depth``: list layers accumulated before entering + the outermost NewType boundary. + """ + + is_optional: bool = False + list_depth: int = 0 + newtype_outer_list_depth: int = 0 + is_dict: bool = False + dict_key_type: TypeInfo | None = None + dict_value_type: TypeInfo | None = None + constraints: list[ConstraintSource] = field(default_factory=list) + outermost_newtype_name: str | None = None + outermost_newtype_ref: object | None = None + last_newtype_name: str | None = None + last_newtype_ref: object | None = None + description: str | None = None + + def add_constraint(self, constraint: object) -> None: + self.constraints.append( + ConstraintSource(self.last_newtype_ref, self.last_newtype_name, constraint) + ) + + def build_type_info( + self, + *, + base_type: str, + kind: TypeKind, + literal_values: tuple[object, ...] | None = None, + source_type: type | None = None, + union_members: tuple[type[BaseModel], ...] | None = None, + ) -> TypeInfo: + return TypeInfo( + base_type=base_type, + kind=kind, + is_optional=self.is_optional, + list_depth=self.list_depth, + newtype_outer_list_depth=self.newtype_outer_list_depth, + is_dict=self.is_dict, + dict_key_type=self.dict_key_type, + dict_value_type=self.dict_value_type, + constraints=tuple(self.constraints), + literal_values=literal_values, + source_type=source_type, + newtype_name=self.outermost_newtype_name, + newtype_ref=self.outermost_newtype_ref, + union_members=union_members, + description=self.description, + ) + + +def analyze_type(annotation: object) -> TypeInfo: + """Analyze a type annotation and return TypeInfo. + + Iteratively unwraps type wrappers (Annotated, Optional, list, NewType) until + reaching a terminal type. + """ + state = _UnwrapState() + + while True: + origin = get_origin(annotation) + + # Handle NewType (e.g., int32 = NewType("int32", Annotated[int, ...])) + if is_newtype(annotation): + name = annotation.__name__ # type: ignore[attr-defined] + state.last_newtype_name = name + state.last_newtype_ref = annotation + if state.outermost_newtype_name is None: + state.newtype_outer_list_depth = state.list_depth + state.outermost_newtype_name = name + state.outermost_newtype_ref = annotation + annotation = annotation.__supertype__ # type: ignore[attr-defined] + continue + + # Handle Annotated types (Annotated[X, metadata...]) + if origin is Annotated: + args = get_args(annotation) + annotation = args[0] + for c in args[1:]: + if isinstance(c, FieldInfo): + if c.description is not None and state.description is None: + state.description = clean_docstring(c.description) + for m in c.metadata: + state.add_constraint(m) + else: + state.add_constraint(c) + continue + + # Handle union types (X | None or Optional[X]) + if _is_union(origin): + args = get_args(annotation) + # Filter out None, Sentinel types (Pydantic's ), and + # Literal alternatives (e.g., HttpUrl | Literal[""] where the + # Literal is a special-value sentinel, not the primary type). + if any(a is types.NoneType for a in args): + state.is_optional = True + + non_none_args = [ + a + for a in args + if a is not types.NoneType and not isinstance(a, Sentinel) + ] + + # Only filter out Literal arms when a concrete (non-Literal) type + # exists. Without this guard, Optional[Literal["x"]] would lose + # all args because the Literal *is* the primary type. + concrete_args = [a for a in non_none_args if get_origin(a) is not Literal] + real_args = concrete_args if concrete_args else non_none_args + + if len(real_args) > 1: + # Check if all real args are BaseModel subclasses + # (unwrap Annotated wrappers to get the actual class) + members: list[type[BaseModel]] = [] + for arg in real_args: + inner = arg + if get_origin(inner) is Annotated: + inner = get_args(inner)[0] + if isinstance(inner, type) and issubclass(inner, BaseModel): + members.append(inner) + else: + raise UnsupportedUnionError( + f"Multi-type unions not supported: {annotation}" + ) + return state.build_type_info( + base_type=members[0].__name__, + kind=TypeKind.UNION, + union_members=tuple(members), + ) + + if not real_args: + raise UnsupportedUnionError( + f"Union with no concrete types: {annotation}" + ) + + annotation = real_args[0] + continue + + # Handle list types (list[X]) + if origin is list: + args = get_args(annotation) + if not args: + raise TypeError("Bare list without type argument is not supported") + state.list_depth += 1 + annotation = args[0] + continue + + # Handle dict types (dict[K, V]) + if origin is dict: + args = get_args(annotation) + if not args: + raise TypeError("Bare dict without type arguments is not supported") + state.is_dict = True + state.dict_key_type = analyze_type(args[0]) + state.dict_value_type = analyze_type(args[1]) + base_type = state.last_newtype_name or "dict" + return state.build_type_info( + base_type=base_type, + kind=TypeKind.PRIMITIVE, + source_type=dict, + ) + + break + + return _classify_terminal(annotation, state) + + +def _classify_terminal(annotation: object, state: _UnwrapState) -> TypeInfo: + """Classify a fully-unwrapped terminal type into a TypeInfo.""" + # typing.Any -- treat as an opaque primitive + if annotation is Any: + return state.build_type_info( + base_type="Any", + kind=TypeKind.PRIMITIVE, + ) + + # Literal types (e.g., Literal["value"] or Literal["a", "b"]) + if get_origin(annotation) is Literal: + args = get_args(annotation) + return state.build_type_info( + base_type="Literal", + kind=TypeKind.LITERAL, + literal_values=tuple(args), + ) + + if not isinstance(annotation, type): + raise TypeError(f"Unsupported annotation type: {type(annotation)}") + + if issubclass(annotation, list): + raise TypeError("Bare list without type argument is not supported") + + if issubclass(annotation, dict): + raise TypeError("Bare dict without type arguments is not supported") + + # Determine kind from type hierarchy + if issubclass(annotation, Enum): + kind = TypeKind.ENUM + elif issubclass(annotation, BaseModel): + kind = TypeKind.MODEL + else: + kind = TypeKind.PRIMITIVE + + base_type = state.last_newtype_name or annotation.__name__ + + return state.build_type_info( + base_type=base_type, + kind=kind, + source_type=annotation, + ) + + +def single_literal_value(annotation: object) -> object | None: + """Extract a single literal value from a type annotation, or None. + + Delegates to analyze_type for all unwrapping, then checks + whether the result is a single-value Literal. Multi-value + Literals return None — callers needing all values should use + ``analyze_type`` and read ``literal_values`` directly. + """ + try: + ti = analyze_type(annotation) + except (TypeError, UnsupportedUnionError): + return None + if ( + ti.kind == TypeKind.LITERAL + and ti.literal_values + and len(ti.literal_values) == 1 + ): + return ti.literal_values[0] + return None diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/type_registry.py b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/type_registry.py new file mode 100644 index 000000000..505657866 --- /dev/null +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/type_registry.py @@ -0,0 +1,113 @@ +"""Type registry mapping Python types to target representations.""" + +from dataclasses import dataclass + +from .type_analyzer import TypeInfo + +__all__ = [ + "TypeMapping", + "PRIMITIVE_TYPES", + "get_type_mapping", + "is_semantic_newtype", + "resolve_type_name", +] + + +@dataclass(frozen=True) +class TypeMapping: + """Maps a type to its representation in different targets.""" + + markdown: str + + def for_target(self, target: str) -> str: + """Get the type representation for a named target.""" + if target != "markdown": + raise ValueError(f"Unknown target {target!r}, expected 'markdown'") + return self.markdown + + +PRIMITIVE_TYPES: dict[str, TypeMapping] = { + # Signed integers + "int8": TypeMapping(markdown="int8"), + "int16": TypeMapping(markdown="int16"), + "int32": TypeMapping(markdown="int32"), + "int64": TypeMapping(markdown="int64"), + # Unsigned integers + "uint8": TypeMapping(markdown="uint8"), + "uint16": TypeMapping(markdown="uint16"), + "uint32": TypeMapping(markdown="uint32"), + # Floating point + "float32": TypeMapping(markdown="float32"), + "float64": TypeMapping(markdown="float64"), + # Basic types + "str": TypeMapping(markdown="string"), + "bool": TypeMapping(markdown="boolean"), + # Python builtins (aliases to their portable equivalents) + "int": TypeMapping(markdown="int64"), + "float": TypeMapping(markdown="float64"), + # Geometry types + "Geometry": TypeMapping(markdown="geometry"), + "BBox": TypeMapping(markdown="bbox"), +} + + +def is_semantic_newtype(type_info: TypeInfo) -> bool: + """Whether a type represents a semantic NewType that should be displayed by name. + + Returns True for unregistered NewTypes (HexColor, Sources) and NewTypes + that wrap a different base type (FeatureVersion wrapping int32, Id wrapping + NoWhitespaceString). Returns False for registered primitives (int32, Geometry). + """ + if type_info.newtype_name is None: + return False + if type_info.newtype_name != type_info.base_type: + return True + return get_type_mapping(type_info.base_type) is None + + +def get_type_mapping(type_name: str) -> TypeMapping | None: + """Look up a type mapping by name. + + Parameters + ---------- + type_name : str + The type name to look up (e.g., "int32", "str", "Geometry"). + Also accepts Python builtin names ("int" -> int64, "float" -> float64). + + Returns + ------- + TypeMapping or None + The TypeMapping for the type, or None if not found. + """ + return PRIMITIVE_TYPES.get(type_name) + + +def resolve_type_name(type_info: TypeInfo, target: str) -> str: + """Resolve a TypeInfo to the base type string for a given target. + + Looks up the type in the registry first (trying source_type if base_type + has no mapping). Falls back to the base_type name as-is. + + Parameters + ---------- + type_info : TypeInfo + The analyzed type information. + target : str + The output target ("markdown"). + + Returns + ------- + str + The resolved base type name string for the target. + """ + mapping = get_type_mapping(type_info.base_type) + if mapping is None and type_info.source_type is not None: + mapping = get_type_mapping(type_info.source_type.__name__) + if mapping is not None: + return mapping.for_target(target) + + # Semantic NewType wrapping an unregistered type (e.g., Sources wrapping + # SourceItem): use the underlying class name rather than the NewType alias. + if type_info.newtype_name and type_info.source_type is not None: + return type_info.source_type.__name__ + return type_info.base_type diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/union_extraction.py b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/union_extraction.py new file mode 100644 index 000000000..6cd4d73d5 --- /dev/null +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/extraction/union_extraction.py @@ -0,0 +1,142 @@ +"""Union extraction and discriminator handling.""" + +from __future__ import annotations + +from typing import Annotated, get_args, get_origin + +from pydantic import BaseModel +from pydantic.fields import FieldInfo + +from overture.schema.system.feature import resolve_discriminator_field_name + +from .model_extraction import extract_model, resolve_field_alias +from .specs import AnnotatedField, UnionSpec, is_model_class +from .type_analyzer import TypeInfo, TypeKind, analyze_type, single_literal_value + +__all__ = ["extract_discriminator", "extract_union"] + + +def _find_common_base(members: list[type[BaseModel]]) -> type[BaseModel]: + """Find the most-derived common BaseModel ancestor of all members.""" + filtered_mros = [ + [c for c in cls.__mro__ if is_model_class(c) and c is not BaseModel] + for cls in members + ] + common = set(filtered_mros[0]) + for mro in filtered_mros[1:]: + common &= set(mro) + if not common: + raise ValueError( + f"No common BaseModel ancestor for {[m.__name__ for m in members]}" + ) + + def max_mro_index(cls: type) -> int: + return max(mro.index(cls) for mro in filtered_mros) + + return min(common, key=max_mro_index) + + +def _find_field_by_alias(model: type[BaseModel], alias: str) -> FieldInfo | None: + """Find a field in model_fields by alias-resolved name.""" + direct = model.model_fields.get(alias) + if direct is not None: + return direct + for py_name, fi in model.model_fields.items(): + if resolve_field_alias(py_name, fi) == alias: + return fi + return None + + +def extract_discriminator( + annotation: object, + members: list[type[BaseModel]], +) -> tuple[str | None, dict[str, type[BaseModel]] | None]: + """Extract discriminator field name and value-to-type mapping.""" + if get_origin(annotation) is not Annotated: + return None, None + + disc_field_name: str | None = None + for metadata in get_args(annotation)[1:]: + if isinstance(metadata, FieldInfo): + disc_field_name = resolve_discriminator_field_name(metadata.discriminator) + if disc_field_name is not None: + break + + if disc_field_name is None: + return None, None + + mapping: dict[str, type[BaseModel]] = {} + for member in members: + field_info = _find_field_by_alias(member, disc_field_name) + if field_info and field_info.annotation is not None: + lit_val = single_literal_value(field_info.annotation) + if lit_val is not None: + mapping[str(lit_val)] = member + + return disc_field_name, mapping or None + + +_TypeShape = tuple[str, TypeKind, bool, int] +_FieldKey = tuple[str, _TypeShape] + + +def _type_shape(ti: TypeInfo) -> _TypeShape: + """Structural shape for dedup -- excludes source_type which varies across members.""" + return (ti.base_type, ti.kind, ti.is_optional, ti.list_depth) + + +def extract_union( + name: str, + annotation: object, + *, + entry_point: str | None = None, +) -> UnionSpec: + """Extract a UnionSpec from a discriminated union type alias.""" + ti = analyze_type(annotation) + if ti.kind != TypeKind.UNION or ti.union_members is None: + raise TypeError(f"{name} is not a union type alias") + + members = list(ti.union_members) + common_base = _find_common_base(members) + + base_spec = extract_model(common_base) + shared_field_names = {f.name for f in base_spec.fields} + + member_specs = [(m, extract_model(m)) for m in members] + + annotated_fields: list[AnnotatedField] = [] + + # Shared fields first (from common base) + for fs in base_spec.fields: + annotated_fields.append(AnnotatedField(field_spec=fs, variant_sources=None)) + + # Variant-specific fields: collect by (name, type identity) for dedup + seen: dict[_FieldKey, AnnotatedField] = {} + + for member_cls, member_spec in member_specs: + for fs in member_spec.fields: + if fs.name in shared_field_names: + continue + key = (fs.name, _type_shape(fs.type_info)) + existing = seen.get(key) + prior_sources = existing.variant_sources or () if existing else () + seen[key] = AnnotatedField( + field_spec=fs, + variant_sources=(*prior_sources, member_cls.__name__), + ) + + annotated_fields.extend(seen.values()) + + disc_field, disc_mapping = extract_discriminator(annotation, members) + + return UnionSpec( + name=name, + description=ti.description, + annotated_fields=annotated_fields, + members=members, + discriminator_field=disc_field, + discriminator_mapping=disc_mapping, + source_annotation=annotation, + common_base=common_base, + entry_point=entry_point, + ) diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/layout/__init__.py b/packages/overture-schema-codegen/src/overture/schema/codegen/layout/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/layout/module_layout.py b/packages/overture-schema-codegen/src/overture/schema/codegen/layout/module_layout.py new file mode 100644 index 000000000..f7b8f0a44 --- /dev/null +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/layout/module_layout.py @@ -0,0 +1,150 @@ +"""Output directory layout from Python module paths. + +Translates dotted module paths into output directory paths by mirroring +the source package structure. +""" + +from __future__ import annotations + +import sys +from collections.abc import Iterable, Mapping +from pathlib import PurePosixPath + +__all__ = [ + "OUTPUT_ROOT", + "compute_output_dir", + "compute_schema_root", + "entry_point_class", + "entry_point_module", + "is_package_module", + "module_relpath", + "output_dir_for_entry_point", +] + +OUTPUT_ROOT = PurePosixPath(".") + + +def _split_entry_point(entry_point_path: str) -> tuple[str, str]: + """Split ``"module.path:ClassName"`` into its two parts. + + >>> _split_entry_point("overture.schema.buildings:Building") + ('overture.schema.buildings', 'Building') + """ + if ":" not in entry_point_path: + msg = f"Expected 'module:Class' format, got {entry_point_path!r}" + raise ValueError(msg) + module, cls = entry_point_path.split(":", 1) + return module, cls + + +def entry_point_module(entry_point_path: str) -> str: + """Extract module path from entry-point-style path. + + >>> entry_point_module("overture.schema.buildings:Building") + 'overture.schema.buildings' + """ + return _split_entry_point(entry_point_path)[0] + + +def entry_point_class(entry_point_path: str) -> str: + """Extract class name from entry-point-style path. + + >>> entry_point_class("overture.schema.buildings:Building") + 'Building' + """ + return _split_entry_point(entry_point_path)[1] + + +def compute_schema_root(module_paths: Iterable[str]) -> str: + """Find the longest common dotted prefix of module paths. + + Deduplicates inputs first. For a single unique path, drops the + last component (the module itself). + """ + paths = sorted(set(module_paths)) + if not paths: + msg = "No module paths provided" + raise ValueError(msg) + + segments = [p.split(".") for p in paths] + if len(segments) == 1: + return ".".join(segments[0][:-1]) + + common: list[str] = [] + for parts in zip(*segments, strict=False): + if len(set(parts)) == 1: + common.append(parts[0]) + else: + break + return ".".join(common) + + +def module_relpath(module: str, root: str) -> str: + """Strip the schema root prefix from a dotted module path.""" + if not root: + return module + if module == root: + return "" + prefix = root + "." + if not module.startswith(prefix): + msg = f"Module {module!r} does not start with root {root!r}" + raise ValueError(msg) + return module[len(prefix) :] + + +def is_package_module( + module: str, + module_registry: Mapping[str, object] | None = None, +) -> bool: + """Check whether a module is a package (directory) or a file module. + + Packages have ``__path__``; file modules do not (PEP 302). + """ + registry: Mapping[str, object] = ( + module_registry if module_registry is not None else sys.modules + ) + mod = registry.get(module) + if mod is None: + msg = f"Module {module!r} not found in registry" + raise ValueError(msg) + return hasattr(mod, "__path__") + + +def output_dir_for_entry_point( + entry_point_path: str | None, + schema_root: str, + module_registry: Mapping[str, object] | None = None, +) -> PurePosixPath: + """Compute output directory from an entry-point-style path. + + Raises ValueError if *entry_point_path* is None. + """ + if entry_point_path is None: + msg = "entry_point_path must not be None" + raise ValueError(msg) + module = entry_point_module(entry_point_path) + return compute_output_dir(module, schema_root, module_registry) + + +def compute_output_dir( + module: str, + schema_root: str, + module_registry: Mapping[str, object] | None = None, +) -> PurePosixPath: + """Compute output directory for a module, mirroring package structure. + + File modules drop their last component (the .py filename). + Packages keep all components. Returns ``PurePosixPath(".")`` for + the root directory. + """ + relpath = module_relpath(module, schema_root) + if not relpath: + return OUTPUT_ROOT + + parts = relpath.split(".") + if not is_package_module(module, module_registry): + parts = parts[:-1] + + if not parts: + return OUTPUT_ROOT + return PurePosixPath(*parts) diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/layout/type_collection.py b/packages/overture-schema-codegen/src/overture/schema/codegen/layout/type_collection.py new file mode 100644 index 000000000..7d7cf95f7 --- /dev/null +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/layout/type_collection.py @@ -0,0 +1,155 @@ +"""Supplementary type discovery by walking expanded feature trees. + +Walks FieldSpec.model references for sub-models (already extracted), +and extracts enums and NewTypes on first encounter. +""" + +from collections.abc import Sequence +from typing import Annotated, get_args, get_origin + +from ..extraction.enum_extraction import extract_enum +from ..extraction.model_extraction import expand_model_tree, extract_model +from ..extraction.newtype_extraction import extract_newtype +from ..extraction.pydantic_extraction import extract_pydantic_type +from ..extraction.specs import ( + FeatureSpec, + FieldSpec, + ModelSpec, + SupplementarySpec, + TypeIdentity, + is_pydantic_type, +) +from ..extraction.type_analyzer import ( + TypeInfo, + TypeKind, + analyze_type, + is_newtype, + walk_type_info, +) +from ..extraction.type_registry import is_semantic_newtype + +__all__ = ["collect_all_supplementary_types"] + + +def collect_all_supplementary_types( + feature_specs: Sequence[FeatureSpec], +) -> dict[TypeIdentity, SupplementarySpec]: + """Collect supplementary types by walking expanded feature trees. + + Requires that expand_model_tree has been called on all feature specs + first. Walks FieldSpec.model references for sub-models (already + extracted), and extracts enums and NewTypes on first encounter. + + Returns a dict mapping TypeIdentity to extracted specs. Two types + with the same class name from different modules are keyed separately. + """ + feature_objs: set[object] = {spec.identity.obj for spec in feature_specs} + all_specs: dict[TypeIdentity, SupplementarySpec] = {} + visited_models: set[object] = set() + + def _register_newtype(newtype_ref: object, name: str) -> bool: + """Register a NewType if not already present. Returns True if registered.""" + nt_id = TypeIdentity(newtype_ref, name) + if nt_id in all_specs: + return False + all_specs[nt_id] = extract_newtype(newtype_ref) + return True + + def _collect_from_model(model_spec: ModelSpec) -> None: + if ( + model_spec.source_type in visited_models + or model_spec.source_type in feature_objs + ): + return + visited_models.add(model_spec.source_type) + all_specs[model_spec.identity] = model_spec + _collect_from_fields(model_spec.fields) + + def _collect_inner_newtypes(newtype_ref: object) -> None: + """Walk a NewType's __supertype__ chain for intermediate semantic NewTypes.""" + annotation = getattr(newtype_ref, "__supertype__", None) + while annotation is not None: + if get_origin(annotation) is Annotated: + annotation = get_args(annotation)[0] + continue + if is_newtype(annotation): + inner_ti = analyze_type(annotation) + if ( + inner_ti.newtype_ref is not None + and inner_ti.newtype_name is not None + and is_semantic_newtype(inner_ti) + ): + _register_newtype(inner_ti.newtype_ref, inner_ti.newtype_name) + annotation = getattr(annotation, "__supertype__", None) + continue + break + + def _collect_from_type_info(ti: TypeInfo) -> None: + """Collect supplementary types from a single TypeInfo. + + Uses walk_type_info for dict key/value recursion. Handles all + TypeKind variants without early returns so newtype extraction + and dict recursion apply regardless of kind. + """ + + def _visit(node: TypeInfo) -> None: + # UNION, ENUM, and pydantic (PRIMITIVE) are mutually exclusive + # by TypeKind. NewType extraction is orthogonal -- a node can be + # a NewType-wrapped ENUM, for instance. + if node.kind == TypeKind.UNION and node.union_members: + # Walk each member's fields for supplementary types. + # Members that are also top-level feature specs are skipped + # by the feature_objs guard in _collect_from_model. + for member_cls in node.union_members: + member_spec = extract_model(member_cls) + expand_model_tree(member_spec) + _collect_from_model(member_spec) + elif node.kind == TypeKind.ENUM and node.source_type is not None: + enum_id = TypeIdentity.of(node.source_type) + if enum_id not in all_specs: + all_specs[enum_id] = extract_enum(node.source_type) + elif is_pydantic_type(node): + assert node.source_type is not None # guaranteed by is_pydantic_type + pid = TypeIdentity.of(node.source_type) + if pid not in all_specs: + all_specs[pid] = extract_pydantic_type(node.source_type) + + # Semantic NewTypes always get extracted, including intermediate + # NewTypes in the wrapping chain (e.g., Id wraps NoWhitespaceString + # wraps str -- both Id and NoWhitespaceString get pages). + if ( + node.newtype_ref is not None + and node.newtype_name is not None + and is_semantic_newtype(node) + ): + newly_registered = _register_newtype( + node.newtype_ref, node.newtype_name + ) + if newly_registered: + _collect_inner_newtypes(node.newtype_ref) + + walk_type_info(ti, _visit) + + def _collect_from_fields(fields: list[FieldSpec]) -> None: + # A single field can match multiple conditions (e.g., Sources is both + # a semantic NewType and wraps a MODEL-kind type), so checks are + # independent `if` statements, not `elif`. + for field_spec in fields: + ti = field_spec.type_info + _collect_from_type_info(ti) + + # MODEL-kind fields (whether direct or via NewType wrapper) get expanded + if ti.kind == TypeKind.MODEL and ti.source_type is not None: + if field_spec.model is None: + msg = ( + f"MODEL-kind field {field_spec.name!r} has source_type " + f"but model=None — call expand_model_tree first" + ) + raise RuntimeError(msg) + if not field_spec.starts_cycle: + _collect_from_model(field_spec.model) + + for spec in feature_specs: + _collect_from_fields(spec.fields) + + return all_specs diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/__init__.py b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/link_computation.py b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/link_computation.py new file mode 100644 index 000000000..f05fa6fd7 --- /dev/null +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/link_computation.py @@ -0,0 +1,69 @@ +"""Relative link computation between rendered output files.""" + +from dataclasses import dataclass +from pathlib import PurePosixPath + +from ..extraction.case_conversion import slug_filename +from ..extraction.specs import TypeIdentity + +__all__ = ["LinkContext", "relative_link"] + + +@dataclass +class LinkContext: + """Placement context for resolving cross-directory markdown links.""" + + page_path: PurePosixPath + registry: dict[TypeIdentity, PurePosixPath] + + def resolve_link(self, identity: TypeIdentity) -> str | None: + """Resolve *identity* to a relative link if it exists in the registry.""" + if identity in self.registry: + return relative_link(self.page_path, self.registry[identity]) + return None + + def resolve_link_or_slug(self, identity: TypeIdentity) -> str: + """Resolve *identity* to a relative link, falling back to a slug filename. + + Always returns a usable link string. Use when the caller needs a + link regardless of whether the type has a registered page. + """ + return self.resolve_link(identity) or slug_filename(identity.name) + + +def _is_normalized(path: PurePosixPath) -> bool: + """Check whether the path contains no '..' or '.' components (except root '.').""" + return ".." not in path.parts and path.parts.count(".") <= 1 + + +def relative_link(source: PurePosixPath, target: PurePosixPath) -> str: + """Compute a relative path from source file to target file. + + Both paths must be normalized (no ``..`` components) and relative + to the same output root. + """ + if not _is_normalized(source): + msg = f"Source path not normalized: {source}" + raise ValueError(msg) + if not _is_normalized(target): + msg = f"Target path not normalized: {target}" + raise ValueError(msg) + source_dir = source.parent + # Count how many levels up from source_dir to common ancestor, + # then descend to target. PurePosixPath doesn't have os.path.relpath, + # so compute manually. + source_parts = source_dir.parts + target_parts = target.parts + + # Find common prefix length + common = 0 + for s, t in zip(source_parts, target_parts, strict=False): + if s != t: + break + common += 1 + + ups = len(source_parts) - common + downs = target_parts[common:] + + parts = [".."] * ups + list(downs) + return "/".join(parts) if parts else "." diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/path_assignment.py b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/path_assignment.py new file mode 100644 index 000000000..2700d5a9e --- /dev/null +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/path_assignment.py @@ -0,0 +1,115 @@ +"""Map types to markdown output file paths. + +Uses module-mirrored output directories: output paths derive from +the source Python module path relative to schema_root. +""" + +from collections.abc import Sequence +from pathlib import PurePosixPath + +from ..extraction.case_conversion import slug_filename +from ..extraction.specs import ( + FeatureSpec, + PydanticTypeSpec, + SupplementarySpec, + TypeIdentity, +) +from ..layout.module_layout import compute_output_dir, output_dir_for_entry_point + +__all__ = [ + "GEOMETRY_PAGE", + "PRIMITIVES_PAGE", + "build_placement_registry", + "resolve_output_path", +] + +# Aggregate page paths. +PRIMITIVES_PAGE = PurePosixPath("system/primitive/primitives.md") +GEOMETRY_PAGE = PurePosixPath("system/primitive/geometry.md") + + +def build_placement_registry( + feature_specs: Sequence[FeatureSpec], + all_specs: dict[TypeIdentity, SupplementarySpec], + primitive_names: list[TypeIdentity], + geometry_names: list[TypeIdentity], + schema_root: str, +) -> dict[TypeIdentity, PurePosixPath]: + """Build a mapping from TypeIdentity to output file paths. + + Uses module-mirrored output directories: output paths derive from + the source Python module path relative to schema_root. + """ + registry: dict[TypeIdentity, PurePosixPath] = _aggregate_page_entries( + primitive_names, geometry_names + ) + + feature_dirs: set[PurePosixPath] = set() + for spec in feature_specs: + spec_dir = output_dir_for_entry_point(spec.entry_point, schema_root) + registry[spec.identity] = _md_path(spec_dir, spec.name) + feature_dirs.add(spec_dir) + + for tid, supp_spec in all_specs.items(): + if tid in registry: + continue + if isinstance(supp_spec, PydanticTypeSpec): + registry[tid] = ( + PurePosixPath("pydantic") + / supp_spec.source_module + / slug_filename(tid.name) + ) + continue + source_module = getattr(supp_spec.source_type, "__module__", None) + if source_module is None: + continue + output_dir = compute_output_dir(source_module, schema_root) + output_dir = _nest_under_types(output_dir, feature_dirs) + registry[tid] = _md_path(output_dir, tid.name) + + return registry + + +def resolve_output_path( + identity: TypeIdentity, + registry: dict[TypeIdentity, PurePosixPath] | None, +) -> PurePosixPath: + """Look up a type's output path from the registry, with flat-file fallback.""" + if registry is not None and identity in registry: + return registry[identity] + return PurePosixPath(slug_filename(identity.name)) + + +def _aggregate_page_entries( + primitive_names: list[TypeIdentity], + geometry_names: list[TypeIdentity], +) -> dict[TypeIdentity, PurePosixPath]: + """Pre-populate registry entries for types documented on aggregate pages.""" + entries: dict[TypeIdentity, PurePosixPath] = dict.fromkeys( + primitive_names, PRIMITIVES_PAGE + ) + entries.update(dict.fromkeys(geometry_names, GEOMETRY_PAGE)) + return entries + + +def _nest_under_types( + output_dir: PurePosixPath, feature_dirs: set[PurePosixPath] +) -> PurePosixPath: + """Insert ``types/`` after the feature directory portion. + + If *output_dir* equals or is a subdirectory of a feature directory, + returns a path with ``types/`` inserted after the feature directory. + Otherwise returns *output_dir* unchanged. + """ + for fd in sorted(feature_dirs, key=lambda p: len(p.parts), reverse=True): + try: + relative = output_dir.relative_to(fd) + except ValueError: + continue + return fd / "types" / relative + return output_dir + + +def _md_path(directory: PurePosixPath, name: str) -> PurePosixPath: + """Build a .md file path from a directory and a PascalCase type name.""" + return directory / slug_filename(name) diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/pipeline.py b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/pipeline.py new file mode 100644 index 000000000..0bd143c56 --- /dev/null +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/pipeline.py @@ -0,0 +1,165 @@ +"""Markdown generation pipeline: render pages without I/O. + +Orchestrates tree expansion, type collection, placement, reverse +references, and rendering into a list of RenderedPage objects. The +caller decides what to do with them (write to disk, add frontmatter, +stream to stdout, etc.). +""" + +from collections.abc import Sequence +from dataclasses import dataclass +from pathlib import PurePosixPath + +import overture.schema.system.primitive as _system_primitive +from overture.schema.system.primitive import GeometryType + +from ..extraction.examples import ExampleRecord, collect_dict_paths, load_examples +from ..extraction.model_extraction import expand_model_tree +from ..extraction.primitive_extraction import ( + extract_primitives, + partition_primitive_and_geometry_names, +) +from ..extraction.specs import ( + EnumSpec, + FeatureSpec, + ModelSpec, + NewTypeSpec, + PydanticTypeSpec, + SupplementarySpec, + TypeIdentity, + UnionSpec, +) +from ..layout.type_collection import collect_all_supplementary_types +from .link_computation import LinkContext +from .path_assignment import ( + GEOMETRY_PAGE, + PRIMITIVES_PAGE, + build_placement_registry, + resolve_output_path, +) +from .renderer import ( + render_enum, + render_feature, + render_geometry_from_values, + render_newtype, + render_primitives_from_specs, + render_pydantic_type, +) +from .reverse_references import UsedByEntry, compute_reverse_references + +__all__ = ["RenderedPage", "generate_markdown_pages"] + + +@dataclass(frozen=True, slots=True) +class RenderedPage: + """A rendered page with its content and output path.""" + + content: str + path: PurePosixPath + is_feature: bool = False + + +def _load_model_examples( + spec: FeatureSpec, +) -> list[ExampleRecord] | None: + """Load examples for a feature spec, returning None when absent.""" + if isinstance(spec, UnionSpec): + pyproject_source = spec.members[0] if spec.members else None + validation_type = spec.source_annotation + model_fields = spec.common_base.model_fields + else: + pyproject_source = spec.source_type + validation_type = spec.source_type + model_fields = spec.source_type.model_fields if spec.source_type else {} + if not pyproject_source: + return None + field_names = [f.name for f in spec.fields] + dict_paths = collect_dict_paths(spec.fields) + examples = load_examples( + validation_type, + spec.name, + field_names, + pyproject_source=pyproject_source, + model_fields=model_fields, + dict_paths=dict_paths, + ) + return examples or None + + +def _render_supplement( + tid: TypeIdentity, + spec: SupplementarySpec, + registry: dict[TypeIdentity, PurePosixPath], + reverse_refs: dict[TypeIdentity, list[UsedByEntry]], +) -> RenderedPage: + """Render a single supplementary type page.""" + output_path = resolve_output_path(tid, registry) + ctx = LinkContext(output_path, registry) + used_by = reverse_refs.get(tid) + + if isinstance(spec, EnumSpec): + content = render_enum(spec, link_ctx=ctx, used_by=used_by) + elif isinstance(spec, NewTypeSpec): + content = render_newtype(spec, ctx, used_by=used_by) + elif isinstance(spec, ModelSpec): + content = render_feature(spec, ctx, used_by=used_by) + elif isinstance(spec, PydanticTypeSpec): + content = render_pydantic_type(spec, link_ctx=ctx, used_by=used_by) + else: + raise TypeError(f"Unhandled SupplementarySpec variant: {type(spec).__name__}") + + return RenderedPage(content=content, path=output_path) + + +def generate_markdown_pages( + feature_specs: Sequence[FeatureSpec], + schema_root: str, +) -> list[RenderedPage]: + """Generate all markdown pages from feature specs. + + Returns rendered pages without writing to disk. The caller handles + I/O, frontmatter injection, and any output-format-specific concerns + (like Docusaurus category files). + """ + cache: dict[type, ModelSpec] = {} + for spec in feature_specs: + expand_model_tree(spec, cache) + + primitive_names, geometry_names = partition_primitive_and_geometry_names( + _system_primitive + ) + all_specs = collect_all_supplementary_types(feature_specs) + registry = build_placement_registry( + feature_specs, all_specs, primitive_names, geometry_names, schema_root + ) + + reverse_refs = compute_reverse_references(feature_specs, all_specs) + + pages: list[RenderedPage] = [] + + for spec in feature_specs: + output_path = registry[spec.identity] + ctx = LinkContext(output_path, registry) + examples = _load_model_examples(spec) + used_by = reverse_refs.get(spec.identity) + content = render_feature(spec, link_ctx=ctx, examples=examples, used_by=used_by) + pages.append(RenderedPage(content=content, path=output_path, is_feature=True)) + + for tid, supp_spec in all_specs.items(): + pages.append(_render_supplement(tid, supp_spec, registry, reverse_refs)) + + pages.append( + RenderedPage( + content=render_primitives_from_specs(extract_primitives(primitive_names)), + path=PRIMITIVES_PAGE, + ) + ) + + pages.append( + RenderedPage( + content=render_geometry_from_values([m.value for m in GeometryType]), + path=GEOMETRY_PAGE, + ) + ) + + return pages diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/renderer.py b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/renderer.py new file mode 100644 index 000000000..6ef448eb5 --- /dev/null +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/renderer.py @@ -0,0 +1,621 @@ +"""Markdown renderer for Pydantic model documentation.""" + +import functools +import json +import re +from collections.abc import Callable +from dataclasses import dataclass +from pathlib import Path +from typing import TypedDict, cast + +from annotated_types import Interval +from jinja2 import Environment, FileSystemLoader +from typing_extensions import NotRequired + +from ..extraction.examples import ExampleRecord +from ..extraction.field_constraints import constraint_display_text +from ..extraction.model_constraints import analyze_model_constraints +from ..extraction.specs import ( + AnnotatedField, + EnumSpec, + FeatureSpec, + FieldSpec, + ModelSpec, + NewTypeSpec, + PrimitiveSpec, + PydanticTypeSpec, + TypeIdentity, + UnionSpec, +) +from ..extraction.type_analyzer import ( + ConstraintSource, +) +from .link_computation import LinkContext +from .reverse_references import UsedByEntry +from .type_format import ( + format_type, + format_underlying_type, + resolve_type_link, +) + +__all__ = [ + "render_enum", + "render_feature", + "render_geometry_from_values", + "render_newtype", + "render_primitives_from_specs", + "render_pydantic_type", +] + + +_LinkFn = Callable[[TypeIdentity], str] + +_TEMPLATES_DIR = Path(__file__).parent / "templates" + +_BARE_URL_RE = re.compile( + r"(?)]+|www\.[^\s<>)]+)" +) +_TRAILING_PUNCT_RE = re.compile(r"[.,;:!?]+$") +# (.+?) deliberately does not match newlines -- CommonMark code spans are inline. +_CODE_SPAN_RE = re.compile(r"(`+)(.+?)\1") + + +def _linkify_bare_urls(text: str) -> str: + """Wrap bare URLs in Markdown link syntax. + + Turns ``www.example.com`` into ``[www.example.com](https://www.example.com)`` + and ``https://example.com`` into ``[https://example.com](https://example.com)``. + URLs already inside ``[text](url)`` or backtick code spans are left + untouched. Trailing sentence punctuation (``.``, ``,``, etc.) is excluded + from the link. + + Two-pass approach: extract code spans first, linkify the remaining + text, then restore code spans. + """ + # Extract code spans, replacing with placeholders + spans: list[str] = [] + + def _stash_span(m: re.Match[str]) -> str: + spans.append(m.group(0)) + return f"\x00CODESPAN{len(spans) - 1}\x00" + + text = _CODE_SPAN_RE.sub(_stash_span, text) + + # Linkify bare URLs in non-code text + def _to_link(m: re.Match[str]) -> str: + raw = m.group(0) + url = _TRAILING_PUNCT_RE.sub("", raw) + trailing = raw[len(url) :] + href = url if url.startswith("http") else f"https://{url}" + return f"[{url}]({href}){trailing}" + + text = _BARE_URL_RE.sub(_to_link, text) + + # Restore code spans + for i, span in enumerate(spans): + text = text.replace(f"\x00CODESPAN{i}\x00", span) + + return text + + +@functools.lru_cache(maxsize=1) +def _get_jinja_env() -> Environment: + """Return the Jinja2 environment, creating it on first use.""" + env = Environment( + loader=FileSystemLoader(_TEMPLATES_DIR), + trim_blocks=True, + lstrip_blocks=True, + ) + env.filters["linkify_urls"] = _linkify_bare_urls + return env + + +_EXAMPLE_TRUNCATION_LIMIT = 100 + + +class _FieldRow(TypedDict): + """Template context for a single field table row. + + ``pre_formatted`` indicates the ``name`` already contains backticks + and variant tags, so the template should render it verbatim. + """ + + name: str + type_str: str + description: str | None + pre_formatted: NotRequired[bool] + + +_PARAGRAPH_BREAK_RE = re.compile(r"\n(?:[ \t]*\n)+") + + +def _unwrap_paragraphs(text: str) -> str: + r"""Unwrap hard-wrapped lines within paragraphs, preserving paragraph breaks. + + Splits on blank lines (paragraph boundaries), replaces single newlines + within each paragraph with spaces, then rejoins with ``\n\n``. + Matches markdown's treatment of newlines within paragraphs. + """ + paragraphs = _PARAGRAPH_BREAK_RE.split(text) + return "\n\n".join(p.replace("\n", " ") for p in paragraphs) + + +def _sanitize_for_table_cell(text: str) -> str: + """Sanitize text for embedding in a markdown table cell. + + Unwraps within-paragraph newlines to spaces, then converts paragraph + breaks to ``

``. Escapes pipe characters for table safety. + Uses ``
`` (not ``
``) for MDX/Docusaurus compatibility. + """ + text = text.strip() + text = _unwrap_paragraphs(text) + text = text.replace("\n\n", "

") + return text.replace("|", "\\|") + + +def _truncate(text: str) -> str: + """Truncate text to ``_EXAMPLE_TRUNCATION_LIMIT`` chars, adding ellipsis.""" + if len(text) > _EXAMPLE_TRUNCATION_LIMIT: + return text[: _EXAMPLE_TRUNCATION_LIMIT - 3] + "..." + return text + + +def _format_example_value(value: object) -> str: + """Format an example value for display in a markdown Column | Value table. + + All non-empty values render in backticks for consistent monospace + formatting. Long representations are truncated before wrapping. + """ + if value is None: + return "`null`" + + if isinstance(value, bool): + return "`true`" if value else "`false`" + + if isinstance(value, str): + if value == "": + return "" + return f"`{_truncate(value)}`" + + if isinstance(value, list): + items = ", ".join(json.dumps(item) for item in value) + return f"`{_truncate(f'[{items}]')}`" + + if isinstance(value, dict): + pairs = ", ".join(f"{json.dumps(k)}: {json.dumps(v)}" for k, v in value.items()) + return f"`{_truncate(f'{{{pairs}}}')}`" + + return f"`{value}`" + + +def _field_template_context( + field: FieldSpec, + ctx: LinkContext | None = None, +) -> _FieldRow: + """Build template context dict for a field.""" + description = ( + _sanitize_for_table_cell(field.description) if field.description else None + ) + return _FieldRow( + name=field.name, + type_str=format_type(field, ctx), + description=description, + ) + + +def _annotate_constraint_notes( + row: _FieldRow, + notes: list[str], +) -> None: + """Append italic constraint descriptions to a field's description cell.""" + formatted = "
".join(f"*{note}*" for note in notes) + if row["description"]: + row["description"] = f"{row['description']}

{formatted}" + else: + row["description"] = formatted + + +def _link_fn_from_ctx(ctx: LinkContext | None) -> _LinkFn: + r"""Build a TypeIdentity-to-markdown-link resolver from a LinkContext. + + Returns a function that resolves a TypeIdentity to ``[`Name`](href)`` + when the identity has a page in the registry, or plain ``\`Name\``` otherwise. + """ + return functools.partial(resolve_type_link, ctx=ctx) + + +def _annotate_field_constraints( + row: _FieldRow, field: FieldSpec, ctx: LinkContext | None +) -> None: + """Annotate a field row with constraints from the field's own annotation. + + Shows constraints where source is None — those applied directly to + the field, not inherited from NewType chains. NewType-inherited + constraints appear on the NewType's own page instead. + """ + link_fn = _link_fn_from_ctx(ctx) + notes = [ + constraint_display_text(cs, link_fn=link_fn) + for cs in field.type_info.constraints + if cs.source_ref is None + ] + if notes: + _annotate_constraint_notes(row, notes) + + +def _expandable_list_suffix(field_spec: FieldSpec) -> str: + """Return ``"[]"`` per nesting level for list-of-model fields expanded inline.""" + if ( + field_spec.type_info.is_list + and field_spec.model + and not field_spec.starts_cycle + ): + return "[]" * field_spec.type_info.list_depth + return "" + + +def _expand_sub_model( + field_spec: FieldSpec, + name: str, + ctx: LinkContext | None, + result: list[_FieldRow], +) -> None: + """Expand sub-model fields inline, appending child rows to *result*.""" + sub = field_spec.model if not field_spec.starts_cycle else None + if sub is not None: + child_prefix = f"{name}{_expandable_list_suffix(field_spec)}." + result.extend(_expand_model_fields(sub.fields, ctx, prefix=child_prefix)) + + +def _annotate_top_level_constraints( + rows: list[_FieldRow], + constraint_notes: dict[str, list[str]] | None, +) -> None: + """Annotate top-level field rows with model-constraint notes. + + Top-level rows are those without dot-notation prefixes. + """ + if not constraint_notes: + return + for row in rows: + name = row["name"] + if "." in name: + continue + field_name = name.split("[")[0] + if field_name in constraint_notes: + _annotate_constraint_notes(row, constraint_notes[field_name]) + + +def _expand_model_fields( + fields: list[FieldSpec], + ctx: LinkContext | None, + prefix: str = "", +) -> list[_FieldRow]: + """Flatten nested model fields into dot-notation rows for display. + + Walks the pre-populated FieldSpec.model tree. Stops recursion at + fields marked with starts_cycle. + """ + result: list[_FieldRow] = [] + for field_spec in fields: + row = _field_template_context(field_spec, ctx) + name = f"{prefix}{field_spec.name}" if prefix else field_spec.name + row["name"] = f"{name}{_expandable_list_suffix(field_spec)}" + if not prefix: + _annotate_field_constraints(row, field_spec, ctx) + result.append(row) + + _expand_sub_model(field_spec, name, ctx, result) + return result + + +def _short_variant_name(class_name: str, union_name: str) -> str: + """Strip common suffix to produce short variant name. + + Examples + -------- + >>> _short_variant_name("RoadSegment", "Segment") + 'Road' + >>> _short_variant_name("WaterSegment", "Segment") + 'Water' + >>> _short_variant_name("Building", "Building") + 'Building' + """ + if class_name.endswith(union_name): + short = class_name[: -len(union_name)] + if short: + return short + return class_name + + +def _variant_tag(annotated: AnnotatedField, union_name: str) -> str | None: + """Return an italic variant tag like ``*(Road, Water)*``, or None for shared fields.""" + if annotated.variant_sources is None: + return None + short_names = [ + _short_variant_name(v, union_name) for v in annotated.variant_sources + ] + return f" *({', '.join(short_names)})*" + + +def _expand_union_fields( + spec: UnionSpec, + ctx: LinkContext | None, + constraint_notes: dict[str, list[str]] | None = None, +) -> list[_FieldRow]: + """Expand UnionSpec fields with inline variant tags. + + Shared fields (variant_sources=None) render normally. Variant-specific + fields get *(ShortName)* tag after the field name. + """ + result: list[_FieldRow] = [] + for annotated in spec.annotated_fields: + field_spec = annotated.field_spec + row = _field_template_context(field_spec, ctx) + name = field_spec.name + suffix = _expandable_list_suffix(field_spec) + + _annotate_field_constraints(row, field_spec, ctx) + if constraint_notes and field_spec.name in constraint_notes: + _annotate_constraint_notes(row, constraint_notes[field_spec.name]) + + tag = _variant_tag(annotated, spec.name) + if tag is not None: + row["name"] = f"`{name}{suffix}`{tag}" + row["pre_formatted"] = True + else: + row["name"] = f"{name}{suffix}" + + result.append(row) + _expand_sub_model(field_spec, name, ctx, result) + return result + + +def render_feature( + spec: FeatureSpec, + link_ctx: LinkContext | None = None, + examples: list[ExampleRecord] | None = None, + used_by: list[UsedByEntry] | None = None, +) -> str: + """Render a FeatureSpec (ModelSpec or UnionSpec) as Markdown documentation. + + For ModelSpec, requires expand_model_tree to have been called first. + For UnionSpec, adds inline variant tags to variant-specific fields. + """ + template = _get_jinja_env().get_template("feature.md.jinja2") + + constraint_descriptions, field_notes = analyze_model_constraints(spec.constraints) + + if isinstance(spec, UnionSpec): + fields = _expand_union_fields(spec, link_ctx, constraint_notes=field_notes) + elif isinstance(spec, ModelSpec): + fields = _expand_model_fields(spec.fields, link_ctx) + _annotate_top_level_constraints(fields, field_notes) + else: + raise TypeError(f"Unsupported spec type: {type(spec).__name__}") + + formatted_examples: list[list[dict[str, str]]] | None = None + if examples: + formatted_examples = [ + [ + {"column": key, "value": _format_example_value(val)} + for key, val in record.rows + ] + for record in examples + ] + + return template.render( + model=spec, + fields=fields, + constraints=constraint_descriptions, + examples=formatted_examples, + used_by=_build_used_by_context(used_by, link_ctx), + ) + + +def render_enum( + enum_spec: EnumSpec, + link_ctx: LinkContext | None = None, + used_by: list[UsedByEntry] | None = None, +) -> str: + """Render an EnumSpec as Markdown documentation.""" + template = _get_jinja_env().get_template("enum.md.jinja2") + return template.render( + enum=enum_spec, used_by=_build_used_by_context(used_by, link_ctx) + ) + + +@dataclass +class _NewTypeConstraintRow: + """Rendered constraint for template.""" + + display: str + source: str | None = None + source_link: str | None = None + + +def _format_constraint( + cs: ConstraintSource, + newtype_ref: object, + ctx: LinkContext | None = None, +) -> _NewTypeConstraintRow: + """Format a ConstraintSource for display in a NewType page.""" + display = constraint_display_text(cs) + + if cs.source_ref is None or cs.source_ref is newtype_ref: + return _NewTypeConstraintRow(display=display) + + assert cs.source_name is not None # source_ref and source_name are set together + source_identity = TypeIdentity(cs.source_ref, cs.source_name) + source_link = ctx.resolve_link(source_identity) if ctx else None + return _NewTypeConstraintRow( + display=display, source=cs.source_name, source_link=source_link + ) + + +class _UsedByContext(TypedDict): + """Template context for a used-by entry.""" + + name: str + link: str | None + + +def _build_used_by_context( + used_by: list[UsedByEntry] | None, + link_ctx: LinkContext | None, +) -> list[_UsedByContext] | None: + """Build template context for used-by entries.""" + if not used_by: + return None + return [ + { + "name": entry.identity.name, + "link": link_ctx.resolve_link(entry.identity) if link_ctx else None, + } + for entry in used_by + ] + + +def render_newtype( + newtype_spec: NewTypeSpec, + link_ctx: LinkContext | None = None, + used_by: list[UsedByEntry] | None = None, +) -> str: + """Render a NewTypeSpec as Markdown documentation.""" + template = _get_jinja_env().get_template("newtype.md.jinja2") + ti = newtype_spec.type_info + underlying = format_underlying_type(ti, link_ctx) + constraints = [ + _format_constraint(cs, newtype_spec.source_type, link_ctx) + for cs in ti.constraints + ] + + return template.render( + newtype=newtype_spec, + underlying_type=underlying, + constraints=constraints, + used_by=_build_used_by_context(used_by, link_ctx), + ) + + +def render_pydantic_type( + spec: PydanticTypeSpec, + link_ctx: LinkContext | None = None, + used_by: list[UsedByEntry] | None = None, +) -> str: + """Render a PydanticTypeSpec as Markdown documentation.""" + template = _get_jinja_env().get_template("pydantic_type.md.jinja2") + return template.render( + pydantic_type=spec, + used_by=_build_used_by_context(used_by, link_ctx), + ) + + +# Matches the ge/le bounds of the int64 NewType in overture.schema.system.primitive. +_INT64_MIN = -(2**63) +_INT64_MAX = 2**63 - 1 + +_NumericBound = int | float | None + +# IEEE 754 precision by bit width — formatting knowledge, not schema data. +_FLOAT_PRECISION: dict[int, str] = {32: "~7 decimal digits", 64: "~15 decimal digits"} + + +def _format_bound(value: int | float) -> str: + """Format a numeric bound for display. + + Uses ``2^63`` notation for int64-scale values to avoid unreadable + numbers; otherwise formats with thousands separators for ints. + """ + if value == _INT64_MIN: + return "-2^63" + if value == _INT64_MAX: + return "2^63-1" + if isinstance(value, float): + return str(value) + return f"{value:,}" + + +def _format_interval(bounds: Interval) -> str: + """Format an Interval as a range string, or empty if unconstrained. + + Two inclusive bounds render as ``lower to upper``. All other + combinations use explicit comparison operators so the + inclusivity/exclusivity is unambiguous. + """ + # Interval fields are typed as Supports* protocols; narrow to numeric + # since we only encounter int/float constraints from the schema. + ge = cast(_NumericBound, bounds.ge) + gt = cast(_NumericBound, bounds.gt) + le = cast(_NumericBound, bounds.le) + lt = cast(_NumericBound, bounds.lt) + + # Both bounds inclusive: compact "lower to upper" form + if ge is not None and le is not None: + return f"{_format_bound(ge)} to {_format_bound(le)}" + + # Any other two-bound combination: use explicit operators + parts: list[str] = [] + if ge is not None: + parts.append(f">= {_format_bound(ge)}") + elif gt is not None: + parts.append(f"> {_format_bound(gt)}") + + if le is not None: + parts.append(f"<= {_format_bound(le)}") + elif lt is not None: + parts.append(f"< {_format_bound(lt)}") + + return ", ".join(parts) + + +def _bit_width_key(name: str) -> tuple[str, int]: + """Sort key: prefix then numeric bit width.""" + prefix = name.rstrip("0123456789") + digits = name[len(prefix) :] + return (prefix, int(digits) if digits else 0) + + +def render_primitives_from_specs(specs: list[PrimitiveSpec]) -> str: + """Render the primitives.md page from pre-extracted PrimitiveSpecs.""" + template = _get_jinja_env().get_template("primitives.md.jinja2") + + signed_ints: list[dict[str, str | None]] = [] + unsigned_ints: list[dict[str, str | None]] = [] + floats: list[dict[str, str | None]] = [] + + for spec in sorted(specs, key=lambda s: _bit_width_key(s.name)): + if spec.name.startswith(("int", "uint")): + target = signed_ints if spec.name.startswith("int") else unsigned_ints + target.append( + { + "name": spec.name, + "range": _format_interval(spec.bounds), + "description": _sanitize_for_table_cell(spec.description or ""), + } + ) + elif spec.name.startswith("float"): + precision = ( + _FLOAT_PRECISION.get(spec.float_bits, "") if spec.float_bits else "" + ) + floats.append( + { + "name": spec.name, + "precision": precision, + "description": _sanitize_for_table_cell(spec.description or ""), + } + ) + + return template.render( + signed_ints=signed_ints, + unsigned_ints=unsigned_ints, + floats=floats, + ) + + +def render_geometry_from_values(geometry_type_values: list[str]) -> str: + """Render the geometry.md page from pre-extracted geometry type values.""" + template = _get_jinja_env().get_template("geometry.md.jinja2") + geometry_types = ", ".join(f"`{v}`" for v in geometry_type_values) + return template.render(geometry_types=geometry_types) diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/reverse_references.py b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/reverse_references.py new file mode 100644 index 000000000..2ad471fc1 --- /dev/null +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/reverse_references.py @@ -0,0 +1,173 @@ +"""Compute reverse references from types to their referrers.""" + +from __future__ import annotations + +from collections.abc import Mapping, Sequence +from dataclasses import dataclass +from enum import Enum + +from ..extraction.specs import ( + FeatureSpec, + FieldSpec, + ModelSpec, + NewTypeSpec, + SupplementarySpec, + TypeIdentity, + UnionSpec, + is_pydantic_type, +) +from ..extraction.type_analyzer import TypeInfo, TypeKind, walk_type_info + +__all__ = [ + "UsedByEntry", + "UsedByKind", + "compute_reverse_references", +] + + +class UsedByKind(Enum): + """Kind of referrer in a 'used by' entry.""" + + MODEL = 0 + NEWTYPE = 1 + + +@dataclass(frozen=True, slots=True) +class UsedByEntry: + """A single 'used by' entry pointing to a referrer.""" + + identity: TypeIdentity + kind: UsedByKind + + +def compute_reverse_references( + feature_specs: Sequence[FeatureSpec], + all_specs: Mapping[TypeIdentity, SupplementarySpec], +) -> dict[TypeIdentity, list[UsedByEntry]]: + """Compute reverse references from types to their referrers. + + Returns a dict mapping TypeIdentity to lists of UsedByEntry, sorted with + models before NewTypes, alphabetical within each group. + + Parameters + ---------- + feature_specs : Sequence[FeatureSpec] + Feature-level specs (ModelSpec or UnionSpec). + all_specs : Mapping[TypeIdentity, SupplementarySpec] + Supplementary types (enums, newtypes, sub-models). + + Returns + ------- + dict[TypeIdentity, list[UsedByEntry]] + Dict mapping TypeIdentity to sorted lists of UsedByEntry. + """ + # Track references with sets to deduplicate + references: dict[TypeIdentity, set[UsedByEntry]] = {} + + def add_reference( + target: TypeIdentity, referrer: TypeIdentity, kind: UsedByKind + ) -> None: + """Add a reference from referrer to target, with deduplication.""" + if target == referrer or target not in all_specs: + return + references.setdefault(target, set()).add(UsedByEntry(referrer, kind)) + + def collect_from_type_info( + ti: TypeInfo, referrer: TypeIdentity, referrer_kind: UsedByKind + ) -> None: + """Collect references from a TypeInfo.""" + + def _visit(node: TypeInfo) -> None: + if node.newtype_ref is not None and node.newtype_name is not None: + add_reference( + TypeIdentity(node.newtype_ref, node.newtype_name), + referrer, + referrer_kind, + ) + + # ENUM, MODEL, pydantic (PRIMITIVE), and UNION are mutually + # exclusive by TypeKind. + if ( + node.kind in (TypeKind.ENUM, TypeKind.MODEL) + and node.source_type is not None + ): + add_reference( + TypeIdentity.of(node.source_type), + referrer, + referrer_kind, + ) + elif is_pydantic_type(node): + add_reference( + TypeIdentity.of(node.source_type), referrer, referrer_kind + ) + elif node.union_members is not None: + for member_cls in node.union_members: + add_reference( + TypeIdentity.of(member_cls), + referrer, + referrer_kind, + ) + + walk_type_info(ti, _visit) + + def collect_from_fields( + fields: list[FieldSpec], referrer: TypeIdentity, referrer_kind: UsedByKind + ) -> None: + """Collect references from model fields.""" + for field_spec in fields: + collect_from_type_info(field_spec.type_info, referrer, referrer_kind) + + def collect_from_model_spec(spec: ModelSpec, referrer: TypeIdentity) -> None: + """Collect references from a ModelSpec.""" + collect_from_fields(spec.fields, referrer, UsedByKind.MODEL) + + def collect_from_union_spec(spec: UnionSpec) -> None: + """Collect references from a UnionSpec.""" + referrer = spec.identity + # Union features reference their members + for member_cls in spec.members: + add_reference( + TypeIdentity.of(member_cls), + referrer, + UsedByKind.MODEL, + ) + # Also walk fields for other supplementary types + collect_from_fields(spec.fields, referrer, UsedByKind.MODEL) + + def collect_from_newtype_spec(spec: NewTypeSpec, referrer: TypeIdentity) -> None: + """Collect references from a NewTypeSpec.""" + collect_from_type_info(spec.type_info, referrer, UsedByKind.NEWTYPE) + + # Collect inherited NewTypes from constraint sources + for cs in spec.type_info.constraints: + if cs.source_ref is not None and cs.source_name is not None: + ref_id = TypeIdentity(cs.source_ref, cs.source_name) + add_reference(ref_id, referrer, UsedByKind.NEWTYPE) + + # Collect from features + for spec in feature_specs: + if isinstance(spec, ModelSpec): + collect_from_model_spec(spec, spec.identity) + elif isinstance(spec, UnionSpec): + collect_from_union_spec(spec) + + # Collect from supplementary specs (NewTypes and sub-models reference + # other types; enums do not, so they need no processing here) + for tid, supp_spec in all_specs.items(): + if isinstance(supp_spec, NewTypeSpec): + collect_from_newtype_spec(supp_spec, tid) + elif isinstance(supp_spec, ModelSpec): + collect_from_model_spec(supp_spec, tid) + + # Sort into deterministic lists. (kind, name) handles the common case; + # module breaks ties when two referrers share the same display name + # (e.g. identically-named types from different themes/modules). + result: dict[TypeIdentity, list[UsedByEntry]] = {} + for target, ref_set in references.items(): + entries = sorted( + ref_set, + key=lambda e: (e.kind.value, e.identity.name, e.identity.module), + ) + result[target] = entries + + return result diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/templates/_used_by.md.jinja2 b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/templates/_used_by.md.jinja2 new file mode 100644 index 000000000..fcbd9e82b --- /dev/null +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/templates/_used_by.md.jinja2 @@ -0,0 +1,10 @@ +{% if used_by %} + +## Used By + +{% for entry in used_by -%} +{% if entry.link %}- [`{{ entry.name }}`]({{ entry.link }}) +{% else %}- `{{ entry.name }}` +{% endif %} +{% endfor %} +{% endif %} diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/templates/enum.md.jinja2 b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/templates/enum.md.jinja2 new file mode 100644 index 000000000..b5b71c254 --- /dev/null +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/templates/enum.md.jinja2 @@ -0,0 +1,13 @@ +# {{ enum.name }} +{% if enum.description %} + +{{ enum.description | linkify_urls }} +{% endif %} + +## Values + +{% for member in enum.members -%} +- `{{ member.value }}`{% if member.description %} - {{ member.description }}{% endif %} + +{% endfor %} +{% include '_used_by.md.jinja2' %} diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/templates/feature.md.jinja2 b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/templates/feature.md.jinja2 new file mode 100644 index 000000000..78a183c5e --- /dev/null +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/templates/feature.md.jinja2 @@ -0,0 +1,45 @@ +# {{ model.name }} +{% if model.description %} + +{{ model.description | linkify_urls }} +{% endif %} + +## Fields + +| Name | Type | Description | +| -----: | :----: | ------------- | +{% for field in fields -%} +| {% if field.pre_formatted %}{{ field.name }}{% else %}`{{ field.name }}`{% endif %} | {{ field.type_str }} | {% if field.description %}{{ field.description }} {% endif %}| +{% endfor %} +{% if constraints %} + +## Constraints + +{% for c in constraints %} +- {{ c }} +{% endfor %} +{% endif %} +{% if examples %} + +## Examples +{% if examples|length == 1 %} + +| Column | Value | +| -------: | ------- | +{% for row in examples[0] -%} +| `{{ row.column }}` | {{ row.value }} | +{% endfor %} +{% else %} +{% for example in examples %} + +### Example {{ loop.index }} + +| Column | Value | +| -------: | ------- | +{% for row in example -%} +| `{{ row.column }}` | {{ row.value }} | +{% endfor %} +{% endfor %} +{% endif %} +{% endif %} +{% include '_used_by.md.jinja2' %} diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/templates/geometry.md.jinja2 b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/templates/geometry.md.jinja2 new file mode 100644 index 000000000..cd6b200de --- /dev/null +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/templates/geometry.md.jinja2 @@ -0,0 +1,11 @@ +# Geometry Types + +Spatial types for representing geographic features. + +## Types + +| Type | Description | +| -----: | ------------- | +| `Geometry` | GeoJSON geometry value (Point, LineString, Polygon, etc.) | +| `BBox` | Bounding box as 4 or 6 coordinate values: [west, south, east, north] or [west, south, min-altitude, east, north, max-altitude] | +| `GeometryType` | Enumeration of geometry types: {{ geometry_types }} | diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/templates/newtype.md.jinja2 b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/templates/newtype.md.jinja2 new file mode 100644 index 000000000..3d2c58f3a --- /dev/null +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/templates/newtype.md.jinja2 @@ -0,0 +1,17 @@ +# {{ newtype.name }} +{% if newtype.description %} + +{{ newtype.description | linkify_urls }} +{% endif %} + +Underlying type: {{ underlying_type }} +{% if constraints %} + +## Constraints + +{% for c in constraints -%} +- {{ c.display }}{% if c.source_link %} (from [`{{ c.source }}`]({{ c.source_link }})){% endif %} + +{% endfor %} +{% endif %} +{% include '_used_by.md.jinja2' %} diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/templates/primitives.md.jinja2 b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/templates/primitives.md.jinja2 new file mode 100644 index 000000000..fd87a1ec0 --- /dev/null +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/templates/primitives.md.jinja2 @@ -0,0 +1,27 @@ +# Primitive Types + +Numeric types used for schema field definitions. + +## Integer Types + +| Type | Range | Description | +| -----: | :-----: | ------------- | +{% for t in signed_ints -%} +| `{{ t.name }}` | {{ t.range }} | {{ t.description }} | +{% endfor %} + +## Unsigned Integer Types + +| Type | Range | Description | +| -----: | :-----: | ------------- | +{% for t in unsigned_ints -%} +| `{{ t.name }}` | {{ t.range }} | {{ t.description }} | +{% endfor %} + +## Floating Point Types + +| Type | Precision | Description | +| -----: | :---------: | ------------- | +{% for t in floats -%} +| `{{ t.name }}` | {{ t.precision }} | {{ t.description }} | +{% endfor %} diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/templates/pydantic_type.md.jinja2 b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/templates/pydantic_type.md.jinja2 new file mode 100644 index 000000000..3185acf56 --- /dev/null +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/templates/pydantic_type.md.jinja2 @@ -0,0 +1,8 @@ +# {{ pydantic_type.name }} +{% if pydantic_type.description %} + +{{ pydantic_type.description | linkify_urls }} +{% endif %} + +See: [Pydantic docs]({{ pydantic_type.docs_url }}) +{% include '_used_by.md.jinja2' %} diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/type_format.py b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/type_format.py new file mode 100644 index 000000000..0cc047e6e --- /dev/null +++ b/packages/overture-schema-codegen/src/overture/schema/codegen/markdown/type_format.py @@ -0,0 +1,238 @@ +"""Format TypeInfo as markdown type strings with cross-page links.""" + +from __future__ import annotations + +from pydantic import BaseModel + +from ..extraction.specs import FieldSpec, TypeIdentity +from ..extraction.type_analyzer import TypeInfo, TypeKind +from ..extraction.type_registry import is_semantic_newtype, resolve_type_name +from .link_computation import LinkContext + +__all__ = [ + "format_dict_type", + "format_type", + "format_underlying_type", + "resolve_type_link", +] + + +def _code_link(name: str, href: str) -> str: + """Format a markdown link with inline-code text: [``name``](href).""" + return f"[`{name}`]({href})" + + +def resolve_type_link(identity: TypeIdentity, ctx: LinkContext | None = None) -> str: + """Resolve a TypeIdentity to a linked code span or plain code span. + + When *ctx* is provided, links only to types in the registry (types + without pages render as inline code). Without context, renders as + inline code -- producing a link requires a placement registry to + compute correct relative paths. + """ + if ctx: + href = ctx.resolve_link(identity) + if href: + return _code_link(identity.name, href) + return f"`{identity.name}`" + + +def _wrap_list_n(inner: str, depth: int) -> str: + """Wrap an inner type string in ``list<...>`` markdown syntax *depth* times. + + Builds a single broken-backtick wrapper rather than nesting iteratively. + Iterative nesting creates adjacent backticks (`````) that CommonMark + interprets as multi-backtick code span delimiters. + """ + return f"`{'list<' * depth}`{inner}`{'>' * depth}`" + + +def _plain_list_type(base: str, depth: int) -> str: + """Format a plain (unlinked) list type string for *depth* nesting levels.""" + return f"`{'list<' * depth}{base}{'>' * depth}`" + + +def _linked_type_identity(ti: TypeInfo) -> TypeIdentity | None: + """Return the TypeIdentity to use for a markdown link, or None for non-linked types.""" + if is_semantic_newtype(ti) and ti.newtype_ref is not None: + assert ti.newtype_name is not None # guaranteed by is_semantic_newtype + return TypeIdentity(ti.newtype_ref, ti.newtype_name) + if ti.kind in (TypeKind.ENUM, TypeKind.MODEL) and ti.source_type is not None: + return TypeIdentity(ti.source_type, ti.base_type) + return None + + +def _try_primitive_link( + ti: TypeInfo, display_name: str, ctx: LinkContext | None +) -> str | None: + """Try to link a PRIMITIVE type to its page via registry lookup. + + Registered primitives (int32, Geometry) and Pydantic types (HttpUrl) + can have pages in the registry. Uses the type registry display name + (e.g. ``geometry`` not ``Geometry``) for the link text. + """ + if ti.kind != TypeKind.PRIMITIVE or not ctx: + return None + candidate = ti.newtype_ref or ti.source_type + if candidate is None: + return None + href = ctx.resolve_link(TypeIdentity(candidate, display_name)) + if href: + return _code_link(display_name, href) + return None + + +def _markdown_type_name(ti: TypeInfo) -> str: + """Return the markdown display name for a type. + + Uses the semantic NewType name when present (e.g. ``LanguageTag``), + otherwise falls back to the resolved markdown type (e.g. ``string``). + """ + name = ti.newtype_name if is_semantic_newtype(ti) else None + return name or resolve_type_name(ti, "markdown") + + +def format_dict_type(ti: TypeInfo) -> str: + """Format a dict TypeInfo as bare ``map`` using resolved markdown names.""" + if ti.dict_key_type is None or ti.dict_value_type is None: + msg = f"format_dict_type requires dict key/value types, got {ti}" + raise ValueError(msg) + key = _markdown_type_name(ti.dict_key_type) + value = _markdown_type_name(ti.dict_value_type) + return f"map<{key}, {value}>" + + +def _format_union_members( + members: tuple[type[BaseModel], ...], + ctx: LinkContext | None, + separator: str = r" \| ", +) -> str: + """Format union members as individually linked/backticked names. + + Each member is resolved independently so members with pages get linked + while others render as plain code spans. *separator* is inserted between + members (default is ``\\|`` for table-cell safety). + """ + return separator.join(resolve_type_link(TypeIdentity.of(m), ctx) for m in members) + + +def format_type( + field: FieldSpec, + ctx: LinkContext | None = None, +) -> str: + """Format a field's type for markdown display, with links and qualifiers.""" + ti = field.type_info + qualifiers: list[str] = [] + + if ti.kind == TypeKind.LITERAL and ti.literal_values: + if len(ti.literal_values) == 1: + return f'`"{ti.literal_values[0]}"`' + return r" \| ".join(f'`"{v}"`' for v in ti.literal_values) + + identity = _linked_type_identity(ti) + + if ti.kind == TypeKind.UNION and ti.union_members: + display = _format_union_members(ti.union_members, ctx) + if ti.is_list: + qualifiers.append("list") + elif ti.is_dict: + if identity: + display = resolve_type_link(identity, ctx) + qualifiers.append("map") + else: + display = f"`{format_dict_type(ti)}`" + elif identity: + display = resolve_type_link(identity, ctx) + # List layers outside a NewType wrap with list<> syntax (e.g., list[PhoneNumber] + # renders as list). List layers inside a NewType use a (list) + # qualifier instead (e.g., Sources wrapping list[SourceItem] renders as + # Sources (list)), since the list-ness is an implementation detail of the type. + if ti.newtype_outer_list_depth > 0: + assert ti.is_list # outer list layers are a subset of total list layers + display = _wrap_list_n(display, ti.newtype_outer_list_depth) + elif ti.is_list and ti.newtype_name is not None: # list is inside the NewType + qualifiers.append("list") + elif ti.is_list: + display = _wrap_list_n(display, ti.list_depth) + else: + # Fallback: types without a linked identity. Registered primitives (int32, + # Geometry) and Pydantic types (HttpUrl) may still link to aggregate pages + # via the placement registry. Unregistered primitives render as plain code. + base = resolve_type_name(ti, "markdown") + link = _try_primitive_link(ti, base, ctx) + if link and ti.is_list: + display = _wrap_list_n(link, ti.list_depth) + elif link: + display = link + elif ti.is_list: + display = _plain_list_type(base, ti.list_depth) + else: + display = f"`{base}`" + + if not field.is_required: + qualifiers.append("optional") + + if qualifiers: + return f"{display} ({', '.join(qualifiers)})" + return display + + +def _linked_or_backticked(ti: TypeInfo, ctx: LinkContext | None) -> tuple[str, bool]: + """Return (formatted_string, has_link) for a TypeInfo component. + + Used by format_underlying_type to decide whether container types + need broken-backtick formatting (interleaving backtick runs with + linked text). + + When ``has_link`` is True, ``formatted_string`` is a markdown link + ready for broken-backtick container syntax. When False, it is a raw + name that the caller embeds inside backticks. + """ + identity = _linked_type_identity(ti) + if identity and ctx: + href = ctx.resolve_link(identity) + if href: + return _code_link(identity.name, href), True + return _markdown_type_name(ti), False + + +def format_underlying_type(ti: TypeInfo, ctx: LinkContext | None = None) -> str: + """Format a NewType's underlying type for the page header, with links. + + Links enums and models that have their own pages. Does not link the + outermost NewType (which would self-reference). Dict key/value types + use full link resolution since they reference other types. + """ + if ti.kind == TypeKind.UNION and ti.union_members: + return _format_union_members(ti.union_members, ctx, separator=" | ") + + if ti.is_dict and ti.dict_key_type and ti.dict_value_type: + key_str, key_linked = _linked_or_backticked(ti.dict_key_type, ctx) + val_str, val_linked = _linked_or_backticked(ti.dict_value_type, ctx) + if key_linked or val_linked: + if not key_linked: + key_str = f"`{key_str}`" + if not val_linked: + val_str = f"`{val_str}`" + return f"`map<`{key_str}`,`{val_str}`>`" + return f"`map<{key_str}, {val_str}>`" + + # Only link enums and models -- skip is_semantic_newtype to avoid + # self-linking (this TypeInfo belongs to the NewType being rendered). + identity = ( + TypeIdentity.of(ti.source_type) + if ti.kind in (TypeKind.ENUM, TypeKind.MODEL) and ti.source_type + else None + ) + if identity and ctx: + href = ctx.resolve_link(identity) + if href: + linked = _code_link(identity.name, href) + if ti.is_list: + return _wrap_list_n(linked, ti.list_depth) + return linked + + base = identity.name if identity else resolve_type_name(ti, "markdown") + if ti.is_list: + return _plain_list_type(base, ti.list_depth) + return f"`{base}`" diff --git a/packages/overture-schema-codegen/src/overture/schema/codegen/py.typed b/packages/overture-schema-codegen/src/overture/schema/codegen/py.typed new file mode 100644 index 000000000..e69de29bb diff --git a/packages/overture-schema-codegen/tests/codegen_test_support.py b/packages/overture-schema-codegen/tests/codegen_test_support.py new file mode 100644 index 000000000..64facf5a9 --- /dev/null +++ b/packages/overture-schema-codegen/tests/codegen_test_support.py @@ -0,0 +1,365 @@ +"""Shared test support for overture-schema-codegen tests. + +Provides reusable model fixtures and helpers. Pytest fixtures are in conftest.py. +""" + +from __future__ import annotations + +from collections.abc import Mapping +from difflib import unified_diff +from enum import Enum +from pathlib import Path +from typing import Annotated, Generic, Literal, NewType, TypeVar + +import pytest +from overture.schema.codegen.extraction.model_extraction import extract_model +from overture.schema.codegen.extraction.pydantic_extraction import extract_pydantic_type +from overture.schema.codegen.extraction.specs import ( + AnnotatedField, + EnumMemberSpec, + EnumSpec, + FieldSpec, + ModelSpec, + TypeIdentity, + UnionSpec, + is_model_class, +) +from overture.schema.codegen.extraction.type_analyzer import TypeInfo, TypeKind +from overture.schema.core.discovery import discover_models +from overture.schema.system.doc import DocumentedEnum +from overture.schema.system.field_constraint import UniqueItemsConstraint +from overture.schema.system.model_constraint import require_any_of +from overture.schema.system.primitive import ( + Geometry, + GeometryType, + GeometryTypeConstraint, + float64, + int32, +) +from overture.schema.system.ref import Id, Identified, Reference, Relationship +from overture.schema.system.string import HexColor, LanguageTag, StrippedString +from pydantic import BaseModel, EmailStr, Field, HttpUrl + +STR_TYPE = TypeInfo(base_type="str", kind=TypeKind.PRIMITIVE) + +ThemeT = TypeVar("ThemeT") +TypeT = TypeVar("TypeT") + + +class SimpleModel(BaseModel): + """A simple model.""" + + name: str + + +class FeatureBase(BaseModel, Generic[ThemeT, TypeT]): + """Base class mimicking OvertureFeature pattern for tests.""" + + theme: ThemeT + type: TypeT + + +# Separate TypeVars from ThemeT/TypeT: IdentifiedFeature models a +# non-Overture user building on Identified with their own nomenclature. +CategoryT = TypeVar("CategoryT") +KindT = TypeVar("KindT") + + +class IdentifiedFeature(Identified, Generic[CategoryT, KindT]): + """Feature with identity and typed category/kind.""" + + category: CategoryT + kind: KindT + + +class InstrumentFamily(str, DocumentedEnum): + """Classification by sound production method.""" + + STRING = "string", "Sound from vibrating strings" + WIND = "wind", "Sound from vibrating air column" + PERCUSSION = "percussion" + + +class SimpleKind(str, Enum): + SMALL = "small" + LARGE = "large" + + +class Instrument( + IdentifiedFeature[Literal["music"], Literal["instrument"]], +): + """A musical instrument. + + Instruments produce sound through vibration. They are classified + by how sound is produced. + """ + + name: str = Field(description="Common name") + tuning: float64 | None = Field( + None, + description=("Concert pitch in Hz.\n\nStandard tuning is 440 Hz."), + ) + num_strings: int32 | None = Field(None) + family: InstrumentFamily | None = None + color: HexColor | None = Field(None, description="Body color") + tags: Annotated[list[str], UniqueItemsConstraint()] | None = None + + +@require_any_of("name", "description") +class Venue( + IdentifiedFeature[Literal["music"], Literal["venue"]], +): + """A concert venue. + + A location where musical performances take place. + """ + + name: str | None = Field(None, description="Venue name") + description: str | None = None + geometry: Annotated[ + Geometry, + GeometryTypeConstraint(GeometryType.POINT, GeometryType.POLYGON), + ] + capacity: Annotated[int, Field(ge=1)] | None = None + resident_ensemble: ( + Annotated[Id, Reference(Relationship.BELONGS_TO, Instrument)] | None + ) = None + + +class SourceItem(BaseModel): + """A source data reference.""" + + dataset: str = Field(description="Source dataset name") + + +Sources = NewType( + "Sources", + Annotated[ + list[SourceItem], + Field(min_length=1, description="Source data references"), + UniqueItemsConstraint(), + ], +) + + +class FeatureWithSources( + FeatureBase[Literal["test"], Literal["sourced"]], +): + """A feature with a Sources field.""" + + name: str = Field(description="Feature name") + sources: Sources | None = None + + +class Address(BaseModel): + """A mailing address.""" + + street: str = Field(description="Street name") + city: str = Field(description="City name") + zip_code: str | None = Field(None, description="Postal code") + + +class FeatureWithAddress( + FeatureBase[Literal["test"], Literal["addressed"]], +): + """A feature with an address field.""" + + title: str = Field(description="Feature title") + address: Address + + +class TreeNode(BaseModel): + """A recursive tree node.""" + + label: str = Field(description="Node label") + parent: TreeNode | None = None + + +class Widget(BaseModel): + active: bool + label: str = Field(description="Display label") + + +CommonNames = NewType("CommonNames", dict[LanguageTag, StrippedString]) + + +class FeatureWithDict( + FeatureBase[Literal["test"], Literal["dictfeat"]], +): + """A feature with dict fields.""" + + name: str = Field(description="Feature name") + names: CommonNames | None = Field(None, description="Localized names") + alt_names: dict[LanguageTag, StrippedString] | None = Field( + None, description="Alternate localized names" + ) + tags: dict[str, str] | None = Field(None, description="Arbitrary tags") + metadata: dict[str, int] = Field(description="Numeric metadata") + + +class FeatureWithUrl(FeatureBase[Literal["test"], Literal["linked"]]): + """A feature with Pydantic URL and email fields.""" + + website: HttpUrl | None = None + emails: list[EmailStr] | None = None + + +HTTP_URL_SPEC = extract_pydantic_type(HttpUrl) +EMAIL_STR_SPEC = extract_pydantic_type(EmailStr) + + +class SegmentBase(BaseModel): + """Common base for test segments.""" + + geometry: str + subtype: str + + +class RoadSegment(SegmentBase): + subtype: Literal["road"] + class_: Annotated[str, Field(alias="class")] + speed_limit: int | None = None + + +class RailSegment(SegmentBase): + subtype: Literal["rail"] + class_: Annotated[int, Field(alias="class")] + rail_gauge: float | None = None + + +class WaterSegment(SegmentBase): + subtype: Literal["water"] + + +TestSegment = Annotated[ + RoadSegment | RailSegment | WaterSegment, + Field(description="Test segment union"), +] + + +class ContactInfo(BaseModel): + """Contact information for a venue.""" + + email: str = Field(description="Email address") + phone: str | None = Field(None, description="Phone number") + + +class VenueWithContact(SegmentBase): + """A segment variant with a nested sub-model field.""" + + subtype: Literal["venue"] + contact: ContactInfo + + +TestSegmentWithSubModel = Annotated[ + RoadSegment | VenueWithContact, + Field(description="Test segment union with sub-model member"), +] + + +def make_union_spec( + name: str = "TestUnion", + *, + description: str | None = None, + annotated_fields: list[AnnotatedField] | None = None, + members: list[type[BaseModel]] | None = None, + source_annotation: object = None, + common_base: type[BaseModel] | None = None, + entry_point: str | None = None, +) -> UnionSpec: + """Build a UnionSpec with sensible defaults for tests.""" + return UnionSpec( + name=name, + description=description, + annotated_fields=annotated_fields or [], + members=members or [], + discriminator_field=None, + discriminator_mapping=None, + source_annotation=source_annotation, + common_base=common_base or BaseModel, + entry_point=entry_point, + ) + + +def find_model_class(name: str, models: dict[object, object]) -> type[BaseModel]: + """Find a discovered model class by name.""" + matches = [v for v in models.values() if getattr(v, "__name__", None) == name] + assert matches, f"{name} model not found" + match = matches[0] + assert isinstance(match, type) + assert issubclass(match, BaseModel) + return match + + +def find_field(spec: ModelSpec, name: str) -> FieldSpec: + """Find a field by name in a ModelSpec, raising if missing.""" + return next(f for f in spec.fields if f.name == name) + + +def find_member(spec: EnumSpec, name: str) -> EnumMemberSpec: + """Find a member by name in an EnumSpec, raising if missing.""" + return next(m for m in spec.members if m.name == name) + + +T = TypeVar("T") + + +def lookup_by_name(mapping: dict[TypeIdentity, T], name: str) -> T: + """Look up a value in a TypeIdentity-keyed dict by name, raising KeyError if absent.""" + for tid, value in mapping.items(): + if tid.name == name: + return value + raise KeyError(name) + + +def has_name(mapping: Mapping[TypeIdentity, object], name: str) -> bool: + """Check whether a TypeIdentity-keyed mapping contains a key with the given name.""" + return any(tid.name == name for tid in mapping) + + +def assert_literal_field( + spec: ModelSpec, field_name: str, expected_value: object +) -> None: + """Assert a field is a single-value Literal with the expected value.""" + field = find_field(spec, field_name) + assert field.type_info.kind == TypeKind.LITERAL + assert field.type_info.literal_values == (expected_value,) + + +def flat_specs_from_discovery( + theme: str | None = None, +) -> list[ModelSpec]: + """Build a flat list of ModelSpecs from discovery, with entry_point set.""" + models = discover_models() + if theme: + models = {k: v for k, v in models.items() if k.theme == theme} + result = [] + for key, cls in models.items(): + if not is_model_class(cls): + continue + result.append(extract_model(cls, entry_point=key.entry_point)) + return result + + +def assert_golden(actual: str, golden_path: Path, *, update: bool) -> None: + """Compare rendered output against a golden file. + + When update is True, writes actual content to the golden file + instead of comparing. + """ + if update: + golden_path.parent.mkdir(parents=True, exist_ok=True) + golden_path.write_text(actual) + return + expected = golden_path.read_text() + if actual != expected: + diff = "\n".join( + unified_diff( + expected.splitlines(), + actual.splitlines(), + fromfile=str(golden_path), + tofile="actual", + lineterm="", + ) + ) + pytest.fail(f"Golden file mismatch:\n{diff}") diff --git a/packages/overture-schema-codegen/tests/conftest.py b/packages/overture-schema-codegen/tests/conftest.py new file mode 100644 index 000000000..775fc628c --- /dev/null +++ b/packages/overture-schema-codegen/tests/conftest.py @@ -0,0 +1,82 @@ +"""Shared pytest fixtures for overture-schema-codegen tests.""" + +import overture.schema.system.primitive as _system_primitive +import pytest +from click.testing import CliRunner +from codegen_test_support import find_model_class +from overture.schema.codegen.extraction.model_extraction import extract_model +from overture.schema.codegen.extraction.primitive_extraction import ( + extract_primitives, + partition_primitive_and_geometry_names, +) +from overture.schema.codegen.extraction.specs import ModelSpec +from overture.schema.codegen.markdown.renderer import ( + render_geometry_from_values, + render_primitives_from_specs, +) +from overture.schema.core.discovery import discover_models +from overture.schema.system.primitive import GeometryType +from pydantic import BaseModel + + +def pytest_addoption(parser: pytest.Parser) -> None: + parser.addoption( + "--update-golden", + action="store_true", + default=False, + help="Regenerate golden files instead of comparing against them", + ) + + +@pytest.fixture +def update_golden(request: pytest.FixtureRequest) -> bool: + return bool(request.config.getoption("--update-golden")) + + +@pytest.fixture +def cli_runner() -> CliRunner: + """Provide a Click CLI test runner.""" + return CliRunner() + + +@pytest.fixture +def all_discovered_models() -> dict: + """Discover and return all registered Overture models.""" + return discover_models() + + +@pytest.fixture +def building_class(all_discovered_models: dict) -> type[BaseModel]: + """Get the Building model class.""" + return find_model_class("Building", all_discovered_models) + + +@pytest.fixture +def building_spec(building_class: type[BaseModel]) -> ModelSpec: + """Extract the Building model spec.""" + return extract_model(building_class) + + +@pytest.fixture +def place_class(all_discovered_models: dict) -> type[BaseModel]: + """Get the Place model class.""" + return find_model_class("Place", all_discovered_models) + + +@pytest.fixture +def division_class(all_discovered_models: dict) -> type[BaseModel]: + """Get the Division model class.""" + return find_model_class("Division", all_discovered_models) + + +@pytest.fixture(scope="module") +def primitives_markdown() -> str: + """Render the primitives.md page from the system primitive module.""" + primitive_names, _ = partition_primitive_and_geometry_names(_system_primitive) + return render_primitives_from_specs(extract_primitives(primitive_names)) + + +@pytest.fixture(scope="module") +def geometry_markdown() -> str: + """Render the geometry.md page from system GeometryType values.""" + return render_geometry_from_values([m.value for m in GeometryType]) diff --git a/packages/overture-schema-codegen/tests/golden/markdown/common_names.md b/packages/overture-schema-codegen/tests/golden/markdown/common_names.md new file mode 100644 index 000000000..c73d708c9 --- /dev/null +++ b/packages/overture-schema-codegen/tests/golden/markdown/common_names.md @@ -0,0 +1,7 @@ +# CommonNames + +Underlying type: `map` + +## Used By + +- `FeatureWithDict` diff --git a/packages/overture-schema-codegen/tests/golden/markdown/feature_with_address.md b/packages/overture-schema-codegen/tests/golden/markdown/feature_with_address.md new file mode 100644 index 000000000..fdbfdc7a8 --- /dev/null +++ b/packages/overture-schema-codegen/tests/golden/markdown/feature_with_address.md @@ -0,0 +1,15 @@ +# FeatureWithAddress + +A feature with an address field. + +## Fields + +| Name | Type | Description | +| -----: | :----: | ------------- | +| `theme` | `"test"` | | +| `type` | `"addressed"` | | +| `title` | `string` | Feature title | +| `address` | `Address` | | +| `address.street` | `string` | Street name | +| `address.city` | `string` | City name | +| `address.zip_code` | `string` (optional) | Postal code | diff --git a/packages/overture-schema-codegen/tests/golden/markdown/feature_with_dict.md b/packages/overture-schema-codegen/tests/golden/markdown/feature_with_dict.md new file mode 100644 index 000000000..499787d06 --- /dev/null +++ b/packages/overture-schema-codegen/tests/golden/markdown/feature_with_dict.md @@ -0,0 +1,15 @@ +# FeatureWithDict + +A feature with dict fields. + +## Fields + +| Name | Type | Description | +| -----: | :----: | ------------- | +| `theme` | `"test"` | | +| `type` | `"dictfeat"` | | +| `name` | `string` | Feature name | +| `names` | `CommonNames` (map, optional) | Localized names | +| `alt_names` | `map` (optional) | Alternate localized names | +| `tags` | `map` (optional) | Arbitrary tags | +| `metadata` | `map` | Numeric metadata | diff --git a/packages/overture-schema-codegen/tests/golden/markdown/feature_with_sources.md b/packages/overture-schema-codegen/tests/golden/markdown/feature_with_sources.md new file mode 100644 index 000000000..c3e4bc39b --- /dev/null +++ b/packages/overture-schema-codegen/tests/golden/markdown/feature_with_sources.md @@ -0,0 +1,13 @@ +# FeatureWithSources + +A feature with a Sources field. + +## Fields + +| Name | Type | Description | +| -----: | :----: | ------------- | +| `theme` | `"test"` | | +| `type` | `"sourced"` | | +| `name` | `string` | Feature name | +| `sources[]` | `Sources` (list, optional) | Source data references | +| `sources[].dataset` | `string` | Source dataset name | diff --git a/packages/overture-schema-codegen/tests/golden/markdown/hex_color.md b/packages/overture-schema-codegen/tests/golden/markdown/hex_color.md new file mode 100644 index 000000000..847a1b9a5 --- /dev/null +++ b/packages/overture-schema-codegen/tests/golden/markdown/hex_color.md @@ -0,0 +1,19 @@ +# HexColor + +A color represented as an #RRGGBB or #RGB hexadecimal string. + +For example: + +- `"#ff0000"` or `#f00` for pure red 🟥 +- `"#ffa500"` for bright orange 🟧 +- `"#000000"` or `"#000"` for black ⬛ + +Underlying type: `string` + +## Constraints + +- Allows only hexadecimal color codes (e.g., #FF0000 or #FFF). (`HexColorConstraint`, pattern: `^#[0-9A-Fa-f]{3}([0-9A-Fa-f]{3})?$`) + +## Used By + +- `Instrument` diff --git a/packages/overture-schema-codegen/tests/golden/markdown/id.md b/packages/overture-schema-codegen/tests/golden/markdown/id.md new file mode 100644 index 000000000..b2bfa2995 --- /dev/null +++ b/packages/overture-schema-codegen/tests/golden/markdown/id.md @@ -0,0 +1,15 @@ +# Id + +A unique identifier. + +Underlying type: `string` + +## Constraints + +- Minimum length: 1 +- Allows only strings that contain no whitespace characters. (`NoWhitespaceConstraint`, pattern: `^\S+$`) + +## Used By + +- `Instrument` +- `Venue` diff --git a/packages/overture-schema-codegen/tests/golden/markdown/instrument.md b/packages/overture-schema-codegen/tests/golden/markdown/instrument.md new file mode 100644 index 000000000..727f1b559 --- /dev/null +++ b/packages/overture-schema-codegen/tests/golden/markdown/instrument.md @@ -0,0 +1,20 @@ +# Instrument + +A musical instrument. + +Instruments produce sound through vibration. They are classified +by how sound is produced. + +## Fields + +| Name | Type | Description | +| -----: | :----: | ------------- | +| `id` | `Id` | Unique identifier | +| `category` | `"music"` | | +| `kind` | `"instrument"` | | +| `name` | `string` | Common name | +| `tuning` | `float64` (optional) | Concert pitch in Hz.

Standard tuning is 440 Hz. | +| `num_strings` | `int32` (optional) | | +| `family` | `InstrumentFamily` (optional) | | +| `color` | `HexColor` (optional) | Body color | +| `tags` | `list` (optional) | *All items must be unique. (`UniqueItemsConstraint`)* | diff --git a/packages/overture-schema-codegen/tests/golden/markdown/instrument_family.md b/packages/overture-schema-codegen/tests/golden/markdown/instrument_family.md new file mode 100644 index 000000000..d8489cc4f --- /dev/null +++ b/packages/overture-schema-codegen/tests/golden/markdown/instrument_family.md @@ -0,0 +1,13 @@ +# InstrumentFamily + +Classification by sound production method. + +## Values + +- `string` - Sound from vibrating strings +- `wind` - Sound from vibrating air column +- `percussion` + +## Used By + +- `Instrument` diff --git a/packages/overture-schema-codegen/tests/golden/markdown/simple_kind.md b/packages/overture-schema-codegen/tests/golden/markdown/simple_kind.md new file mode 100644 index 000000000..f0aca0300 --- /dev/null +++ b/packages/overture-schema-codegen/tests/golden/markdown/simple_kind.md @@ -0,0 +1,6 @@ +# SimpleKind + +## Values + +- `small` +- `large` diff --git a/packages/overture-schema-codegen/tests/golden/markdown/sources.md b/packages/overture-schema-codegen/tests/golden/markdown/sources.md new file mode 100644 index 000000000..ec0343cb6 --- /dev/null +++ b/packages/overture-schema-codegen/tests/golden/markdown/sources.md @@ -0,0 +1,14 @@ +# Sources + +Source data references + +Underlying type: `list` + +## Constraints + +- Minimum length: 1 +- All items must be unique. (`UniqueItemsConstraint`) + +## Used By + +- `FeatureWithSources` diff --git a/packages/overture-schema-codegen/tests/golden/markdown/venue.md b/packages/overture-schema-codegen/tests/golden/markdown/venue.md new file mode 100644 index 000000000..edb0578ef --- /dev/null +++ b/packages/overture-schema-codegen/tests/golden/markdown/venue.md @@ -0,0 +1,22 @@ +# Venue + +A concert venue. + +A location where musical performances take place. + +## Fields + +| Name | Type | Description | +| -----: | :----: | ------------- | +| `id` | `Id` | Unique identifier | +| `category` | `"music"` | | +| `kind` | `"venue"` | | +| `name` | `string` (optional) | Venue name

*At least one of `name`, `description` must be set* | +| `description` | `string` (optional) | *At least one of `name`, `description` must be set* | +| `geometry` | `geometry` | *Allowed geometry types: Point, Polygon* | +| `capacity` | `int64` (optional) | *`≥ 1`* | +| `resident_ensemble` | `Id` (optional) | A unique identifier

*References `Instrument` (belongs to)* | + +## Constraints + +- At least one of `name`, `description` must be set diff --git a/packages/overture-schema-codegen/tests/golden/markdown/widget.md b/packages/overture-schema-codegen/tests/golden/markdown/widget.md new file mode 100644 index 000000000..c056d27a3 --- /dev/null +++ b/packages/overture-schema-codegen/tests/golden/markdown/widget.md @@ -0,0 +1,8 @@ +# Widget + +## Fields + +| Name | Type | Description | +| -----: | :----: | ------------- | +| `active` | `boolean` | | +| `label` | `string` | Display label | diff --git a/packages/overture-schema-codegen/tests/test_cli.py b/packages/overture-schema-codegen/tests/test_cli.py new file mode 100644 index 000000000..eecd45627 --- /dev/null +++ b/packages/overture-schema-codegen/tests/test_cli.py @@ -0,0 +1,434 @@ +"""Tests for CLI entrypoint.""" + +import json +import re +from pathlib import Path + +import pytest +from click.testing import CliRunner +from overture.schema.codegen.cli import cli +from overture.schema.codegen.extraction.specs import ModelSpec + + +class TestCliList: + """Tests for the list command.""" + + def test_list_command_exists(self, cli_runner: CliRunner) -> None: + """list command should be available.""" + result = cli_runner.invoke(cli, ["list"]) + assert result.exit_code == 0 + + def test_list_shows_discovered_models(self, cli_runner: CliRunner) -> None: + """list command should show discovered models.""" + result = cli_runner.invoke(cli, ["list"]) + + assert "Building" in result.output + assert "Place" in result.output + + +class TestCliGenerate: + """Tests for the generate command.""" + + def test_generate_command_exists(self, cli_runner: CliRunner) -> None: + """generate command should be available.""" + result = cli_runner.invoke(cli, ["generate", "--help"]) + + assert result.exit_code == 0 + assert "Generate" in result.output or "generate" in result.output + + def test_generate_requires_format(self, cli_runner: CliRunner) -> None: + """generate command should require --format.""" + result = cli_runner.invoke(cli, ["generate"]) + assert result.exit_code != 0 + + def test_generate_markdown_to_stdout(self, cli_runner: CliRunner) -> None: + """generate --format markdown should output markdown to stdout.""" + result = cli_runner.invoke(cli, ["generate", "--format", "markdown"]) + + assert result.exit_code == 0 + assert "# Building" in result.output or "# " in result.output + + def test_generate_with_theme_filter(self, cli_runner: CliRunner) -> None: + """generate --theme should filter to specific theme.""" + result = cli_runner.invoke( + cli, ["generate", "--format", "markdown", "--theme", "buildings"] + ) + + assert result.exit_code == 0 + assert "Building" in result.output + assert "Place" not in result.output + + def test_generate_markdown_feature_at_theme_level( + self, cli_runner: CliRunner, tmp_path: Path + ) -> None: + """Markdown features go directly in theme directory.""" + result = cli_runner.invoke( + cli, + [ + "generate", + "--format", + "markdown", + "--theme", + "buildings", + "--output-dir", + str(tmp_path), + ], + ) + assert result.exit_code == 0 + + # Feature models at theme level + assert (tmp_path / "buildings" / "building.md").exists() + assert (tmp_path / "buildings" / "building_part.md").exists() + + # NOT in subdirectories + assert not (tmp_path / "buildings" / "building" / "building.md").exists() + + def test_feature_pages_have_sidebar_position( + self, cli_runner: CliRunner, tmp_path: Path + ) -> None: + """Feature pages include sidebar_position frontmatter.""" + result = cli_runner.invoke( + cli, + [ + "generate", + "--format", + "markdown", + "--theme", + "buildings", + "--output-dir", + str(tmp_path), + ], + ) + assert result.exit_code == 0 + + content = (tmp_path / "buildings" / "building.md").read_text() + assert content.startswith("---\nsidebar_position: 1\n---\n") + + def test_generate_markdown_shared_types_mirror_modules( + self, cli_runner: CliRunner, tmp_path: Path + ) -> None: + """Core/system types land in module-mirrored directories.""" + result = cli_runner.invoke( + cli, + ["generate", "--format", "markdown", "--output-dir", str(tmp_path)], + ) + assert result.exit_code == 0 + + core_dir = tmp_path / "core" + assert core_dir.exists(), "core/ directory should exist" + subdirs = [d.name for d in core_dir.iterdir() if d.is_dir()] + assert len(subdirs) > 0, "core/ should have subdirectories" + + def test_generate_multiple_themes_to_output_dir( + self, cli_runner: CliRunner, tmp_path: Path + ) -> None: + """generate all themes should create subdirectories for each theme.""" + result = cli_runner.invoke( + cli, + [ + "generate", + "--format", + "markdown", + "--output-dir", + str(tmp_path), + ], + ) + + assert result.exit_code == 0 + + subdirs = [d.name for d in tmp_path.iterdir() if d.is_dir()] + assert "buildings" in subdirs + assert "places" in subdirs + + def test_generate_no_duplicate_files( + self, cli_runner: CliRunner, tmp_path: Path + ) -> None: + """No type should produce duplicate output files.""" + result = cli_runner.invoke( + cli, + [ + "generate", + "--format", + "markdown", + "--output-dir", + str(tmp_path), + ], + ) + + assert result.exit_code == 0 + + all_files = list(tmp_path.rglob("*.md")) + all_paths = [str(f.relative_to(tmp_path)) for f in all_files] + assert len(all_paths) == len(set(all_paths)), ( + f"Duplicate files: {[p for p in all_paths if all_paths.count(p) > 1]}" + ) + + +class TestCliGenerateLinkIntegrity: + """Verify all markdown links resolve to existing files.""" + + def test_all_links_resolve(self, cli_runner: CliRunner, tmp_path: Path) -> None: + """Every markdown link target should exist as a file.""" + result = cli_runner.invoke( + cli, + [ + "generate", + "--format", + "markdown", + "--output-dir", + str(tmp_path), + ], + ) + + assert result.exit_code == 0 + + link_re = re.compile(r"\[.*?\]\(([^)]+\.md(?:#[^)]*)?)\)") + broken: list[str] = [] + + for md_file in tmp_path.rglob("*.md"): + content = md_file.read_text() + for match in link_re.finditer(content): + href = match.group(1).split("#")[0] + # Resolve relative path from the file's directory + target = (md_file.parent / href).resolve() + if not target.exists(): + rel = md_file.relative_to(tmp_path) + broken.append(f"{rel}: {href}") + + assert not broken, "Broken links:\n" + "\n".join(broken) + + +class TestCliGenerateCategoryFiles: + """Tests for _category_.json generation.""" + + def test_generates_category_files( + self, cli_runner: CliRunner, tmp_path: Path + ) -> None: + """Should generate _category_.json files in output directories.""" + result = cli_runner.invoke( + cli, + [ + "generate", + "--format", + "markdown", + "--theme", + "buildings", + "--output-dir", + str(tmp_path), + ], + ) + + assert result.exit_code == 0 + + # Theme directory should have a category file + cat_file = tmp_path / "buildings" / "_category_.json" + assert cat_file.exists() + data = json.loads(cat_file.read_text()) + assert data["label"] == "Buildings" + + def test_core_directory_has_category_file( + self, cli_runner: CliRunner, tmp_path: Path + ) -> None: + """core/ directory should have _category_.json.""" + result = cli_runner.invoke( + cli, + [ + "generate", + "--format", + "markdown", + "--output-dir", + str(tmp_path), + ], + ) + + assert result.exit_code == 0 + + cat_file = tmp_path / "core" / "_category_.json" + assert cat_file.exists() + data = json.loads(cat_file.read_text()) + assert data["label"] == "Core" + + def test_feature_dirs_positioned_before_non_feature_dirs( + self, cli_runner: CliRunner, tmp_path: Path + ) -> None: + """Feature directories should have lower position than non-feature directories.""" + result = cli_runner.invoke( + cli, + [ + "generate", + "--format", + "markdown", + "--output-dir", + str(tmp_path), + ], + ) + assert result.exit_code == 0 + + def pos(dir_name: str) -> int: + data = json.loads((tmp_path / dir_name / "_category_.json").read_text()) + result: int = data["position"] + return result + + # Feature directories (contain feature pages) should sort before + # non-feature directories (core, system -- shared types only) + feature_positions = [pos("buildings"), pos("places"), pos("transportation")] + non_feature_positions = [pos("core"), pos("system")] + + assert max(feature_positions) < min(non_feature_positions) + + def test_subdirectories_have_no_position( + self, cli_runner: CliRunner, tmp_path: Path + ) -> None: + """Only top-level directories get position values.""" + result = cli_runner.invoke( + cli, + [ + "generate", + "--format", + "markdown", + "--output-dir", + str(tmp_path), + ], + ) + assert result.exit_code == 0 + + data = json.loads( + (tmp_path / "core" / "scoping" / "_category_.json").read_text() + ) + assert "position" not in data + + +class TestCliGenerateEnums: + """Tests for enum generation in the generate command.""" + + def test_generate_markdown_includes_enum_files( + self, cli_runner: CliRunner, tmp_path: Path + ) -> None: + """generate --format markdown should create enum documentation files.""" + result = cli_runner.invoke( + cli, + [ + "generate", + "--format", + "markdown", + "--theme", + "buildings", + "--output-dir", + str(tmp_path), + ], + ) + + assert result.exit_code == 0 + + # Enum files exist somewhere under the buildings directory + all_md = list((tmp_path / "buildings").rglob("*.md")) + all_names = [f.stem for f in all_md] + + assert "building" in all_names + + # Should have enum files beyond the feature models + non_feature = [n for n in all_names if n not in ("building", "building_part")] + assert len(non_feature) > 0, "Should generate enum documentation files" + + +class TestCliEntryPoint: + """generate populates entry_point from discovery keys.""" + + def test_generate_sets_entry_point_on_specs( + self, cli_runner: CliRunner, monkeypatch: pytest.MonkeyPatch + ) -> None: + captured: list[ModelSpec] = [] + + def spy(feature_specs: list, schema_root: str, output_dir: object) -> None: + captured.extend(feature_specs) + + monkeypatch.setattr("overture.schema.codegen.cli._generate_markdown", spy) + result = cli_runner.invoke( + cli, ["generate", "--format", "markdown", "--theme", "buildings"] + ) + + assert result.exit_code == 0 + assert len(captured) > 0 + for spec in captured: + assert spec.entry_point is not None, f"{spec.name} missing entry_point" + assert ":" in spec.entry_point, ( + f"entry_point should be entry-point style: {spec.entry_point!r}" + ) + + +class TestCliHelp: + """Tests for CLI help.""" + + def test_main_help(self, cli_runner: CliRunner) -> None: + """--help should show usage information.""" + result = cli_runner.invoke(cli, ["--help"]) + + assert result.exit_code == 0 + assert "generate" in result.output + assert "list" in result.output + + +class TestGenerateWithSegment: + """Integration test: Segment union produces markdown output.""" + + def test_segment_appears_in_markdown_output( + self, cli_runner: CliRunner, tmp_path: Path + ) -> None: + """Generate markdown and verify Segment page exists.""" + result = cli_runner.invoke( + cli, + [ + "generate", + "--format", + "markdown", + "--theme", + "transportation", + "--output-dir", + str(tmp_path), + ], + ) + assert result.exit_code == 0 + + # Segment page should exist + segment_files = list(tmp_path.rglob("segment.md")) + assert len(segment_files) >= 1, f"No segment.md found in {tmp_path}" + + content = segment_files[0].read_text() + assert "# Segment" in content + assert "subtype" in content + + +class TestReverseReferences: + """Integration test: Reverse references appear in generated markdown.""" + + def test_used_by_sections_appear_in_markdown( + self, cli_runner: CliRunner, tmp_path: Path + ) -> None: + """Generate markdown and verify Used By sections appear.""" + result = cli_runner.invoke( + cli, + [ + "generate", + "--format", + "markdown", + "--theme", + "buildings", + "--output-dir", + str(tmp_path), + ], + ) + assert result.exit_code == 0 + + # Find a supplementary type that should have Used By section + # For example, if Building references some enum or NewType + all_md = list(tmp_path.rglob("*.md")) + + # At least one supplementary type should have a Used By section + has_used_by = False + for md_file in all_md: + content = md_file.read_text() + if "## Used By" in content: + has_used_by = True + break + + assert has_used_by, "No 'Used By' sections found in any generated markdown" diff --git a/packages/overture-schema-codegen/tests/test_constraint_description.py b/packages/overture-schema-codegen/tests/test_constraint_description.py new file mode 100644 index 000000000..9961ef2b2 --- /dev/null +++ b/packages/overture-schema-codegen/tests/test_constraint_description.py @@ -0,0 +1,458 @@ +"""Tests for constraint description (model-level and field-level).""" + +from annotated_types import Ge, Gt, Interval, Le, Lt, MaxLen, MinLen +from overture.schema.codegen.extraction.field_constraints import ( + constraint_display_text, + describe_field_constraint, +) +from overture.schema.codegen.extraction.model_constraints import ( + analyze_model_constraints, +) +from overture.schema.codegen.extraction.specs import TypeIdentity +from overture.schema.codegen.extraction.type_analyzer import ConstraintSource +from overture.schema.system.model_constraint import ( + FieldEqCondition, + ForbidIfConstraint, + MinFieldsSetConstraint, + ModelConstraint, + NoExtraFieldsConstraint, + Not, + RadioGroupConstraint, + RequireAnyOfConstraint, + RequireIfConstraint, +) +from overture.schema.system.primitive import GeometryType, GeometryTypeConstraint +from overture.schema.system.ref import Reference, Relationship +from overture.schema.system.ref.id import Identified + + +def describe_model_constraints( + constraints: tuple[ModelConstraint, ...], +) -> list[str]: + descriptions, _ = analyze_model_constraints(constraints) + return descriptions + + +def field_constraint_notes( + constraints: tuple[ModelConstraint, ...], +) -> dict[str, list[str]]: + _, field_notes = analyze_model_constraints(constraints) + return field_notes + + +class TestDescribeSingleConstraint: + """Each constraint type produces readable prose.""" + + def test_require_any_of(self) -> None: + constraint = RequireAnyOfConstraint._create_internal( + "@require_any_of", "name", "description" + ) + result = describe_model_constraints((constraint,)) + + assert result == ["At least one of `name`, `description` must be set"] + + def test_radio_group(self) -> None: + constraint = RadioGroupConstraint._create_internal( + "@radio_group", "is_land", "is_territorial" + ) + result = describe_model_constraints((constraint,)) + + assert result == ["Exactly one of `is_land`, `is_territorial` must be `true`"] + + def test_min_fields_set(self) -> None: + constraint = MinFieldsSetConstraint._create_internal("@min_fields_set", 3) + result = describe_model_constraints((constraint,)) + + assert result == ["At least 3 fields must be set"] + + def test_require_if_field_eq(self) -> None: + constraint = RequireIfConstraint._create_internal( + "@require_if", ["admin_level"], FieldEqCondition("subtype", "country") + ) + result = describe_model_constraints((constraint,)) + + assert result == ["`admin_level` is required when `subtype` = `country`"] + + def test_require_if_negated_condition(self) -> None: + """Not(FieldEqCondition) uses not-equal sign.""" + constraint = RequireIfConstraint._create_internal( + "@require_if", + ["parent_division_id"], + Not(FieldEqCondition("subtype", "country")), + ) + result = describe_model_constraints((constraint,)) + + assert result == ["`parent_division_id` is required when `subtype` ≠ `country`"] + + def test_forbid_if_field_eq(self) -> None: + constraint = ForbidIfConstraint._create_internal( + "@forbid_if", + ["parent_division_id"], + FieldEqCondition("subtype", "country"), + ) + result = describe_model_constraints((constraint,)) + + assert result == [ + "`parent_division_id` is forbidden when `subtype` = `country`" + ] + + def test_multi_field_uses_plural_verb(self) -> None: + """Multiple field names produce 'are required', not 'is required'.""" + constraint = RequireIfConstraint._create_internal( + "@require_if", + ["foo", "bar"], + FieldEqCondition("flag", "on"), + ) + result = describe_model_constraints((constraint,)) + + assert result == ["`foo`, `bar` are required when `flag` = `on`"] + + +class TestDescribeFiltering: + """Filtering and fallback behavior.""" + + def test_no_extra_fields_filtered_out(self) -> None: + """@no_extra_fields produces no output.""" + constraint = NoExtraFieldsConstraint._create_internal("@no_extra_fields") + result = describe_model_constraints((constraint,)) + + assert result == [] + + def test_unknown_constraint_uses_name_fallback(self) -> None: + """Unrecognized constraint type falls back to constraint.name.""" + + class FutureConstraint(ModelConstraint): + pass + + constraint = FutureConstraint("@future_thing") + result = describe_model_constraints((constraint,)) + + assert result == ["`@future_thing`"] + + +class TestConsolidation: + """Consolidation of same-field conditional constraints.""" + + def test_consolidate_require_if_same_field(self) -> None: + """Multiple @require_if with same fields, different FieldEqCondition values, merge.""" + constraints = tuple( + RequireIfConstraint._create_internal( + "@require_if", + ["admin_level"], + FieldEqCondition("subtype", val), + ) + for val in ("country", "dependency", "macroregion") + ) + result = describe_model_constraints(constraints) + + assert result == [ + "`admin_level` is required when `subtype` is one of: " + "`country`, `dependency`, `macroregion`" + ] + + def test_no_consolidation_for_different_fields(self) -> None: + """@require_if with different field_names are not consolidated.""" + c1 = RequireIfConstraint._create_internal( + "@require_if", ["foo"], FieldEqCondition("flag", "a") + ) + c2 = RequireIfConstraint._create_internal( + "@require_if", ["bar"], FieldEqCondition("flag", "b") + ) + result = describe_model_constraints((c1, c2)) + + assert len(result) == 2 + + def test_no_consolidation_for_negated_conditions(self) -> None: + """Negated conditions are not consolidated.""" + c1 = RequireIfConstraint._create_internal( + "@require_if", ["foo"], Not(FieldEqCondition("flag", "a")) + ) + c2 = RequireIfConstraint._create_internal( + "@require_if", ["foo"], Not(FieldEqCondition("flag", "b")) + ) + result = describe_model_constraints((c1, c2)) + + assert len(result) == 2 + + def test_consolidate_forbid_if_same_field(self) -> None: + """Multiple @forbid_if with same fields also consolidate.""" + constraints = tuple( + ForbidIfConstraint._create_internal( + "@forbid_if", + ["secret"], + FieldEqCondition("role", val), + ) + for val in ("guest", "anonymous") + ) + result = describe_model_constraints(constraints) + + assert result == [ + "`secret` is forbidden when `role` is one of: `guest`, `anonymous`" + ] + + +class TestMixedConstraints: + """End-to-end with mixed constraint types.""" + + def test_division_like_model(self) -> None: + """Mixed constraints render in declaration order with consolidation.""" + constraints = ( + RequireAnyOfConstraint._create_internal("@require_any_of", "foo", "bar"), + ForbidIfConstraint._create_internal( + "@forbid_if", + ["parent_id"], + FieldEqCondition("subtype", "country"), + ), + RequireIfConstraint._create_internal( + "@require_if", + ["parent_id"], + Not(FieldEqCondition("subtype", "country")), + ), + RequireIfConstraint._create_internal( + "@require_if", + ["level"], + FieldEqCondition("subtype", "country"), + ), + RequireIfConstraint._create_internal( + "@require_if", + ["level"], + FieldEqCondition("subtype", "region"), + ), + RadioGroupConstraint._create_internal("@radio_group", "is_land", "is_sea"), + ) + result = describe_model_constraints(constraints) + + assert result == [ + "At least one of `foo`, `bar` must be set", + "`parent_id` is forbidden when `subtype` = `country`", + "`parent_id` is required when `subtype` ≠ `country`", + "`level` is required when `subtype` is one of: `country`, `region`", + "Exactly one of `is_land`, `is_sea` must be `true`", + ] + + +class TestFieldConstraintNotes: + """field_constraint_notes maps field names to their constraint descriptions.""" + + def test_require_any_of_maps_all_fields(self) -> None: + """RequireAnyOfConstraint maps each field name to the description.""" + constraint = RequireAnyOfConstraint._create_internal( + "@require_any_of", "name", "description" + ) + result = field_constraint_notes((constraint,)) + + expected = "At least one of `name`, `description` must be set" + assert result == {"name": [expected], "description": [expected]} + + def test_require_if_includes_condition_field(self) -> None: + """RequireIfConstraint includes both constrained and condition fields.""" + constraint = RequireIfConstraint._create_internal( + "@require_if", ["admin_level"], FieldEqCondition("subtype", "country") + ) + result = field_constraint_notes((constraint,)) + + expected = "`admin_level` is required when `subtype` = `country`" + assert result["admin_level"] == [expected] + assert result["subtype"] == [expected] + + def test_forbid_if_with_negated_condition_includes_condition_field(self) -> None: + """ForbidIfConstraint with Not(FieldEqCondition) includes condition field.""" + constraint = ForbidIfConstraint._create_internal( + "@forbid_if", + ["parent_id"], + Not(FieldEqCondition("subtype", "country")), + ) + result = field_constraint_notes((constraint,)) + + expected = "`parent_id` is forbidden when `subtype` ≠ `country`" + assert result["parent_id"] == [expected] + assert result["subtype"] == [expected] + + def test_consolidated_constraints_map_all_fields(self) -> None: + """Consolidated constraints map to all participating fields.""" + constraints = tuple( + RequireIfConstraint._create_internal( + "@require_if", + ["admin_level"], + FieldEqCondition("subtype", val), + ) + for val in ("country", "dependency") + ) + result = field_constraint_notes(constraints) + + expected = ( + "`admin_level` is required when `subtype` is one of: " + "`country`, `dependency`" + ) + assert result["admin_level"] == [expected] + assert result["subtype"] == [expected] + + def test_no_extra_fields_produces_no_annotations(self) -> None: + """NoExtraFieldsConstraint produces no field annotations.""" + constraint = NoExtraFieldsConstraint._create_internal("@no_extra_fields") + result = field_constraint_notes((constraint,)) + + assert result == {} + + def test_min_fields_set_produces_no_annotations(self) -> None: + """MinFieldsSetConstraint produces no field annotations.""" + constraint = MinFieldsSetConstraint._create_internal("@min_fields_set", 3) + result = field_constraint_notes((constraint,)) + + assert result == {} + + def test_radio_group_maps_all_fields(self) -> None: + """RadioGroupConstraint maps each field name to the description.""" + constraint = RadioGroupConstraint._create_internal( + "@radio_group", "is_land", "is_sea" + ) + result = field_constraint_notes((constraint,)) + + expected = "Exactly one of `is_land`, `is_sea` must be `true`" + assert result == {"is_land": [expected], "is_sea": [expected]} + + def test_multiple_constraints_on_one_field(self) -> None: + """Field appearing in multiple constraints gets all descriptions.""" + c1 = RequireAnyOfConstraint._create_internal( + "@require_any_of", "name", "description" + ) + c2 = RequireIfConstraint._create_internal( + "@require_if", ["name"], FieldEqCondition("subtype", "venue") + ) + result = field_constraint_notes((c1, c2)) + + assert len(result["name"]) == 2 + + +class TestDescribeFieldConstraint: + """Tests for describe_field_constraint readable output.""" + + def test_ge(self) -> None: + assert describe_field_constraint(Ge(ge=0)) == "`≥ 0`" + + def test_le(self) -> None: + assert describe_field_constraint(Le(le=100)) == "`≤ 100`" + + def test_gt(self) -> None: + assert describe_field_constraint(Gt(gt=0)) == "`> 0`" + + def test_lt(self) -> None: + assert describe_field_constraint(Lt(lt=100)) == "`< 100`" + + def test_min_len(self) -> None: + assert describe_field_constraint(MinLen(min_length=1)) == "Minimum length: 1" + + def test_max_len(self) -> None: + assert describe_field_constraint(MaxLen(max_length=10)) == "Maximum length: 10" + + def test_interval_closed(self) -> None: + assert describe_field_constraint(Interval(ge=0, le=100)) == "`0 ≤ x ≤ 100`" + + def test_interval_open(self) -> None: + assert describe_field_constraint(Interval(gt=0, lt=100)) == "`0 < x < 100`" + + def test_interval_half_open(self) -> None: + assert describe_field_constraint(Interval(ge=0, lt=100)) == "`0 ≤ x < 100`" + + def test_interval_lower_only(self) -> None: + assert describe_field_constraint(Interval(ge=0)) == "`≥ 0`" + + def test_interval_upper_only(self) -> None: + assert describe_field_constraint(Interval(le=100)) == "`≤ 100`" + + def test_geometry_type_single(self) -> None: + constraint = GeometryTypeConstraint(GeometryType.POINT) + assert describe_field_constraint(constraint) == "Allowed geometry types: Point" + + def test_geometry_type_multiple(self) -> None: + constraint = GeometryTypeConstraint(GeometryType.POINT, GeometryType.POLYGON) + assert ( + describe_field_constraint(constraint) + == "Allowed geometry types: Point, Polygon" + ) + + def test_geometry_type_all_types(self) -> None: + constraint = GeometryTypeConstraint( + GeometryType.POINT, + GeometryType.LINE_STRING, + GeometryType.POLYGON, + ) + assert ( + describe_field_constraint(constraint) + == "Allowed geometry types: LineString, Point, Polygon" + ) + + def test_reference_belongs_to(self) -> None: + class Target(Identified): + pass + + constraint = Reference(Relationship.BELONGS_TO, Target) + assert ( + describe_field_constraint(constraint) == "References `Target` (belongs to)" + ) + + def test_reference_connects_to(self) -> None: + class Other(Identified): + pass + + constraint = Reference(Relationship.CONNECTS_TO, Other) + assert ( + describe_field_constraint(constraint) == "References `Other` (connects to)" + ) + + def test_reference_link_fn_receives_type_identity(self) -> None: + """link_fn callback receives TypeIdentity wrapping the relatee class.""" + + class Target(Identified): + pass + + received: list[TypeIdentity] = [] + + def link_fn(tid: TypeIdentity) -> str: + received.append(tid) + return f"[`{tid.name}`](link)" + + constraint = Reference(Relationship.BELONGS_TO, Target) + result = describe_field_constraint(constraint, link_fn=link_fn) + + assert len(received) == 1 + assert received[0].obj is Target + assert received[0].name == "Target" + assert result == "References [`Target`](link) (belongs to)" + + def test_reference_link_fn_used_in_output(self) -> None: + """link_fn return value appears verbatim in the description.""" + + class Target(Identified): + pass + + constraint = Reference(Relationship.CONNECTS_TO, Target) + result = describe_field_constraint( + constraint, link_fn=lambda tid: f"[`{tid.name}`](path/to/target)" + ) + assert result == "References [`Target`](path/to/target) (connects to)" + + +class TestConstraintDisplayText: + """constraint_display_text forwards link_fn to describe_field_constraint.""" + + def test_link_fn_forwarded_to_reference_constraint(self) -> None: + """link_fn is forwarded when constraint is a Reference.""" + + class Target(Identified): + pass + + constraint = Reference(Relationship.BELONGS_TO, Target) + cs = ConstraintSource(source_ref=None, source_name=None, constraint=constraint) + + received: list[TypeIdentity] = [] + + def link_fn(tid: TypeIdentity) -> str: + received.append(tid) + return f"[`{tid.name}`](link)" + + result = constraint_display_text(cs, link_fn=link_fn) + + assert len(received) == 1 + assert received[0].obj is Target + assert result == "References [`Target`](link) (belongs to)" diff --git a/packages/overture-schema-codegen/tests/test_enum_extraction.py b/packages/overture-schema-codegen/tests/test_enum_extraction.py new file mode 100644 index 000000000..2e5367e3b --- /dev/null +++ b/packages/overture-schema-codegen/tests/test_enum_extraction.py @@ -0,0 +1,149 @@ +"""Tests for enum extraction.""" + +from enum import Enum + +from codegen_test_support import find_member +from overture.schema.codegen.extraction.enum_extraction import extract_enum +from overture.schema.codegen.extraction.specs import EnumMemberSpec, EnumSpec +from overture.schema.system.doc import DocumentedEnum + + +class TestEnumMemberSpec: + """Tests for EnumMemberSpec dataclass.""" + + def test_stores_name_value_description(self) -> None: + """EnumMemberSpec should store name, value, and description.""" + member = EnumMemberSpec( + name="GABLED", value="gabled", description="A gabled roof" + ) + + assert member.name == "GABLED" + assert member.value == "gabled" + assert member.description == "A gabled roof" + + def test_description_can_be_none(self) -> None: + """EnumMemberSpec description should be optional.""" + member = EnumMemberSpec(name="FLAT", value="flat", description=None) + + assert member.description is None + + +class TestEnumSpec: + """Tests for EnumSpec dataclass.""" + + def test_stores_name_description_members(self) -> None: + """EnumSpec should store name, description, and members list.""" + members = [ + EnumMemberSpec(name="A", value="a", description=None), + EnumMemberSpec(name="B", value="b", description="The letter B"), + ] + + spec = EnumSpec( + name="Letters", description="A collection of letters", members=members + ) + + assert spec.name == "Letters" + assert spec.description == "A collection of letters" + assert len(spec.members) == 2 + + +class TestExtractEnumSimple: + """Tests for extract_enum with simple str Enum classes.""" + + def test_extracts_simple_str_enum(self) -> None: + """Should extract name, description, and members from simple str Enum.""" + + class RoofShape(str, Enum): + """The shape of the roof.""" + + FLAT = "flat" + GABLED = "gabled" + DOMED = "dome" + + result = extract_enum(RoofShape) + + assert result.name == "RoofShape" + assert result.description == "The shape of the roof." + assert len(result.members) == 3 + + # Check member extraction + flat = find_member(result, "FLAT") + assert flat.value == "flat" + assert flat.description is None + + gabled = find_member(result, "GABLED") + assert gabled.value == "gabled" + + def test_enum_without_docstring(self) -> None: + """Should handle enum without docstring.""" + + class SimpleEnum(str, Enum): + A = "a" + B = "b" + + result = extract_enum(SimpleEnum) + + assert result.name == "SimpleEnum" + assert result.description is None + + +class TestExtractEnumDocumented: + """Tests for extract_enum with DocumentedEnum classes.""" + + def test_extracts_documented_enum_with_member_descriptions(self) -> None: + """Should extract per-member descriptions from DocumentedEnum.""" + + class Side(str, DocumentedEnum): + """The side on which something appears.""" + + LEFT = ("left", "On the left side") + RIGHT = ("right", "On the right side") + + result = extract_enum(Side) + + assert result.name == "Side" + assert result.description == "The side on which something appears." + assert len(result.members) == 2 + + left = find_member(result, "LEFT") + assert left.value == "left" + assert left.description == "On the left side" + + right = find_member(result, "RIGHT") + assert right.value == "right" + assert right.description == "On the right side" + + def test_documented_enum_with_mixed_documentation(self) -> None: + """DocumentedEnum can have some members documented and others not.""" + + class ConnectionState(str, DocumentedEnum): + """Connection states.""" + + CONNECTED = "connected" + DISCONNECTED = "disconnected" + QUIESCING = ("quiescing", "Gracefully shutting down") + + result = extract_enum(ConnectionState) + + connected = find_member(result, "CONNECTED") + assert connected.value == "connected" + assert connected.description is None + + quiescing = find_member(result, "QUIESCING") + assert quiescing.value == "quiescing" + assert quiescing.description == "Gracefully shutting down" + + +class TestEnumSpecSourceType: + """Tests for source_type on EnumSpec.""" + + def test_enum_spec_source_type_defaults_to_none(self) -> None: + spec = EnumSpec(name="Test", description=None) + assert spec.source_type is None + + def test_extract_enum_sets_source_type(self) -> None: + class Color(str, Enum): + RED = "red" + + spec = extract_enum(Color) + assert spec.source_type is Color diff --git a/packages/overture-schema-codegen/tests/test_example_loader.py b/packages/overture-schema-codegen/tests/test_example_loader.py new file mode 100644 index 000000000..19a562676 --- /dev/null +++ b/packages/overture-schema-codegen/tests/test_example_loader.py @@ -0,0 +1,1006 @@ +"""Tests for example_loader module.""" + +import logging +import sys +import types +from collections.abc import Iterator +from pathlib import Path +from textwrap import dedent +from typing import Annotated, Literal + +import pytest +from overture.schema.codegen.extraction.examples import ( + ExampleRecord, + _denull, + _inject_literal_fields, + collect_dict_paths, + flatten_example, + load_examples, + load_examples_from_toml, + order_example_rows, + resolve_pyproject_path, + validate_example, +) +from overture.schema.codegen.extraction.specs import FieldSpec, ModelSpec +from overture.schema.codegen.extraction.type_analyzer import TypeInfo, TypeKind +from pydantic import BaseModel, ConfigDict, Field, Tag, ValidationError + + +class TestFlattenExample: + """Tests for flatten_example function.""" + + def test_simple_fields(self) -> None: + """Flatten simple key-value pairs.""" + raw = {"id": "123", "version": 1, "name": "test"} + result = flatten_example(raw) + assert result == [("id", "123"), ("version", 1), ("name", "test")] + + def test_nested_dict(self) -> None: + """Flatten nested dict to dot notation.""" + raw = {"names": {"primary": "foo", "common": {"en": "bar"}}} + result = flatten_example(raw) + assert result == [ + ("names.primary", "foo"), + ("names.common.en", "bar"), + ] + + def test_list_of_dicts(self) -> None: + """Flatten list of dicts with array notation.""" + raw = {"sources": [{"dataset": "OSM", "record_id": "w123"}]} + result = flatten_example(raw) + assert result == [ + ("sources[0].dataset", "OSM"), + ("sources[0].record_id", "w123"), + ] + + def test_bbox_flattened_at_top_level(self) -> None: + """Bbox fields are flattened like any other nested dict.""" + raw = { + "id": "123", + "bbox": {"xmin": -176.6, "xmax": -176.64}, + "version": 1, + } + result = flatten_example(raw) + assert result == [ + ("id", "123"), + ("bbox.xmin", -176.6), + ("bbox.xmax", -176.64), + ("version", 1), + ] + + def test_plain_list_kept_as_value(self) -> None: + """Plain lists (non-dict items) are kept as values.""" + raw = {"phones": ["+1234", "+5678"]} + result = flatten_example(raw) + assert result == [("phones", ["+1234", "+5678"])] + + def test_empty_dict(self) -> None: + """Empty dict produces empty list.""" + raw: dict[str, object] = {} + result = flatten_example(raw) + assert result == [] + + def test_empty_list(self) -> None: + """Empty list is kept as value.""" + raw: dict[str, object] = {"tags": []} + result = flatten_example(raw) + assert result == [("tags", [])] + + def test_list_of_list_of_dicts(self) -> None: + """Flatten list[list[dict]] with double-index notation.""" + raw = { + "hierarchies": [ + [ + {"division_id": "aaa", "name": "Country"}, + {"division_id": "bbb", "name": "Region"}, + ], + ] + } + result = flatten_example(raw) + assert result == [ + ("hierarchies[0][0].division_id", "aaa"), + ("hierarchies[0][0].name", "Country"), + ("hierarchies[0][1].division_id", "bbb"), + ("hierarchies[0][1].name", "Region"), + ] + + def test_multiple_list_items(self) -> None: + """Handle multiple items in list of dicts.""" + raw = { + "sources": [ + {"dataset": "OSM", "confidence": 0.9}, + {"dataset": "MSFT", "confidence": 0.8}, + ] + } + result = flatten_example(raw) + assert result == [ + ("sources[0].dataset", "OSM"), + ("sources[0].confidence", 0.9), + ("sources[1].dataset", "MSFT"), + ("sources[1].confidence", 0.8), + ] + + def test_dict_field_kept_as_leaf(self) -> None: + """Dict values at dict_paths are kept as leaf values.""" + raw = { + "name": "test", + "tags": {"color": "red", "size": "large"}, + } + result = flatten_example(raw, dict_paths=frozenset({"tags"})) + assert result == [ + ("name", "test"), + ("tags", {"color": "red", "size": "large"}), + ] + + def test_nested_dict_path_kept_as_leaf(self) -> None: + """Dict values at nested dict_paths are kept as leaf values.""" + raw = { + "names": { + "primary": "Tower", + "common": {"en": "Tower", "fr": "Tour"}, + }, + } + result = flatten_example(raw, dict_paths=frozenset({"names.common"})) + assert result == [ + ("names.primary", "Tower"), + ("names.common", {"en": "Tower", "fr": "Tour"}), + ] + + def test_empty_dict_paths_preserves_behavior(self) -> None: + """Empty dict_paths (default) recurses all dicts as before.""" + raw = {"tags": {"color": "red"}} + result = flatten_example(raw) + assert result == [("tags.color", "red")] + + def test_dict_inside_list_kept_as_leaf(self) -> None: + """Dict at indexed path matches schema path in dict_paths.""" + raw = { + "items": [ + {"name": "a", "tags": {"color": "red"}}, + {"name": "b", "tags": {"size": "large"}}, + ], + } + result = flatten_example(raw, dict_paths=frozenset({"items[].tags"})) + assert result == [ + ("items[0].name", "a"), + ("items[0].tags", {"color": "red"}), + ("items[1].name", "b"), + ("items[1].tags", {"size": "large"}), + ] + + +class TestOrderExampleRows: + """Tests for order_example_rows function.""" + + def test_order_by_field_names(self) -> None: + """Order rows by position in field_names.""" + flat_rows = [("version", 1), ("id", "123"), ("name", "test")] + field_names = ["id", "name", "version"] + result = order_example_rows(flat_rows, field_names) + assert result == [("id", "123"), ("name", "test"), ("version", 1)] + + def test_extract_base_field_from_dot_notation(self) -> None: + """Extract base field from dotted keys.""" + flat_rows = [ + ("names.primary", "foo"), + ("id", "123"), + ("names.common.en", "bar"), + ] + field_names = ["id", "names"] + result = order_example_rows(flat_rows, field_names) + assert result == [ + ("id", "123"), + ("names.primary", "foo"), + ("names.common.en", "bar"), + ] + + def test_extract_base_field_from_array_notation(self) -> None: + """Extract base field from array notation.""" + flat_rows = [ + ("sources[0].dataset", "OSM"), + ("id", "123"), + ("sources[0].record_id", "w123"), + ("sources[1].dataset", "MSFT"), + ] + field_names = ["id", "sources"] + result = order_example_rows(flat_rows, field_names) + assert result == [ + ("id", "123"), + ("sources[0].dataset", "OSM"), + ("sources[0].record_id", "w123"), + ("sources[1].dataset", "MSFT"), + ] + + def test_order_with_mixed_notation(self) -> None: + """Order rows with mixed simple, dotted, and array notation.""" + flat_rows = [ + ("version", 1), + ("sources[0].dataset", "OSM"), + ("id", "123"), + ("names.primary", "foo"), + ] + field_names = ["id", "names", "sources", "version"] + result = order_example_rows(flat_rows, field_names) + assert result == [ + ("id", "123"), + ("names.primary", "foo"), + ("sources[0].dataset", "OSM"), + ("version", 1), + ] + + def test_unknown_fields_sort_to_end(self) -> None: + """Unknown fields sort to end, maintaining relative order.""" + flat_rows = [ + ("unknown2", "b"), + ("id", "123"), + ("unknown1", "a"), + ("version", 1), + ] + field_names = ["id", "version"] + result = order_example_rows(flat_rows, field_names) + assert result == [ + ("id", "123"), + ("version", 1), + ("unknown2", "b"), + ("unknown1", "a"), + ] + + +class TestLoadExamplesFromToml: + """Tests for load_examples_from_toml function.""" + + def test_load_example_list(self, tmp_path: Path) -> None: + """Load examples for a model from TOML.""" + toml_path = tmp_path / "pyproject.toml" + toml_path.write_text( + dedent(""" + [project] + name = "test-package" + + [[examples.Building]] + id = "123" + version = 1 + + [[examples.Building]] + id = "456" + version = 2 + """) + ) + + result = load_examples_from_toml(toml_path, "Building") + assert len(result) == 2 + assert result[0] == {"id": "123", "version": 1} + assert result[1] == {"id": "456", "version": 2} + + def test_model_not_found_returns_empty(self, tmp_path: Path) -> None: + """Return empty list when model has no examples.""" + toml_path = tmp_path / "pyproject.toml" + toml_path.write_text( + dedent(""" + [project] + name = "test-package" + + [[examples.Building]] + id = "123" + """) + ) + + result = load_examples_from_toml(toml_path, "Road") + assert result == [] + + def test_no_examples_section_returns_empty(self, tmp_path: Path) -> None: + """Return empty list when no examples section exists.""" + toml_path = tmp_path / "pyproject.toml" + toml_path.write_text( + dedent(""" + [project] + name = "test-package" + """) + ) + + result = load_examples_from_toml(toml_path, "Building") + assert result == [] + + +class MockProject: + """A temporary project directory with registered mock modules.""" + + def __init__(self, root: Path, pyproject: Path, mod_name: str) -> None: + self.root = root + self.pyproject = pyproject + self.mod_name = mod_name + self._registered_modules: list[str] = [mod_name] + + def write_pyproject(self, content: str) -> None: + self.pyproject.write_text(content) + + def add_submodule(self, *subdirs: str) -> str: + """Register a deeper module under this project's src directory. + + Returns the module name for use in __module__ attributes. + """ + pkg_dir = self.root / "src" / Path(*subdirs) + pkg_dir.mkdir(parents=True, exist_ok=True) + module_file = pkg_dir / "module.py" + module_file.write_text("# module") + + sub_mod_name = f"{self.mod_name}_{'_'.join(subdirs)}" + mod = types.ModuleType(sub_mod_name) + mod.__file__ = str(module_file) + sys.modules[sub_mod_name] = mod + self._registered_modules.append(sub_mod_name) + return sub_mod_name + + def cleanup(self) -> None: + for name in self._registered_modules: + sys.modules.pop(name, None) + + +@pytest.fixture +def mock_project(tmp_path: Path) -> Iterator[MockProject]: + """Create a project directory with a mock module registered in sys.modules. + + Yields a MockProject with root, pyproject path, and mod_name. + Writes a minimal pyproject.toml by default; tests can overwrite via + ``project.write_pyproject()``. + """ + root = tmp_path / "project" + root.mkdir() + pyproject = root / "pyproject.toml" + pyproject.write_text("[project]\nname = 'test'") + + src_dir = root / "src" + src_dir.mkdir() + module_file = src_dir / "module.py" + module_file.write_text("# module") + + mod_name = f"_test_mock_{id(tmp_path)}" + mod = types.ModuleType(mod_name) + mod.__file__ = str(module_file) + sys.modules[mod_name] = mod + + project = MockProject(root=root, pyproject=pyproject, mod_name=mod_name) + yield project + project.cleanup() + + +class TestResolvePyprojectPath: + """Tests for resolve_pyproject_path function.""" + + def test_finds_pyproject_in_parent_dirs(self, mock_project: MockProject) -> None: + """Walk up from module location to find pyproject.toml.""" + deeper_mod = mock_project.add_submodule("pkg") + + class MockModel: + __module__ = deeper_mod + + result = resolve_pyproject_path(MockModel) + assert result == mock_project.pyproject + + def test_returns_none_when_not_found(self, tmp_path: Path) -> None: + """Return None when pyproject.toml doesn't exist.""" + module_dir = tmp_path / "src" + module_dir.mkdir() + module_file = module_dir / "module.py" + module_file.write_text("# module") + + mod_name = f"_test_resolve_nf_{id(tmp_path)}" + mod = types.ModuleType(mod_name) + mod.__file__ = str(module_file) + sys.modules[mod_name] = mod + try: + + class MockModel: + __module__ = mod_name + + result = resolve_pyproject_path(MockModel) + assert result is None + finally: + sys.modules.pop(mod_name, None) + + def test_returns_none_when_no_module(self) -> None: + """Return None when model's module is not in sys.modules.""" + + class MockModel: + __module__ = "_nonexistent_module_for_test" + + result = resolve_pyproject_path(MockModel) + assert result is None + + +class TestLoadExamples: + """Tests for load_examples entry point.""" + + def test_end_to_end(self, mock_project: MockProject) -> None: + """Load, flatten, and order examples end-to-end.""" + mock_project.write_pyproject( + dedent(""" + [project] + name = "test" + + [[examples.Building]] + version = 1 + names = { primary = "Tower" } + id = "123" + + [examples.Building.bbox] + xmin = 1.0 + xmax = 2.0 + + [[examples.Building.sources]] + dataset = "OSM" + record_id = "w456" + """) + ) + + class MockModel(BaseModel): + __module__ = mock_project.mod_name + id: str + version: int + names: dict[str, object] + sources: list[dict[str, object]] + + field_names = ["id", "bbox", "names", "sources", "version"] + result = load_examples(MockModel, "Building", field_names) + + assert len(result) == 1 + record = result[0] + assert isinstance(record, ExampleRecord) + + assert record.rows == [ + ("id", "123"), + ("bbox.xmin", 1.0), + ("bbox.xmax", 2.0), + ("names.primary", "Tower"), + ("sources[0].dataset", "OSM"), + ("sources[0].record_id", "w456"), + ("version", 1), + ] + + def test_returns_empty_on_missing_pyproject(self) -> None: + """Return empty list when model's module not in sys.modules.""" + + class MockModel(BaseModel): + __module__ = "_nonexistent_module_for_load_test" + + result = load_examples(MockModel, "Building", ["id"]) + assert result == [] + + def test_returns_empty_on_missing_model(self, mock_project: MockProject) -> None: + """Return empty list when model has no examples.""" + + class MockModel(BaseModel): + __module__ = mock_project.mod_name + + result = load_examples(MockModel, "Building", ["id"]) + assert result == [] + + def test_invalid_examples_skipped_with_warning( + self, mock_project: MockProject, caplog: pytest.LogCaptureFixture + ) -> None: + """Invalid examples are skipped and warning logged.""" + mock_project.write_pyproject( + dedent(""" + [project] + name = "test" + + [[examples.MockModel]] + name = "valid" + count = 1 + + [[examples.MockModel]] + name = "invalid" + count = "not_an_int" + + [[examples.MockModel]] + name = "also_valid" + count = 2 + """) + ) + + class MockModel(BaseModel): + __module__ = mock_project.mod_name + name: str + count: int + + caplog.set_level(logging.WARNING) + + result = load_examples(MockModel, "MockModel", ["name", "count"]) + + assert len(result) == 2 + assert result[0].rows == [("name", "valid"), ("count", 1)] + assert result[1].rows == [("name", "also_valid"), ("count", 2)] + + assert any( + "MockModel" in record.message + and "validation" in record.message.lower() + and str(mock_project.pyproject) in record.message + for record in caplog.records + ) + + def test_dict_paths_keep_dicts_as_leaves(self, mock_project: MockProject) -> None: + """Dict fields listed in dict_paths stay as leaf values.""" + mock_project.write_pyproject( + dedent(""" + [project] + name = "test" + + [[examples.MockModel]] + name = "Tower" + + [examples.MockModel.tags] + color = "red" + size = "large" + """) + ) + + class MockModel(BaseModel): + __module__ = mock_project.mod_name + name: str + tags: dict[str, str] + + result = load_examples( + MockModel, + "MockModel", + ["name", "tags"], + dict_paths=frozenset({"tags"}), + ) + + assert len(result) == 1 + assert result[0].rows == [ + ("name", "Tower"), + ("tags", {"color": "red", "size": "large"}), + ] + + def test_denulled_values_in_output(self, mock_project: MockProject) -> None: + """Flattened output contains None not "null" strings.""" + mock_project.write_pyproject( + dedent(""" + [project] + name = "test" + + [[examples.MockModel]] + name = "test" + value = "null" + """) + ) + + class MockModel(BaseModel): + __module__ = mock_project.mod_name + name: str + value: int | None + + result = load_examples(MockModel, "MockModel", ["name", "value"]) + + assert len(result) == 1 + assert result[0].rows == [("name", "test"), ("value", None)] + + +class TestDenull: + """Tests for _denull function.""" + + def test_converts_null_string_to_none(self) -> None: + """Top-level "null" strings become None.""" + assert _denull({"a": "null"}) == {"a": None} + + def test_nested_dict(self) -> None: + """Recurse into nested dicts.""" + data = {"a": {"b": "null"}} + assert _denull(data) == {"a": {"b": None}} + + def test_list_of_dicts(self) -> None: + """Recurse into dicts inside lists.""" + data = {"items": [{"x": "null"}]} + assert _denull(data) == {"items": [{"x": None}]} + + def test_mixed_types_unchanged(self) -> None: + """Non-"null" strings, ints, bools, and plain lists pass through.""" + data = { + "name": "hello", + "count": 42, + "flag": True, + "tags": ["a", "b"], + "score": 3.14, + } + assert _denull(data) == data + + def test_no_mutation(self) -> None: + """Original dict is not modified.""" + original = {"a": "null", "b": {"c": "null"}} + _denull(original) + assert original == {"a": "null", "b": {"c": "null"}} + + def test_empty_dict(self) -> None: + """Empty dict returns empty dict.""" + assert _denull({}) == {} + + def test_deeply_nested(self) -> None: + """Handle multiple levels of nesting.""" + data = {"a": {"b": {"c": "null"}}} + assert _denull(data) == {"a": {"b": {"c": None}}} + + def test_null_strings_in_plain_list(self) -> None: + """Convert "null" strings inside plain lists.""" + data = {"tags": ["a", "null", "b"]} + assert _denull(data) == {"tags": ["a", None, "b"]} + + +class TestInjectLiteralFields: + """Tests for _inject_literal_fields function.""" + + def test_injects_single_value_literal(self) -> None: + """Inject field with single-value Literal annotation.""" + + class MockModel(BaseModel): + theme: Literal["buildings"] + name: str + + data = {"name": "Tower"} + result = _inject_literal_fields(MockModel.model_fields, data) + assert result == {"name": "Tower", "theme": "buildings"} + + def test_skips_non_literal_field(self) -> None: + """Do not inject fields without Literal annotations.""" + + class MockModel(BaseModel): + name: str + count: int + + data = {"name": "Tower"} + result = _inject_literal_fields(MockModel.model_fields, data) + assert result == {"name": "Tower"} + + def test_skips_already_present_field(self) -> None: + """Do not overwrite fields already in data.""" + + class MockModel(BaseModel): + theme: Literal["buildings"] + name: str + + data = {"theme": "custom", "name": "Tower"} + result = _inject_literal_fields(MockModel.model_fields, data) + assert result == {"theme": "custom", "name": "Tower"} + + def test_respects_validation_alias(self) -> None: + """Use validation_alias when injecting.""" + + class MockModel(BaseModel): + class_: Literal["building"] = Field(validation_alias="class") + name: str + + data = {"name": "Tower"} + result = _inject_literal_fields(MockModel.model_fields, data) + assert result == {"name": "Tower", "class": "building"} + + def test_no_mutation(self) -> None: + """Original data dict is not modified.""" + + class MockModel(BaseModel): + theme: Literal["buildings"] + name: str + + data = {"name": "Tower"} + original_data = data.copy() + _inject_literal_fields(MockModel.model_fields, data) + assert data == original_data + + def test_multiple_literal_fields(self) -> None: + """Inject multiple Literal fields.""" + + class MockModel(BaseModel): + theme: Literal["buildings"] + type: Literal["building"] + name: str + + data = {"name": "Tower"} + result = _inject_literal_fields(MockModel.model_fields, data) + assert result == {"name": "Tower", "theme": "buildings", "type": "building"} + + def test_skips_multi_value_literal(self) -> None: + """Do not inject Literal with multiple values.""" + + class MockModel(BaseModel): + status: Literal["active", "inactive"] + name: str + + data = {"name": "Tower"} + result = _inject_literal_fields(MockModel.model_fields, data) + assert result == {"name": "Tower"} + + def test_respects_alias_fallback(self) -> None: + """Fall back to alias if validation_alias not set.""" + + class MockModel(BaseModel): + class_: Literal["building"] = Field(alias="class") + name: str + + data = {"name": "Tower"} + result = _inject_literal_fields(MockModel.model_fields, data) + assert result == {"name": "Tower", "class": "building"} + + def test_unwraps_optional_literal(self) -> None: + """Inject Optional[Literal["x"]] fields (union-wrapped by Pydantic).""" + + class MockModel(BaseModel): + theme: Literal["buildings"] | None = None + name: str + + data = {"name": "Tower"} + result = _inject_literal_fields(MockModel.model_fields, data) + assert result == {"name": "Tower", "theme": "buildings"} + + +class TestValidateExample: + """Tests for validate_example function.""" + + def test_valid_data_passes(self) -> None: + """Valid data is validated and denulled dict returned.""" + + class MockModel(BaseModel): + name: str + count: int + + raw = {"name": "test", "count": 42} + result = validate_example(MockModel, raw) + assert result == {"name": "test", "count": 42} + + def test_invalid_data_raises_validation_error(self) -> None: + """Invalid data raises ValidationError.""" + + class MockModel(BaseModel): + count: int + + raw = {"count": "not_an_int"} + with pytest.raises(ValidationError): + validate_example(MockModel, raw) + + def test_denulled_dict_returned(self) -> None: + """Denulled dict is returned, not raw or preprocessed.""" + + class MockModel(BaseModel): + name: str + value: int | None + + raw = {"name": "test", "value": "null"} + result = validate_example(MockModel, raw) + assert result == {"name": "test", "value": None} + + def test_literals_injected_before_validation(self) -> None: + """Missing Literal fields are injected before validation.""" + + class MockModel(BaseModel): + theme: Literal["buildings"] + name: str + + raw = {"name": "Tower"} + result = validate_example(MockModel, raw) + # Returned dict is denulled, NOT preprocessed (no injected literals) + assert result == {"name": "Tower"} + + +class TestValidateExampleWithUnion: + """Tests for validate_example with discriminated unions via TypeAdapter.""" + + def test_validates_union_via_type_adapter(self) -> None: + """TypeAdapter validates against a discriminated union.""" + + class Dog(BaseModel): + kind: Literal["dog"] + bark: str + + class Cat(BaseModel): + kind: Literal["cat"] + purr: bool + + PetUnion = Annotated[ + Annotated[Dog, Tag("dog")] | Annotated[Cat, Tag("cat")], + Field(discriminator="kind"), + ] + + raw = {"kind": "dog", "bark": "woof"} + result = validate_example(PetUnion, raw, model_fields=Dog.model_fields) + assert result == {"kind": "dog", "bark": "woof"} + + def test_invalid_union_example_raises(self) -> None: + """Invalid data against union raises ValidationError.""" + + class Dog(BaseModel): + kind: Literal["dog"] + bark: str + + class Cat(BaseModel): + kind: Literal["cat"] + purr: bool + + PetUnion = Annotated[ + Annotated[Dog, Tag("dog")] | Annotated[Cat, Tag("cat")], + Field(discriminator="kind"), + ] + + raw = {"kind": "dog", "bark": 42} # bark should be str + with pytest.raises(ValidationError): + validate_example(PetUnion, raw, model_fields=Dog.model_fields) + + def test_null_cross_arm_fields_accepted(self) -> None: + """Null fields from other union arms are accepted in flat-schema examples. + + Parquet files have columns for all union arms. A road segment row + includes ``rail_flags=null`` because the column exists in the table. + Validation should accept these cross-arm nulls. + """ + + class _Base(BaseModel): + model_config = ConfigDict(extra="forbid") + kind: str + name: str + + class Dog(_Base): + kind: Literal["dog"] + bark: str | None = None + + class Cat(_Base): + kind: Literal["cat"] + purr: bool | None = None + + PetUnion = Annotated[ + Annotated[Dog, Tag("dog")] | Annotated[Cat, Tag("cat")], + Field(discriminator="kind"), + ] + + # Flat schema: Dog example includes Cat's "purr" field as null + raw = {"kind": "dog", "name": "Rex", "bark": "woof", "purr": "null"} + result = validate_example(PetUnion, raw, model_fields=_Base.model_fields) + # Returned dict preserves the original denulled data + assert result == { + "kind": "dog", + "name": "Rex", + "bark": "woof", + "purr": None, + } + + +class TestIntegration: + """Integration tests with real schema models.""" + + def test_real_building_examples_validate(self) -> None: + """Validate real Building examples from the schema package.""" + pytest.importorskip("overture.schema.buildings.building") + + from overture.schema.buildings.building import Building # noqa: PLC0415 + + # Find the pyproject.toml for the Building model + pyproject_path = resolve_pyproject_path(Building) + assert pyproject_path is not None, "Could not find pyproject.toml for Building" + + # Load raw examples from TOML + raw_examples = load_examples_from_toml(pyproject_path, "Building") + assert len(raw_examples) > 0, "No Building examples found in pyproject.toml" + + # Validate each example + for idx, raw_example in enumerate(raw_examples): + # Should not raise ValidationError + validated = validate_example(Building, raw_example) + assert isinstance(validated, dict), f"Example {idx}: Expected dict result" + + def test_real_segment_examples_validate(self) -> None: + """Validate real Segment examples (discriminated union with cross-arm fields).""" + pytest.importorskip("overture.schema.transportation") + + from overture.schema.transportation import Segment # noqa: PLC0415 + from overture.schema.transportation.segment.models import ( # noqa: PLC0415 + RoadSegment, + TransportationSegment, + ) + + pyproject_path = resolve_pyproject_path(RoadSegment) + assert pyproject_path is not None + + raw_examples = load_examples_from_toml(pyproject_path, "Segment") + assert len(raw_examples) > 0, "No Segment examples found" + + for idx, raw_example in enumerate(raw_examples): + validated = validate_example( + Segment, + raw_example, + model_fields=TransportationSegment.model_fields, + ) + assert isinstance(validated, dict), f"Example {idx}: Expected dict result" + + +def _field( + name: str, + *, + kind: TypeKind = TypeKind.PRIMITIVE, + base_type: str = "str", + is_dict: bool = False, + list_depth: int = 0, + is_required: bool = True, + model: ModelSpec | None = None, + starts_cycle: bool = False, +) -> FieldSpec: + """Build a FieldSpec with sensible defaults for testing.""" + return FieldSpec( + name=name, + type_info=TypeInfo( + base_type=base_type, kind=kind, is_dict=is_dict, list_depth=list_depth + ), + description=None, + is_required=is_required, + model=model, + starts_cycle=starts_cycle, + ) + + +class TestCollectDictPaths: + """Tests for collect_dict_paths.""" + + def test_no_dict_fields(self) -> None: + """Model with only primitive fields returns empty set.""" + fields = [_field("name")] + assert collect_dict_paths(fields) == frozenset() + + def test_top_level_dict_field(self) -> None: + """Dict field at top level is collected.""" + fields = [ + _field("name"), + _field("tags", is_dict=True, is_required=False), + ] + assert collect_dict_paths(fields) == frozenset({"tags"}) + + def test_nested_dict_in_sub_model(self) -> None: + """Dict field inside a sub-model produces dotted path.""" + inner_fields = [ + _field("primary"), + _field("common", is_dict=True, is_required=False), + ] + inner_model = ModelSpec(name="Names", description=None, fields=inner_fields) + fields = [ + _field("names", kind=TypeKind.MODEL, base_type="Names", model=inner_model) + ] + assert collect_dict_paths(fields) == frozenset({"names.common"}) + + def test_list_of_model_with_dict(self) -> None: + """Dict inside list-of-model uses [] in path.""" + inner_fields = [_field("tags", is_dict=True, is_required=False)] + inner_model = ModelSpec(name="Item", description=None, fields=inner_fields) + fields = [ + _field( + "items", + kind=TypeKind.MODEL, + base_type="Item", + list_depth=1, + model=inner_model, + ), + ] + assert collect_dict_paths(fields) == frozenset({"items[].tags"}) + + def test_nested_list_depth(self) -> None: + """list[list[Model]] produces [][] in path.""" + inner_fields = [_field("tags", is_dict=True)] + inner_model = ModelSpec(name="Item", description=None, fields=inner_fields) + fields = [ + _field( + "items", + kind=TypeKind.MODEL, + base_type="Item", + list_depth=2, + model=inner_model, + ), + ] + assert collect_dict_paths(fields) == frozenset({"items[][].tags"}) + + def test_cycle_stops_recursion(self) -> None: + """Fields with starts_cycle=True are not recursed into.""" + inner_fields = [_field("data", is_dict=True, is_required=False)] + inner_model = ModelSpec(name="Node", description=None, fields=inner_fields) + fields = [ + _field( + "child", + kind=TypeKind.MODEL, + base_type="Node", + is_required=False, + model=inner_model, + starts_cycle=True, + ), + ] + assert collect_dict_paths(fields) == frozenset() diff --git a/packages/overture-schema-codegen/tests/test_golden_markdown.py b/packages/overture-schema-codegen/tests/test_golden_markdown.py new file mode 100644 index 000000000..42320ee69 --- /dev/null +++ b/packages/overture-schema-codegen/tests/test_golden_markdown.py @@ -0,0 +1,130 @@ +"""Golden-file snapshot tests for Markdown renderer output.""" + +from enum import Enum +from pathlib import Path + +import pytest +from codegen_test_support import ( + CommonNames, + FeatureWithAddress, + FeatureWithDict, + FeatureWithSources, + HexColor, + Id, + Instrument, + InstrumentFamily, + SimpleKind, + Sources, + Venue, + Widget, + assert_golden, +) +from overture.schema.codegen.extraction.enum_extraction import extract_enum +from overture.schema.codegen.extraction.model_extraction import ( + expand_model_tree, + extract_model, +) +from overture.schema.codegen.extraction.newtype_extraction import extract_newtype +from overture.schema.codegen.extraction.specs import TypeIdentity +from overture.schema.codegen.layout.type_collection import ( + collect_all_supplementary_types, +) +from overture.schema.codegen.markdown.renderer import ( + render_enum, + render_feature, + render_newtype, +) +from overture.schema.codegen.markdown.reverse_references import ( + UsedByEntry, + compute_reverse_references, +) +from pydantic import BaseModel + +GOLDEN_DIR = Path(__file__).parent / "golden" / "markdown" + +FEATURE_CASES = [ + (Instrument, "instrument.md"), + (Venue, "venue.md"), + (Widget, "widget.md"), + (FeatureWithSources, "feature_with_sources.md"), + (FeatureWithAddress, "feature_with_address.md"), + (FeatureWithDict, "feature_with_dict.md"), +] + +ENUM_CASES = [ + (InstrumentFamily, "instrument_family.md"), + (SimpleKind, "simple_kind.md"), +] + +NEWTYPE_CASES = [ + (HexColor, "hex_color.md"), + (Id, "id.md"), + (Sources, "sources.md"), + (CommonNames, "common_names.md"), +] + + +@pytest.fixture(scope="module") +def reverse_refs() -> dict[TypeIdentity, list[UsedByEntry]]: + """Compute reverse references for all test models.""" + feature_specs = [] + for model_class, _ in FEATURE_CASES: + assert isinstance(model_class, type) and issubclass(model_class, BaseModel) + spec = extract_model(model_class) + expand_model_tree(spec) + feature_specs.append(spec) + + all_specs = collect_all_supplementary_types(feature_specs) + return compute_reverse_references(feature_specs, all_specs) + + +@pytest.mark.parametrize( + ("model_class", "golden_filename"), + FEATURE_CASES, + ids=[name for _, name in FEATURE_CASES], +) +def test_feature_golden( + model_class: type[BaseModel], + golden_filename: str, + update_golden: bool, + reverse_refs: dict[TypeIdentity, list[UsedByEntry]], +) -> None: + spec = extract_model(model_class) + expand_model_tree(spec) + used_by = reverse_refs.get(spec.identity) + actual = render_feature(spec, used_by=used_by) + assert_golden(actual, GOLDEN_DIR / golden_filename, update=update_golden) + + +@pytest.mark.parametrize( + ("enum_class", "golden_filename"), + ENUM_CASES, + ids=[name for _, name in ENUM_CASES], +) +def test_enum_golden( + enum_class: type[Enum], + golden_filename: str, + update_golden: bool, + reverse_refs: dict[TypeIdentity, list[UsedByEntry]], +) -> None: + spec = extract_enum(enum_class) + used_by = reverse_refs.get(spec.identity) + actual = render_enum(spec, used_by=used_by) + assert_golden(actual, GOLDEN_DIR / golden_filename, update=update_golden) + + +@pytest.mark.parametrize( + ("newtype_callable", "golden_filename"), + NEWTYPE_CASES, + ids=[name for _, name in NEWTYPE_CASES], +) +def test_newtype_golden( + newtype_callable: object, + golden_filename: str, + update_golden: bool, + reverse_refs: dict[TypeIdentity, list[UsedByEntry]], +) -> None: + spec = extract_newtype(newtype_callable) + used_by = reverse_refs.get(spec.identity) + actual = render_newtype(spec, used_by=used_by) + assert_golden(actual, GOLDEN_DIR / golden_filename, update=update_golden) diff --git a/packages/overture-schema-codegen/tests/test_integration_real_models.py b/packages/overture-schema-codegen/tests/test_integration_real_models.py new file mode 100644 index 000000000..b4dd9419f --- /dev/null +++ b/packages/overture-schema-codegen/tests/test_integration_real_models.py @@ -0,0 +1,279 @@ +"""Integration tests against real Overture models. + +These tests validate the extraction layer against actual models from +the installed Overture schema packages. +""" + +import pytest +from codegen_test_support import assert_literal_field +from overture.schema.codegen.extraction.model_extraction import extract_model +from overture.schema.codegen.extraction.specs import ( + FeatureSpec, + ModelSpec, + UnionSpec, + filter_model_classes, + is_model_class, + is_union_alias, +) +from overture.schema.codegen.extraction.type_analyzer import TypeKind +from overture.schema.codegen.extraction.union_extraction import extract_union +from overture.schema.codegen.layout.module_layout import entry_point_class +from overture.schema.codegen.markdown.pipeline import generate_markdown_pages +from overture.schema.codegen.markdown.renderer import render_feature +from overture.schema.core.discovery import discover_models +from overture.schema.transportation import Segment +from overture.schema.transportation.segment.models import RoadSegment +from pydantic import BaseModel + + +class TestDiscoverModels: + """Tests for model discovery.""" + + def test_discover_models_returns_dict(self) -> None: + """discover_models() should return a dictionary.""" + models = discover_models() + assert isinstance(models, dict) + + def test_discover_models_finds_building( + self, building_class: type[BaseModel] + ) -> None: + """Should discover the Building model.""" + assert issubclass(building_class, BaseModel) + + def test_discover_models_finds_place(self, place_class: type[BaseModel]) -> None: + """Should discover the Place model.""" + assert issubclass(place_class, BaseModel) + + def test_discover_models_returns_multiple_themes(self) -> None: + """Should discover models from multiple themes.""" + models = discover_models() + assert len(models) >= 3, f"Expected at least 3 models, got {len(models)}" + + +class TestExtractBuildingModel: + """Tests for extracting the Building model.""" + + def test_extract_building_has_name(self, building_spec: ModelSpec) -> None: + """Building model spec should have correct name.""" + assert building_spec.name == "Building" + + def test_extract_building_has_theme_type(self, building_spec: ModelSpec) -> None: + """Building should have theme='buildings', type='building' as Literal fields.""" + assert_literal_field(building_spec, "theme", "buildings") + assert_literal_field(building_spec, "type", "building") + + def test_extract_building_has_fields(self, building_spec: ModelSpec) -> None: + """Building should have multiple fields.""" + assert len(building_spec.fields) > 0, "Building should have at least one field" + field_names = {f.name for f in building_spec.fields} + assert "id" in field_names + + def test_building_field_types_are_valid(self, building_spec: ModelSpec) -> None: + """All Building fields should have valid TypeInfo.""" + for field in building_spec.fields: + assert field.type_info is not None + assert field.type_info.kind in TypeKind + + +class TestExtractPlaceModel: + """Tests for extracting the Place model.""" + + def test_extract_place_has_theme_type(self, place_class: type[BaseModel]) -> None: + """Place should have theme='places', type='place' as Literal fields.""" + spec = extract_model(place_class) + assert_literal_field(spec, "theme", "places") + assert_literal_field(spec, "type", "place") + + def test_place_has_fields(self, place_class: type[BaseModel]) -> None: + """Place model should have fields.""" + spec = extract_model(place_class) + assert len(spec.fields) > 0 + + +class TestExtractDivisionModel: + """Tests for extracting Division model.""" + + def test_extract_division_theme_type(self, division_class: type[BaseModel]) -> None: + """Division should have theme='divisions', type='division' as Literal fields.""" + spec = extract_model(division_class) + assert_literal_field(spec, "theme", "divisions") + assert_literal_field(spec, "type", "division") + + +class TestFieldTypeAnalysis: + """Tests that analyze_type handles real model field types correctly.""" + + def test_no_analyze_type_crashes(self, all_discovered_models: dict) -> None: + """extract_model should not crash on any discovered model.""" + for model_class in filter_model_classes(all_discovered_models): + spec = extract_model(model_class) + assert spec.name == model_class.__name__ + + def test_all_field_types_resolved(self, all_discovered_models: dict) -> None: + """All fields should have resolved TypeInfo.""" + for model_class in filter_model_classes(all_discovered_models): + spec = extract_model(model_class) + for field in spec.fields: + assert field.type_info.base_type, ( + f"No base_type for {spec.name}.{field.name}" + ) + assert field.type_info.kind in TypeKind, ( + f"Invalid kind for {spec.name}.{field.name}" + ) + + +class TestMarkdownRenderingRealModels: + """Tests for markdown rendering with real models.""" + + def test_render_building_content(self, building_class: type[BaseModel]) -> None: + """Building renders with title, field table, and expected fields.""" + markdown = render_feature(extract_model(building_class)) + + assert "# Building" in markdown + assert "| Name |" in markdown + assert "| Type |" in markdown + assert "id" in markdown + assert "geometry" in markdown + + def test_render_all_models_without_crash(self, all_discovered_models: dict) -> None: + """render_feature should not crash on any discovered model.""" + for model_class in filter_model_classes(all_discovered_models): + markdown = render_feature(extract_model(model_class)) + assert isinstance(markdown, str) + assert len(markdown) > 0 + + +class TestDiscriminatedUnions: + """Tests for discriminated union types like Segment. + + Segment is registered as a discriminated union (type alias), not a class. + The extraction layer handles the individual union members (RoadSegment, + RailSegment, WaterSegment) but not the union itself. + """ + + def test_segment_is_not_a_class(self) -> None: + """Segment discovery returns a type alias, not a class.""" + models = discover_models() + segment_entries = [ + (k, v) for k, v in models.items() if "segment" in str(k).lower() + ] + + assert len(segment_entries) == 1 + _key, segment = segment_entries[0] + + assert not isinstance(segment, type) + + def test_individual_segment_types_extractable(self) -> None: + """Individual segment member types have expected theme/type literals.""" + spec = extract_union("Segment", Segment) + for member_cls in spec.members: + member_spec = extract_model(member_cls) + assert_literal_field(member_spec, "theme", "transportation") + assert_literal_field(member_spec, "type", "segment") + + def test_road_segment_has_road_specific_fields(self) -> None: + """RoadSegment should have road-specific fields.""" + spec = extract_model(RoadSegment) + field_names = {f.name for f in spec.fields} + + assert "subtype" in field_names + + +class TestSegmentUnionExtraction: + """Tests for extracting the real Segment discriminated union.""" + + @pytest.fixture + def segment_spec(self) -> UnionSpec: + """Extract Segment union spec.""" + return extract_union("Segment", Segment) + + def test_segment_extract_union_succeeds(self, segment_spec: UnionSpec) -> None: + """extract_union works on the real Segment type alias.""" + assert segment_spec.name == "Segment" + assert len(segment_spec.members) == 3 + + def test_segment_has_shared_fields(self, segment_spec: UnionSpec) -> None: + """Segment UnionSpec has shared fields from TransportationSegment.""" + shared = [ + af for af in segment_spec.annotated_fields if af.variant_sources is None + ] + shared_names = {af.field_spec.name for af in shared} + # All segments share these base fields + assert "geometry" in shared_names + assert "subtype" in shared_names + assert "id" in shared_names + + def test_segment_has_variant_fields(self, segment_spec: UnionSpec) -> None: + """Segment UnionSpec has variant-specific fields.""" + variant = [ + af for af in segment_spec.annotated_fields if af.variant_sources is not None + ] + variant_names = {af.field_spec.name for af in variant} + # RoadSegment has these specific fields + assert "road_flags" in variant_names + assert "road_surface" in variant_names + assert len(variant_names) > 0 + + def test_segment_discriminator_extracted_from_callable( + self, segment_spec: UnionSpec + ) -> None: + """Segment callable discriminator is resolved via _field_name.""" + assert segment_spec.discriminator_field == "subtype" + assert segment_spec.discriminator_mapping is not None + assert len(segment_spec.discriminator_mapping) == 3 + # Keys are str(enum_member), e.g. "Subtype.ROAD" + road_key = next(k for k in segment_spec.discriminator_mapping if "ROAD" in k) + assert segment_spec.discriminator_mapping[road_key] is RoadSegment + + def test_segment_common_base_is_base_model(self, segment_spec: UnionSpec) -> None: + """Segment common_base is the shared base class.""" + assert segment_spec.common_base is not None + assert issubclass(segment_spec.common_base, BaseModel) + # Verify common base has expected fields + assert "geometry" in segment_spec.common_base.model_fields + assert "id" in segment_spec.common_base.model_fields + + +class TestPydanticTypePages: + """End-to-end: pipeline produces pages for referenced Pydantic built-in types.""" + + _SCHEMA_ROOT = "overture.schema" + + @pytest.fixture(scope="class") + def pages(self) -> list: + """Generate all pages from real discovered models.""" + models = discover_models() + feature_specs: list[FeatureSpec] = [] + for key, entry in models.items(): + if is_model_class(entry): + feature_specs.append(extract_model(entry, entry_point=key.entry_point)) + elif is_union_alias(entry): + feature_specs.append( + extract_union( + entry_point_class(key.entry_point), + entry, + entry_point=key.entry_point, + ) + ) + return generate_markdown_pages(feature_specs, self._SCHEMA_ROOT) + + def test_http_url_page_exists(self, pages: list) -> None: + """Pipeline produces a page for HttpUrl under pydantic/networks/.""" + paths = {str(p.path) for p in pages} + assert any("pydantic/networks/http_url" in path for path in paths) + + def test_email_str_page_exists(self, pages: list) -> None: + """Pipeline produces a page for EmailStr under pydantic/networks/.""" + paths = {str(p.path) for p in pages} + assert any("pydantic/networks/email_str" in path for path in paths) + + def test_http_url_page_content(self, pages: list) -> None: + """HttpUrl page has expected heading and Pydantic docs link.""" + page = next(p for p in pages if "pydantic/networks/http_url" in str(p.path)) + assert "# HttpUrl" in page.content + assert "docs.pydantic.dev" in page.content + + def test_place_links_to_http_url(self, pages: list) -> None: + """Place feature page links to the HttpUrl type page.""" + place_page = next(p for p in pages if p.path.stem == "place" and p.is_feature) + assert "HttpUrl" in place_page.content diff --git a/packages/overture-schema-codegen/tests/test_markdown_renderer.py b/packages/overture-schema-codegen/tests/test_markdown_renderer.py new file mode 100644 index 000000000..e22154196 --- /dev/null +++ b/packages/overture-schema-codegen/tests/test_markdown_renderer.py @@ -0,0 +1,1436 @@ +"""Tests for Markdown renderer.""" + +from collections.abc import Callable +from enum import Enum +from pathlib import PurePosixPath +from typing import Annotated, Literal, NewType + +import pytest +from annotated_types import Ge, Interval +from codegen_test_support import ( + EMAIL_STR_SPEC, + HTTP_URL_SPEC, + STR_TYPE, + CommonNames, + FeatureBase, + FeatureWithAddress, + FeatureWithSources, + Instrument, + SimpleModel, + SourceItem, + Sources, + TreeNode, + Venue, + make_union_spec, +) +from overture.schema.codegen.extraction.examples import ExampleRecord +from overture.schema.codegen.extraction.model_extraction import ( + expand_model_tree, + extract_model, +) +from overture.schema.codegen.extraction.newtype_extraction import extract_newtype +from overture.schema.codegen.extraction.specs import ( + AnnotatedField, + EnumMemberSpec, + EnumSpec, + FieldSpec, + PrimitiveSpec, + TypeIdentity, +) +from overture.schema.codegen.extraction.type_analyzer import ConstraintSource +from overture.schema.codegen.markdown.link_computation import LinkContext +from overture.schema.codegen.markdown.renderer import ( + _format_constraint, + _format_example_value, + _linkify_bare_urls, + _sanitize_for_table_cell, + render_enum, + render_feature, + render_newtype, + render_primitives_from_specs, + render_pydantic_type, +) +from overture.schema.codegen.markdown.reverse_references import UsedByEntry, UsedByKind +from overture.schema.system.field_constraint import ( + CountryCodeAlpha2Constraint, + JsonPointerConstraint, + UniqueItemsConstraint, +) +from overture.schema.system.model_constraint import no_extra_fields +from overture.schema.system.primitive import int32 +from overture.schema.system.ref import Id +from overture.schema.system.string import HexColor, NoWhitespaceString +from pydantic import BaseModel, Field + +_FLAT_MEMBER = EnumMemberSpec(name="FLAT", value="flat", description=None) + +_ROOF_SHAPE_SPEC = EnumSpec( + name="RoofShape", + description="The shape of the roof.", + members=[_FLAT_MEMBER], +) + + +class TestSanitizeForTableCell: + """Tests for _sanitize_for_table_cell.""" + + def test_single_line_unchanged(self) -> None: + """Single-line text passes through unchanged.""" + assert ( + _sanitize_for_table_cell("A simple description.") == "A simple description." + ) + + def test_single_newline_becomes_space(self) -> None: + """Single newline within a paragraph becomes a space.""" + assert _sanitize_for_table_cell("Line one.\nLine two.") == "Line one. Line two." + + def test_blank_line_becomes_double_br(self) -> None: + """Blank line (paragraph break) becomes

.""" + assert ( + _sanitize_for_table_cell("Para one.\n\nPara two.") + == "Para one.

Para two." + ) + + def test_blank_line_with_whitespace(self) -> None: + """Blank line containing only whitespace is treated as blank.""" + assert ( + _sanitize_for_table_cell("Para one.\n \nPara two.") + == "Para one.

Para two." + ) + + def test_multiple_blank_lines_collapsed(self) -> None: + """Multiple consecutive blank lines collapse to one

.""" + assert _sanitize_for_table_cell("A.\n\n\nB.") == "A.

B." + + def test_pipe_escaped(self) -> None: + """Pipe characters escaped to avoid breaking table columns.""" + assert _sanitize_for_table_cell("foo | bar") == "foo \\| bar" + + def test_pipe_and_newline_both_handled(self) -> None: + """Pipes and newlines handled together.""" + assert _sanitize_for_table_cell("a | b\nc | d") == "a \\| b c \\| d" + + def test_strips_leading_trailing_whitespace(self) -> None: + """Leading/trailing whitespace stripped.""" + assert _sanitize_for_table_cell(" hello ") == "hello" + + +class TestLinkifyBareUrls: + """Tests for _linkify_bare_urls.""" + + def test_www_url_gets_linked(self) -> None: + """www. URLs become Markdown links with https:// href.""" + assert ( + _linkify_bare_urls("see www.example.com for details") + == "see [www.example.com](https://www.example.com) for details" + ) + + def test_https_url_gets_linked(self) -> None: + """https:// URLs become self-referencing Markdown links.""" + assert ( + _linkify_bare_urls("see https://example.com/path") + == "see [https://example.com/path](https://example.com/path)" + ) + + def test_http_url_gets_linked(self) -> None: + """http:// URLs become self-referencing Markdown links.""" + assert ( + _linkify_bare_urls("see http://example.com") + == "see [http://example.com](http://example.com)" + ) + + def test_existing_markdown_link_unchanged(self) -> None: + """URLs already inside [text](url) are left alone.""" + text = "[example](https://example.com)" + assert _linkify_bare_urls(text) == text + + def test_text_without_urls_unchanged(self) -> None: + """Plain text passes through unchanged.""" + assert _linkify_bare_urls("no urls here") == "no urls here" + + def test_url_in_parentheses(self) -> None: + """URL inside sentence parentheses gets linked.""" + result = _linkify_bare_urls("from the OA (www.openaddresses.io) project") + assert "[www.openaddresses.io](https://www.openaddresses.io)" in result + + def test_trailing_period_excluded(self) -> None: + """Trailing sentence punctuation is not part of the URL.""" + assert ( + _linkify_bare_urls("found on https://www.wikidata.org/.") + == "found on [https://www.wikidata.org/](https://www.wikidata.org/)." + ) + + def test_trailing_comma_excluded(self) -> None: + """Trailing comma is not part of the URL.""" + assert ( + _linkify_bare_urls("see https://example.com, and more") + == "see [https://example.com](https://example.com), and more" + ) + + def test_url_in_backtick_code_span_unchanged(self) -> None: + """URLs inside backtick code spans are not linkified.""" + text = "use `https://example.com` as the base" + assert _linkify_bare_urls(text) == text + + def test_url_in_double_backtick_code_span_unchanged(self) -> None: + """URLs inside double-backtick code spans are not linkified.""" + text = "use ``https://example.com/path`` as the base" + assert _linkify_bare_urls(text) == text + + def test_mixed_code_span_and_bare_url(self) -> None: + """Code-span URLs preserved while bare URLs are linkified.""" + text = "see `https://a.com` and https://b.com" + result = _linkify_bare_urls(text) + assert "`https://a.com`" in result + assert "[https://b.com](https://b.com)" in result + + +class TestRenderFeatureBasic: + """Tests for render_feature with basic models.""" + + def test_renders_title_from_model_name(self) -> None: + """Should render model name as H1 title.""" + spec = extract_model(SimpleModel) + result = render_feature(spec) + + assert "# SimpleModel" in result + + def test_renders_description_from_docstring(self) -> None: + """Should render model docstring as description.""" + + class DescribedModel(BaseModel): + """This is the model description.""" + + value: int + + spec = extract_model(DescribedModel) + result = render_feature(spec) + + assert "This is the model description." in result + + def test_renders_fields_section(self) -> None: + """Should include Fields section header.""" + + class ModelWithField(BaseModel): + """Model with a field.""" + + name: str + + spec = extract_model(ModelWithField) + result = render_feature(spec) + + assert "## Fields" in result + + def test_renders_field_table_header(self) -> None: + """Should render field table with proper headers.""" + + class ModelWithField(BaseModel): + """Model with a field.""" + + name: str + + spec = extract_model(ModelWithField) + result = render_feature(spec) + + assert "| Name | Type | Description |" in result + assert "| -----: | :----: | ------------- |" in result + + +class TestRenderFeatureFieldTable: + """Tests for field table rendering.""" + + def test_renders_required_field(self) -> None: + """Should render required field without (optional) suffix.""" + + class ModelWithRequired(BaseModel): + """Model with required field.""" + + name: str = Field(description="The name") + + spec = extract_model(ModelWithRequired) + result = render_feature(spec) + + assert "| `name` |" in result + assert "| `string` |" in result + assert "The name" in result + + def test_renders_optional_field(self) -> None: + """Should render optional field with (optional) suffix.""" + + class ModelWithOptional(BaseModel): + """Model with optional field.""" + + nickname: str | None = Field(None, description="Optional nickname") + + spec = extract_model(ModelWithOptional) + result = render_feature(spec) + + assert "| `nickname` |" in result + assert "(optional)" in result + assert "Optional nickname" in result + + def test_renders_typed_fields(self) -> None: + """Should render field types correctly.""" + + class ModelWithTypes(BaseModel): + """Model with various types.""" + + count: int + price: float + active: bool + + spec = extract_model(ModelWithTypes) + result = render_feature(spec) + + # Check that fields are present (exact type format may vary) + assert "`count`" in result + assert "`price`" in result + assert "`active`" in result + + def test_multiline_description_sanitized_in_table(self) -> None: + """Multiline field description rendered with
in table cell.""" + + class ModelWithMultilineDesc(BaseModel): + """Model.""" + + name: str = Field(description="First line.\n\nSecond paragraph.") + + spec = extract_model(ModelWithMultilineDesc) + result = render_feature(spec) + + assert "First line.

Second paragraph." in result + # The table should not be broken by a blank line + lines = result.splitlines() + table_start = next(i for i, line in enumerate(lines) if "| Name |" in line) + for i in range(table_start, len(lines)): + if lines[i].strip() == "": + break + assert lines[i].startswith("|"), f"Table broken at line {i}: {lines[i]}" + + +class TestRenderFeatureWithThemeType: + """Tests for rendering Feature-like models with theme/type.""" + + def test_renders_theme_and_type_fields(self) -> None: + """Should render theme and type as Literal fields.""" + + class Place(FeatureBase[Literal["places"], Literal["place"]]): + """A place feature.""" + + name: str + + spec = extract_model(Place) + result = render_feature(spec) + + # Theme and type should appear somewhere in output + assert "places" in result + assert "place" in result + + +class TestRenderFeatureLiteralField: + """Tests for rendering Literal-typed fields.""" + + def test_literal_field_renders_as_quoted_value(self) -> None: + """Literal field should render as quoted string in backticks.""" + + class TestFeature(FeatureBase[Literal["test_theme"], Literal["test_type"]]): + """Test feature.""" + + name: str + + spec = extract_model(TestFeature) + result = render_feature(spec) + + assert '| `"test_theme"` |' in result + assert '| `"test_type"` |' in result + + +class TestRenderFeatureNewTypeDisplay: + """Tests for NewType rendering in Markdown.""" + + def test_newtype_wrapping_list_renders_name_with_list_qualifier( + self, + ) -> None: + """NewType wrapping a list renders as name with (list, optional).""" + + class Item(BaseModel): + value: str + + TestSources = NewType( + "TestSources", Annotated[list[Item], UniqueItemsConstraint()] + ) + + class ModelWithSources(BaseModel): + """Model with sources.""" + + sources: TestSources | None = None + + spec = extract_model(ModelWithSources) + expand_model_tree(spec) + result = render_feature(spec) + + assert "`TestSources`" in result + assert "(list, optional)" in result + + def test_hex_color_renders_as_newtype_name(self) -> None: + """HexColor (unregistered NewType) renders as code-formatted name.""" + + class ModelWithColor(BaseModel): + """Model with color.""" + + color: HexColor | None = None + + spec = extract_model(ModelWithColor) + result = render_feature(spec) + + assert "`HexColor`" in result + assert "(optional)" in result + + def test_registered_primitive_renders_through_registry(self) -> None: + """Registered primitive (int32) renders via registry, not as NewType link.""" + + class ModelWithCount(BaseModel): + """Model with count.""" + + count: int32 + + spec = extract_model(ModelWithCount) + result = render_feature(spec) + + assert "| `int32` |" in result + # Should NOT be linked + assert "](int32.md)" not in result + + def test_plain_str_renders_as_string(self) -> None: + """Plain str field renders as 'string'.""" + + class ModelWithName(BaseModel): + """Model with name.""" + + name: str + + spec = extract_model(ModelWithName) + result = render_feature(spec) + + assert "| `string` |" in result + + def test_enum_renders_as_code_without_context(self) -> None: + """Enum fields render as inline code without LinkContext.""" + + class Status(str, Enum): + ACTIVE = "active" + + class ModelWithEnum(BaseModel): + """Model with enum.""" + + status: Status + + spec = extract_model(ModelWithEnum) + result = render_feature(spec) + + assert "| `Status` |" in result + + def test_model_field_renders_as_code_without_context(self) -> None: + """BaseModel field renders as inline code without LinkContext.""" + + class Inner(BaseModel): + value: str + + class Outer(BaseModel): + """Model with nested model.""" + + inner: Inner + + spec = extract_model(Outer) + expand_model_tree(spec) + result = render_feature(spec) + + assert "| `Inner` |" in result + + +class TestRenderFeatureInlineExpansion: + """Tests for inline expansion of nested model fields.""" + + def test_direct_model_fields_expanded_with_dot_prefix(self) -> None: + """Direct model field expands sub-fields with dot notation.""" + spec = extract_model(FeatureWithAddress) + expand_model_tree(spec) + result = render_feature(spec) + + assert "| `address.street` |" in result + assert "| `address.city` |" in result + assert "| `address.zip_code` |" in result + + def test_list_of_model_fields_expanded_with_bracket_dot_prefix(self) -> None: + """List-of-model field expands sub-fields with []. notation.""" + spec = extract_model(FeatureWithSources) + expand_model_tree(spec) + result = render_feature(spec) + + assert "| `sources[]` |" in result + assert "| `sources[].dataset` |" in result + + def test_cycle_detection_prevents_infinite_recursion(self) -> None: + """Recursive model emits parent row but does not recurse.""" + spec = extract_model(TreeNode) + expand_model_tree(spec) + result = render_feature(spec) + + # The parent field row appears + assert "| `parent` |" in result + # But no recursion into parent.label + assert "parent.label" not in result + + def test_primitive_field_unchanged(self) -> None: + """Primitive fields produce a single row without expansion.""" + spec = extract_model(SimpleModel) + result = render_feature(spec) + + lines = [line for line in result.splitlines() if "| `name` |" in line] + assert len(lines) == 1 + + def test_parent_row_preserved_before_expansion(self) -> None: + """The parent field row still appears before expanded sub-fields.""" + spec = extract_model(FeatureWithAddress) + expand_model_tree(spec) + result = render_feature(spec) + + # Parent row for 'address' itself appears + assert "| `address` |" in result + # And it appears before the expanded fields + lines = result.splitlines() + address_line = next( + i for i, line in enumerate(lines) if "| `address` |" in line + ) + street_line = next( + i for i, line in enumerate(lines) if "| `address.street` |" in line + ) + assert address_line < street_line + + +class TestRenderFeatureConstraints: + """Tests for model-level constraint rendering in feature pages.""" + + def test_venue_has_constraints_section(self) -> None: + """Venue's @require_any_of renders as a Constraints section.""" + spec = extract_model(Venue) + result = render_feature(spec) + + assert "## Constraints" in result + assert "At least one of `name`, `description` must be set" in result + + def test_constraints_section_between_fields_and_examples(self) -> None: + """Constraints section appears after Fields, before Examples.""" + spec = extract_model(Venue) + examples = [ExampleRecord(rows=[("name", "test")])] + result = render_feature(spec, examples=examples) + + lines = result.splitlines() + fields_line = next(i for i, line in enumerate(lines) if "## Fields" in line) + constraints_line = next( + i for i, line in enumerate(lines) if "## Constraints" in line + ) + examples_line = next(i for i, line in enumerate(lines) if "## Examples" in line) + + assert fields_line < constraints_line < examples_line + + def test_no_constraints_section_without_constraints(self) -> None: + """Models without model-level constraints omit Constraints section.""" + + class Plain(BaseModel): + """Plain model.""" + + name: str + + spec = extract_model(Plain) + result = render_feature(spec) + + assert "## Constraints" not in result + + def test_no_constraints_section_with_only_no_extra_fields(self) -> None: + """Model with only @no_extra_fields omits Constraints section.""" + + @no_extra_fields + class Strict(BaseModel): + """Strict model.""" + + name: str + + spec = extract_model(Strict) + result = render_feature(spec) + + assert "## Constraints" not in result + + +class TestRenderFeatureConstraintNotes: + """Tests for inline constraint notes in field description cells.""" + + def test_venue_name_field_includes_constraint_note(self) -> None: + """Venue's name field description cell includes constraint note in italics.""" + spec = extract_model(Venue) + result = render_feature(spec) + + # Find the row for 'name' field + lines = result.splitlines() + name_line = next(line for line in lines if "| `name` |" in line) + assert "Venue name" in name_line + assert "*At least one of `name`, `description` must be set*" in name_line + assert "
" in name_line + + def test_field_with_no_description_gets_constraint_note(self) -> None: + """Field with no existing description still gets the constraint note.""" + spec = extract_model(Venue) + result = render_feature(spec) + + # description field on Venue has no Field(description=...) + lines = result.splitlines() + desc_line = next(line for line in lines if "| `description` |" in line) + assert "*At least one of `name`, `description` must be set*" in desc_line + + +class TestRenderFeatureFieldConstraints: + """Tests for field-level constraint annotation from TypeInfo.""" + + def test_venue_geometry_shows_allowed_types(self) -> None: + """Venue's geometry field shows GeometryTypeConstraint as a note.""" + spec = extract_model(Venue) + expand_model_tree(spec) + result = render_feature(spec) + + lines = result.splitlines() + geo_line = next(line for line in lines if "| `geometry` |" in line) + assert "*Allowed geometry types: Point, Polygon*" in geo_line + + def test_venue_reference_links_when_context_available(self) -> None: + """Reference constraint links the target type when LinkContext has the page.""" + spec = extract_model(Venue) + expand_model_tree(spec) + ctx = LinkContext( + page_path=PurePosixPath("music/venue.md"), + registry={ + TypeIdentity(Instrument, "Instrument"): PurePosixPath( + "music/instrument.md" + ) + }, + ) + result = render_feature(spec, link_ctx=ctx) + + lines = result.splitlines() + ref_line = next(line for line in lines if "| `resident_ensemble` |" in line) + assert "[`Instrument`](instrument.md)" in ref_line + assert "belongs to" in ref_line + + def test_venue_reference_unlinked_without_context(self) -> None: + """Reference constraint renders as plain code when no LinkContext.""" + spec = extract_model(Venue) + expand_model_tree(spec) + result = render_feature(spec) + + lines = result.splitlines() + ref_line = next(line for line in lines if "| `resident_ensemble` |" in line) + assert "References `Instrument`" in ref_line + assert "belongs to" in ref_line + + +class TestRenderEnumBasic: + """Tests for render_enum with simple enums.""" + + def test_renders_title_from_enum_name(self) -> None: + """Should render enum name as H1 title.""" + result = render_enum(_ROOF_SHAPE_SPEC) + + assert "# RoofShape" in result + + def test_renders_description_from_docstring(self) -> None: + """Should render enum docstring as description.""" + result = render_enum(_ROOF_SHAPE_SPEC) + + assert "The shape of the roof." in result + + def test_renders_values_section(self) -> None: + """Should include Values section header.""" + result = render_enum(_ROOF_SHAPE_SPEC) + + assert "## Values" in result + + def test_renders_values_as_bullet_list(self) -> None: + """Should render each value as a bullet point.""" + spec = EnumSpec( + name="RoofShape", + description="The shape of the roof.", + members=[ + EnumMemberSpec(name="FLAT", value="flat", description=None), + EnumMemberSpec(name="GABLED", value="gabled", description=None), + EnumMemberSpec(name="DOME", value="dome", description=None), + ], + ) + + result = render_enum(spec) + + assert "- `flat`" in result + assert "- `gabled`" in result + assert "- `dome`" in result + + +class TestRenderEnumDocumented: + """Tests for render_enum with DocumentedEnum (per-value descriptions).""" + + def test_renders_member_descriptions(self) -> None: + """Should render per-value descriptions after the value.""" + spec = EnumSpec( + name="Side", + description="The side on which something appears.", + members=[ + EnumMemberSpec( + name="LEFT", value="left", description="On the left side" + ), + EnumMemberSpec( + name="RIGHT", value="right", description="On the right side" + ), + ], + ) + + result = render_enum(spec) + + assert "- `left` - On the left side" in result + assert "- `right` - On the right side" in result + + def test_renders_mixed_documented_undocumented(self) -> None: + """Should handle mix of documented and undocumented members.""" + spec = EnumSpec( + name="ConnectionState", + description="Connection states.", + members=[ + EnumMemberSpec(name="CONNECTED", value="connected", description=None), + EnumMemberSpec( + name="QUIESCING", + value="quiescing", + description="Gracefully shutting down", + ), + ], + ) + + result = render_enum(spec) + + # Undocumented: just the value + assert "- `connected`" in result + # Documented: value + description + assert "- `quiescing` - Gracefully shutting down" in result + + +class TestRenderEnumNoDescription: + """Tests for enums without class docstrings.""" + + def test_enum_without_description(self) -> None: + """Should render enum without description section when None.""" + spec = EnumSpec( + name="SimpleEnum", + description=None, + members=[ + EnumMemberSpec(name="A", value="a", description=None), + EnumMemberSpec(name="B", value="b", description=None), + ], + ) + + result = render_enum(spec) + + # Should still have title and values + assert "# SimpleEnum" in result + assert "## Values" in result + assert "- `a`" in result + assert "- `b`" in result + # Should not have empty lines where description would be + lines = result.strip().split("\n") + # Title should be followed by blank line then Values header + assert lines[0] == "# SimpleEnum" + + +class TestRenderNewType: + """Tests for render_newtype.""" + + def test_renders_title(self) -> None: + """Should render NewType name as H1 title.""" + spec = extract_newtype(HexColor) + result = render_newtype(spec) + + assert "# HexColor" in result + + def test_renders_underlying_type(self) -> None: + """Should show the resolved underlying type below the description.""" + spec = extract_newtype(HexColor) + result = render_newtype(spec) + + assert "# HexColor\n" in result + assert "Underlying type: `string`" in result + + def test_renders_constraints(self) -> None: + """Should render constraints section with description and pattern.""" + spec = extract_newtype(HexColor) + result = render_newtype(spec) + + assert "## Constraints" in result + assert "Allows only hexadecimal color codes" in result + assert "`HexColorConstraint`" in result + assert "pattern:" in result + + def test_renders_id_with_provenance_without_link(self) -> None: + """Id page shows constraints without provenance links when no context.""" + spec = extract_newtype(Id) + result = render_newtype(spec) + + assert "# Id" in result + assert "NoWhitespaceConstraint" in result + # No link without LinkContext + assert "no_whitespace_string.md" not in result + + def test_builtin_underlying_type_not_linked(self) -> None: + """Built-in underlying type (string) stays in plain backticks.""" + spec = extract_newtype(HexColor) + result = render_newtype(spec) + + assert "Underlying type: `string`" in result + + def test_list_model_underlying_type_without_context(self) -> None: + """List-of-model underlying type renders without link when no context.""" + spec = extract_newtype(Sources) + result = render_newtype(spec) + + assert "Underlying type: `list`" in result + + def test_dict_underlying_types_without_context(self) -> None: + """Dict key/value NewTypes render without links when no context.""" + spec = extract_newtype(CommonNames) + result = render_newtype(spec) + + assert "map" in result + + +class TestPlacementAwareLinks: + """Tests for rendering with LinkContext for cross-directory links.""" + + def test_feature_links_to_shared_type_via_registry(self) -> None: + """Feature in theme subdir links to shared type in types/ dir.""" + + class ModelWithColor(BaseModel): + """Model with color.""" + + color: HexColor | None = None + + spec = extract_model(ModelWithColor) + page_path = PurePosixPath("buildings/building/building.md") + ctx = LinkContext( + page_path, + { + TypeIdentity(HexColor, "HexColor"): PurePosixPath( + "types/strings/hex_color.md" + ) + }, + ) + + result = render_feature(spec, link_ctx=ctx) + + assert "[`HexColor`](../../types/strings/hex_color.md)" in result + + def test_feature_links_to_theme_level_type(self) -> None: + """Feature in subdir links to type at theme level.""" + + class RoofShape(str, Enum): + FLAT = "flat" + + class ModelWithRoof(BaseModel): + """Model with roof.""" + + roof: RoofShape + + spec = extract_model(ModelWithRoof) + page_path = PurePosixPath("buildings/building/building.md") + ctx = LinkContext( + page_path, + { + TypeIdentity(RoofShape, "RoofShape"): PurePosixPath( + "buildings/roof_shape.md" + ) + }, + ) + + result = render_feature(spec, link_ctx=ctx) + + assert "[`RoofShape`](../roof_shape.md)" in result + + def test_feature_links_to_sibling_in_same_subdir(self) -> None: + """Feature links to type in its own subdirectory.""" + + class BuildingClass(str, Enum): + RESIDENTIAL = "residential" + + class ModelWithClass(BaseModel): + """Model.""" + + building_class: BuildingClass + + spec = extract_model(ModelWithClass) + page_path = PurePosixPath("buildings/building/building.md") + ctx = LinkContext( + page_path, + { + TypeIdentity(BuildingClass, "BuildingClass"): PurePosixPath( + "buildings/building/building_class.md" + ) + }, + ) + + result = render_feature(spec, link_ctx=ctx) + + assert "[`BuildingClass`](building_class.md)" in result + + def test_without_context_renders_as_code(self) -> None: + """Without LinkContext, types render as inline code (no link).""" + + class ModelWithColor(BaseModel): + """Model with color.""" + + color: HexColor | None = None + + spec = extract_model(ModelWithColor) + result = render_feature(spec) + + assert "`HexColor`" in result + assert "hex_color.md" not in result + + def test_newtype_underlying_type_linked_via_registry(self) -> None: + """NewType header links underlying model type through placement registry.""" + spec = extract_newtype(Sources) + page_path = PurePosixPath("types/references/sources.md") + ctx = LinkContext( + page_path, + { + TypeIdentity(SourceItem, "SourceItem"): PurePosixPath( + "types/references/source_item.md" + ) + }, + ) + + result = render_newtype(spec, link_ctx=ctx) + + assert "[`SourceItem`](source_item.md)" in result + + def test_newtype_underlying_type_not_linked_when_absent(self) -> None: + """Underlying type stays backtick-only when missing from registry.""" + spec = extract_newtype(Sources) + page_path = PurePosixPath("types/references/sources.md") + ctx = LinkContext(page_path, {}) + + result = render_newtype(spec, link_ctx=ctx) + + assert "`list`" in result + assert "[`SourceItem`]" not in result + + def test_newtype_provenance_link_uses_registry(self) -> None: + """NewType provenance links resolve through placement registry.""" + spec = extract_newtype(Id) + page_path = PurePosixPath("types/references/id.md") + registry = { + TypeIdentity(NoWhitespaceString, "NoWhitespaceString"): PurePosixPath( + "types/strings/no_whitespace_string.md" + ), + } + ctx = LinkContext(page_path, registry) + + result = render_newtype(spec, link_ctx=ctx) + + assert "../strings/no_whitespace_string.md" in result + + +class TestFormatExampleValue: + """Tests for _format_example_value.""" + + def test_none_renders_as_null(self) -> None: + """None renders as backtick-quoted null.""" + + assert _format_example_value(None) == "`null`" + + def test_string_null_renders_with_backticks(self) -> None: + """String 'null' renders as a backtick-wrapped string.""" + + assert _format_example_value("null") == "`null`" + + def test_bool_true_renders_lowercase(self) -> None: + """Boolean True renders as backtick-quoted lowercase true.""" + + assert _format_example_value(True) == "`true`" + + def test_bool_false_renders_lowercase(self) -> None: + """Boolean False renders as backtick-quoted lowercase false.""" + + assert _format_example_value(False) == "`false`" + + def test_empty_string_renders_empty(self) -> None: + """Empty string renders as empty string.""" + + assert _format_example_value("") == "" + + def test_short_string_has_backticks(self) -> None: + """Non-empty strings render with backticks.""" + + assert _format_example_value("OpenStreetMap") == "`OpenStreetMap`" + + def test_long_string_truncated(self) -> None: + """Strings longer than 100 chars are truncated with ellipsis.""" + + long = "x" * 150 + result = _format_example_value(long) + assert result == f"`{'x' * 97}...`" + assert len(result) == 100 + 2 # 100 content + 2 backticks + + def test_integer_has_backticks(self) -> None: + """Integers render with backticks.""" + + assert _format_example_value(42) == "`42`" + assert _format_example_value(0) == "`0`" + assert _format_example_value(-17) == "`-17`" + + def test_float_has_backticks(self) -> None: + """Floats render with backticks.""" + + assert _format_example_value(3.14) == "`3.14`" + assert _format_example_value(-2.5) == "`-2.5`" + + def test_list_renders_comma_separated(self) -> None: + """Lists render as backtick-wrapped comma-separated values.""" + + assert _format_example_value([1, 2, 3]) == "`[1, 2, 3]`" + assert _format_example_value(["a", "b"]) == '`["a", "b"]`' + assert _format_example_value([]) == "`[]`" + + def test_long_list_truncated(self) -> None: + """Lists longer than truncation limit are truncated with ellipsis.""" + long_list = list(range(200)) + result = _format_example_value(long_list) + assert result.startswith("`[0, 1, 2,") + assert result.endswith("...`") + inner = result[1:-1] # strip backticks + assert len(inner) <= 100 + + def test_long_dict_truncated(self) -> None: + """Dicts longer than truncation limit are truncated with ellipsis.""" + long_dict = {f"key_{i}": f"value_{i}" for i in range(50)} + result = _format_example_value(long_dict) + assert result.startswith('`{"key_0":') + assert result.endswith("...`") + inner = result[1:-1] + assert len(inner) <= 100 + + def test_pipe_character_not_escaped_in_backticks(self) -> None: + """Pipe characters need no escaping inside backticks.""" + + assert _format_example_value("foo|bar") == "`foo|bar`" + assert _format_example_value("a|b|c") == "`a|b|c`" + + +class TestRenderFeatureWithExamples: + """Tests for render_feature with examples support.""" + + def test_accepts_examples_parameter(self) -> None: + """render_feature accepts examples parameter.""" + spec = extract_model(SimpleModel) + examples = [ExampleRecord(rows=[("name", "test")])] + + # Should not raise + result = render_feature(spec, examples=examples) + assert "# SimpleModel" in result + + def test_renders_single_example_without_heading(self) -> None: + """Single example renders without 'Example 1' heading.""" + + class ModelWithCount(BaseModel): + """A simple model.""" + + name: str + count: int + + spec = extract_model(ModelWithCount) + examples = [ExampleRecord(rows=[("name", "test"), ("count", 42)])] + + result = render_feature(spec, examples=examples) + assert "## Examples" in result + assert "| Column | Value |" in result + assert "| `name` | `test` |" in result + assert "| `count` | `42` |" in result + # Should NOT have "Example 1" heading + assert "### Example 1" not in result + + def test_renders_multiple_examples_with_headings(self) -> None: + """Multiple examples render with 'Example N' headings.""" + spec = extract_model(SimpleModel) + examples = [ + ExampleRecord(rows=[("name", "first")]), + ExampleRecord(rows=[("name", "second")]), + ] + + result = render_feature(spec, examples=examples) + assert "## Examples" in result + assert "### Example 1" in result + assert "### Example 2" in result + assert "| `name` | `first` |" in result + assert "| `name` | `second` |" in result + + def test_formats_example_values(self) -> None: + """Example values are formatted using _format_example_value.""" + + class TestModel(BaseModel): + """Test model.""" + + text: str + count: int + active: bool + optional: str | None + + spec = extract_model(TestModel) + examples = [ + ExampleRecord( + rows=[ + ("text", "hello"), + ("count", 42), + ("active", True), + ("optional", None), + ] + ) + ] + + result = render_feature(spec, examples=examples) + # String with backticks + assert "| `text` | `hello` |" in result + # Number with backticks + assert "| `count` | `42` |" in result + # Boolean with backticks, lowercase + assert "| `active` | `true` |" in result + # None as null + assert "| `optional` | `null` |" in result + + def test_no_examples_omits_section(self) -> None: + """When examples is None, Examples section is not rendered.""" + spec = extract_model(SimpleModel) + result = render_feature(spec, examples=None) + + assert "## Examples" not in result + + def test_empty_examples_list_omits_section(self) -> None: + """When examples is empty list, Examples section is not rendered.""" + spec = extract_model(SimpleModel) + result = render_feature(spec, examples=[]) + + assert "## Examples" not in result + + +class TestRenderPrimitivesPage: + """Tests for the aggregate primitives page.""" + + def test_contains_title(self, primitives_markdown: str) -> None: + assert "# Primitive Types" in primitives_markdown + + def test_contains_signed_integers(self, primitives_markdown: str) -> None: + assert "| `int8` |" in primitives_markdown + assert "| `int16` |" in primitives_markdown + assert "| `int32` |" in primitives_markdown + assert "| `int64` |" in primitives_markdown + + def test_contains_unsigned_integers(self, primitives_markdown: str) -> None: + assert "| `uint8` |" in primitives_markdown + assert "| `uint16` |" in primitives_markdown + assert "| `uint32` |" in primitives_markdown + + def test_contains_floats(self, primitives_markdown: str) -> None: + assert "| `float32` |" in primitives_markdown + assert "| `float64` |" in primitives_markdown + + def test_ranges_match_schema_constraints(self, primitives_markdown: str) -> None: + """Range strings derive from ge/le constraints in the schema.""" + assert "-128 to 127" in primitives_markdown + assert "-32,768 to 32,767" in primitives_markdown + assert "-2,147,483,648 to 2,147,483,647" in primitives_markdown + assert "-2^63 to 2^63-1" in primitives_markdown + assert "0 to 255" in primitives_markdown + assert "0 to 65,535" in primitives_markdown + assert "0 to 4,294,967,295" in primitives_markdown + + def test_descriptions_from_docstrings(self, primitives_markdown: str) -> None: + """Descriptions derive from first line of NewType docstrings.""" + assert "Portable 8-bit signed integer." in primitives_markdown + assert "Portable 16-bit unsigned integer." in primitives_markdown + assert "Portable IEEE 32-bit floating point number." in primitives_markdown + + def test_float_precision(self, primitives_markdown: str) -> None: + """Float entries show IEEE 754 precision.""" + assert "~7 decimal digits" in primitives_markdown + assert "~15 decimal digits" in primitives_markdown + + def test_pipe_in_description_escaped(self) -> None: + """Pipe characters in primitive descriptions are escaped.""" + specs = [ + PrimitiveSpec( + name="int8", + description="Range: -128 | 127", + bounds=Interval(ge=-128, le=127), + ), + ] + result = render_primitives_from_specs(specs) + assert "Range: -128 \\| 127" in result + + +class TestRenderGeometryPage: + """Tests for the aggregate geometry page.""" + + def test_contains_title(self, geometry_markdown: str) -> None: + assert "# Geometry Types" in geometry_markdown + + def test_contains_geometry_types(self, geometry_markdown: str) -> None: + assert "Geometry" in geometry_markdown + assert "BBox" in geometry_markdown + assert "GeometryType" in geometry_markdown + + def test_lists_geometry_type_values(self, geometry_markdown: str) -> None: + assert "`point`" in geometry_markdown or "`POINT`" in geometry_markdown + + +class TestRenderUnionTemplate: + """Tests for UnionSpec template rendering with synthetic specs.""" + + def test_shared_fields_have_no_variant_tag(self) -> None: + """Shared fields render without variant annotation.""" + spec = make_union_spec( + description="A test union.", + annotated_fields=[ + AnnotatedField( + field_spec=FieldSpec( + name="id", + type_info=STR_TYPE, + description="ID", + is_required=True, + ), + variant_sources=None, + ), + ], + ) + result = render_feature(spec) + assert "| `id` |" in result + assert "*(" not in result # no variant tag + + def test_variant_fields_have_inline_tag(self) -> None: + """Variant-specific fields get *(Variant)* tag.""" + spec = make_union_spec( + name="Segment", + annotated_fields=[ + AnnotatedField( + field_spec=FieldSpec( + name="speed_limit", + type_info=STR_TYPE, + description=None, + is_required=False, + ), + variant_sources=("RoadSegment",), + ), + ], + ) + result = render_feature(spec) + assert "| `speed_limit` *(Road)* |" in result + + +class TestFormatConstraintDisplay: + """Tests for FieldConstraint display with on-demand description/pattern extraction.""" + + def test_description_and_pattern(self) -> None: + """Constraint with docstring and pattern renders both.""" + cs = ConstraintSource( + source_ref=None, source_name=None, constraint=CountryCodeAlpha2Constraint() + ) + result = _format_constraint(cs, None) + assert "Allows only ISO 3166-1 alpha-2 country codes." in result.display + assert "`CountryCodeAlpha2Constraint`" in result.display + assert "pattern: `^[A-Z]{2}$`" in result.display + + def test_description_without_pattern(self) -> None: + """Constraint with docstring but no pattern renders description only.""" + cs = ConstraintSource( + source_ref=None, source_name=None, constraint=JsonPointerConstraint() + ) + result = _format_constraint(cs, None) + assert "Allows only valid JSON Pointer values (RFC 6901)." in result.display + assert "`JsonPointerConstraint`" in result.display + assert "pattern" not in result.display + + def test_no_description_falls_through(self) -> None: + """Plain string metadata has no docstring and falls through.""" + cs = ConstraintSource( + source_ref=None, source_name=None, constraint="plain string metadata" + ) + result = _format_constraint(cs, None) + assert result.display == "`plain string metadata`" + + def test_annotated_types_uses_operator_notation_not_docstring(self) -> None: + """annotated-types constraints use operator notation, not their __doc__.""" + cs = ConstraintSource(source_ref=None, source_name=None, constraint=Ge(ge=0)) + result = _format_constraint(cs, None) + assert result.display == "`≥ 0`" + assert "Ge(ge=x)" not in result.display + + def test_constraint_class_not_linked(self) -> None: + """Constraint class name stays in backticks (no pages generated for constraints).""" + cs = ConstraintSource( + source_ref=None, source_name=None, constraint=CountryCodeAlpha2Constraint() + ) + result = _format_constraint(cs, None) + assert "`CountryCodeAlpha2Constraint`" in result.display + assert "[`CountryCodeAlpha2Constraint`](" not in result.display + + +def _feature_spec() -> object: + return extract_model(SimpleModel) + + +def _enum_spec() -> object: + return _ROOF_SHAPE_SPEC + + +def _newtype_spec() -> object: + return extract_newtype(HexColor) + + +_USED_BY_CASES = [ + pytest.param(_feature_spec, render_feature, id="feature"), + pytest.param(_enum_spec, render_enum, id="enum"), + pytest.param(_newtype_spec, render_newtype, id="newtype"), +] + + +class TestUsedByRendering: + """Tests for rendering 'Used By' section across all render functions.""" + + @pytest.mark.parametrize(("spec_factory", "render_fn"), _USED_BY_CASES) + def test_entries_render_without_links_when_no_context( + self, + spec_factory: Callable[[], object], + render_fn: Callable[..., str], + ) -> None: + """Without LinkContext, 'Used By' entries render as inline code.""" + _building = object() + _building_id = object() + used_by = [ + UsedByEntry( + identity=TypeIdentity(_building, "Building"), kind=UsedByKind.MODEL + ), + UsedByEntry( + identity=TypeIdentity(_building_id, "BuildingId"), + kind=UsedByKind.NEWTYPE, + ), + ] + + result = render_fn(spec_factory(), used_by=used_by) + + assert "## Used By" in result + assert "- `Building`" in result + assert "- `BuildingId`" in result + + @pytest.mark.parametrize( + ("spec_factory", "render_fn", "page_path", "expected_link"), + [ + pytest.param( + _feature_spec, + render_feature, + PurePosixPath("types/strings/hex_color.md"), + "../../buildings/building/building.md", + id="feature", + ), + pytest.param( + _enum_spec, + render_enum, + PurePosixPath("buildings/roof_shape.md"), + "building/building.md", + id="enum", + ), + pytest.param( + _newtype_spec, + render_newtype, + PurePosixPath("types/strings/hex_color.md"), + "../../buildings/building/building.md", + id="newtype", + ), + ], + ) + def test_link_context_uses_registry( + self, + spec_factory: Callable[[], object], + render_fn: Callable[..., str], + page_path: PurePosixPath, + expected_link: str, + ) -> None: + """Used-by entries resolve links through placement registry.""" + _building = object() + _building_identity = TypeIdentity(_building, "Building") + registry = { + _building_identity: PurePosixPath("buildings/building/building.md"), + } + ctx = LinkContext(page_path, registry) + used_by = [UsedByEntry(identity=_building_identity, kind=UsedByKind.MODEL)] + + result = render_fn(spec_factory(), link_ctx=ctx, used_by=used_by) + + assert "## Used By" in result + assert f"[`Building`]({expected_link})" in result + + @pytest.mark.parametrize(("spec_factory", "render_fn"), _USED_BY_CASES) + def test_no_used_by_omits_section( + self, + spec_factory: Callable[[], object], + render_fn: Callable[..., str], + ) -> None: + """When used_by is None, 'Used By' section is not rendered.""" + result = render_fn(spec_factory(), used_by=None) + + assert "## Used By" not in result + + @pytest.mark.parametrize(("spec_factory", "render_fn"), _USED_BY_CASES) + def test_empty_used_by_omits_section( + self, + spec_factory: Callable[[], object], + render_fn: Callable[..., str], + ) -> None: + """When used_by is empty list, 'Used By' section is not rendered.""" + result = render_fn(spec_factory(), used_by=[]) + + assert "## Used By" not in result + + +class TestRenderPydanticType: + """Tests for render_pydantic_type.""" + + def test_heading_is_pascal_case(self) -> None: + result = render_pydantic_type(HTTP_URL_SPEC) + assert result.startswith("# HttpUrl\n") + + def test_description_rendered(self) -> None: + result = render_pydantic_type(HTTP_URL_SPEC) + assert "A type that will accept any http or https URL." in result + + def test_no_description_omits_paragraph(self) -> None: + result = render_pydantic_type(EMAIL_STR_SPEC) + lines = result.strip().split("\n") + assert lines[0] == "# EmailStr" + + def test_pydantic_docs_link(self) -> None: + result = render_pydantic_type(HTTP_URL_SPEC) + assert ( + "https://docs.pydantic.dev/latest/api/networks/#pydantic.networks.HttpUrl" + in result + ) + + def test_used_by_section(self) -> None: + place_cls = type("Place", (), {}) + place_id = TypeIdentity(place_cls, "Place") + used_by = [UsedByEntry(place_id, UsedByKind.MODEL)] + ctx = LinkContext( + page_path=PurePosixPath("pydantic/networks/http_url.md"), + registry={place_id: PurePosixPath("places/place/place.md")}, + ) + result = render_pydantic_type(HTTP_URL_SPEC, link_ctx=ctx, used_by=used_by) + assert "## Used By" in result + assert "Place" in result diff --git a/packages/overture-schema-codegen/tests/test_markdown_type_format.py b/packages/overture-schema-codegen/tests/test_markdown_type_format.py new file mode 100644 index 000000000..e54426f5f --- /dev/null +++ b/packages/overture-schema-codegen/tests/test_markdown_type_format.py @@ -0,0 +1,317 @@ +"""Tests for markdown type formatting.""" + +from enum import Enum +from pathlib import PurePosixPath +from typing import Literal, NewType + +from overture.schema.codegen.extraction.specs import FieldSpec, TypeIdentity +from overture.schema.codegen.extraction.type_analyzer import ( + TypeInfo, + TypeKind, + analyze_type, +) +from overture.schema.codegen.markdown.link_computation import LinkContext +from overture.schema.codegen.markdown.type_format import ( + format_dict_type, + format_type, + format_underlying_type, +) +from overture.schema.system.primitive import int32 +from pydantic import BaseModel, HttpUrl + + +class _ModelA(BaseModel): + x: int + + +class _ModelB(BaseModel): + y: str + + +class TestFormatType: + """Tests for format_type.""" + + def test_plain_str_renders_as_string(self) -> None: + ti = analyze_type(str) + assert format_type(_make_field(ti)) == "`string`" + + def test_optional_adds_qualifier(self) -> None: + ti = analyze_type(str | None) + assert format_type(_make_field(ti, is_required=False)) == "`string` (optional)" + + def test_literal_renders_as_quoted_value(self) -> None: + ti = analyze_type(Literal["places"]) + assert format_type(_make_field(ti)) == '`"places"`' + + def test_multi_value_literal_renders_comma_separated(self) -> None: + ti = analyze_type(Literal["a", "b", "c"]) + assert format_type(_make_field(ti)) == '`"a"` \\| `"b"` \\| `"c"`' + + def test_enum_without_context_renders_as_code(self) -> None: + class Color(str, Enum): + RED = "red" + + ti = analyze_type(Color) + assert format_type(_make_field(ti)) == "`Color`" + + def test_enum_with_link_context(self) -> None: + class Color(str, Enum): + RED = "red" + + ti = analyze_type(Color) + field = _make_field(ti) + ctx = LinkContext( + page_path=PurePosixPath("buildings/building/building.md"), + registry={ + TypeIdentity(Color, "Color"): PurePosixPath("types/enums/color.md") + }, + ) + assert format_type(field, ctx) == "[`Color`](../../types/enums/color.md)" + + def test_list_of_primitives(self) -> None: + ti = analyze_type(list[str]) + assert format_type(_make_field(ti)) == "`list`" + + def test_nested_list_of_primitives(self) -> None: + ti = analyze_type(list[list[str]]) + assert format_type(_make_field(ti)) == "`list>`" + + def test_registered_primitive_not_linked(self) -> None: + ti = analyze_type(int32) + result = format_type(_make_field(ti)) + assert result == "`int32`" + assert "](int32.md)" not in result + + +class TestFormatDictType: + """Tests for format_dict_type.""" + + def test_simple_dict_renders_as_map(self) -> None: + ti = analyze_type(dict[str, int]) + result = format_dict_type(ti) + assert result == "map" + + def test_dict_with_newtype_shows_semantic_name(self) -> None: + MyKey = NewType("MyKey", str) + ti = analyze_type(dict[MyKey, int]) + result = format_dict_type(ti) + assert result == "map" + + +def _make_field( + ti: TypeInfo, *, name: str = "x", is_required: bool = True +) -> FieldSpec: + """Build a FieldSpec for test convenience.""" + return FieldSpec(name=name, type_info=ti, description=None, is_required=is_required) + + +class TestFormatUnionType: + """Tests for UNION-kind TypeInfo in format_type.""" + + def test_union_renders_all_members(self) -> None: + ti = analyze_type(_ModelA | _ModelB) + result = format_type(_make_field(ti)) + assert "`_ModelA`" in result + assert "`_ModelB`" in result + # Pipe separator escaped for table cells + assert r"\|" in result + + def test_union_with_link_context_links_each_member(self) -> None: + ti = analyze_type(_ModelA | _ModelB) + ctx = LinkContext( + page_path=PurePosixPath("theme/feature/feature.md"), + registry={ + TypeIdentity(_ModelA, "_ModelA"): PurePosixPath( + "theme/feature/types/model_a.md" + ), + TypeIdentity(_ModelB, "_ModelB"): PurePosixPath( + "theme/feature/types/model_b.md" + ), + }, + ) + result = format_type(_make_field(ti), ctx) + assert "[`_ModelA`](types/model_a.md)" in result + assert "[`_ModelB`](types/model_b.md)" in result + + def test_optional_union_adds_qualifier(self) -> None: + ti = analyze_type(_ModelA | _ModelB | None) + result = format_type(_make_field(ti, is_required=False)) + assert "(optional)" in result + assert "`_ModelA`" in result + assert "`_ModelB`" in result + + def test_list_of_union_adds_qualifier(self) -> None: + ti = TypeInfo( + base_type="_ModelA", + kind=TypeKind.UNION, + list_depth=1, + union_members=(_ModelA, _ModelB), + ) + result = format_type(_make_field(ti)) + assert "(list)" in result + assert "`_ModelA`" in result + assert "`_ModelB`" in result + + def test_union_members_unlinked_without_context(self) -> None: + ti = analyze_type(_ModelA | _ModelB) + result = format_type(_make_field(ti)) + # No markdown links without context + assert "]()" not in result + assert "[`" not in result + + def test_union_partial_links(self) -> None: + """Members with pages get linked; members without don't.""" + ti = analyze_type(_ModelA | _ModelB) + ctx = LinkContext( + page_path=PurePosixPath("theme/feature/feature.md"), + registry={ + TypeIdentity(_ModelA, "_ModelA"): PurePosixPath( + "theme/feature/types/model_a.md" + ) + }, + ) + result = format_type(_make_field(ti), ctx) + assert "[`_ModelA`](types/model_a.md)" in result + assert "`_ModelB`" in result + # _ModelB should NOT be linked + assert "[`_ModelB`]" not in result + + +class TestPydanticTypeLinking: + """Tests for PRIMITIVE types with pages getting linked.""" + + def test_pydantic_type_linked_when_in_registry(self) -> None: + ti = analyze_type(HttpUrl) + field = _make_field(ti) + ctx = LinkContext( + page_path=PurePosixPath("places/place/place.md"), + registry={ + TypeIdentity(HttpUrl, "HttpUrl"): PurePosixPath( + "pydantic/networks/http_url.md" + ) + }, + ) + result = format_type(field, ctx) + assert "[`HttpUrl`]" in result + assert "pydantic/networks/http_url.md" in result + + def test_pydantic_type_unlinked_without_registry_entry(self) -> None: + ti = analyze_type(HttpUrl) + field = _make_field(ti) + ctx = LinkContext( + page_path=PurePosixPath("places/place/place.md"), + registry={}, + ) + result = format_type(field, ctx) + assert result == "`HttpUrl`" + assert "[" not in result + + def test_list_of_pydantic_type_linked(self) -> None: + ti = analyze_type(list[HttpUrl]) + field = _make_field(ti) + ctx = LinkContext( + page_path=PurePosixPath("places/place/place.md"), + registry={ + TypeIdentity(HttpUrl, "HttpUrl"): PurePosixPath( + "pydantic/networks/http_url.md" + ) + }, + ) + result = format_type(field, ctx) + assert "HttpUrl" in result + assert "pydantic/networks/http_url.md" in result + + def test_registered_primitive_links_to_aggregate_page(self) -> None: + """int32 links to the primitives aggregate page when in registry.""" + ti = analyze_type(int32) + field = _make_field(ti) + ctx = LinkContext( + page_path=PurePosixPath("places/place/place.md"), + registry={ + TypeIdentity(int32, "int32"): PurePosixPath( + "system/primitive/primitives.md" + ) + }, + ) + result = format_type(field, ctx) + assert "[`int32`]" in result + assert "system/primitive/primitives.md" in result + + +class TestListOfSemanticNewtype: + """Tests for list[SemanticNewType] rendering. + + When a scalar NewType appears inside list[], the type renders as + list rather than NewTypeName (list). The (list) qualifier + is reserved for NewTypes that internally wrap a list. + """ + + def test_list_of_scalar_newtype_renders_list_syntax(self) -> None: + """list[ScalarNewType] renders as list, not Name (list).""" + ScalarNT = NewType("ScalarNT", str) + ti = analyze_type(list[ScalarNT]) + result = format_type(_make_field(ti)) + assert "list<" in result + assert "ScalarNT" in result + assert "(list)" not in result + + def test_newtype_wrapping_list_renders_qualifier(self) -> None: + """NewType wrapping list[X] renders as Name (list).""" + ListNT = NewType("ListNT", list[str]) + ti = analyze_type(ListNT) + result = format_type(_make_field(ti)) + assert "(list)" in result + assert "ListNT" in result + + def test_list_of_scalar_newtype_with_link(self) -> None: + """list[ScalarNewType] with link context renders linked list.""" + ScalarNT = NewType("ScalarNT", str) + ti = analyze_type(list[ScalarNT]) + field = _make_field(ti) + ctx = LinkContext( + page_path=PurePosixPath("places/place/place.md"), + registry={ + TypeIdentity(ScalarNT, "ScalarNT"): PurePosixPath("system/scalar_nt.md") + }, + ) + result = format_type(field, ctx) + assert "list<" in result + assert "ScalarNT" in result + assert "system/scalar_nt.md" in result + assert "(list)" not in result + + def test_nested_list_of_scalar_newtype_renders_nested_list_syntax(self) -> None: + """list[list[ScalarNewType]] renders as list>.""" + ScalarNT = NewType("ScalarNT", str) + ti = analyze_type(list[list[ScalarNT]]) + result = format_type(_make_field(ti)) + assert "list<" in result + assert "list<`" in result or "`list None: + ti = analyze_type(_ModelA | _ModelB) + result = format_underlying_type(ti) + assert result == "`_ModelA` | `_ModelB`" + + def test_union_with_link_context(self) -> None: + ti = analyze_type(_ModelA | _ModelB) + ctx = LinkContext( + page_path=PurePosixPath("types/my_union.md"), + registry={ + TypeIdentity(_ModelA, "_ModelA"): PurePosixPath( + "theme/feature/types/model_a.md" + ), + TypeIdentity(_ModelB, "_ModelB"): PurePosixPath( + "theme/feature/types/model_b.md" + ), + }, + ) + result = format_underlying_type(ti, ctx) + assert "[`_ModelA`](../theme/feature/types/model_a.md)" in result + assert "[`_ModelB`](../theme/feature/types/model_b.md)" in result diff --git a/packages/overture-schema-codegen/tests/test_model_extractor.py b/packages/overture-schema-codegen/tests/test_model_extractor.py new file mode 100644 index 000000000..f2b2bd257 --- /dev/null +++ b/packages/overture-schema-codegen/tests/test_model_extractor.py @@ -0,0 +1,549 @@ +"""Tests for model extraction.""" + +from typing import Annotated, Literal + +from codegen_test_support import ( + FeatureBase, + FeatureWithAddress, + Instrument, + SourceItem, + TreeNode, + Venue, + assert_literal_field, + find_field, +) +from overture.schema.codegen.extraction.model_extraction import ( + expand_model_tree, + extract_model, +) +from overture.schema.codegen.extraction.specs import ModelSpec +from overture.schema.system.field_constraint import UniqueItemsConstraint +from overture.schema.system.model_constraint import ( + FieldEqCondition, + FieldGroupConstraint, + require_any_of, + require_if, +) +from overture.schema.system.primitive import ( + Geometry, + GeometryType, + GeometryTypeConstraint, +) +from overture.schema.system.string import HexColor +from pydantic import BaseModel, Field + + +class TestModelConstraints: + """Model-level constraint extraction.""" + + def test_unconstrained_model_has_empty_constraints(self) -> None: + """Models without decorators produce an empty constraints tuple.""" + + class Plain(BaseModel): + name: str + + spec = extract_model(Plain) + + assert spec.constraints == () + + def test_extracts_require_any_of(self) -> None: + """Should extract @require_any_of from a decorated model.""" + spec = extract_model(Venue) + + assert len(spec.constraints) == 1 + (constraint,) = spec.constraints + assert constraint.name == "@require_any_of" + assert isinstance(constraint, FieldGroupConstraint) + assert constraint.field_names == ("name", "description") + + def test_stacked_constraints_preserve_order(self) -> None: + """Multiple decorators extracted in stacking order (inner-first).""" + + @require_if(["bar"], FieldEqCondition("baz", "x")) + @require_any_of("foo", "bar") + class Stacked(BaseModel): + foo: str | None = None + bar: str | None = None + baz: str | None = None + + spec = extract_model(Stacked) + + assert len(spec.constraints) == 2 + assert spec.constraints[0].name == "@require_any_of" + assert spec.constraints[1].name == "@require_if" + + +class TestExtractModelSimple: + """Tests for extract_model with simple Pydantic models.""" + + def test_extract_simple_model(self) -> None: + """Should extract basic model information.""" + + class SimpleModel(BaseModel): + """A simple test model.""" + + name: str + + result = extract_model(SimpleModel) + + assert result.name == "SimpleModel" + assert result.description == "A simple test model." + assert len(result.fields) == 1 + assert result.fields[0].name == "name" + assert result.fields[0].type_info.base_type == "str" + assert result.fields[0].is_required is True + + def test_extract_model_does_not_set_entry_point(self) -> None: + class M(BaseModel): + x: int + + result = extract_model(M) + assert result.entry_point is None + + def test_extract_model_with_optional_field(self) -> None: + """Should handle optional fields correctly.""" + + class ModelWithOptional(BaseModel): + """Model with optional field.""" + + name: str + nickname: str | None = None + + result = extract_model(ModelWithOptional) + + assert len(result.fields) == 2 + + name_field = find_field(result, "name") + assert name_field.is_required is True + + nickname_field = find_field(result, "nickname") + assert nickname_field.is_required is False + assert nickname_field.type_info.is_optional is True + + def test_extract_model_with_field_description(self) -> None: + """Should extract field descriptions from Field().""" + + class ModelWithDescription(BaseModel): + """Model with field descriptions.""" + + name: str = Field(description="The name of the entity") + + result = extract_model(ModelWithDescription) + + assert result.fields[0].description == "The name of the entity" + + def test_extract_model_with_list_field(self) -> None: + """Should handle list fields correctly.""" + + class ModelWithList(BaseModel): + """Model with list field.""" + + tags: list[str] + + result = extract_model(ModelWithList) + + tags_field = result.fields[0] + assert tags_field.name == "tags" + assert tags_field.type_info.is_list is True + assert tags_field.type_info.base_type == "str" + + +class TestExtractModelWithThemeType: + """Tests for extracting theme/type from Feature-like models.""" + + def test_extract_theme_and_type_from_generic(self) -> None: + """Should extract theme and type as Literal fields.""" + + class Place(FeatureBase[Literal["places"], Literal["place"]]): + """A place feature.""" + + name: str + + result = extract_model(Place) + assert_literal_field(result, "theme", "places") + assert_literal_field(result, "type", "place") + + def test_extract_different_theme_type(self) -> None: + """Should handle different theme/type values as Literal fields.""" + + class Building(FeatureBase[Literal["buildings"], Literal["building"]]): + """A building feature.""" + + height: float | None = None + + result = extract_model(Building) + assert_literal_field(result, "theme", "buildings") + assert_literal_field(result, "type", "building") + + def test_non_feature_model_has_no_theme_type(self) -> None: + """Regular models without Generic base should have no theme/type fields.""" + + class RegularModel(BaseModel): + """A regular model.""" + + value: int + + result = extract_model(RegularModel) + + field_names = [f.name for f in result.fields] + assert "theme" not in field_names + assert "type" not in field_names + + +class TestExtractModelFieldAlias: + """Tests for field alias handling in extract_model.""" + + def test_field_with_alias_uses_alias_name(self) -> None: + """Fields with alias should use alias as the field name, not Python attr name.""" + + class ModelWithAlias(BaseModel): + """Model with aliased field.""" + + class_: str | None = Field(default=None, alias="class") + + result = extract_model(ModelWithAlias) + + # Should use alias 'class', not Python name 'class_' + class_field = result.fields[0] + assert class_field.name == "class" + + def test_field_without_alias_uses_python_name(self) -> None: + """Fields without alias should use Python attribute name.""" + + class ModelWithoutAlias(BaseModel): + """Model without alias.""" + + name: str + + result = extract_model(ModelWithoutAlias) + + assert result.fields[0].name == "name" + + +class TestExtractModelDocstring: + """Tests for docstring extraction and cleaning.""" + + def test_multiline_docstring_has_indentation_stripped(self) -> None: + """Multi-line docstrings should have leading whitespace stripped. + + Docstrings defined in classes have leading whitespace on continuation + lines. This should be stripped so they render as normal paragraphs + in Markdown, not as code blocks. + """ + + class ModelWithMultilineDoc(BaseModel): + """A model with multi-line docstring. + + This is a second paragraph that would have leading + whitespace in the raw __doc__ attribute. + """ + + name: str + + result = extract_model(ModelWithMultilineDoc) + + # Description should NOT have leading whitespace on continuation lines + assert result.description is not None + assert "\n " not in result.description + # Should still have the content + assert "second paragraph" in result.description + + +class TestFieldOrderingWithMixins: + """Tests for field ordering when a model has multiple inheritance.""" + + def test_mixin_fields_come_after_primary_chain_and_own(self) -> None: + """Fields from mixin bases should appear after primary chain and own fields.""" + + class PrimaryBase(BaseModel): + base_field: str + + class MixinA(BaseModel): + a_field: str + + class MixinB(BaseModel): + b_field: str + + class Child(PrimaryBase, MixinA, MixinB): + """A child model with mixins.""" + + own_field: str + + result = extract_model(Child) + field_names = [f.name for f in result.fields] + + assert field_names == ["base_field", "own_field", "a_field", "b_field"] + + def test_single_inheritance_order_unchanged(self) -> None: + """Single-inheritance models should keep Pydantic's default order.""" + + class Parent(BaseModel): + parent_field: str + + class Child(Parent): + """A child model.""" + + child_field: str + + result = extract_model(Child) + field_names = [f.name for f in result.fields] + + assert field_names == ["parent_field", "child_field"] + + def test_mixin_fields_in_declaration_order(self) -> None: + """Mixin fields should appear in class declaration order, not reversed MRO.""" + + class Primary(BaseModel): + p: str + + class MixinFirst(BaseModel): + first: str + + class MixinSecond(BaseModel): + second: str + + class MixinThird(BaseModel): + third: str + + class Model(Primary, MixinFirst, MixinSecond, MixinThird): + """Model with three mixins.""" + + own: str + + result = extract_model(Model) + field_names = [f.name for f in result.fields] + + # Mixins in declaration order: First, Second, Third + assert field_names == ["p", "own", "first", "second", "third"] + + def test_deep_primary_chain_before_mixins(self) -> None: + """Fields from the entire primary chain should precede mixin fields.""" + + class GrandParent(BaseModel): + gp_field: str + + class Parent(GrandParent): + p_field: str + + class Mixin(BaseModel): + m_field: str + + class Child(Parent, Mixin): + """Child with deep primary chain.""" + + own_field: str + + result = extract_model(Child) + field_names = [f.name for f in result.fields] + + assert field_names == ["gp_field", "p_field", "own_field", "m_field"] + + def test_recursive_mixin_reordering(self) -> None: + """Mixins on primary-chain classes should also be reordered.""" + + class CoreBase(BaseModel): + core: str + + class ParentMixin(BaseModel): + pm: str + + class Parent(CoreBase, ParentMixin): + p: str + + class ChildMixin(BaseModel): + cm: str + + class Child(Parent, ChildMixin): + """Child where primary-chain parent has its own mixin.""" + + own: str + + result = extract_model(Child) + field_names = [f.name for f in result.fields] + + # CoreBase (Parent's primary) -> Parent own -> ParentMixin -> Child own -> ChildMixin + assert field_names == ["core", "p", "pm", "own", "cm"] + + +class TestExpandModelTree: + """Tests for expand_model_tree.""" + + def test_model_without_sub_models_unchanged(self) -> None: + """Fields without MODEL kind get model=None.""" + + class Simple(BaseModel): + name: str + count: int + + spec = extract_model(Simple) + expand_model_tree(spec) + + for f in spec.fields: + assert f.model is None + assert f.starts_cycle is False + + def test_nested_model_gets_expanded(self) -> None: + """MODEL-kind fields get their model populated.""" + spec = extract_model(FeatureWithAddress) + expand_model_tree(spec) + + addr_field = find_field(spec, "address") + assert addr_field.model is not None + assert addr_field.model.name == "Address" + assert addr_field.starts_cycle is False + + # Sub-model fields should exist + sub_names = [f.name for f in addr_field.model.fields] + assert "street" in sub_names + assert "city" in sub_names + + def test_cycle_detected_and_marked(self) -> None: + """Self-referential model gets starts_cycle=True.""" + spec = extract_model(TreeNode) + expand_model_tree(spec) + + parent_field = find_field(spec, "parent") + assert parent_field.model is not None + assert parent_field.model is spec # Same object -- cycle + assert parent_field.starts_cycle is True + + def test_shared_reference_not_marked_as_cycle(self) -> None: + """Two models referencing the same sub-model share it without cycle.""" + + class Shared(BaseModel): + value: str + + class ModelA(BaseModel): + ref: Shared + + class ModelB(BaseModel): + ref: Shared + + cache: dict[type, ModelSpec] = {} + spec_a = extract_model(ModelA) + expand_model_tree(spec_a, cache) + + spec_b = extract_model(ModelB) + expand_model_tree(spec_b, cache) + + ref_a = find_field(spec_a, "ref") + ref_b = find_field(spec_b, "ref") + + # Same ModelSpec object, neither is a cycle + assert ref_a.model is ref_b.model + assert ref_a.starts_cycle is False + assert ref_b.starts_cycle is False + + def test_list_of_model_gets_expanded(self) -> None: + """list[Model] fields also get their model populated.""" + + class HasList(BaseModel): + items: list[SourceItem] + + spec = extract_model(HasList) + expand_model_tree(spec) + + items_field = find_field(spec, "items") + assert items_field.model is not None + assert items_field.model.name == "SourceItem" + + +class TestFieldInfoMetadataConstraints: + """Constraints from field_info.metadata are merged into TypeInfo. + + Pydantic strips the Annotated wrapper from some fields and moves the + metadata to field_info.metadata. extract_model merges these back into + TypeInfo.constraints so they aren't silently dropped. + """ + + def test_geometry_type_constraint_extracted(self) -> None: + """GeometryTypeConstraint on geometry field should appear in constraints.""" + spec = extract_model(Venue) + geometry_field = find_field(spec, "geometry") + + constraint_types = [ + type(cs.constraint) for cs in geometry_field.type_info.constraints + ] + assert GeometryTypeConstraint in constraint_types + + def test_geometry_type_constraint_has_null_source(self) -> None: + """Constraints from field_info.metadata have source_ref=None (not from a NewType).""" + spec = extract_model(Venue) + geometry_field = find_field(spec, "geometry") + + geo_constraints = [ + cs + for cs in geometry_field.type_info.constraints + if isinstance(cs.constraint, GeometryTypeConstraint) + ] + assert len(geo_constraints) == 1 + assert geo_constraints[0].source_ref is None + + def test_metadata_constraints_not_duplicated(self) -> None: + """Fields where Pydantic preserves Annotated don't get duplicate constraints. + + When field_info.metadata is empty (Pydantic kept the Annotated wrapper), + no extra constraints are added. + """ + spec = extract_model(Instrument) + tags_field = find_field(spec, "tags") + + unique_constraints = [ + cs + for cs in tags_field.type_info.constraints + if isinstance(cs.constraint, UniqueItemsConstraint) + ] + assert len(unique_constraints) == 1 + + def test_standalone_annotated_field_extracts_metadata(self) -> None: + """Direct Annotated[Type, constraint] fields (non-optional, non-union) + get their constraints from field_info.metadata.""" + + class Model(BaseModel): + geo: Annotated[ + Geometry, + GeometryTypeConstraint(GeometryType.POINT), + ] + + spec = extract_model(Model) + geo_field = find_field(spec, "geo") + + constraint_types = [ + type(cs.constraint) for cs in geo_field.type_info.constraints + ] + assert GeometryTypeConstraint in constraint_types + + +class TestFieldDescriptionFallback: + """Tests for field description fallback from NewType Field metadata.""" + + def test_field_inherits_newtype_description(self) -> None: + """Field with no explicit description gets NewType's Field description.""" + + class TestModel(BaseModel): + color: HexColor + + spec = extract_model(TestModel) + field = find_field(spec, "color") + assert field.description is not None + assert "color" in field.description.lower() + + def test_explicit_description_not_overridden(self) -> None: + """Field with explicit description keeps its own, ignores NewType's.""" + + class TestModel(BaseModel): + color: HexColor = Field(description="Custom color description") + + spec = extract_model(TestModel) + field = find_field(spec, "color") + assert field.description == "Custom color description" + + def test_field_without_newtype_description_stays_none(self) -> None: + """Field typed as plain str (no NewType description) keeps None.""" + + class TestModel(BaseModel): + name: str + + spec = extract_model(TestModel) + field = find_field(spec, "name") + assert field.description is None diff --git a/packages/overture-schema-codegen/tests/test_module_layout.py b/packages/overture-schema-codegen/tests/test_module_layout.py new file mode 100644 index 000000000..5766e60e0 --- /dev/null +++ b/packages/overture-schema-codegen/tests/test_module_layout.py @@ -0,0 +1,175 @@ +"""Tests for module_layout: output directory layout from module paths.""" + +from pathlib import PurePosixPath + +import pytest +from overture.schema.codegen.layout.module_layout import ( + compute_output_dir, + compute_schema_root, + entry_point_class, + entry_point_module, + is_package_module, + module_relpath, +) + + +class TestComputeSchemaRoot: + def test_multiple_paths_common_prefix(self) -> None: + paths = [ + "overture.schema.buildings", + "overture.schema.places", + "overture.schema.divisions", + ] + assert compute_schema_root(paths) == "overture.schema" + + def test_single_path_drops_last_component(self) -> None: + assert compute_schema_root(["overture.schema.buildings"]) == "overture.schema" + + def test_mixed_depth_paths(self) -> None: + paths = [ + "overture.schema.buildings", + "overture.schema.core.names.primary_name", + ] + assert compute_schema_root(paths) == "overture.schema" + + def test_divergent_namespaces(self) -> None: + paths = ["overture.schema.buildings", "acme.transit"] + assert compute_schema_root(paths) == "" + + def test_empty_raises(self) -> None: + with pytest.raises(ValueError): + compute_schema_root([]) + + def test_single_component_path(self) -> None: + assert compute_schema_root(["buildings"]) == "" + + def test_identical_paths_deduplicated(self) -> None: + paths = ["overture.schema.buildings", "overture.schema.buildings"] + assert compute_schema_root(paths) == "overture.schema" + + +class TestEntryPointModule: + def test_extracts_module(self) -> None: + assert entry_point_module("overture.schema.buildings:Building") == ( + "overture.schema.buildings" + ) + + def test_missing_colon_raises(self) -> None: + with pytest.raises(ValueError): + entry_point_module("no_colon") + + def test_multiple_colons_splits_on_first(self) -> None: + assert entry_point_module("mod:A:B") == "mod" + + +class TestEntryPointClass: + def test_extracts_class(self) -> None: + assert entry_point_class("overture.schema.buildings:Building") == "Building" + + def test_missing_colon_raises(self) -> None: + with pytest.raises(ValueError): + entry_point_class("no_colon") + + def test_colon_at_end_returns_empty(self) -> None: + assert entry_point_class("mod:") == "" + + def test_multiple_colons_splits_on_first(self) -> None: + assert entry_point_class("mod:A:B") == "A:B" + + +class TestModuleRelpath: + def test_strips_root_prefix(self) -> None: + assert ( + module_relpath("overture.schema.buildings", "overture.schema") + == "buildings" + ) + + def test_deep_path(self) -> None: + assert ( + module_relpath("overture.schema.core.names.primary_name", "overture.schema") + == "core.names.primary_name" + ) + + def test_module_equals_root(self) -> None: + assert module_relpath("overture.schema", "overture.schema") == "" + + def test_empty_root(self) -> None: + assert module_relpath("buildings", "") == "buildings" + + def test_nonmatching_raises(self) -> None: + with pytest.raises(ValueError): + module_relpath("acme.transit", "overture.schema") + + +def _make_registry(*entries: tuple[str, bool]) -> dict[str, object]: + """Build a synthetic module registry. + + Each entry is (module_path, is_package). Packages get __path__; + file modules do not. + """ + registry: dict[str, object] = {} + for mod_path, is_pkg in entries: + if is_pkg: + registry[mod_path] = type("pkg", (), {"__path__": ["/fake"]})() + else: + registry[mod_path] = type("mod", (), {})() + return registry + + +class TestIsPackageModule: + def test_package_has_path(self) -> None: + registry = _make_registry(("my.package", True)) + assert is_package_module("my.package", registry) is True + + def test_file_module_no_path(self) -> None: + registry = _make_registry(("my.module", False)) + assert is_package_module("my.module", registry) is False + + def test_missing_module_raises(self) -> None: + with pytest.raises(ValueError): + is_package_module("nonexistent", {}) + + +class TestComputeOutputDir: + def test_package_keeps_all_parts(self) -> None: + reg = _make_registry(("overture.schema.buildings", True)) + result = compute_output_dir("overture.schema.buildings", "overture.schema", reg) + assert result == PurePosixPath("buildings") + + def test_file_module_drops_last(self) -> None: + reg = _make_registry(("overture.schema.core.names.primary_name", False)) + result = compute_output_dir( + "overture.schema.core.names.primary_name", "overture.schema", reg + ) + assert result == PurePosixPath("core/names") + + def test_deep_package(self) -> None: + reg = _make_registry(("overture.schema.core.names", True)) + result = compute_output_dir( + "overture.schema.core.names", "overture.schema", reg + ) + assert result == PurePosixPath("core/names") + + def test_file_module_in_theme(self) -> None: + reg = _make_registry(("overture.schema.buildings.enums", False)) + result = compute_output_dir( + "overture.schema.buildings.enums", "overture.schema", reg + ) + assert result == PurePosixPath("buildings") + + def test_file_module_deep(self) -> None: + reg = _make_registry(("overture.schema.divisions.division.models", False)) + result = compute_output_dir( + "overture.schema.divisions.division.models", "overture.schema", reg + ) + assert result == PurePosixPath("divisions/division") + + def test_root_module_returns_dot(self) -> None: + reg = _make_registry(("overture.schema", True)) + result = compute_output_dir("overture.schema", "overture.schema", reg) + assert result == PurePosixPath(".") + + def test_file_module_one_level_returns_dot(self) -> None: + reg = _make_registry(("overture.schema.types", False)) + result = compute_output_dir("overture.schema.types", "overture.schema", reg) + assert result == PurePosixPath(".") diff --git a/packages/overture-schema-codegen/tests/test_naming.py b/packages/overture-schema-codegen/tests/test_naming.py new file mode 100644 index 000000000..77e4d5773 --- /dev/null +++ b/packages/overture-schema-codegen/tests/test_naming.py @@ -0,0 +1,23 @@ +"""Tests for PascalCase to snake_case conversion.""" + +import pytest +from overture.schema.codegen.extraction.case_conversion import to_snake_case + + +class TestToSnakeCase: + """Tests for snake_case conversion helper.""" + + @pytest.mark.parametrize( + ("input_name", "expected"), + [ + ("Building", "building"), + ("BuildingPart", "building_part"), + ("RoadSegment", "road_segment"), + ("Place", "place"), + ("simple", "simple"), # Already lowercase + ("HTTPServer", "http_server"), # Consecutive caps + ], + ) + def test_converts_pascal_to_snake(self, input_name: str, expected: str) -> None: + """PascalCase names should convert to snake_case.""" + assert to_snake_case(input_name) == expected diff --git a/packages/overture-schema-codegen/tests/test_newtype_extraction.py b/packages/overture-schema-codegen/tests/test_newtype_extraction.py new file mode 100644 index 000000000..6cd73c5c2 --- /dev/null +++ b/packages/overture-schema-codegen/tests/test_newtype_extraction.py @@ -0,0 +1,74 @@ +"""Tests for NewType extraction.""" + +from typing import Annotated, NewType + +from codegen_test_support import STR_TYPE +from overture.schema.codegen.extraction.newtype_extraction import extract_newtype +from overture.schema.codegen.extraction.specs import NewTypeSpec +from overture.schema.system.field_constraint import UniqueItemsConstraint +from overture.schema.system.ref import Id +from overture.schema.system.string import HexColor +from pydantic import BaseModel, Field + + +class TestExtractNewType: + """Tests for extract_newtype function.""" + + def test_extract_hex_color(self) -> None: + """Should extract HexColor NewType specification.""" + spec = extract_newtype(HexColor) + + assert spec.name == "HexColor" + assert spec.type_info.newtype_name == "HexColor" + + def test_extract_id(self) -> None: + """Should extract Id NewType with nested chain.""" + spec = extract_newtype(Id) + + assert spec.name == "Id" + assert spec.type_info.newtype_name == "Id" + assert spec.type_info.base_type == "NoWhitespaceString" + + def test_extract_newtype_wrapping_list(self) -> None: + """Should extract a list-wrapping NewType.""" + + class Item(BaseModel): + value: str + + TestSources = NewType( + "TestSources", Annotated[list[Item], UniqueItemsConstraint()] + ) + spec = extract_newtype(TestSources) + + assert spec.name == "TestSources" + assert spec.type_info.is_list is True + assert spec.type_info.newtype_name == "TestSources" + + def test_extract_newtype_without_doc_uses_field_description(self) -> None: + """NewType with Field(description=...) but no __doc__ uses Field description.""" + TestType = NewType( + "TestType", + Annotated[str, Field(description="A test type description")], + ) + spec = extract_newtype(TestType) + assert spec.description == "A test type description" + + def test_extract_newtype_with_doc_ignores_field_description(self) -> None: + """NewType with custom __doc__ uses docstring, not Field description.""" + spec = extract_newtype(HexColor) + # HexColor has both __doc__ and Field(description=...). + # __doc__ should win because is_custom_docstring returns True. + assert spec.description is not None + assert "example" in spec.description.lower() or "#" in spec.description + + +class TestNewTypeSpecSourceType: + """Tests for source_type on NewTypeSpec.""" + + def test_newtype_spec_source_type_defaults_to_none(self) -> None: + spec = NewTypeSpec(name="Test", description=None, type_info=STR_TYPE) + assert spec.source_type is None + + def test_extract_newtype_sets_source_type(self) -> None: + spec = extract_newtype(HexColor) + assert spec.source_type is HexColor diff --git a/packages/overture-schema-codegen/tests/test_primitive_extraction.py b/packages/overture-schema-codegen/tests/test_primitive_extraction.py new file mode 100644 index 000000000..5fb2de9ed --- /dev/null +++ b/packages/overture-schema-codegen/tests/test_primitive_extraction.py @@ -0,0 +1,109 @@ +"""Tests for primitive extraction and numeric bounds.""" + +from typing import Annotated, NewType + +import overture.schema.system.primitive as _system_primitive +from overture.schema.codegen.extraction.newtype_extraction import extract_newtype +from overture.schema.codegen.extraction.primitive_extraction import ( + extract_numeric_bounds, + extract_primitives, + partition_primitive_and_geometry_names, +) +from overture.schema.codegen.extraction.specs import TypeIdentity +from overture.schema.codegen.extraction.type_analyzer import analyze_type +from overture.schema.system.primitive import float32, int32, int64, uint8 +from pydantic import Field + + +class TestPartitionPrimitiveAndGeometryNames: + """Tests for partition_primitive_and_geometry_names function.""" + + def test_returns_type_identities(self) -> None: + prims, geoms = partition_primitive_and_geometry_names(_system_primitive) + assert all(isinstance(p, TypeIdentity) for p in prims) + assert all(isinstance(g, TypeIdentity) for g in geoms) + + def test_identity_obj_is_actual_callable(self) -> None: + prims, _ = partition_primitive_and_geometry_names(_system_primitive) + int32_id = next(p for p in prims if p.name == "int32") + assert int32_id.obj is _system_primitive.int32 + + +class TestExtractPrimitives: + """Tests for extract_primitives function.""" + + def test_accepts_type_identities(self) -> None: + prims, _ = partition_primitive_and_geometry_names(_system_primitive) + specs = extract_primitives(prims) + assert len(specs) > 0 + names = [s.name for s in specs] + assert "int32" in names + + def test_extracts_bounds(self) -> None: + prims, _ = partition_primitive_and_geometry_names(_system_primitive) + specs = extract_primitives(prims) + int32_spec = next(s for s in specs if s.name == "int32") + assert int32_spec.bounds.ge == -(2**31) + assert int32_spec.bounds.le == 2**31 - 1 + + +class TestExtractNumericBounds: + """Tests for extract_numeric_bounds function.""" + + def test_signed_integer_bounds(self) -> None: + """Should extract ge/le from a constrained integer NewType.""" + spec = extract_newtype(int32) + bounds = extract_numeric_bounds(spec.type_info) + + assert bounds.ge == -(2**31) + assert bounds.le == 2**31 - 1 + + def test_unsigned_integer_bounds(self) -> None: + """Should extract 0-based bounds from unsigned NewType.""" + spec = extract_newtype(uint8) + bounds = extract_numeric_bounds(spec.type_info) + + assert bounds.ge == 0 + assert bounds.le == 255 + + def test_int64_bounds(self) -> None: + """Should extract large bounds from int64.""" + spec = extract_newtype(int64) + bounds = extract_numeric_bounds(spec.type_info) + + assert bounds.ge == -(2**63) + assert bounds.le == 2**63 - 1 + + def test_unconstrained_type(self) -> None: + """Should return empty Interval for types without numeric constraints.""" + spec = extract_newtype(float32) + bounds = extract_numeric_bounds(spec.type_info) + + assert bounds.ge is None + assert bounds.gt is None + assert bounds.le is None + assert bounds.lt is None + + def test_exclusive_bounds(self) -> None: + """Should extract gt/lt from constraints using exclusive bounds.""" + ExclusiveBounded = NewType( + "ExclusiveBounded", Annotated[int, Field(gt=0, lt=100)] + ) + type_info = analyze_type(ExclusiveBounded) + bounds = extract_numeric_bounds(type_info) + + assert bounds.gt == 0 + assert bounds.lt == 100 + assert bounds.ge is None + assert bounds.le is None + + def test_mixed_bounds(self) -> None: + """Should extract a mix of inclusive and exclusive bounds.""" + MixedBounded = NewType("MixedBounded", Annotated[int, Field(ge=0, lt=256)]) + type_info = analyze_type(MixedBounded) + bounds = extract_numeric_bounds(type_info) + + assert bounds.ge == 0 + assert bounds.lt == 256 + assert bounds.gt is None + assert bounds.le is None diff --git a/packages/overture-schema-codegen/tests/test_pydantic_extraction.py b/packages/overture-schema-codegen/tests/test_pydantic_extraction.py new file mode 100644 index 000000000..1d8803d16 --- /dev/null +++ b/packages/overture-schema-codegen/tests/test_pydantic_extraction.py @@ -0,0 +1,29 @@ +"""Tests for Pydantic type extraction.""" + +from overture.schema.codegen.extraction.pydantic_extraction import extract_pydantic_type +from overture.schema.codegen.extraction.specs import PydanticTypeSpec +from pydantic import EmailStr, HttpUrl + + +class TestExtractPydanticType: + def test_extracts_http_url(self) -> None: + spec = extract_pydantic_type(HttpUrl) + assert isinstance(spec, PydanticTypeSpec) + assert spec.name == "HttpUrl" + assert spec.source_type is HttpUrl + assert spec.source_module == "networks" + assert spec.description is not None + assert "http" in spec.description.lower() + + def test_extracts_email_str(self) -> None: + spec = extract_pydantic_type(EmailStr) + assert isinstance(spec, PydanticTypeSpec) + assert spec.name == "EmailStr" + assert spec.source_type is EmailStr + assert spec.source_module == "networks" + + def test_admonition_label_filtered_from_description(self) -> None: + spec = extract_pydantic_type(EmailStr) + # EmailStr.__doc__ starts with "Info:" (bare admonition label). + # _usable_description filters this, returning None. + assert spec.description is None diff --git a/packages/overture-schema-codegen/tests/test_reverse_references.py b/packages/overture-schema-codegen/tests/test_reverse_references.py new file mode 100644 index 000000000..fb8e1e41a --- /dev/null +++ b/packages/overture-schema-codegen/tests/test_reverse_references.py @@ -0,0 +1,227 @@ +"""Tests for reverse reference computation.""" + +from enum import Enum as PyEnum +from typing import NewType + +import pytest +from codegen_test_support import ( + FeatureWithAddress, + FeatureWithUrl, + Instrument, + RoadSegment, + TreeNode, + Venue, + has_name, + lookup_by_name, + make_union_spec, +) +from overture.schema.codegen.extraction.enum_extraction import extract_enum +from overture.schema.codegen.extraction.model_extraction import ( + expand_model_tree, + extract_model, +) +from overture.schema.codegen.extraction.newtype_extraction import extract_newtype +from overture.schema.codegen.extraction.specs import PydanticTypeSpec, TypeIdentity +from overture.schema.codegen.layout.type_collection import ( + collect_all_supplementary_types, +) +from overture.schema.codegen.markdown.reverse_references import ( + UsedByKind, + compute_reverse_references, +) +from overture.schema.system.ref import Id +from overture.schema.system.string import NoWhitespaceString +from pydantic import BaseModel + + +@pytest.mark.parametrize( + ("model_class", "model_name", "target_name"), + [ + (Instrument, "Instrument", "InstrumentFamily"), + (Instrument, "Instrument", "HexColor"), + (FeatureWithAddress, "FeatureWithAddress", "Address"), + ], + ids=["enum", "newtype", "sub-model"], +) +def test_model_referencing_type_produces_used_by_entry( + model_class: type, + model_name: str, + target_name: str, +) -> None: + """Model referencing a type produces a 'used by' entry on that type.""" + model_spec = extract_model(model_class, entry_point=model_name) + expand_model_tree(model_spec) + all_specs = collect_all_supplementary_types([model_spec]) + + assert has_name(all_specs, target_name) + + result = compute_reverse_references([model_spec], all_specs) + + entries = lookup_by_name(result, target_name) + assert len(entries) == 1 + assert entries[0].identity.name == model_name + assert entries[0].kind == UsedByKind.MODEL + + +def test_newtype_inheriting_from_newtype_produces_used_by_entry() -> None: + """NewType inheriting constraints from another NewType produces a 'used by' entry.""" + # Id wraps NoWhitespaceString, which is also a NewType + # When we extract Id, its constraints include ConstraintSource(source_ref=NoWhitespaceString, ...) + id_spec = extract_newtype(Id) + nws_spec = extract_newtype(NoWhitespaceString) + + all_specs = { + TypeIdentity(Id, "Id"): id_spec, + TypeIdentity(NoWhitespaceString, "NoWhitespaceString"): nws_spec, + } + + result = compute_reverse_references([], all_specs) + + # NoWhitespaceString should have a used_by entry from Id + entries = lookup_by_name(result, "NoWhitespaceString") + assert len(entries) == 1 + assert entries[0].identity.name == "Id" + assert entries[0].kind == UsedByKind.NEWTYPE + + +def test_union_members_have_used_by_entries() -> None: + """Union members have 'used by' entries pointing to the union feature.""" + # Create a union spec with RoadSegment as a member + union_spec = make_union_spec( + name="TestSegment", + description="Test segment union", + members=[RoadSegment], + entry_point="TestSegment", + ) + + # Extract the member + road_spec = extract_model(RoadSegment) + expand_model_tree(road_spec) + all_specs = {TypeIdentity(RoadSegment, "RoadSegment"): road_spec} + + result = compute_reverse_references([union_spec], all_specs) + + entries = lookup_by_name(result, "RoadSegment") + assert len(entries) == 1 + assert entries[0].identity.name == "TestSegment" + assert entries[0].kind == UsedByKind.MODEL + + +def test_self_references_filtered_out() -> None: + """Self-references are filtered out (handles recursive types).""" + tree_spec = extract_model(TreeNode, entry_point="TreeNode") + expand_model_tree(tree_spec) + + # Manually add TreeNode to all_specs to test self-reference filtering + all_specs = {TypeIdentity(TreeNode, "TreeNode"): tree_spec} + + result = compute_reverse_references([tree_spec], all_specs) + + # TreeNode should not appear in result since it only references itself + with pytest.raises(KeyError): + lookup_by_name(result, "TreeNode") + + +def test_deduplication_same_type_multiple_fields() -> None: + """Deduplication works when same type is referenced via multiple fields.""" + instrument_spec = extract_model(Instrument, entry_point="Instrument") + venue_spec = extract_model(Venue, entry_point="Venue") + expand_model_tree(instrument_spec) + expand_model_tree(venue_spec) + all_specs = collect_all_supplementary_types([instrument_spec, venue_spec]) + + assert has_name(all_specs, "Id") + + result = compute_reverse_references([instrument_spec, venue_spec], all_specs) + + entries = lookup_by_name(result, "Id") + # Both Instrument and Venue reference Id + assert len(entries) == 2 + names = {e.identity.name for e in entries} + assert names == {"Instrument", "Venue"} + # All should be MODELs + assert all(e.kind == UsedByKind.MODEL for e in entries) + + +def test_pydantic_type_has_used_by_from_feature() -> None: + """Pydantic type in all_specs gets used-by entries from features referencing it.""" + model_spec = extract_model(FeatureWithUrl, entry_point="FeatureWithUrl") + expand_model_tree(model_spec) + all_specs = collect_all_supplementary_types([model_spec]) + + assert has_name(all_specs, "HttpUrl") + assert isinstance(lookup_by_name(all_specs, "HttpUrl"), PydanticTypeSpec) + + result = compute_reverse_references([model_spec], all_specs) + + entries = lookup_by_name(result, "HttpUrl") + assert any(e.identity.name == "FeatureWithUrl" for e in entries) + + +def test_sort_tiebreaker_uses_module_for_same_name_referrers() -> None: + """Referrers with the same name sort deterministically by module.""" + + # Two model classes named "Feature" from different modules. + class SharedEnum(PyEnum): + A = "a" + + class FeatureAlpha(BaseModel): + value: SharedEnum + + class FeatureBeta(BaseModel): + value: SharedEnum + + FeatureAlpha.__name__ = "Feature" + FeatureAlpha.__module__ = "alpha.models" + FeatureBeta.__name__ = "Feature" + FeatureBeta.__module__ = "beta.models" + + spec_a = extract_model(FeatureAlpha, entry_point="Feature") + spec_b = extract_model(FeatureBeta, entry_point="Feature") + expand_model_tree(spec_a) + expand_model_tree(spec_b) + + enum_id = TypeIdentity(SharedEnum, "SharedEnum") + all_specs = {enum_id: extract_enum(SharedEnum)} + + result = compute_reverse_references([spec_a, spec_b], all_specs) + + entries = lookup_by_name(result, "SharedEnum") + assert len(entries) == 2 + # Both named "Feature" — module provides the tiebreaker + modules = [e.identity.module for e in entries] + assert modules == ["alpha.models", "beta.models"] + + +def test_sorting_models_before_newtypes() -> None: + """Sorting produces models before NewTypes, alphabetical within groups.""" + # Create a test where the same type (Id) is referenced by: + # - Two models (Instrument and Venue) - both MODEL referrers + # - A NewType wrapper around Id + # Create a synthetic NewType that wraps Id + CustomId = NewType("CustomId", Id) + + instrument_spec = extract_model(Instrument, entry_point="Instrument") + venue_spec = extract_model(Venue, entry_point="Venue") + expand_model_tree(instrument_spec) + expand_model_tree(venue_spec) + all_specs = collect_all_supplementary_types([instrument_spec, venue_spec]) + + # Add the CustomId NewType which references Id + custom_id_spec = extract_newtype(CustomId) + all_specs[TypeIdentity(CustomId, "CustomId")] = custom_id_spec + + result = compute_reverse_references([instrument_spec, venue_spec], all_specs) + + # Id should have entries from both Instrument and Venue (MODELs) and CustomId (NEWTYPE) + entries = lookup_by_name(result, "Id") + assert len(entries) == 3 + + # Check sorting: MODELs first, then NEWTYPE + # Within MODELs: alphabetical (Instrument, Venue) + assert entries[0].kind == UsedByKind.MODEL + assert entries[0].identity.name == "Instrument" + assert entries[1].kind == UsedByKind.MODEL + assert entries[1].identity.name == "Venue" + assert entries[2].kind == UsedByKind.NEWTYPE + assert entries[2].identity.name == "CustomId" diff --git a/packages/overture-schema-codegen/tests/test_specs.py b/packages/overture-schema-codegen/tests/test_specs.py new file mode 100644 index 000000000..0780e2fda --- /dev/null +++ b/packages/overture-schema-codegen/tests/test_specs.py @@ -0,0 +1,305 @@ +"""Tests for spec data structures and predicates.""" + +from typing import Annotated + +import pytest +from codegen_test_support import ( + STR_TYPE, + InstrumentFamily, + SimpleModel, + make_union_spec, +) +from overture.schema.codegen.extraction.model_extraction import extract_model +from overture.schema.codegen.extraction.specs import ( + AnnotatedField, + EnumSpec, + FeatureSpec, + FieldSpec, + ModelSpec, + NewTypeSpec, + TypeIdentity, + is_union_alias, +) +from overture.schema.codegen.extraction.type_analyzer import TypeInfo, TypeKind +from pydantic import BaseModel, Field + + +class TestFeatureSpecProtocol: + """Tests for FeatureSpec protocol compliance.""" + + def test_model_spec_satisfies_feature_spec(self) -> None: + """ModelSpec satisfies the FeatureSpec protocol.""" + + class Simple(BaseModel): + name: str + + spec = extract_model(Simple) + # Protocol compliance check + assert isinstance(spec, FeatureSpec) + # Verify protocol attributes + assert spec.name == "Simple" + assert isinstance(spec.fields, list) + assert spec.source_type is Simple + + +class TestFieldSpec: + """Tests for FieldSpec dataclass.""" + + def test_fieldspec_stores_basic_attributes(self) -> None: + """FieldSpec should store name, type_info, description, is_required.""" + field_spec = FieldSpec( + name="test_field", + type_info=STR_TYPE, + description="A test field", + is_required=True, + ) + + assert field_spec.name == "test_field" + assert field_spec.type_info == STR_TYPE + assert field_spec.description == "A test field" + assert field_spec.is_required is True + + def test_fieldspec_optional_field(self) -> None: + """FieldSpec should handle optional fields.""" + optional_str = TypeInfo( + base_type="str", kind=TypeKind.PRIMITIVE, is_optional=True + ) + + field_spec = FieldSpec( + name="optional_field", + type_info=optional_str, + description=None, + is_required=False, + ) + + assert field_spec.is_required is False + assert field_spec.description is None + + +class TestModelSpec: + """Tests for ModelSpec dataclass.""" + + def test_modelspec_stores_basic_attributes(self) -> None: + """ModelSpec should store name, description, fields.""" + field = FieldSpec( + name="id", + type_info=STR_TYPE, + description="Unique identifier", + is_required=True, + ) + + model_spec = ModelSpec( + name="TestModel", + description="A test model", + fields=[field], + ) + + assert model_spec.name == "TestModel" + assert model_spec.description == "A test model" + assert len(model_spec.fields) == 1 + assert model_spec.fields[0].name == "id" + + def test_entry_point_defaults_to_none(self) -> None: + spec = ModelSpec(name="M", description=None) + assert spec.entry_point is None + + +class TestAnnotatedField: + """Tests for AnnotatedField wrapper.""" + + def test_stores_field_and_variant_sources(self) -> None: + """AnnotatedField pairs a FieldSpec with variant provenance.""" + fs = FieldSpec(name="x", type_info=STR_TYPE, description=None, is_required=True) + af = AnnotatedField(field_spec=fs, variant_sources=("RoadSegment",)) + assert af.field_spec is fs + assert af.variant_sources == ("RoadSegment",) + + def test_none_variant_sources_means_shared(self) -> None: + """variant_sources=None indicates a shared field.""" + fs = FieldSpec(name="x", type_info=STR_TYPE, description=None, is_required=True) + af = AnnotatedField(field_spec=fs, variant_sources=None) + assert af.variant_sources is None + + +class TestFieldSpecModelTree: + """Tests for FieldSpec model and starts_cycle fields.""" + + def test_model_defaults_to_none(self) -> None: + field_spec = FieldSpec( + name="test", type_info=STR_TYPE, description=None, is_required=True + ) + assert field_spec.model is None + + def test_starts_cycle_defaults_to_false(self) -> None: + field_spec = FieldSpec( + name="test", type_info=STR_TYPE, description=None, is_required=True + ) + assert field_spec.starts_cycle is False + + def test_model_can_hold_model_spec(self) -> None: + type_info = TypeInfo(base_type="Address", kind=TypeKind.MODEL) + sub = ModelSpec(name="Address", description=None) + field_spec = FieldSpec( + name="address", + type_info=type_info, + description=None, + is_required=True, + model=sub, + ) + assert field_spec.model is sub + + def test_starts_cycle_can_be_set(self) -> None: + type_info = TypeInfo(base_type="Node", kind=TypeKind.MODEL) + sub = ModelSpec(name="Node", description=None) + field_spec = FieldSpec( + name="parent", + type_info=type_info, + description=None, + is_required=False, + model=sub, + starts_cycle=True, + ) + assert field_spec.starts_cycle is True + assert field_spec.model is sub + + def test_starts_cycle_without_model_is_nonsensical(self) -> None: + """starts_cycle=True with model=None is expressible but invalid. + + expand_model_tree never produces this combination -- starts_cycle + is only set when model points to the cycle-causing ModelSpec. + Document the invariant so violations stand out. + """ + type_info = TypeInfo(base_type="Node", kind=TypeKind.MODEL) + field_spec = FieldSpec( + name="parent", + type_info=type_info, + description=None, + is_required=False, + starts_cycle=True, + ) + # Expressible but meaningless: cycle to nowhere + assert field_spec.starts_cycle is True + assert field_spec.model is None + + +class TestIsUnionAlias: + """Tests for is_union_alias predicate.""" + + def test_annotated_union_of_models_returns_true(self) -> None: + """Annotated[Union of BaseModels] is a union alias.""" + + class A(BaseModel): + x: int + + class B(BaseModel): + y: str + + union_type = Annotated[A | B, Field(description="test")] + assert is_union_alias(union_type) is True + + def test_model_class_returns_false(self) -> None: + """A concrete BaseModel class is not a union alias.""" + + class A(BaseModel): + x: int + + assert is_union_alias(A) is False + + def test_plain_string_returns_false(self) -> None: + """A plain string is not a union alias.""" + assert is_union_alias("not a type") is False + + def test_non_model_union_returns_false(self) -> None: + """A union of non-model types is not a union alias.""" + assert is_union_alias(str | int) is False + + +class TestUnionSpec: + """Tests for UnionSpec data structure.""" + + def test_fields_property_returns_plain_field_specs(self) -> None: + """UnionSpec.fields property returns list[FieldSpec] from annotated_fields.""" + fs1 = FieldSpec( + name="a", type_info=STR_TYPE, description=None, is_required=True + ) + fs2 = FieldSpec( + name="b", type_info=STR_TYPE, description=None, is_required=False + ) + spec = make_union_spec( + annotated_fields=[ + AnnotatedField(field_spec=fs1, variant_sources=None), + AnnotatedField(field_spec=fs2, variant_sources=("X",)), + ], + ) + assert spec.fields == [fs1, fs2] + + +class TestTypeIdentity: + def test_frozen(self) -> None: + ti = TypeIdentity(obj=int, name="int") + with pytest.raises(AttributeError): + ti.obj = str # type: ignore[misc] + + def test_same_obj_equal(self) -> None: + a = TypeIdentity(obj=int, name="int") + b = TypeIdentity(obj=int, name="integer") + assert a == b + + def test_same_obj_same_hash(self) -> None: + a = TypeIdentity(obj=int, name="int") + b = TypeIdentity(obj=int, name="integer") + assert hash(a) == hash(b) + + def test_different_obj_not_equal(self) -> None: + a = TypeIdentity(obj=int, name="int") + b = TypeIdentity(obj=str, name="int") + assert a != b + + def test_works_as_dict_key(self) -> None: + ti = TypeIdentity(obj=int, name="int") + d = {ti: "value"} + assert d[TypeIdentity(obj=int, name="different")] == "value" + + def test_not_equal_to_non_identity(self) -> None: + ti = TypeIdentity(obj=int, name="int") + non_identity_type: object = int + non_identity_str: object = "int" + assert ti != non_identity_type + assert ti != non_identity_str + + +class TestSpecIdentity: + def test_model_spec_identity(self) -> None: + spec = ModelSpec(name="Foo", description=None, source_type=SimpleModel) + ident = spec.identity + assert isinstance(ident, TypeIdentity) + assert ident.obj is SimpleModel + assert ident.name == "Foo" + + def test_enum_spec_identity(self) -> None: + spec = EnumSpec(name="Color", description=None, source_type=InstrumentFamily) + ident = spec.identity + assert ident.obj is InstrumentFamily + assert ident.name == "Color" + + def test_newtype_spec_identity(self) -> None: + from overture.schema.system.primitive import int32 + + spec = NewTypeSpec( + name="int32", description=None, type_info=STR_TYPE, source_type=int32 + ) + ident = spec.identity + assert ident.obj is int32 + assert ident.name == "int32" + + def test_union_spec_identity(self) -> None: + sentinel = object() + spec = make_union_spec("TestUnion", source_annotation=sentinel) + ident = spec.identity + assert ident.obj is sentinel + assert ident.name == "TestUnion" + + def test_model_spec_satisfies_feature_protocol_with_identity(self) -> None: + spec = ModelSpec(name="Foo", description=None, source_type=SimpleModel) + feature: FeatureSpec = spec + assert feature.identity.obj is SimpleModel diff --git a/packages/overture-schema-codegen/tests/test_type_analyzer.py b/packages/overture-schema-codegen/tests/test_type_analyzer.py new file mode 100644 index 000000000..d48d12211 --- /dev/null +++ b/packages/overture-schema-codegen/tests/test_type_analyzer.py @@ -0,0 +1,676 @@ +"""Tests for type analysis.""" + +from enum import Enum +from typing import Annotated, Any, Literal, NewType, Optional + +import pytest +from annotated_types import Ge +from overture.schema.codegen.extraction.type_analyzer import ( + TypeInfo, + TypeKind, + UnsupportedUnionError, + analyze_type, + single_literal_value, +) +from overture.schema.system.primitive import float64, int32 +from overture.schema.system.ref import Id +from overture.schema.system.string import ( + HexColor, + NoWhitespaceConstraint, + NoWhitespaceString, + SnakeCaseString, +) +from pydantic import BaseModel, Field, Tag +from typing_extensions import Sentinel + + +@pytest.fixture() +def id_type_info() -> TypeInfo: + return analyze_type(Id) + + +@pytest.fixture() +def hex_color_type_info() -> TypeInfo: + return analyze_type(HexColor) + + +class TestAnalyzeTypePrimitives: + """Tests for primitive type analysis.""" + + @pytest.mark.parametrize("annotation", [str, int, float, bool]) + def test_builtin_returns_primitive_type_info(self, annotation: type) -> None: + """Builtin type annotations return PRIMITIVE TypeInfo with matching base_type.""" + result = analyze_type(annotation) + + assert result.base_type == annotation.__name__ + assert result.kind == TypeKind.PRIMITIVE + assert result.is_optional is False + assert result.is_list is False + + +class TestAnalyzeTypeSentinel: + """Tests for Sentinel type filtering in unions. + + Pydantic uses ``typing_extensions.Sentinel`` instances (like ````) + in union types for optional fields. The type analyzer filters these out + alongside ``None`` when processing unions. + """ + + @pytest.fixture() + def missing_sentinel(self) -> object: + return Sentinel("MISSING") + + def test_sentinel_filtered_from_union(self, missing_sentinel: object) -> None: + """Sentinel is filtered out, leaving the concrete type.""" + result = analyze_type(str | missing_sentinel) # type: ignore[arg-type] + + assert result.base_type == "str" + assert result.kind == TypeKind.PRIMITIVE + assert result.is_optional is False + + def test_sentinel_with_none_sets_optional(self, missing_sentinel: object) -> None: + """Sentinel + None both filtered; None triggers is_optional.""" + result = analyze_type(str | missing_sentinel | None) # type: ignore[arg-type] + + assert result.base_type == "str" + assert result.kind == TypeKind.PRIMITIVE + assert result.is_optional is True + + +class TestAnalyzeTypeOptional: + """Tests for Optional type analysis.""" + + def test_pipe_none_sets_is_optional(self) -> None: + """str | None returns TypeInfo with is_optional=True.""" + result = analyze_type(str | None) + + assert result.base_type == "str" + assert result.kind == TypeKind.PRIMITIVE + assert result.is_optional is True + assert result.is_list is False + + def test_type_with_literal_and_none(self) -> None: + """str | Literal[""] | None filters Literal and marks optional.""" + result = analyze_type(str | Literal[""] | None) + + assert result.base_type == "str" + assert result.kind == TypeKind.PRIMITIVE + assert result.is_optional is True + + def test_typing_optional_sets_is_optional(self) -> None: + """Optional[str] from typing module returns TypeInfo with is_optional=True.""" + result = analyze_type(Optional[str]) # noqa: UP045 + + assert result.base_type == "str" + assert result.kind == TypeKind.PRIMITIVE + assert result.is_optional is True + assert result.is_list is False + + +class TestAnalyzeTypeUnionLiteralFiltering: + """Tests for filtering Literal arms out of unions.""" + + def test_type_with_literal_alternative(self) -> None: + """str | Literal[""] filters out the Literal and analyzes the concrete type.""" + result = analyze_type(str | Literal[""]) + + assert result.base_type == "str" + assert result.kind == TypeKind.PRIMITIVE + assert result.is_optional is False + + +class TestAnalyzeTypeList: + """Tests for list type analysis.""" + + def test_list_str_sets_is_list(self) -> None: + """list[str] returns TypeInfo with is_list=True.""" + result = analyze_type(list[str]) + + assert result.base_type == "str" + assert result.kind == TypeKind.PRIMITIVE + assert result.is_optional is False + assert result.is_list is True + + def test_nested_list_sets_depth_2(self) -> None: + """list[list[str]] records two levels of nesting.""" + result = analyze_type(list[list[str]]) + + assert result.list_depth == 2 + assert result.base_type == "str" + assert result.kind == TypeKind.PRIMITIVE + + +class TestAnalyzeTypeComposite: + """Tests for composite/nested type analysis.""" + + def test_list_optional_str(self) -> None: + """list[str | None] sets both is_list and is_optional.""" + result = analyze_type(list[str | None]) + + assert result.base_type == "str" + assert result.is_list is True + assert result.is_optional is True + + def test_optional_list_str(self) -> None: + """list[str] | None sets both is_list and is_optional.""" + result = analyze_type(list[str] | None) + + assert result.base_type == "str" + assert result.is_list is True + assert result.is_optional is True + + def test_annotated_optional_str(self) -> None: + """Annotated[str | None, ...] extracts constraints and sets is_optional.""" + result = analyze_type(Annotated[str | None, "description"]) + + assert result.base_type == "str" + assert result.is_optional is True + assert len(result.constraints) == 1 + assert result.constraints[0].source_ref is None + assert result.constraints[0].constraint == "description" + + def test_annotated_list_str(self) -> None: + """Annotated[list[str], ...] extracts constraints and sets is_list.""" + result = analyze_type(Annotated[list[str], Field(min_length=1)]) + + assert result.base_type == "str" + assert result.is_list is True + assert len(result.constraints) == 1 + assert result.constraints[0].source_ref is None + + +class TestAnalyzeTypeAnnotated: + """Tests for Annotated type analysis.""" + + def test_annotated_int_with_ge_extracts_constraint(self) -> None: + """Annotated[int, Field(ge=0)] unpacks FieldInfo to extract Ge constraint.""" + result = analyze_type(Annotated[int, Field(ge=0)]) + + assert result.base_type == "int" + assert result.kind == TypeKind.PRIMITIVE + assert len(result.constraints) == 1 + cs = result.constraints[0] + assert cs.source_ref is None + assert isinstance(cs.constraint, Ge) + assert cs.constraint.ge == 0 + + def test_annotated_without_constraints(self) -> None: + """Annotated[str, 'description'] extracts non-Field metadata.""" + result = analyze_type(Annotated[str, "just a description"]) + + assert result.base_type == "str" + assert len(result.constraints) == 1 + assert result.constraints[0].source_ref is None + assert result.constraints[0].constraint == "just a description" + + +class TestAnalyzeTypeLiteral: + """Tests for Literal type analysis.""" + + def test_literal_string_extracts_values(self) -> None: + """Literal["active"] stores the value in literal_values tuple.""" + result = analyze_type(Literal["active"]) + + assert result.kind == TypeKind.LITERAL + assert result.literal_values == ("active",) + + def test_literal_int_extracts_values(self) -> None: + """Literal[42] stores the value in literal_values tuple.""" + result = analyze_type(Literal[42]) + + assert result.kind == TypeKind.LITERAL + assert result.literal_values == (42,) + + def test_multi_value_literal_stores_all_args(self) -> None: + """Literal["a", "b"] stores all args in literal_values tuple.""" + result = analyze_type(Literal["a", "b"]) + + assert result.kind == TypeKind.LITERAL + assert result.literal_values == ("a", "b") + + def test_optional_literal_extracts_values(self) -> None: + """Optional[Literal["x"]] unwraps to Literal with is_optional set.""" + result = analyze_type(Literal["x"] | None) + + assert result.kind == TypeKind.LITERAL + assert result.literal_values == ("x",) + assert result.is_optional is True + + +class TestAnalyzeTypeEnum: + """Tests for Enum type analysis.""" + + def test_enum_subclass_returns_kind_enum(self) -> None: + """Enum subclass returns TypeInfo with kind=ENUM.""" + + class Color(Enum): + RED = "red" + GREEN = "green" + + result = analyze_type(Color) + + assert result.base_type == "Color" + assert result.kind == TypeKind.ENUM + + +class TestAnalyzeTypeModel: + """Tests for BaseModel type analysis.""" + + def test_basemodel_subclass_returns_kind_model(self) -> None: + """BaseModel subclass returns TypeInfo with kind=MODEL.""" + + class Person(BaseModel): + name: str + + result = analyze_type(Person) + + assert result.base_type == "Person" + assert result.kind == TypeKind.MODEL + + +class TestAnalyzeTypeNewType: + """Tests for NewType primitive analysis.""" + + def test_int32_returns_newtype_name(self) -> None: + """int32 NewType returns TypeInfo with base_type='int32'.""" + result = analyze_type(int32) + + assert result.base_type == "int32" + assert result.kind == TypeKind.PRIMITIVE + + def test_float64_returns_newtype_name(self) -> None: + """float64 NewType returns TypeInfo with base_type='float64'.""" + result = analyze_type(float64) + + assert result.base_type == "float64" + assert result.kind == TypeKind.PRIMITIVE + + def test_optional_int32(self) -> None: + """int32 | None sets is_optional and preserves base_type.""" + result = analyze_type(int32 | None) + + assert result.base_type == "int32" + assert result.is_optional is True + + +class TestNewtypeName: + """Tests for outermost NewType name tracking.""" + + def test_single_layer_newtype(self) -> None: + """Single NewType like int32 sets newtype_name to its name.""" + result = analyze_type(int32) + + assert result.newtype_name == "int32" + assert result.base_type == "int32" + + def test_nested_newtype_preserves_outermost(self, id_type_info: TypeInfo) -> None: + """Nested NewType chain uses outermost name for newtype_name.""" + assert id_type_info.newtype_name == "Id" + assert id_type_info.base_type == "NoWhitespaceString" + + def test_plain_type_has_no_newtype_name(self) -> None: + """Plain types without NewType wrapping have newtype_name=None.""" + result = analyze_type(str) + + assert result.newtype_name is None + + def test_newtype_ref_set_for_newtype(self, id_type_info: TypeInfo) -> None: + """newtype_ref points to the outermost NewType callable.""" + assert id_type_info.newtype_ref is Id + + def test_newtype_ref_none_for_plain_type(self) -> None: + """Plain types have newtype_ref=None.""" + result = analyze_type(str) + + assert result.newtype_ref is None + + +class TestNewtypeWrappingList: + """Tests for NewType wrapping a list type.""" + + def test_newtype_wrapping_list(self) -> None: + """NewType wrapping a list sets is_list and preserves newtype_name.""" + TestSources = NewType("TestSources", Annotated[list[str], Field(min_length=1)]) + result = analyze_type(TestSources) + + assert result.is_list is True + assert result.newtype_name == "TestSources" + + def test_scalar_newtype_is_not_list(self) -> None: + """Scalar NewType like int32 has is_list=False.""" + result = analyze_type(int32) + + assert result.is_list is False + + def test_plain_list_has_no_newtype_name(self) -> None: + """Plain list[str] without NewType has newtype_name=None.""" + result = analyze_type(list[str]) + + assert result.newtype_name is None + assert result.is_list is True + + def test_newtype_wrapping_list_of_models(self) -> None: + """list[NewType wrapping list[Model]] records depth 2, outer depth 1.""" + + class _Item(BaseModel): + name: str + + Inner = NewType("Inner", Annotated[list[_Item], Field(min_length=1)]) + result = analyze_type(list[Inner]) + + assert result.list_depth == 2 + assert result.newtype_outer_list_depth == 1 + assert result.base_type == "Inner" + assert result.kind == TypeKind.MODEL + assert result.source_type is _Item + + +class TestNewtypeOuterListDepth: + """Tests for newtype_outer_list_depth tracking.""" + + def test_list_of_scalar_newtype_has_outer_depth(self) -> None: + """list[ScalarNewType] records the list layer as outside the NewType.""" + ScalarNT = NewType("ScalarNT", str) + result = analyze_type(list[ScalarNT]) + + assert result.newtype_outer_list_depth == 1 + assert result.list_depth == 1 + + def test_newtype_wrapping_list_has_zero_outer_depth(self) -> None: + """NewType wrapping list[X] records no list layers outside the NewType.""" + ListNT = NewType("ListNT", Annotated[list[str], Field(min_length=1)]) + result = analyze_type(ListNT) + + assert result.newtype_outer_list_depth == 0 + assert result.list_depth == 1 + + @pytest.mark.parametrize( + "annotation", + [ + list[str], # list without NewType + int32, # scalar NewType + str, # plain type + ], + ids=["plain_list", "scalar_newtype", "plain_type"], + ) + def test_zero_outer_depth_without_newtype_boundary( + self, annotation: object + ) -> None: + """Types without a NewType inside a list have newtype_outer_list_depth=0.""" + result = analyze_type(annotation) + + assert result.newtype_outer_list_depth == 0 + + def test_nested_list_of_scalar_newtype_has_outer_depth_2(self) -> None: + """list[list[ScalarNewType]] records two outer list layers.""" + ScalarNT = NewType("ScalarNT", str) + result = analyze_type(list[list[ScalarNT]]) + + assert result.newtype_outer_list_depth == 2 + assert result.list_depth == 2 + + +class TestConstraintProvenance: + """Tests for flattened constraints with provenance tracking.""" + + def test_nested_newtype_flattens_constraints(self, id_type_info: TypeInfo) -> None: + """Id -> NoWhitespaceString -> str flattens all constraints with sources.""" + source_names = { + cs.source_name for cs in id_type_info.constraints if cs.source_name + } + assert "Id" in source_names + assert "NoWhitespaceString" in source_names + + def test_nested_newtype_includes_inner_constraints( + self, id_type_info: TypeInfo + ) -> None: + """Inner NewType constraints are collected with provenance.""" + nws_constraints = [ + cs for cs in id_type_info.constraints if cs.source_ref is NoWhitespaceString + ] + constraint_types = {type(cs.constraint) for cs in nws_constraints} + assert NoWhitespaceConstraint in constraint_types + + def test_direct_annotation_has_none_source(self) -> None: + """Constraints from direct Annotated (no NewType) have source_ref=None.""" + result = analyze_type(Annotated[str, "direct"]) + + assert len(result.constraints) == 1 + assert result.constraints[0].source_ref is None + assert result.constraints[0].constraint == "direct" + + def test_single_newtype_constraints_attributed( + self, hex_color_type_info: TypeInfo + ) -> None: + """HexColor constraints are attributed to the HexColor callable.""" + assert all(cs.source_ref is HexColor for cs in hex_color_type_info.constraints) + assert len(hex_color_type_info.constraints) > 0 + + def test_source_ref_is_newtype_callable( + self, hex_color_type_info: TypeInfo + ) -> None: + """source_ref is the actual NewType callable, not a string.""" + cs = hex_color_type_info.constraints[0] + assert cs.source_ref is HexColor + + def test_constraint_preserves_original_object( + self, hex_color_type_info: TypeInfo + ) -> None: + """ConstraintSource.constraint holds the original constraint object.""" + hcc = next( + cs + for cs in hex_color_type_info.constraints + if type(cs.constraint).__name__ == "HexColorConstraint" + ) + assert hcc.constraint.__class__.__name__ == "HexColorConstraint" + + +class TestTypeInfoDescription: + """Tests for TypeInfo.description from Field(description=...) metadata.""" + + def test_newtype_with_field_description( + self, hex_color_type_info: TypeInfo + ) -> None: + """Should extract Field description from HexColor.""" + assert hex_color_type_info.description is not None + assert "color" in hex_color_type_info.description.lower() + + def test_newtype_without_field_description(self) -> None: + """Should have None description for types without Field(description=...).""" + result = analyze_type(int) + assert result.description is None + + def test_plain_annotated_with_field_description(self) -> None: + """Should extract description from Annotated with Field(description=...).""" + MyType = Annotated[str, Field(description="A test description")] + result = analyze_type(MyType) + assert result.description == "A test description" + + def test_outermost_description_wins(self, id_type_info: TypeInfo) -> None: + """Outermost FieldInfo.description takes precedence in nested NewTypes.""" + assert id_type_info.description is not None + assert "unique identifier" in id_type_info.description.lower() + + def test_newtype_without_field_has_none_description(self) -> None: + """NewType with constraints but no Field(description=...) has None.""" + result = analyze_type(SnakeCaseString) + assert result.description is None + + +class TestAnalyzeTypeAny: + """Tests for typing.Any analysis.""" + + def test_any_returns_primitive(self) -> None: + """Any annotation returns TypeInfo with base_type='Any' and kind=PRIMITIVE.""" + result = analyze_type(Any) + + assert result.base_type == "Any" + assert result.kind == TypeKind.PRIMITIVE + + def test_dict_with_any_value(self) -> None: + """dict[str, Any] analyzes without error.""" + result = analyze_type(dict[str, Any]) + + assert result.is_dict is True + assert result.dict_value_type is not None + assert result.dict_value_type.base_type == "Any" + + +class TestAnalyzeTypeDict: + """Tests for dict type analysis.""" + + @pytest.fixture() + def dict_str_int(self) -> TypeInfo: + return analyze_type(dict[str, int]) + + def test_dict_str_int_sets_is_dict(self, dict_str_int: TypeInfo) -> None: + """dict[str, int] returns TypeInfo with is_dict=True.""" + assert dict_str_int.is_dict is True + assert dict_str_int.is_optional is False + assert dict_str_int.is_list is False + + def test_dict_key_type_analyzed(self, dict_str_int: TypeInfo) -> None: + """dict[str, int] has dict_key_type describing the key.""" + assert dict_str_int.dict_key_type is not None + assert dict_str_int.dict_key_type.base_type == "str" + assert dict_str_int.dict_key_type.kind == TypeKind.PRIMITIVE + + def test_dict_value_type_analyzed(self, dict_str_int: TypeInfo) -> None: + """dict[str, int] has dict_value_type describing the value.""" + assert dict_str_int.dict_value_type is not None + assert dict_str_int.dict_value_type.base_type == "int" + assert dict_str_int.dict_value_type.kind == TypeKind.PRIMITIVE + + def test_optional_dict(self) -> None: + """dict[str, str] | None sets is_dict and is_optional.""" + result = analyze_type(dict[str, str] | None) + + assert result.is_dict is True + assert result.is_optional is True + + def test_newtype_wrapping_dict(self) -> None: + """NewType wrapping dict preserves newtype_name and sets is_dict.""" + TestMapping = NewType("TestMapping", dict[str, str]) + result = analyze_type(TestMapping) + + assert result.is_dict is True + assert result.newtype_name == "TestMapping" + + def test_bare_dict_raises_type_error(self) -> None: + """Bare dict without type arguments raises TypeError.""" + with pytest.raises(TypeError, match="Bare dict"): + analyze_type(dict) + + +class TestAnalyzeTypeErrors: + """Tests for error handling.""" + + def test_unsupported_annotation_raises_type_error(self) -> None: + """Unsupported annotation type raises TypeError.""" + with pytest.raises(TypeError, match="Unsupported annotation type"): + analyze_type("not a type") + + def test_multi_type_union_raises_clear_error(self) -> None: + """Multi-type unions like str | int raise UnsupportedUnionError.""" + with pytest.raises( + UnsupportedUnionError, match="Multi-type unions not supported" + ): + analyze_type(str | int) + + def test_multi_type_union_with_none_raises_clear_error(self) -> None: + """Multi-type optional unions like str | int | None raise UnsupportedUnionError.""" + with pytest.raises( + UnsupportedUnionError, match="Multi-type unions not supported" + ): + analyze_type(str | int | None) + + def test_bare_list_raises_type_error(self) -> None: + """Bare list without type argument raises TypeError.""" + with pytest.raises(TypeError, match="Bare list without type argument"): + analyze_type(list) + + +class UnionModelA(BaseModel): + x: int + + +class UnionModelB(BaseModel): + y: str + + +class TestAnalyzeTypeUnion: + """Tests for discriminated union analysis.""" + + def test_all_model_union_returns_union_kind(self) -> None: + """Annotated[Union of BaseModel subclasses] returns TypeKind.UNION.""" + union_type = Annotated[UnionModelA | UnionModelB, Field(description="test")] + result = analyze_type(union_type) + + assert result.kind == TypeKind.UNION + assert result.union_members is not None + assert len(result.union_members) == 2 + assert UnionModelA in result.union_members + assert UnionModelB in result.union_members + + def test_annotated_wrapped_members_unwrapped(self) -> None: + """Union members wrapped in Annotated[X, Tag(...)] are unwrapped.""" + union_type = Annotated[ + Annotated[UnionModelA, Tag("a")] | Annotated[UnionModelB, Tag("b")], + Field(description="disc"), + ] + result = analyze_type(union_type) + + assert result.kind == TypeKind.UNION + assert result.union_members is not None + assert len(result.union_members) == 2 + assert UnionModelA in result.union_members + assert UnionModelB in result.union_members + + def test_mixed_model_nonmodel_union_still_raises(self) -> None: + """Union of model + non-model types still raises UnsupportedUnionError.""" + with pytest.raises(UnsupportedUnionError): + analyze_type(UnionModelA | str) + + def test_non_model_multi_union_still_raises(self) -> None: + """Multi-type union of non-models still raises UnsupportedUnionError.""" + with pytest.raises(UnsupportedUnionError): + analyze_type(str | int) + + def test_union_base_type_is_first_member_name(self) -> None: + """UNION TypeInfo base_type is the first member's class name.""" + result = analyze_type( + Annotated[UnionModelA | UnionModelB, Field(description="test")] + ) + assert result.base_type == "UnionModelA" + + def test_optional_union_sets_is_optional(self) -> None: + """Union with None among model members sets is_optional.""" + result = analyze_type( + Annotated[UnionModelA | UnionModelB, Field(description="test")] | None + ) + assert result.kind == TypeKind.UNION + assert result.is_optional is True + + +class TestSingleLiteralValue: + """Tests for single_literal_value convenience accessor.""" + + def test_single_value_literal(self) -> None: + """Literal["x"] returns the literal value.""" + assert single_literal_value(Literal["x"]) == "x" + + def test_single_int_literal(self) -> None: + """Literal[42] returns the integer value.""" + assert single_literal_value(Literal[42]) == 42 + + def test_multi_value_literal_returns_none(self) -> None: + """Multi-value Literal returns None (no single default).""" + assert single_literal_value(Literal["a", "b"]) is None + + def test_non_literal_returns_none(self) -> None: + """Non-Literal types return None.""" + assert single_literal_value(str) is None + + def test_unsupported_type_returns_none(self) -> None: + """Types that raise during analysis return None.""" + assert single_literal_value("not a type") is None diff --git a/packages/overture-schema-codegen/tests/test_type_collection.py b/packages/overture-schema-codegen/tests/test_type_collection.py new file mode 100644 index 000000000..154b39e2c --- /dev/null +++ b/packages/overture-schema-codegen/tests/test_type_collection.py @@ -0,0 +1,128 @@ +"""Tests for type collection module.""" + +from codegen_test_support import ( + FeatureWithAddress, + FeatureWithSources, + FeatureWithUrl, + Instrument, + TestSegmentWithSubModel, + has_name, + lookup_by_name, +) +from overture.schema.codegen.extraction.model_extraction import ( + expand_model_tree, + extract_model, +) +from overture.schema.codegen.extraction.specs import ( + EnumSpec, + ModelSpec, + NewTypeSpec, + PydanticTypeSpec, + SupplementarySpec, + TypeIdentity, +) +from overture.schema.codegen.layout.type_collection import ( + collect_all_supplementary_types, +) +from pydantic import BaseModel + + +def _make_feature_with_sub_model(sub_model: type) -> type[BaseModel]: + """Build a feature class whose only field references sub_model.""" + return type( + f"FeatureWith{sub_model.__name__}", + (BaseModel,), + {"__annotations__": {"sub": sub_model}, "sub": None}, + ) + + +def _expanded_supplementary(model_class: type) -> dict[TypeIdentity, SupplementarySpec]: + spec = extract_model(model_class) + expand_model_tree(spec) + return collect_all_supplementary_types([spec]) + + +class TestCollectAllSupplementarySpecs: + """Tests for collect_all_supplementary_types returning specs from expanded trees.""" + + def test_returns_enum_specs(self) -> None: + result = _expanded_supplementary(Instrument) + + assert has_name(result, "InstrumentFamily") + assert isinstance(lookup_by_name(result, "InstrumentFamily"), EnumSpec) + + def test_returns_newtype_specs(self) -> None: + result = _expanded_supplementary(Instrument) + + assert has_name(result, "HexColor") + assert isinstance(lookup_by_name(result, "HexColor"), NewTypeSpec) + + def test_returns_model_specs_from_expanded_tree(self) -> None: + result = _expanded_supplementary(FeatureWithAddress) + + assert has_name(result, "Address") + assert isinstance(lookup_by_name(result, "Address"), ModelSpec) + + def test_collects_transitive_types(self) -> None: + """Types referenced by sub-models are also collected.""" + result = _expanded_supplementary(FeatureWithSources) + + # Sources is a semantic NewType; SourceItem is a sub-model + # referenced transitively via the expanded tree + assert has_name(result, "Sources") + assert has_name(result, "SourceItem") + + def test_same_name_different_types_both_collected(self) -> None: + """Two types with the same __name__ from different modules are both collected.""" + ModelA = type("Address", (BaseModel,), {"__annotations__": {"x": str}}) + ModelB = type("Address", (BaseModel,), {"__annotations__": {"y": int}}) + + outer_a = extract_model(_make_feature_with_sub_model(ModelA)) + expand_model_tree(outer_a) + + outer_b = extract_model(_make_feature_with_sub_model(ModelB)) + expand_model_tree(outer_b) + + result = collect_all_supplementary_types([outer_a, outer_b]) + + address_entries = [ + spec for tid, spec in result.items() if tid.name == "Address" + ] + assert len(address_entries) == 2 + + +class TestCollectUnionMemberSubModels: + """Tests for union members with nested sub-model fields.""" + + def test_union_member_with_sub_model_collects_sub_model(self) -> None: + """Sub-models inside union members are collected without RuntimeError.""" + + class FeatureWithUnionSubModel(BaseModel): + segment: TestSegmentWithSubModel + + result = _expanded_supplementary(FeatureWithUnionSubModel) + + assert has_name(result, "ContactInfo") + assert isinstance(lookup_by_name(result, "ContactInfo"), ModelSpec) + + +class TestCollectPydanticTypes: + """Tests for Pydantic built-in type collection.""" + + def test_collects_pydantic_type_from_field(self) -> None: + """Pydantic types referenced in fields are collected.""" + result = _expanded_supplementary(FeatureWithUrl) + assert has_name(result, "HttpUrl") + assert isinstance(lookup_by_name(result, "HttpUrl"), PydanticTypeSpec) + + def test_collects_pydantic_type_inside_list(self) -> None: + """Pydantic types wrapped in list[] are collected.""" + result = _expanded_supplementary(FeatureWithUrl) + assert has_name(result, "EmailStr") + assert isinstance(lookup_by_name(result, "EmailStr"), PydanticTypeSpec) + + def test_does_not_collect_builtin_primitives(self) -> None: + """Plain primitives like str are not collected as PydanticTypeSpec.""" + result = _expanded_supplementary(FeatureWithUrl) + assert not has_name(result, "str") + assert not has_name(result, "int") diff --git a/packages/overture-schema-codegen/tests/test_type_placement.py b/packages/overture-schema-codegen/tests/test_type_placement.py new file mode 100644 index 000000000..62ef7449a --- /dev/null +++ b/packages/overture-schema-codegen/tests/test_type_placement.py @@ -0,0 +1,247 @@ +"""Tests for type placement module.""" + +from pathlib import PurePosixPath + +import overture.schema.system.primitive as _system_primitive +from codegen_test_support import ( + EMAIL_STR_SPEC, + HTTP_URL_SPEC, + STR_TYPE, + flat_specs_from_discovery, + lookup_by_name, + make_union_spec, +) +from overture.schema.codegen.extraction.model_extraction import expand_model_tree +from overture.schema.codegen.extraction.primitive_extraction import ( + partition_primitive_and_geometry_names, +) +from overture.schema.codegen.extraction.specs import ( + AnnotatedField, + FeatureSpec, + FieldSpec, + ModelSpec, + SupplementarySpec, + TypeIdentity, +) +from overture.schema.codegen.layout.type_collection import ( + collect_all_supplementary_types, +) +from overture.schema.codegen.markdown.link_computation import LinkContext, relative_link +from overture.schema.codegen.markdown.path_assignment import ( + GEOMETRY_PAGE, + PRIMITIVES_PAGE, + build_placement_registry, +) +from pydantic import BaseModel + +_PRIMITIVE_NAMES, _GEOMETRY_NAMES = partition_primitive_and_geometry_names( + _system_primitive +) + +_SCHEMA_ROOT = "overture.schema" + + +def _build_registry( + feature_specs: list[ModelSpec], +) -> tuple[dict[TypeIdentity, PurePosixPath], dict[TypeIdentity, SupplementarySpec]]: + """Build placement registry with standard aggregate names.""" + cache: dict[type, ModelSpec] = {} + for spec in feature_specs: + expand_model_tree(spec, cache) + all_specs = collect_all_supplementary_types(feature_specs) + registry = build_placement_registry( + feature_specs, all_specs, _PRIMITIVE_NAMES, _GEOMETRY_NAMES, _SCHEMA_ROOT + ) + return registry, all_specs + + +class TestRelativeLink: + """Test relative path computation between pages.""" + + def test_same_directory(self) -> None: + source = PurePosixPath("buildings/building.md") + target = PurePosixPath("buildings/facade_material.md") + assert relative_link(source, target) == "facade_material.md" + + def test_sibling_directory(self) -> None: + source = PurePosixPath("buildings/building.md") + target = PurePosixPath("core/names/names.md") + assert relative_link(source, target) == "../core/names/names.md" + + def test_within_core(self) -> None: + source = PurePosixPath("core/names/names.md") + target = PurePosixPath("core/sources/sources.md") + assert relative_link(source, target) == "../sources/sources.md" + + def test_to_aggregate_page(self) -> None: + source = PurePosixPath("core/names/names.md") + target = PurePosixPath("system/primitive/primitives.md") + assert relative_link(source, target) == "../../system/primitive/primitives.md" + + +class TestBuildPlacementRegistry: + """Test the full placement registry builder with module-mirrored paths.""" + + def test_features_at_theme_level(self) -> None: + """Features land directly in their theme directory.""" + specs = flat_specs_from_discovery("buildings") + registry, _ = _build_registry(specs) + + assert lookup_by_name(registry, "Building") == PurePosixPath( + "buildings/building.md" + ) + assert lookup_by_name(registry, "BuildingPart") == PurePosixPath( + "buildings/building_part.md" + ) + + def test_shared_types_mirror_source_modules(self) -> None: + """Core/system types land in directories matching their module path.""" + specs = flat_specs_from_discovery("buildings") + registry, _ = _build_registry(specs) + + names = {tid.name for tid in registry} + if "Names" in names: + assert str(lookup_by_name(registry, "Names")).startswith("core/") + + def test_no_duplicate_paths(self) -> None: + """No two individual types share an output path.""" + specs = flat_specs_from_discovery() + registry, _ = _build_registry(specs) + + aggregate_pages = { + PurePosixPath("system/primitive/primitives.md"), + PurePosixPath("system/primitive/geometry.md"), + } + individual = [p for p in registry.values() if p not in aggregate_pages] + assert len(individual) == len(set(individual)), ( + "Duplicate output paths detected" + ) + + def test_aggregate_pages_at_system_primitive(self) -> None: + """Primitive and geometry aggregate pages under system/primitive/.""" + assert PRIMITIVES_PAGE == PurePosixPath("system/primitive/primitives.md") + assert GEOMETRY_PAGE == PurePosixPath("system/primitive/geometry.md") + + def test_supplementary_types_nested_under_types(self) -> None: + """Supplementary types in a feature directory go under types/.""" + specs = flat_specs_from_discovery("buildings") + registry, _ = _build_registry(specs) + + # BuildingClass is a supplementary type from the buildings module + assert lookup_by_name(registry, "BuildingClass") == PurePosixPath( + "buildings/types/building_class.md" + ) + + def test_submodule_supplementary_types_nested_under_types(self) -> None: + """Supplementary types in a feature subdirectory go under types/.""" + specs = flat_specs_from_discovery("divisions") + registry, _ = _build_registry(specs) + + # AreaClass is from overture.schema.divisions.division_area.enums, + # a subdirectory of the divisions feature directory. + assert lookup_by_name(registry, "AreaClass") == PurePosixPath( + "divisions/types/division_area/area_class.md" + ) + + def test_shared_types_not_nested(self) -> None: + """Core/system supplementary types stay at their module-mirrored path.""" + specs = flat_specs_from_discovery("buildings") + registry, _ = _build_registry(specs) + + # Names is from overture.schema.core -- no features there, no nesting + names = {tid.name for tid in registry} + if "Names" in names: + path = str(lookup_by_name(registry, "Names")) + assert path.startswith("core/") + assert "/types/" not in path + + +class TestPlacementWithUnionSpec: + """Tests for placement registry with UnionSpec.""" + + def test_union_spec_gets_placement(self) -> None: + """UnionSpec is placed alongside ModelSpec in the registry.""" + + class Base(BaseModel): + name: str + + class A(Base): + x: int + + union_spec = make_union_spec( + annotated_fields=[ + AnnotatedField( + field_spec=FieldSpec( + name="name", + type_info=STR_TYPE, + description=None, + is_required=True, + ), + variant_sources=None, + ), + ], + members=[A], + common_base=Base, + entry_point="test.package:TestUnion", + ) + + feature_specs: list[FeatureSpec] = [union_spec] + all_specs = collect_all_supplementary_types(feature_specs) + registry = build_placement_registry( + feature_specs, all_specs, [], [], "test.package" + ) + assert any(tid.name == "TestUnion" for tid in registry) + + +class TestLinkContextWithTypeIdentity: + """Tests for LinkContext using TypeIdentity keys.""" + + def test_same_name_different_identity_separate_paths(self) -> None: + """Two types with the same name but different objects resolve to different paths.""" + obj_a = type("Address", (), {}) + obj_b = type("Address", (), {}) + registry = { + TypeIdentity(obj_a, "Address"): PurePosixPath("places/types/address.md"), + TypeIdentity(obj_b, "Address"): PurePosixPath("addresses/address.md"), + } + ctx = LinkContext(page_path=PurePosixPath("places/place.md"), registry=registry) + assert ctx.resolve_link(TypeIdentity(obj_a, "Address")) == "types/address.md" + assert ( + ctx.resolve_link(TypeIdentity(obj_b, "Address")) + == "../addresses/address.md" + ) + + +class TestPydanticTypePlacement: + """Tests for placement of Pydantic built-in types.""" + + def test_pydantic_type_placed_under_module_dir(self) -> None: + registry = build_placement_registry( + feature_specs=[], + all_specs={HTTP_URL_SPEC.identity: HTTP_URL_SPEC}, + primitive_names=[], + geometry_names=[], + schema_root="overture.schema", + ) + assert lookup_by_name(registry, "HttpUrl") == PurePosixPath( + "pydantic/networks/http_url.md" + ) + + def test_multiple_pydantic_types_same_module(self) -> None: + specs: dict[TypeIdentity, SupplementarySpec] = { + HTTP_URL_SPEC.identity: HTTP_URL_SPEC, + EMAIL_STR_SPEC.identity: EMAIL_STR_SPEC, + } + registry = build_placement_registry( + feature_specs=[], + all_specs=specs, + primitive_names=[], + geometry_names=[], + schema_root="overture.schema", + ) + assert lookup_by_name(registry, "HttpUrl") == PurePosixPath( + "pydantic/networks/http_url.md" + ) + assert lookup_by_name(registry, "EmailStr") == PurePosixPath( + "pydantic/networks/email_str.md" + ) diff --git a/packages/overture-schema-codegen/tests/test_type_registry.py b/packages/overture-schema-codegen/tests/test_type_registry.py new file mode 100644 index 000000000..b9d02d2ac --- /dev/null +++ b/packages/overture-schema-codegen/tests/test_type_registry.py @@ -0,0 +1,143 @@ +"""Tests for type registry.""" + +import pytest +from overture.schema.codegen.extraction.type_analyzer import TypeInfo, TypeKind +from overture.schema.codegen.extraction.type_registry import ( + PRIMITIVE_TYPES, + TypeMapping, + get_type_mapping, + resolve_type_name, +) + + +class TestTypeMapping: + """Tests for TypeMapping dataclass.""" + + def test_typemapping_accepts_markdown(self) -> None: + """TypeMapping should construct with markdown field.""" + mapping = TypeMapping(markdown="int32") + + assert mapping.markdown == "int32" + + def test_for_target_returns_markdown(self) -> None: + """for_target should return markdown representation for markdown target.""" + mapping = TypeMapping(markdown="int32") + + assert mapping.for_target("markdown") == "int32" + + def test_for_target_rejects_unknown_target(self) -> None: + """for_target should raise ValueError for unknown targets.""" + mapping = TypeMapping(markdown="int32") + + with pytest.raises(ValueError, match="Unknown target 'scala'"): + mapping.for_target("scala") + + +class TestPrimitiveTypes: + """Tests for PRIMITIVE_TYPES registry.""" + + def test_registry_contains_expected_types(self) -> None: + """Registry should contain all expected primitive types.""" + expected_types = { + "int8", + "int16", + "int32", + "int64", + "uint8", + "uint16", + "uint32", + "float32", + "float64", + "str", + "bool", + "int", + "float", + "Geometry", + "BBox", + } + + assert set(PRIMITIVE_TYPES.keys()) == expected_types + + def test_bbox_mapping(self) -> None: + """BBox should map to bbox.""" + bbox = PRIMITIVE_TYPES["BBox"] + + assert bbox.markdown == "bbox" + + +class TestGetTypeMapping: + """Tests for get_type_mapping function.""" + + def test_returns_mapping_for_known_type(self) -> None: + """Should return TypeMapping for known primitive type.""" + result = get_type_mapping("int32") + + assert result is not None + assert result.markdown == "int32" + + def test_returns_none_for_unknown_type(self) -> None: + """Should return None for unknown type names.""" + result = get_type_mapping("unknown_type") + + assert result is None + + def test_returns_mapping_for_builtin_int(self) -> None: + """Should map Python int to int64.""" + result = get_type_mapping("int") + + assert result is not None + assert result.markdown == "int64" + + def test_returns_mapping_for_builtin_float(self) -> None: + """Should map Python float to float64.""" + result = get_type_mapping("float") + + assert result is not None + assert result.markdown == "float64" + + +class TestResolveTypeNameNewTypeFallback: + """Tests for resolve_type_name with unregistered NewTypes.""" + + def test_unregistered_newtype_falls_back_to_source_type(self) -> None: + """Unregistered NewType resolves to source_type name.""" + ti = TypeInfo( + base_type="Sources", + kind=TypeKind.MODEL, + newtype_name="Sources", + source_type=type("SourceItem", (), {}), + ) + result = resolve_type_name(ti, "markdown") + + assert result == "SourceItem" + + def test_registered_newtype_unaffected(self) -> None: + """Registered NewType (int32) still resolves through the registry.""" + ti = TypeInfo( + base_type="int32", + kind=TypeKind.PRIMITIVE, + newtype_name="int32", + source_type=int, + ) + result = resolve_type_name(ti, "markdown") + + assert result == "int32" + + +class TestResolveTypeName: + """Tests for resolve_type_name with list/optional flags.""" + + def _make_type_info(self, **kwargs: object) -> TypeInfo: + defaults = {"base_type": "str", "kind": TypeKind.PRIMITIVE} + defaults.update(kwargs) + return TypeInfo(**defaults) # type: ignore[arg-type] + + def test_ignores_list_depth(self) -> None: + """resolve_type_name returns the base type regardless of list_depth.""" + ti = self._make_type_info(list_depth=1) + assert resolve_type_name(ti, "markdown") == "string" + + def test_ignores_is_optional(self) -> None: + """resolve_type_name returns the base type regardless of is_optional.""" + ti = self._make_type_info(is_optional=True) + assert resolve_type_name(ti, "markdown") == "string" diff --git a/packages/overture-schema-codegen/tests/test_union_extraction.py b/packages/overture-schema-codegen/tests/test_union_extraction.py new file mode 100644 index 000000000..a8b685c48 --- /dev/null +++ b/packages/overture-schema-codegen/tests/test_union_extraction.py @@ -0,0 +1,91 @@ +"""Tests for union extraction.""" + +import pytest +from codegen_test_support import ( + RailSegment, + RoadSegment, + SegmentBase, + TestSegment, + WaterSegment, +) +from overture.schema.codegen.extraction.specs import FieldSpec, UnionSpec +from overture.schema.codegen.extraction.union_extraction import extract_union + + +class TestExtractUnion: + """Tests for extract_union function.""" + + @pytest.fixture + def segment_spec(self) -> UnionSpec: + return extract_union("TestSegment", TestSegment) + + def test_extracts_name_and_description(self, segment_spec: UnionSpec) -> None: + """UnionSpec captures the union name and docstring.""" + assert segment_spec.name == "TestSegment" + assert segment_spec.description == "Test segment union" + + def test_finds_common_base(self, segment_spec: UnionSpec) -> None: + """Identifies SegmentBase as the common base class.""" + assert segment_spec.common_base is SegmentBase + + def test_shared_fields_first(self, segment_spec: UnionSpec) -> None: + """Shared fields from common base come first with variant_sources=None.""" + shared = [ + af for af in segment_spec.annotated_fields if af.variant_sources is None + ] + shared_names = [af.field_spec.name for af in shared] + assert "geometry" in shared_names + assert "subtype" in shared_names + # Shared fields are at the start + first_variant_idx = next( + ( + i + for i, af in enumerate(segment_spec.annotated_fields) + if af.variant_sources is not None + ), + len(segment_spec.annotated_fields), + ) + for af in segment_spec.annotated_fields[:first_variant_idx]: + assert af.variant_sources is None + + def test_variant_specific_fields_have_sources( + self, segment_spec: UnionSpec + ) -> None: + """Variant-only fields carry their source class names.""" + speed = next( + af + for af in segment_spec.annotated_fields + if af.field_spec.name == "speed_limit" + ) + assert speed.variant_sources == ("RoadSegment",) + gauge = next( + af + for af in segment_spec.annotated_fields + if af.field_spec.name == "rail_gauge" + ) + assert gauge.variant_sources == ("RailSegment",) + + def test_heterogeneous_same_name_produces_separate_rows( + self, segment_spec: UnionSpec + ) -> None: + """class_ in Road (str) vs Rail (int): separate rows, not merged.""" + class_fields = [ + af for af in segment_spec.annotated_fields if af.field_spec.name == "class" + ] + assert len(class_fields) == 2 + sources = {af.variant_sources for af in class_fields} + assert ("RoadSegment",) in sources + assert ("RailSegment",) in sources + + def test_members_lists_all_member_classes(self, segment_spec: UnionSpec) -> None: + """UnionSpec.members contains all union member classes.""" + assert set(segment_spec.members) == {RoadSegment, RailSegment, WaterSegment} + + def test_source_annotation_preserved(self, segment_spec: UnionSpec) -> None: + """source_annotation holds the original Annotated[Union[...]].""" + assert segment_spec.source_annotation is TestSegment + + def test_fields_property_returns_plain_list(self, segment_spec: UnionSpec) -> None: + """spec.fields returns list[FieldSpec] without provenance.""" + for f in segment_spec.fields: + assert isinstance(f, FieldSpec) diff --git a/packages/overture-schema-core/src/overture/schema/core/discovery.py b/packages/overture-schema-core/src/overture/schema/core/discovery.py index 15da3abc4..b9290d29a 100644 --- a/packages/overture-schema-core/src/overture/schema/core/discovery.py +++ b/packages/overture-schema-core/src/overture/schema/core/discovery.py @@ -21,15 +21,15 @@ class ModelKey: The theme name (e.g., "buildings", "places"), or None for non-themed models type : str The feature type (e.g., "building", "place") - class_name : str - The fully qualified class name from the entry point value + entry_point : str + The entry point value in "module:Class" format """ namespace: str theme: str | None type: str - class_name: str + entry_point: str def discover_models( @@ -87,7 +87,7 @@ def discover_models( namespace=ns, theme=theme, type=feature_type, - class_name=entry_point.value, + entry_point=entry_point.value, ) models[key] = model_class except Exception as e: diff --git a/packages/overture-schema-core/src/overture/schema/core/models.py b/packages/overture-schema-core/src/overture/schema/core/models.py index ed43eabe9..dbb77a281 100644 --- a/packages/overture-schema-core/src/overture/schema/core/models.py +++ b/packages/overture-schema-core/src/overture/schema/core/models.py @@ -65,9 +65,9 @@ def __validate_ext_fields__(self) -> Self: maybe_plural = "s" if len(invalid_extra_fields) > 1 else "" raise ValueError( f"invalid extra field name{maybe_plural}: {', '.join(invalid_extra_fields)} " - "(extra fields are temporarily allowed, but only if their names start with 'ext_', " - "but all extra field name support in {self.__class__.__name__} is on a deprecation path" - "and will be removed)" + f"(extra fields are temporarily allowed, but only if their names start with 'ext_', " + f"but all extra field name support in {self.__class__.__name__} is on a deprecation path " + f"and will be removed)" ) return self diff --git a/packages/overture-schema-core/src/overture/schema/core/names.py b/packages/overture-schema-core/src/overture/schema/core/names.py index e968b24a7..aed77d985 100644 --- a/packages/overture-schema-core/src/overture/schema/core/names.py +++ b/packages/overture-schema-core/src/overture/schema/core/names.py @@ -110,7 +110,7 @@ Field(json_schema_extra={"additionalProperties": False}), ], ) -"""A mapping from language to the most commonly used or recognized name in that language.""" +CommonNames.__doc__ = """A mapping from language to the most commonly used or recognized name in that language.""" class NameVariant(str, DocumentedEnum): diff --git a/packages/overture-schema-core/src/overture/schema/core/scoping/opening_hours.py b/packages/overture-schema-core/src/overture/schema/core/scoping/opening_hours.py index a8efc8c16..8f8acbe6a 100644 --- a/packages/overture-schema-core/src/overture/schema/core/scoping/opening_hours.py +++ b/packages/overture-schema-core/src/overture/schema/core/scoping/opening_hours.py @@ -15,7 +15,7 @@ ), ], ) -""" +OpeningHours.__doc__ = """ Time span or time spans during which something is open or active, specified in the OpenStreetMap opening hours specification: https://wiki.openstreetmap.org/wiki/Key:opening_hours/specification. """ diff --git a/packages/overture-schema-divisions-theme/pyproject.toml b/packages/overture-schema-divisions-theme/pyproject.toml index 409598de9..21fe72551 100644 --- a/packages/overture-schema-divisions-theme/pyproject.toml +++ b/packages/overture-schema-divisions-theme/pyproject.toml @@ -37,3 +37,138 @@ packages = ["src/overture"] "overture:divisions:division" = "overture.schema.divisions:Division" "overture:divisions:division_area" = "overture.schema.divisions:DivisionArea" "overture:divisions:division_boundary" = "overture.schema.divisions:DivisionBoundary" + +[[examples.Division]] +id = "350e85f6-68ba-4114-9906-c2844815988b" +geometry = "POINT (-175.2551522 -21.1353686)" +country = "TO" +version = 1 +subtype = "locality" +class = "village" +wikidata = "null" +region = "TO-04" +perspectives = "null" +hierarchies = [ + [ + {division_id = "fef8748b-0c91-46ad-9f2d-976d8d2de3e9", subtype = "country", name = "Tonga"}, + {division_id = "4d67561a-2292-41bd-8996-7853d276a42c", subtype = "region", name = "Tongatapu"}, + {division_id = "8730f0cc-d436-4f11-a7d3-49085813ef44", subtype = "county", name = "Vahe Kolomotu'a"}, + {division_id = "350e85f6-68ba-4114-9906-c2844815988b", subtype = "locality", name = "Sia'atoutai"}, + ], +] +parent_division_id = "8730f0cc-d436-4f11-a7d3-49085813ef44" +norms = "null" +population = 534 +capital_division_ids = "null" +capital_of_divisions = "null" +theme = "divisions" +type = "division" + +[examples.Division.bbox] +xmin = -175.25515747070312 +xmax = -175.255126953125 +ymin = -21.1353702545166 +ymax = -21.13536834716797 + +[[examples.Division.sources]] +property = "" +dataset = "OpenStreetMap" +record_id = "n3173231082@4" +update_time = "2014-12-18T09:17:03Z" +confidence = "null" +between = "null" + +[examples.Division.cartography] +prominence = 29 +min_zoom = "null" +max_zoom = "null" +sort_key = "null" + +[examples.Division.names] +primary = "Sia'atoutai" +common = "null" + +[[examples.Division.names.rules]] +variant = "alternate" +language = "null" +perspectives = "null" +value = "Nafualu" +between = "null" +side = "null" + +[examples.Division.local_type] +en = "village" + +[[examples.DivisionArea]] +id = "eb9b112f-ec3c-47f7-b519-6f9f2e6fc2bd" +geometry = "MULTIPOLYGON (((-174.9553949 -21.4730179, -174.9514163 -21.4719978, -174.9520108 -21.4681253, -174.9566122 -21.4687535, -174.9553949 -21.4730179)), ((-174.9634398 -21.3476807, -174.9753507 -21.3833656, -174.9702168 -21.4037277, -174.950488 -21.4269887, -174.9082983 -21.4577763, -174.9004303 -21.4398142, -174.9048159 -21.3698688, -174.9165467 -21.3035402, -174.9126977 -21.2903268, -174.9199765 -21.2834922, -174.9634398 -21.3476807)))" +country = "TO" +version = 2 +subtype = "region" +class = "land" +is_land = true +is_territorial = false +region = "TO-01" +division_id = "21597af0-b564-463c-a356-42c29e712b7d" +theme = "divisions" +type = "division_area" + +[examples.DivisionArea.bbox] +xmin = -174.97535705566406 +xmax = -174.90040588378906 +ymin = -21.473018646240234 +ymax = -21.283489227294922 + +[[examples.DivisionArea.sources]] +property = "" +dataset = "OpenStreetMap" +record_id = "r7247527@3" +update_time = "2020-12-30T18:41:56Z" +confidence = "null" +between = "null" + +[examples.DivisionArea.names] +primary = "ʻEua" +common = "null" +rules = "null" + +[[examples.DivisionBoundary]] +id = "2bdf68e4-860d-3d8c-a472-ccf439a5302a" +geometry = "LINESTRING (-147.064823 -15.4231537, -147.0519131 -15.2885069, -147.048482 -15.1511701)" +country = "PF" +version = 1 +subtype = "county" +class = "maritime" +is_land = false +is_territorial = true +division_ids = [ + "ae266459-63a4-4508-8295-0101e27d039b", + "d4a6873d-885a-4f2a-bc0f-37e9d9e874e4" +] +region = "null" +is_disputed = false +perspectives = "null" +theme = "divisions" +type = "division_boundary" + +[examples.DivisionBoundary.bbox] +xmin = -147.06483459472656 +xmax = -147.04847717285156 +ymin = -15.4231538772583 +ymax = -15.151169776916504 + +[[examples.DivisionBoundary.sources]] +property = "" +dataset = "OpenStreetMap" +record_id = "r6063055@9" +update_time = "2023-07-20T00:28:40Z" +confidence = "null" +between = "null" + +[[examples.DivisionBoundary.sources]] +property = "" +dataset = "OpenStreetMap" +record_id = "r6063063@12" +update_time = "2023-07-20T00:28:40Z" +confidence = "null" +between = "null" diff --git a/packages/overture-schema-places-theme/pyproject.toml b/packages/overture-schema-places-theme/pyproject.toml index afbf1aee4..1d851b356 100644 --- a/packages/overture-schema-places-theme/pyproject.toml +++ b/packages/overture-schema-places-theme/pyproject.toml @@ -36,3 +36,60 @@ packages = ["src/overture"] [project.entry-points."overture.models"] "overture:places:place" = "overture.schema.places:Place" + +[[examples.Place]] +id = "99003ee6-e75b-4dd6-8a8a-53a5a716c50d" +geometry = "POINT (-150.46875 -79.1713346)" +version = 1 +confidence = 0.7337175792507205 +websites = [ + "https://www.superhotel.co.jp/s_hotels/beppu/" +] +socials = [ + "https://www.facebook.com/107663894904826" +] +emails = "null" +phones = [ + "+81977009000" +] +operating_status = "open" +theme = "places" +type = "place" + +[examples.Place.bbox] +xmin = -150.46875 +xmax = -150.46875 +ymin = -79.17134094238281 +ymax = -79.17133331298828 + +[[examples.Place.sources]] +property = "" +dataset = "meta" +record_id = "107663894904826" +update_time = "2025-06-30T07:00:00.000Z" +confidence = 0.7337175792507205 +between = "null" + +[examples.Place.names] +primary = "スーパーホテル別府駅前" +common = "null" +rules = "null" + +[examples.Place.categories] +primary = "hotel" +alternate = "null" + +[examples.Place.brand] +wikidata = "null" + +[examples.Place.brand.names] +primary = "SUPER HOTEL" +common = "null" +rules = "null" + +[[examples.Place.addresses]] +freeform = "秋田県横手市駅前町13−8" +locality = "横手市" +postcode = "013-0036" +region = "null" +country = "JP" diff --git a/packages/overture-schema-system/src/overture/schema/system/feature.py b/packages/overture-schema-system/src/overture/schema/system/feature.py index de3f14df6..fd067df54 100644 --- a/packages/overture-schema-system/src/overture/schema/system/feature.py +++ b/packages/overture-schema-system/src/overture/schema/system/feature.py @@ -30,6 +30,33 @@ from overture.schema.system.ref import Id +def resolve_discriminator_field_name(discriminator: object) -> str | None: + """Resolve a Pydantic discriminator value to its field name string. + + Handles the three forms a discriminator can take: + - A plain string (used directly as the field name). + - A ``pydantic.Discriminator`` whose ``.discriminator`` attribute is a string. + - A ``pydantic.Discriminator`` whose ``.discriminator`` is a callable + produced by ``Feature.field_discriminator``, which stores the field name + as ``_field_name`` on the callable. + + Returns None if *discriminator* is None or its field name cannot be + determined. + """ + if discriminator is None: + return None + if isinstance(discriminator, str): + return discriminator + inner = getattr(discriminator, "discriminator", None) + if isinstance(inner, str): + return inner + if callable(inner): + field_name = getattr(inner, "_field_name", None) + if isinstance(field_name, str): + return field_name + return None + + class Feature(BaseModel): """ A feature is something you can point to on a map—like a building, road, lake, or park—with the @@ -206,7 +233,10 @@ def field_discriminator( Returns ------- Discriminator - Discriminator that enables discriminated unions that include features + Discriminator that enables discriminated unions that include features. + The inner callable carries a ``_field_name`` attribute set to *field*, + allowing introspection code to recover the discriminator field name + without hardcoding it. Raises ------ @@ -296,6 +326,7 @@ def get_discriminator_value(data: object) -> Any: else getattr(data, field, None) ) + get_discriminator_value._field_name = field # type: ignore[attr-defined] return Discriminator(get_discriminator_value) @model_serializer(mode="wrap") diff --git a/packages/overture-schema-system/src/overture/schema/system/field_constraint/collection.py b/packages/overture-schema-system/src/overture/schema/system/field_constraint/collection.py index a39159217..f8a699d91 100644 --- a/packages/overture-schema-system/src/overture/schema/system/field_constraint/collection.py +++ b/packages/overture-schema-system/src/overture/schema/system/field_constraint/collection.py @@ -43,7 +43,7 @@ def _is_collection_type(source: type[Any]) -> bool: class UniqueItemsConstraint(CollectionConstraint): - """Ensures all items in a collection are unique.""" + """All items must be unique.""" def validate(self, value: list[Any] | None, info: ValidationInfo) -> None: # Skip validation for None values (used with optional fields) diff --git a/packages/overture-schema-system/src/overture/schema/system/field_constraint/string.py b/packages/overture-schema-system/src/overture/schema/system/field_constraint/string.py index 8c2d90415..68737f5db 100644 --- a/packages/overture-schema-system/src/overture/schema/system/field_constraint/string.py +++ b/packages/overture-schema-system/src/overture/schema/system/field_constraint/string.py @@ -324,7 +324,7 @@ def __get_pydantic_json_schema__( class PhoneNumberConstraint(StringConstraint): - """Constraint for international phone numbers.""" + """Allows only international phone numbers.""" def __init__(self) -> None: self.pattern = re.compile(r"^\+\d{1,3}[\s\-\(\)0-9]+$") @@ -359,7 +359,7 @@ def __get_pydantic_json_schema__( class RegionCodeConstraint(StringConstraint): - """ISO 3166-2 principal subdivision code constraint.""" + """Allows only ISO 3166-2 principal subdivision codes.""" def __init__(self) -> None: self.pattern = re.compile(r"^[A-Z]{2}-[A-Z0-9]{1,3}$") @@ -392,7 +392,7 @@ def __get_pydantic_json_schema__( class WikidataIdConstraint(StringConstraint): - """Constraint for Wikidata identifiers (Q followed by digits).""" + """Allows only Wikidata identifiers (Q followed by digits).""" def __init__(self) -> None: self.pattern = re.compile(r"^Q\d+$") diff --git a/packages/overture-schema-system/src/overture/schema/system/primitive/__init__.py b/packages/overture-schema-system/src/overture/schema/system/primitive/__init__.py index 96bfd0250..2b27ef89e 100644 --- a/packages/overture-schema-system/src/overture/schema/system/primitive/__init__.py +++ b/packages/overture-schema-system/src/overture/schema/system/primitive/__init__.py @@ -24,7 +24,7 @@ ) uint8 = NewType("uint8", Annotated[int, Field(ge=0, le=255)]) # type: ignore [type-arg] -""" +uint8.__doc__ = """ Portable 8-bit unsigned integer. This is an `int` at runtime, but using `uint8` for Pydantic model fields instead of `int` makes them @@ -32,7 +32,7 @@ """ uint16 = NewType("uint16", Annotated[int, Field(ge=0, le=65535)]) # type: ignore[type-arg] -""" +uint16.__doc__ = """ Portable 16-bit unsigned integer. This is an `int` at runtime, but using `uint16` for Pydantic model fields instead of `int` makes @@ -40,7 +40,7 @@ """ uint32 = NewType("uint32", Annotated[int, Field(ge=0, le=4294967295)]) # type: ignore[type-arg] -""" +uint32.__doc__ = """ Portable 32-bit unsigned integer. This is an `int` at runtime, but using `uint32` for Pydantic model fields instead of `int` makes @@ -48,7 +48,7 @@ """ int8 = NewType("int8", Annotated[int, Field(ge=-128, le=127)]) # type: ignore[type-arg] -""" +int8.__doc__ = """ Portable 8-bit signed integer. This is an `int` at runtime, but using `int8` for Pydantic model fields instead of `int` makes them @@ -56,7 +56,7 @@ """ int16 = NewType("int16", Annotated[int, Field(ge=-32768, le=32767)]) # type: ignore[type-arg] -""" +int16.__doc__ = """ Portable 16-bit signed integer. This is an `int` at runtime, but using `int16` for Pydantic model fields instead of `int` makes them @@ -64,7 +64,7 @@ """ int32 = NewType("int32", Annotated[int, Field(ge=-(2**31), le=2**31 - 1)]) # type: ignore[type-arg] -""" +int32.__doc__ = """ Portable 32-bit signed integer. This is an `int` at runtime, but using `int32` for Pydantic model fields instead of `int` makes them @@ -72,7 +72,7 @@ """ int64 = NewType("int64", Annotated[int, Field(ge=-(2**63), le=2**63 - 1)]) # type: ignore[type-arg] -""" +int64.__doc__ = """ Portable 64-bit signed integer. This is an `int` at runtime, but using `int64` for Pydantic model fields instead of `int` makes them @@ -80,7 +80,7 @@ """ float32 = NewType("float32", float) # type: ignore[type-arg] -""" +float32.__doc__ = """ Portable IEEE 32-bit floating point number. This is a `float` at runtime, but using `float32` for Pydantic model fields instead of `float` makes @@ -88,7 +88,7 @@ """ float64 = NewType("float64", float) # type: ignore[type-arg] -""" +float64.__doc__ = """ Portable IEEE 64-bit floating point number. This is a `float` at runtime, but using `float64` for Pydantic model fields instead of `float` makes diff --git a/packages/overture-schema-system/src/overture/schema/system/ref/id.py b/packages/overture-schema-system/src/overture/schema/system/ref/id.py index eb467f142..2ffa5dad8 100644 --- a/packages/overture-schema-system/src/overture/schema/system/ref/id.py +++ b/packages/overture-schema-system/src/overture/schema/system/ref/id.py @@ -18,7 +18,7 @@ ), ], ) -""" +Id.__doc__ = """ A unique identifier. """ diff --git a/packages/overture-schema-system/src/overture/schema/system/string.py b/packages/overture-schema-system/src/overture/schema/system/string.py index cac9112ca..6533a192a 100644 --- a/packages/overture-schema-system/src/overture/schema/system/string.py +++ b/packages/overture-schema-system/src/overture/schema/system/string.py @@ -35,7 +35,7 @@ Field(description="An ISO 3166-1 alpha-2 country code"), ], ) # type: ignore [type-arg] -""" +CountryCodeAlpha2.__doc__ = """ An ISO-3166-1 alpha-2 country code. """ @@ -49,7 +49,7 @@ ), ], ) # type: ignore [type-arg] -""" +HexColor.__doc__ = """ A color represented as an #RRGGBB or #RGB hexadecimal string. For example: @@ -67,7 +67,7 @@ Field(description="A JSON Pointer (as described in RFC-6901)"), ], ) # type: ignore [type-arg] -""" +JsonPointer.__doc__ = """ A JSON Pointer As described in `the JSON Pointer specification, RFC-6901`_. @@ -91,7 +91,7 @@ ), ], ) # type: ignore [type-arg] -""" +LanguageTag.__doc__ = """ A BCP-47 language tag. As described in `Tags for Identifying Languages, BCP-47`_. @@ -114,7 +114,7 @@ Field(description="A string that contains no whitespace characters"), ], ) # type: ignore [type-arg] -""" +NoWhitespaceString.__doc__ = """ A string that contains no whitespace characters. """ @@ -124,7 +124,7 @@ str, PhoneNumberConstraint(), Field(description="An international phone number") ], ) # type: ignore [type-arg] -""" +PhoneNumber.__doc__ = """ An international phone number. """ @@ -136,12 +136,12 @@ Field(description="An ISO 3166-2 principal subdivision code"), ], ) # type: ignore [type-arg] -""" +RegionCode.__doc__ = """ An ISO 3166-2 principal subdivision code. """ SnakeCaseString = NewType("SnakeCaseString", Annotated[str, SnakeCaseConstraint()]) -""" +SnakeCaseString.__doc__ = """ A string that looks like a snake case identifier, like a Python variable name (*e.g.*, `foo_bar`). """ @@ -155,7 +155,7 @@ ), ], ) # type: ignore [type-arg] -""" +StrippedString.__doc__ = """ A string without leading or trailing whitespace. """ @@ -167,7 +167,7 @@ Field(description="A wikidata ID, as found on https://www.wikidata.org/"), ], ) # type: ignore [type-arg] -""" +WikidataId.__doc__ = """ A wikidata ID, as found on https://www.wikidata.org/. - `"Q42"` diff --git a/packages/overture-schema-system/tests/test_feature.py b/packages/overture-schema-system/tests/test_feature.py index ec0dfc795..834cd7881 100644 --- a/packages/overture-schema-system/tests/test_feature.py +++ b/packages/overture-schema-system/tests/test_feature.py @@ -332,6 +332,18 @@ class BarModel(BaseModel): ): tap.validate_json(json.dumps(data)) + def test_field_discriminator_attaches_field_name(self) -> None: + """The callable returned by field_discriminator carries _field_name for introspection.""" + + class A(Feature): + kind: Literal["a"] + + class B(Feature): + kind: Literal["b"] + + disc = Feature.field_discriminator("kind", A, B) + assert disc.discriminator._field_name == "kind" # type: ignore[union-attr] + def test_error_field_not_str(self) -> None: with pytest.raises( TypeError, match="`field` must be a `str`, but 42 has type `int`" diff --git a/packages/overture-schema-system/tests/util.py b/packages/overture-schema-system/tests/util.py index 0ce2e4932..f9725f5ba 100644 --- a/packages/overture-schema-system/tests/util.py +++ b/packages/overture-schema-system/tests/util.py @@ -10,10 +10,10 @@ def subset_conflicts( Parameters ---------- - a : dict[str, object] + a : Mapping[str, object] Candidate subset of `b` - b : dict[str, object] - Candidate supserset of `a` + b : Mapping[str, object] + Candidate superset of `a` Returns ------- diff --git a/packages/overture-schema-transportation-theme/pyproject.toml b/packages/overture-schema-transportation-theme/pyproject.toml index 702a8aea8..3302845b2 100644 --- a/packages/overture-schema-transportation-theme/pyproject.toml +++ b/packages/overture-schema-transportation-theme/pyproject.toml @@ -37,3 +37,86 @@ packages = ["src/overture"] [project.entry-points."overture.models"] "overture:transportation:connector" = "overture.schema.transportation:Connector" "overture:transportation:segment" = "overture.schema.transportation:Segment" + +[[examples.Connector]] +id = "39542bee-230f-4b91-b7e5-a9b58e0c59b1" +geometry = "POINT (-176.5472979 -43.9679472)" +version = 1 +theme = "transportation" +type = "connector" + +[examples.Connector.bbox] +xmin = -176.54730224609375 +xmax = -176.54727172851562 +ymin = -43.96794891357422 +ymax = -43.96794128417969 + +[[examples.Connector.sources]] +property = "" +dataset = "OpenStreetMap" +record_id = "null" +update_time = "null" +confidence = "null" +between = "null" + +[[examples.Segment]] +id = "1bc62f3b-08b5-42b8-89fe-36f685f60455" +geometry = "LINESTRING (-176.5636191 -43.954404, -176.5643637 -43.9538145, -176.5647264 -43.9535274, -176.5649947 -43.953251)" +version = 1 +subtype = "road" +class = "residential" +routes = "null" +subclass_rules = "null" +access_restrictions = "null" +level_rules = "null" +destinations = "null" +prohibited_transitions = "null" +road_flags = "null" +speed_limits = "null" +width_rules = "null" +subclass = "null" +rail_flags = "null" +theme = "transportation" +type = "segment" + +[examples.Segment.bbox] +xmin = -176.5650177001953 +xmax = -176.56361389160156 +ymin = -43.954410552978516 +ymax = -43.953250885009766 + +[[examples.Segment.sources]] +property = "" +dataset = "OpenStreetMap" +record_id = "w53435546@6" +update_time = "2021-05-03T06:37:03Z" +confidence = "null" +between = "null" + +[examples.Segment.names] +primary = "Meteorological Lane" +common = "null" + +[[examples.Segment.names.rules]] +variant = "common" +language = "null" +perspectives = "null" +value = "Meteorological Lane" +between = "null" +side = "null" + +[[examples.Segment.connectors]] +connector_id = "15b2c131-9137-4add-88c6-2acd3fa61355" +at = 0.0 + +[[examples.Segment.connectors]] +connector_id = "23ae2702-ef77-4d2e-b39d-77360b696d20" +at = 0.523536154 + +[[examples.Segment.connectors]] +connector_id = "8e944ce1-4b81-49eb-a823-7d98779c855c" +at = 1.0 + +[[examples.Segment.road_surface]] +value = "gravel" +between = "null" diff --git a/pyproject.toml b/pyproject.toml index c51f94221..1679e49a3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -56,12 +56,14 @@ dev = [ ] [tool.pytest.ini_options] +verbosity_subtests = 0 pythonpath = [ "packages/overture-schema-addresses-theme/tests", "packages/overture-schema-annex/tests", "packages/overture-schema-base-theme/tests", "packages/overture-schema-buildings-theme/tests", "packages/overture-schema-cli/tests", + "packages/overture-schema-codegen/tests", "packages/overture-schema-core/tests", "packages/overture-schema-divisions-theme/tests", "packages/overture-schema-places-theme/tests", diff --git a/uv.lock b/uv.lock index d35d4080e..08122cf2a 100644 --- a/uv.lock +++ b/uv.lock @@ -14,6 +14,7 @@ members = [ "overture-schema-base-theme", "overture-schema-buildings-theme", "overture-schema-cli", + "overture-schema-codegen", "overture-schema-core", "overture-schema-divisions-theme", "overture-schema-places-theme", @@ -785,6 +786,26 @@ dev = [ { name = "ruff" }, ] +[[package]] +name = "overture-schema-codegen" +source = { editable = "packages/overture-schema-codegen" } +dependencies = [ + { name = "click" }, + { name = "jinja2" }, + { name = "overture-schema-core" }, + { name = "overture-schema-system" }, + { name = "tomli", marker = "python_full_version < '3.11'" }, +] + +[package.metadata] +requires-dist = [ + { name = "click", specifier = ">=8.0" }, + { name = "jinja2", specifier = ">=3.0" }, + { name = "overture-schema-core", editable = "packages/overture-schema-core" }, + { name = "overture-schema-system", editable = "packages/overture-schema-system" }, + { name = "tomli", marker = "python_full_version < '3.11'", specifier = ">=2.0" }, +] + [[package]] name = "overture-schema-core" source = { editable = "packages/overture-schema-core" }