From f840f69694be03c34dbc1edf4c3556665b2bb8f7 Mon Sep 17 00:00:00 2001 From: Songshen Huang Date: Wed, 18 Mar 2026 16:23:27 -0400 Subject: [PATCH 1/6] Set up YData profiling project and generate initial report --- .../README.md | 0 .../data/baltim.csv | 212 +++++++++ .../notes.txt | 0 .../outputs/baltim_profile_report.html | 435 ++++++++++++++++++ .../src/run_profiling.py | 25 + 5 files changed, 672 insertions(+) create mode 100644 class_project/DATA605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/README.md create mode 100755 class_project/DATA605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/data/baltim.csv create mode 100644 class_project/DATA605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/notes.txt create mode 100644 class_project/DATA605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/outputs/baltim_profile_report.html create mode 100644 class_project/DATA605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/src/run_profiling.py diff --git a/class_project/DATA605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/README.md b/class_project/DATA605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/README.md new file mode 100644 index 000000000..e69de29bb diff --git a/class_project/DATA605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/data/baltim.csv b/class_project/DATA605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/data/baltim.csv new file mode 100755 index 000000000..92570862d --- /dev/null +++ b/class_project/DATA605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/data/baltim.csv @@ -0,0 +1,212 @@ +STATION,PRICE,NROOM,DWELL,NBATH,PATIO,FIREPL,AC,BMENT,NSTOR,GAR,AGE,CITCOU,LOTSZ,SQFT,X,Y +1,47.000000,4.000000,0.000000,1.000000,0.000000,0.000000,0.000000,2.000000,3.000000,0.000000,148.000000,0.000000,5.700000,11.250000,907.000000,534.000000 +2,113.000000,7.000000,1.000000,2.500000,1.000000,1.000000,1.000000,2.000000,2.000000,2.000000,9.000000,1.000000,279.510000,28.920000,922.000000,574.000000 +3,165.000000,7.000000,1.000000,2.500000,1.000000,1.000000,0.000000,3.000000,2.000000,2.000000,23.000000,1.000000,70.640000,30.620000,920.000000,581.000000 +4,104.300000,7.000000,1.000000,2.500000,1.000000,1.000000,1.000000,2.000000,2.000000,2.000000,5.000000,1.000000,174.630000,26.120000,923.000000,578.000000 +5,62.500000,7.000000,1.000000,1.500000,1.000000,1.000000,0.000000,2.000000,2.000000,0.000000,19.000000,1.000000,107.800000,22.040000,918.000000,574.000000 +6,70.000000,6.000000,1.000000,2.500000,1.000000,1.000000,0.000000,3.000000,3.000000,1.000000,20.000000,1.000000,139.640000,39.420000,900.000000,577.000000 +7,127.500000,6.000000,1.000000,2.500000,1.000000,1.000000,1.000000,3.000000,1.000000,2.000000,20.000000,1.000000,250.000000,21.880000,918.000000,576.000000 +8,53.000000,8.000000,1.000000,1.500000,1.000000,0.000000,0.000000,0.000000,3.000000,0.000000,22.000000,1.000000,100.000000,36.720000,907.000000,576.000000 +9,64.500000,6.000000,1.000000,1.000000,1.000000,1.000000,1.000000,3.000000,2.000000,0.000000,22.000000,1.000000,115.900000,25.600000,918.000000,562.000000 +10,145.000000,7.000000,1.000000,2.500000,1.000000,1.000000,1.000000,3.000000,2.000000,2.000000,4.000000,1.000000,365.070000,44.120000,897.000000,576.000000 +11,63.500000,6.000000,1.000000,2.000000,0.000000,1.000000,0.000000,2.000000,2.000000,0.000000,23.000000,1.000000,81.100000,19.880000,916.000000,569.000000 +12,58.900000,5.000000,1.000000,2.000000,0.000000,1.000000,1.000000,0.000000,1.000000,0.000000,20.000000,1.000000,91.000000,12.080000,908.000000,573.000000 +13,65.000000,4.000000,1.000000,2.000000,0.000000,0.000000,0.000000,3.000000,1.000000,0.000000,30.000000,1.000000,74.350000,10.990000,913.000000,566.000000 +14,52.000000,5.000000,0.000000,1.000000,1.000000,0.000000,1.000000,3.000000,2.000000,0.000000,20.000000,1.000000,46.170000,13.600000,910.000000,574.000000 +15,48.000000,5.000000,0.000000,1.500000,0.000000,0.000000,0.000000,2.000000,2.000000,0.000000,18.000000,1.000000,23.100000,12.800000,922.000000,569.000000 +16,3.500000,9.000000,0.000000,3.000000,0.000000,0.000000,0.000000,2.000000,3.000000,0.000000,75.000000,0.000000,14.400000,29.790000,913.000000,536.000000 +17,12.800000,5.000000,0.000000,1.000000,1.000000,0.000000,0.000000,2.000000,2.000000,0.000000,60.000000,0.000000,8.970000,14.300000,919.000000,533.500000 +18,17.500000,5.000000,0.000000,1.000000,0.000000,0.000000,0.000000,3.000000,2.000000,0.000000,65.000000,0.000000,10.220000,13.720000,917.500000,535.000000 +19,36.000000,5.000000,0.000000,1.500000,0.000000,0.000000,0.000000,2.000000,2.000000,0.000000,14.000000,1.000000,38.890000,11.840000,933.000000,548.500000 +20,41.900000,6.000000,1.000000,1.000000,0.000000,0.000000,0.000000,3.000000,2.000000,0.000000,45.000000,0.000000,70.000000,18.060000,932.500000,552.500000 +21,53.500000,5.000000,1.000000,1.500000,0.000000,0.000000,0.000000,3.000000,1.000000,0.000000,14.000000,1.000000,70.820000,10.720000,936.500000,548.500000 +22,24.500000,4.000000,0.000000,1.000000,0.000000,0.000000,0.000000,3.000000,2.000000,0.000000,22.000000,0.000000,18.390000,8.960000,930.000000,542.500000 +23,24.500000,5.000000,1.000000,1.000000,0.000000,0.000000,0.000000,2.000000,2.000000,0.000000,35.000000,0.000000,73.250000,14.380000,925.000000,545.000000 +24,55.500000,5.000000,1.000000,2.500000,0.000000,0.000000,1.000000,3.000000,3.000000,0.000000,5.000000,0.000000,56.120000,36.750000,927.000000,552.000000 +25,60.000000,6.000000,1.000000,2.000000,1.000000,0.000000,1.000000,2.000000,2.000000,2.000000,60.000000,1.000000,400.370000,20.000000,936.000000,554.500000 +26,51.000000,7.000000,1.000000,1.500000,0.000000,1.000000,1.000000,2.000000,2.000000,0.000000,14.000000,1.000000,87.960000,22.820000,860.000000,554.000000 +27,46.000000,6.000000,1.000000,2.500000,1.000000,0.000000,1.000000,0.000000,2.000000,0.000000,19.000000,1.000000,70.400000,24.860000,868.000000,550.500000 +28,46.000000,5.000000,1.000000,1.500000,0.000000,0.000000,0.000000,2.000000,2.000000,0.000000,11.000000,1.000000,84.000000,19.200000,872.500000,543.000000 +29,44.000000,5.000000,1.000000,1.500000,0.000000,0.000000,1.000000,2.000000,1.000000,0.000000,16.000000,1.000000,52.550000,11.580000,880.500000,544.500000 +30,54.900000,5.000000,1.000000,2.000000,0.000000,0.000000,1.000000,1.000000,2.000000,0.000000,19.000000,1.000000,77.760000,26.000000,869.000000,551.500000 +31,42.500000,6.000000,1.000000,2.000000,1.000000,0.000000,0.000000,2.000000,2.000000,0.000000,17.000000,0.000000,105.300000,14.400000,883.000000,538.000000 +32,44.000000,6.000000,1.000000,1.500000,0.000000,1.000000,0.000000,2.000000,1.000000,0.000000,24.000000,1.000000,70.000000,11.620000,876.000000,541.000000 +33,44.900000,5.000000,1.000000,1.500000,0.000000,0.000000,0.000000,2.000000,2.000000,0.000000,22.000000,1.000000,65.000000,23.080000,875.500000,549.000000 +34,37.900000,6.000000,1.000000,1.000000,0.000000,0.000000,0.000000,2.000000,3.000000,0.000000,27.000000,1.000000,62.640000,23.760000,875.000000,550.000000 +35,33.000000,5.000000,0.000000,1.500000,0.000000,0.000000,1.000000,0.000000,2.000000,0.000000,3.000000,1.000000,175.460000,15.600000,868.000000,545.000000 +36,43.900000,5.000000,1.000000,1.500000,0.000000,0.000000,1.000000,3.000000,1.000000,0.000000,21.000000,1.000000,268.000000,10.000000,879.000000,552.000000 +37,49.600000,6.000000,1.000000,1.500000,1.000000,0.000000,0.000000,1.000000,2.000000,0.000000,20.000000,1.000000,96.850000,22.800000,860.000000,555.500000 +38,52.000000,5.000000,0.000000,2.500000,0.000000,1.000000,1.000000,2.000000,2.000000,0.000000,4.000000,1.000000,16.940000,16.760000,868.000000,556.500000 +39,45.500000,6.000000,1.000000,2.500000,0.000000,0.000000,0.000000,2.000000,2.000000,0.000000,24.000000,1.000000,75.000000,18.600000,873.000000,549.000000 +40,37.500000,7.000000,1.000000,2.000000,0.000000,1.000000,0.000000,2.000000,2.000000,1.000000,40.000000,0.000000,84.000000,22.100000,888.500000,545.000000 +41,50.000000,5.000000,0.000000,2.000000,0.000000,1.000000,0.000000,2.000000,2.000000,0.000000,23.000000,1.000000,36.300000,14.280000,878.000000,532.000000 +42,35.900000,5.000000,1.000000,1.500000,0.000000,1.000000,0.000000,2.000000,2.000000,0.000000,35.000000,0.000000,67.760000,15.360000,883.000000,545.500000 +43,42.900000,6.000000,1.000000,2.000000,0.000000,0.000000,0.000000,2.000000,2.000000,0.000000,25.000000,1.000000,77.030000,16.000000,873.000000,557.500000 +44,107.000000,6.000000,1.000000,2.500000,0.000000,1.000000,1.000000,2.000000,1.000000,0.000000,17.000000,1.000000,246.620000,23.040000,882.000000,568.000000 +45,112.000000,5.000000,1.000000,3.500000,0.000000,1.000000,1.000000,2.000000,2.000000,1.000000,26.000000,1.000000,91.050000,24.940000,881.500000,562.000000 +46,44.900000,5.000000,1.000000,1.500000,0.000000,0.000000,1.000000,2.000000,1.000000,0.000000,15.000000,1.000000,76.500000,11.820000,867.000000,560.000000 +47,55.000000,5.000000,1.000000,1.500000,0.000000,1.000000,0.000000,2.000000,2.000000,0.000000,29.000000,1.000000,75.000000,12.880000,877.000000,557.000000 +48,102.000000,5.000000,1.000000,2.000000,0.000000,1.000000,1.000000,2.000000,1.000000,0.000000,24.000000,1.000000,362.120000,11.200000,889.000000,571.000000 +49,35.500000,5.000000,1.000000,1.000000,1.000000,0.000000,0.000000,0.000000,2.000000,0.000000,30.000000,1.000000,102.260000,18.120000,876.500000,564.500000 +50,62.900000,6.000000,1.000000,3.500000,0.000000,0.000000,0.000000,2.000000,3.000000,1.000000,19.000000,1.000000,169.400000,38.250000,870.500000,560.000000 +51,39.000000,6.000000,1.000000,2.500000,0.000000,1.000000,0.000000,2.000000,2.000000,1.000000,50.000000,0.000000,64.500000,17.680000,884.500000,560.000000 +52,110.000000,6.000000,1.000000,2.500000,1.000000,1.000000,1.000000,3.000000,1.000000,0.000000,18.000000,1.000000,315.900000,19.020000,866.000000,567.500000 +53,8.000000,4.000000,0.000000,1.000000,0.000000,1.000000,0.000000,0.000000,2.000000,0.000000,74.000000,0.000000,56.530000,32.800000,899.000000,560.000000 +54,62.000000,5.000000,1.000000,3.000000,0.000000,1.000000,1.000000,2.000000,1.000000,0.000000,22.000000,0.000000,100.000000,15.160000,890.000000,559.000000 +55,60.000000,7.000000,1.000000,1.000000,0.000000,1.000000,0.000000,1.000000,3.000000,0.000000,80.000000,0.000000,119.970000,25.080000,896.000000,560.000000 +56,85.900000,5.000000,1.000000,2.000000,1.000000,0.000000,1.000000,2.000000,1.500000,0.000000,24.000000,1.000000,117.000000,21.975000,892.000000,561.000000 +57,57.000000,5.000000,1.000000,2.500000,0.000000,0.000000,1.000000,3.000000,1.000000,0.000000,20.000000,0.000000,133.660000,12.600000,895.000000,559.000000 +58,110.000000,7.000000,1.000000,3.000000,1.000000,1.000000,1.000000,2.000000,2.000000,0.000000,7.000000,1.000000,144.420000,23.520000,892.000000,565.000000 +59,67.700000,5.000000,1.000000,1.500000,0.000000,1.000000,0.000000,3.000000,2.000000,0.000000,47.000000,0.000000,85.500000,17.520000,902.500000,552.000000 +60,89.500000,10.000000,1.000000,3.500000,1.000000,1.000000,0.000000,3.000000,3.000000,1.000000,50.000000,0.000000,263.500000,47.610000,902.000000,557.000000 +61,70.000000,6.000000,1.000000,2.000000,1.000000,1.000000,0.000000,3.000000,2.500000,0.000000,45.000000,0.000000,52.000000,20.550000,905.000000,550.000000 +62,74.000000,8.000000,0.000000,2.500000,1.000000,1.000000,1.000000,2.000000,3.000000,2.000000,48.000000,0.000000,70.400000,35.520000,905.000000,548.000000 +63,22.900000,5.000000,0.000000,1.000000,0.000000,0.000000,0.000000,1.000000,2.000000,0.000000,50.000000,0.000000,12.960000,14.400000,904.500000,543.000000 +64,13.000000,4.000000,0.000000,1.000000,0.000000,0.000000,0.000000,2.000000,2.000000,0.000000,50.000000,0.000000,7.500000,8.400000,903.000000,547.000000 +65,48.000000,5.000000,1.000000,1.000000,0.000000,0.000000,0.000000,2.000000,2.000000,2.000000,48.000000,1.000000,62.500000,13.680000,910.000000,562.500000 +66,24.000000,5.000000,0.000000,1.500000,0.000000,0.000000,0.000000,2.000000,2.000000,0.000000,55.000000,0.000000,24.910000,14.480000,910.000000,552.000000 +67,53.500000,5.000000,0.000000,1.500000,0.000000,0.000000,0.000000,3.000000,2.000000,0.000000,27.000000,1.000000,29.500000,12.800000,908.500000,565.000000 +68,34.500000,5.000000,0.000000,1.500000,0.000000,1.000000,0.000000,3.000000,2.000000,0.000000,20.000000,0.000000,37.600000,12.800000,913.300000,558.500000 +69,53.000000,5.000000,0.000000,1.500000,0.000000,0.000000,0.000000,3.000000,3.000000,0.000000,33.000000,1.000000,22.000000,18.000000,907.500000,563.000000 +70,87.500000,6.000000,1.000000,1.000000,0.000000,1.000000,0.000000,2.000000,2.000000,3.000000,40.000000,1.000000,108.050000,15.400000,902.000000,572.000000 +71,33.500000,5.000000,0.000000,1.000000,0.000000,0.000000,1.000000,2.000000,2.000000,0.000000,25.000000,0.000000,20.520000,10.080000,908.000000,556.000000 +72,24.000000,5.000000,0.000000,1.000000,0.000000,0.000000,1.000000,3.000000,2.000000,0.000000,25.000000,0.000000,17.600000,8.960000,925.000000,541.500000 +73,9.600000,5.000000,0.000000,1.000000,0.000000,0.000000,0.000000,2.000000,2.000000,0.000000,40.000000,0.000000,11.200000,8.960000,919.000000,540.500000 +74,30.000000,5.000000,0.000000,2.500000,0.000000,0.000000,0.000000,3.000000,2.500000,0.000000,30.000000,0.000000,19.990000,20.000000,919.500000,537.500000 +75,41.000000,5.000000,1.000000,1.500000,0.000000,0.000000,0.000000,3.000000,2.000000,1.000000,40.000000,0.000000,92.310000,12.880000,922.500000,549.000000 +76,30.000000,3.000000,1.000000,2.000000,0.000000,0.000000,0.000000,2.000000,2.000000,0.000000,22.000000,0.000000,31.500000,12.000000,921.000000,558.000000 +77,38.900000,5.000000,0.000000,3.000000,0.000000,0.000000,0.000000,3.000000,2.000000,0.000000,25.000000,0.000000,28.940000,18.160000,882.000000,557.500000 +78,20.700000,5.000000,0.000000,2.000000,0.000000,0.000000,0.000000,2.000000,2.000000,0.000000,29.000000,0.000000,18.480000,14.280000,889.000000,552.000000 +79,49.900000,9.000000,1.000000,3.000000,0.000000,1.000000,0.000000,2.000000,2.500000,2.000000,49.000000,0.000000,127.100000,26.000000,887.000000,555.000000 +80,18.600000,6.000000,0.000000,1.500000,0.000000,0.000000,0.000000,2.000000,2.000000,0.000000,35.000000,0.000000,14.060000,12.020000,896.000000,548.000000 +81,39.000000,6.000000,1.000000,2.000000,0.000000,0.000000,0.000000,2.000000,2.000000,1.000000,55.000000,0.000000,127.100000,20.800000,887.000000,554.000000 +82,34.000000,5.000000,0.000000,1.500000,0.000000,0.000000,0.000000,2.000000,2.000000,0.000000,30.000000,0.000000,19.000000,11.780000,893.000000,546.500000 +83,16.000000,4.000000,0.000000,1.500000,0.000000,0.000000,0.000000,2.000000,2.000000,0.000000,15.000000,0.000000,16.100000,8.680000,896.000000,550.000000 +84,18.900000,6.000000,0.000000,2.000000,0.000000,0.000000,0.000000,2.000000,2.000000,0.000000,40.000000,0.000000,23.980000,17.600000,890.400000,539.000000 +85,15.200000,5.000000,0.000000,1.000000,0.000000,0.000000,0.000000,2.000000,2.000000,0.000000,35.000000,0.000000,19.000000,11.400000,894.000000,534.000000 +86,41.500000,9.000000,0.000000,2.000000,0.000000,1.000000,0.000000,2.000000,3.000000,0.000000,70.000000,0.000000,132.210000,44.550000,887.000000,540.400000 +87,53.000000,10.000000,1.000000,5.000000,0.000000,1.000000,0.000000,2.000000,2.000000,2.000000,25.000000,0.000000,122.100000,46.320000,893.600000,543.000000 +88,22.000000,5.000000,0.000000,2.000000,0.000000,0.000000,0.000000,2.000000,2.000000,0.000000,25.000000,0.000000,16.000000,10.240000,896.500000,541.000000 +89,24.900000,5.000000,0.000000,1.500000,0.000000,0.000000,0.000000,2.000000,2.000000,0.000000,30.000000,0.000000,23.780000,9.600000,898.000000,535.000000 +90,6.700000,4.000000,0.000000,1.000000,0.000000,0.000000,0.000000,2.000000,3.000000,0.000000,30.000000,0.000000,12.000000,31.200000,900.500000,535.000000 +91,32.500000,4.000000,0.000000,3.000000,0.000000,0.000000,0.000000,2.000000,2.000000,0.000000,50.000000,0.000000,23.760000,26.400000,903.000000,540.000000 +92,30.000000,5.000000,0.000000,2.000000,0.000000,0.000000,0.000000,3.000000,2.000000,0.000000,25.000000,0.000000,19.900000,13.600000,913.000000,547.500000 +93,59.000000,8.000000,0.000000,2.000000,0.000000,0.000000,0.000000,2.000000,3.000000,1.000000,70.000000,0.000000,20.300000,27.480000,909.000000,542.500000 +94,29.500000,6.000000,0.000000,1.000000,0.000000,0.000000,0.000000,2.000000,2.000000,2.000000,55.000000,0.000000,27.600000,17.860000,915.500000,545.000000 +95,26.000000,6.000000,0.000000,1.000000,0.000000,1.000000,0.000000,2.000000,2.000000,1.000000,40.000000,0.000000,29.690000,18.040000,915.000000,543.500000 +96,16.500000,4.000000,0.000000,2.000000,0.000000,0.000000,0.000000,2.000000,2.000000,0.000000,70.000000,0.000000,14.720000,14.840000,908.000000,539.000000 +97,39.000000,5.000000,1.000000,1.000000,0.000000,0.000000,0.000000,0.000000,1.000000,0.000000,20.000000,1.000000,70.400000,10.460000,957.000000,508.000000 +98,48.900000,5.000000,1.000000,2.000000,0.000000,0.000000,0.000000,3.000000,2.000000,0.000000,20.000000,1.000000,66.250000,14.560000,955.500000,513.500000 +99,33.500000,3.000000,1.000000,1.000000,0.000000,0.000000,0.000000,2.000000,1.000000,0.000000,25.000000,1.000000,58.500000,6.960000,953.500000,550.500000 +100,46.000000,4.000000,1.000000,1.500000,1.000000,0.000000,0.000000,2.000000,1.000000,0.000000,18.000000,1.000000,91.250000,9.500000,960.000000,550.000000 +101,54.000000,5.000000,1.000000,1.000000,0.000000,0.000000,0.000000,0.000000,1.000000,0.000000,20.000000,1.000000,93.120000,11.860000,971.000000,547.500000 +102,57.900000,4.000000,1.000000,1.500000,0.000000,1.000000,0.000000,2.000000,1.000000,0.000000,2.000000,1.000000,104.500000,12.880000,987.500000,561.000000 +103,37.900000,5.000000,0.000000,1.500000,1.000000,0.000000,1.000000,3.000000,2.000000,0.000000,8.000000,1.000000,42.740000,12.320000,960.500000,542.000000 +104,32.000000,3.000000,1.000000,1.000000,0.000000,0.000000,0.000000,0.000000,1.000000,0.000000,25.000000,1.000000,50.000000,6.720000,953.500000,548.000000 +105,31.000000,5.000000,0.000000,1.500000,0.000000,0.000000,0.000000,3.000000,2.000000,0.000000,18.000000,1.000000,25.190000,10.080000,957.000000,553.000000 +106,34.000000,6.000000,1.000000,1.000000,0.000000,0.000000,0.000000,2.000000,2.000000,0.000000,30.000000,1.000000,75.000000,15.600000,957.000000,545.500000 +107,29.000000,3.000000,1.000000,1.000000,0.000000,0.000000,0.000000,0.000000,1.000000,0.000000,35.000000,1.000000,46.160000,6.720000,964.000000,541.000000 +108,32.500000,5.000000,0.000000,1.500000,0.000000,0.000000,0.000000,3.000000,2.000000,0.000000,21.000000,1.000000,18.000000,11.520000,952.500000,544.500000 +109,51.900000,5.000000,1.000000,1.500000,1.000000,0.000000,0.000000,2.000000,1.000000,0.000000,20.000000,1.000000,169.850000,11.760000,959.000000,537.500000 +110,31.000000,5.000000,0.000000,1.000000,0.000000,0.000000,0.000000,2.000000,2.000000,0.000000,24.000000,1.000000,28.000000,10.240000,955.000000,543.500000 +111,41.800000,6.000000,0.000000,1.500000,0.000000,0.000000,1.000000,3.000000,2.000000,0.000000,13.000000,1.000000,49.130000,11.520000,955.000000,533.000000 +112,48.000000,4.000000,1.000000,1.000000,0.000000,0.000000,0.000000,2.000000,1.000000,0.000000,25.000000,1.000000,65.250000,9.280000,947.000000,541.500000 +113,28.000000,3.000000,1.000000,1.000000,0.000000,0.000000,0.000000,2.000000,1.000000,0.000000,18.000000,1.000000,100.000000,6.720000,958.000000,529.000000 +114,35.000000,5.000000,1.000000,1.000000,0.000000,0.000000,0.000000,2.000000,2.000000,0.000000,30.000000,1.000000,70.000000,15.600000,952.000000,536.500000 +115,46.500000,5.000000,1.000000,1.000000,0.000000,1.000000,0.000000,2.000000,2.000000,2.000000,20.000000,1.000000,303.830000,15.500000,975.000000,527.500000 +116,51.900000,5.000000,1.000000,2.000000,0.000000,0.000000,0.000000,2.000000,1.000000,0.000000,22.000000,1.000000,300.000000,9.840000,958.500000,537.500000 +117,35.400000,4.000000,1.000000,1.000000,0.000000,0.000000,0.000000,0.000000,2.000000,0.000000,28.000000,1.000000,59.800000,15.600000,951.000000,520.000000 +118,16.000000,3.000000,0.000000,2.000000,0.000000,0.000000,0.000000,2.000000,2.000000,0.000000,50.000000,0.000000,45.000000,13.760000,932.500000,520.500000 +119,35.000000,5.000000,0.000000,1.500000,0.000000,0.000000,0.000000,3.000000,2.000000,0.000000,20.000000,1.000000,51.710000,10.240000,945.000000,520.000000 +120,35.000000,4.000000,1.000000,1.000000,0.000000,0.000000,0.000000,0.000000,1.000000,0.000000,38.000000,1.000000,51.420000,5.760000,936.000000,522.500000 +121,36.500000,4.000000,0.000000,1.000000,1.000000,0.000000,0.000000,2.000000,2.000000,0.000000,17.000000,1.000000,18.020000,10.080000,947.000000,525.000000 +122,35.900000,5.000000,0.000000,1.500000,0.000000,0.000000,0.000000,3.000000,2.000000,0.000000,22.000000,1.000000,20.690000,11.520000,941.500000,521.000000 +123,45.000000,5.000000,1.000000,1.000000,0.000000,0.000000,0.000000,2.000000,1.500000,0.000000,27.000000,1.000000,79.810000,12.150000,938.000000,516.000000 +124,40.000000,4.000000,1.000000,1.000000,0.000000,0.000000,0.000000,2.000000,1.000000,0.000000,25.000000,0.000000,62.500000,9.770000,932.000000,526.500000 +125,35.000000,5.000000,1.000000,1.000000,0.000000,0.000000,0.000000,0.000000,2.000000,0.000000,25.000000,1.000000,50.000000,15.000000,940.000000,514.000000 +126,38.000000,5.000000,1.000000,1.000000,0.000000,0.000000,0.000000,0.000000,2.000000,1.000000,25.000000,1.000000,55.000000,14.400000,934.500000,526.000000 +127,37.000000,4.000000,1.000000,1.000000,0.000000,0.000000,0.000000,0.000000,2.000000,0.000000,30.000000,1.000000,54.840000,14.500000,940.000000,519.000000 +128,23.000000,7.000000,1.000000,2.000000,0.000000,0.000000,0.000000,1.000000,2.000000,0.000000,60.000000,1.000000,68.540000,22.540000,938.000000,513.500000 +129,25.500000,4.000000,0.000000,1.000000,0.000000,0.000000,0.000000,0.000000,2.000000,0.000000,22.000000,1.000000,16.160000,10.240000,945.000000,519.000000 +130,39.500000,3.000000,1.000000,1.000000,0.000000,0.000000,0.000000,2.000000,1.000000,0.000000,30.000000,1.000000,62.500000,7.800000,940.500000,528.500000 +131,21.500000,4.000000,0.000000,1.000000,0.000000,0.000000,0.000000,2.000000,2.000000,0.000000,28.000000,0.000000,11.980000,8.400000,894.500000,526.500000 +132,9.000000,5.000000,0.000000,1.000000,0.000000,0.000000,0.000000,2.000000,2.000000,1.000000,45.000000,0.000000,9.100000,10.920000,900.000000,527.000000 +133,67.500000,8.000000,0.000000,3.000000,0.000000,1.000000,0.000000,2.000000,3.000000,0.000000,100.000000,0.000000,21.120000,42.900000,901.500000,530.000000 +134,13.400000,3.000000,0.000000,1.000000,0.000000,0.000000,0.000000,0.000000,2.000000,0.000000,60.000000,0.000000,7.000000,9.000000,920.500000,527.500000 +135,12.500000,5.000000,0.000000,1.000000,0.000000,0.000000,0.000000,0.000000,2.000000,0.000000,50.000000,0.000000,10.130000,10.500000,918.500000,528.500000 +136,28.500000,5.000000,0.000000,1.500000,0.000000,0.000000,0.000000,2.000000,2.000000,1.000000,35.000000,1.000000,21.600000,10.080000,937.000000,531.500000 +137,23.000000,5.000000,0.000000,1.500000,0.000000,0.000000,0.000000,2.000000,2.000000,0.000000,50.000000,0.000000,9.660000,12.600000,925.500000,529.500000 +138,33.500000,4.000000,0.000000,1.000000,1.000000,0.000000,0.000000,3.000000,2.000000,0.000000,24.000000,1.000000,16.000000,8.960000,933.000000,530.500000 +139,9.000000,4.000000,0.000000,1.000000,0.000000,0.000000,0.000000,2.000000,2.000000,0.000000,50.000000,0.000000,8.600000,8.580000,924.500000,531.000000 +140,11.000000,3.000000,0.000000,1.000000,0.000000,0.000000,0.000000,2.000000,2.000000,0.000000,33.000000,0.000000,19.840000,7.560000,907.000000,516.000000 +141,30.900000,5.000000,0.000000,1.000000,0.000000,0.000000,0.000000,2.000000,2.000000,0.000000,40.000000,0.000000,18.000000,10.800000,912.500000,509.500000 +142,31.650000,6.000000,0.000000,2.000000,0.000000,1.000000,0.000000,2.000000,2.000000,0.000000,50.000000,0.000000,18.000000,13.440000,911.000000,511.000000 +143,33.000000,5.000000,0.000000,1.000000,0.000000,0.000000,0.000000,2.000000,2.000000,0.000000,25.000000,1.000000,17.600000,10.240000,885.000000,515.000000 +144,33.400000,5.000000,0.000000,1.000000,0.000000,0.000000,0.000000,0.000000,2.000000,0.000000,48.000000,1.000000,36.440000,14.440000,883.500000,505.500000 +145,47.000000,5.000000,0.000000,1.500000,0.000000,0.000000,1.000000,2.000000,2.000000,0.000000,10.000000,1.000000,23.400000,12.240000,883.000000,512.500000 +146,40.000000,4.000000,1.000000,1.000000,0.000000,0.000000,0.000000,2.000000,1.500000,0.000000,45.000000,1.000000,70.000000,13.200000,888.000000,511.500000 +147,46.000000,5.000000,1.000000,1.500000,0.000000,0.000000,0.000000,3.000000,1.000000,0.000000,20.000000,1.000000,51.790000,9.600000,893.500000,514.000000 +148,45.500000,5.000000,1.000000,1.000000,0.000000,1.000000,1.000000,0.000000,1.000000,0.000000,25.000000,1.000000,61.740000,15.220000,897.500000,515.000000 +149,57.000000,6.000000,1.000000,1.500000,0.000000,0.000000,0.000000,2.000000,2.000000,0.000000,25.000000,1.000000,60.250000,24.160000,888.000000,521.000000 +150,29.900000,4.000000,0.000000,1.000000,0.000000,0.000000,0.000000,0.000000,2.000000,0.000000,22.000000,1.000000,33.660000,10.240000,897.500000,510.500000 +151,30.000000,4.000000,0.000000,1.000000,0.000000,0.000000,0.000000,0.000000,2.000000,0.000000,21.000000,1.000000,29.340000,10.240000,901.000000,509.500000 +152,34.000000,5.000000,1.000000,1.000000,0.000000,1.000000,0.000000,2.000000,1.000000,0.000000,29.000000,1.000000,56.250000,9.880000,902.500000,513.000000 +153,51.000000,6.000000,1.000000,1.500000,1.000000,1.000000,1.000000,3.000000,2.000000,1.000000,18.000000,1.000000,66.300000,23.200000,873.000000,535.000000 +154,64.500000,6.000000,1.000000,2.500000,0.000000,0.000000,1.000000,0.000000,2.000000,0.000000,2.000000,1.000000,95.930000,17.680000,867.000000,535.500000 +155,57.500000,5.000000,1.000000,1.500000,0.000000,0.000000,0.000000,2.000000,2.000000,0.000000,19.000000,1.000000,104.500000,24.300000,869.000000,526.000000 +156,85.500000,6.000000,1.000000,1.500000,0.000000,1.000000,0.000000,2.000000,2.000000,2.000000,49.000000,1.000000,360.000000,35.940000,873.500000,523.500000 +157,61.000000,6.000000,1.000000,1.500000,0.000000,0.000000,1.000000,0.000000,2.000000,0.000000,10.000000,1.000000,60.000000,21.600000,864.000000,527.500000 +158,38.000000,5.000000,0.000000,1.000000,0.000000,0.000000,1.000000,2.000000,2.000000,0.000000,25.000000,1.000000,19.000000,11.020000,882.000000,524.500000 +159,56.500000,6.000000,1.000000,1.500000,0.000000,0.000000,0.000000,0.000000,2.000000,0.000000,16.000000,1.000000,90.090000,21.000000,871.000000,531.000000 +160,60.400000,5.000000,1.000000,1.500000,0.000000,1.000000,0.000000,3.000000,2.000000,1.000000,17.000000,1.000000,84.640000,23.920000,867.500000,523.000000 +161,51.500000,5.000000,0.000000,2.000000,0.000000,0.000000,0.000000,2.000000,2.000000,0.000000,27.000000,1.000000,23.300000,14.400000,876.000000,528.000000 +162,54.000000,4.000000,1.000000,1.500000,0.000000,0.000000,0.000000,2.000000,2.000000,1.000000,34.000000,1.000000,253.000000,28.000000,875.000000,521.000000 +163,69.000000,5.000000,1.000000,2.500000,0.000000,1.000000,1.000000,3.000000,1.000000,0.000000,2.000000,1.000000,82.860000,11.440000,867.000000,533.000000 +164,56.000000,5.000000,1.000000,1.000000,0.000000,0.000000,0.000000,3.000000,2.000000,0.000000,24.000000,1.000000,67.000000,21.940000,874.000000,519.500000 +165,27.900000,5.000000,0.000000,1.500000,0.000000,0.000000,0.000000,2.000000,2.000000,0.000000,23.000000,0.000000,17.280000,10.240000,889.000000,515.500000 +166,37.500000,6.000000,0.000000,1.000000,0.000000,0.000000,0.000000,2.000000,2.000000,2.000000,40.000000,0.000000,38.720000,16.860000,884.500000,532.000000 +167,32.900000,5.000000,0.000000,1.500000,0.000000,0.000000,0.000000,3.000000,2.000000,0.000000,25.000000,0.000000,19.040000,9.920000,891.500000,522.000000 +168,22.000000,5.000000,0.000000,1.000000,0.000000,1.000000,0.000000,2.000000,2.000000,0.000000,45.000000,0.000000,14.980000,13.440000,889.000000,526.500000 +169,29.900000,5.000000,0.000000,2.000000,0.000000,0.000000,0.000000,3.000000,2.000000,0.000000,26.000000,0.000000,20.000000,12.000000,890.000000,533.500000 +170,39.900000,5.000000,0.000000,2.000000,0.000000,0.000000,0.000000,3.000000,2.000000,0.000000,37.000000,0.000000,33.600000,14.760000,883.000000,531.000000 +171,32.600000,4.000000,0.000000,1.000000,0.000000,0.000000,1.000000,2.000000,2.000000,0.000000,15.000000,0.000000,16.000000,8.960000,885.500000,525.000000 +172,38.500000,5.000000,0.000000,1.500000,0.000000,0.000000,0.000000,3.000000,2.000000,0.000000,22.000000,0.000000,34.440000,11.520000,882.500000,528.000000 +173,21.500000,4.000000,0.000000,1.000000,0.000000,0.000000,0.000000,3.000000,2.000000,0.000000,75.000000,0.000000,9.450000,8.640000,911.000000,526.500000 +174,25.900000,4.000000,0.000000,1.000000,0.000000,0.000000,0.000000,2.000000,2.000000,0.000000,28.000000,0.000000,12.320000,8.120000,899.000000,522.000000 +175,27.500000,5.000000,0.000000,1.000000,0.000000,0.000000,0.000000,3.000000,2.000000,0.000000,31.000000,0.000000,23.200000,11.120000,898.000000,520.500000 +176,22.900000,5.000000,0.000000,1.000000,0.000000,0.000000,0.000000,2.000000,2.000000,0.000000,100.000000,0.000000,8.730000,11.280000,913.500000,524.000000 +177,31.500000,4.000000,0.000000,1.500000,0.000000,0.000000,0.000000,2.000000,2.000000,0.000000,15.000000,0.000000,20.000000,10.360000,900.000000,518.000000 +178,8.500000,4.000000,0.000000,1.000000,0.000000,0.000000,0.000000,0.000000,2.000000,0.000000,80.000000,0.000000,9.000000,11.520000,904.000000,527.500000 +179,5.500000,3.000000,0.000000,1.000000,0.000000,0.000000,0.000000,2.000000,3.000000,0.000000,75.000000,0.000000,9.360000,17.100000,916.500000,531.500000 +180,33.000000,4.000000,0.000000,1.500000,0.000000,0.000000,0.000000,3.000000,2.000000,0.000000,23.000000,1.000000,60.000000,17.520000,925.000000,568.500000 +181,57.000000,5.000000,1.000000,1.500000,0.000000,0.000000,1.000000,2.000000,1.000000,0.000000,15.000000,1.000000,82.600000,10.730000,933.000000,573.000000 +182,47.000000,5.000000,1.000000,1.000000,0.000000,0.000000,1.000000,0.000000,1.000000,0.000000,21.000000,1.000000,75.300000,11.200000,931.500000,567.000000 +183,43.500000,4.000000,0.000000,1.500000,0.000000,0.000000,1.000000,2.000000,2.000000,0.000000,2.000000,1.000000,21.000000,12.800000,935.000000,572.000000 +184,43.900000,5.000000,0.000000,1.500000,1.000000,0.000000,0.000000,3.000000,2.000000,0.000000,25.000000,0.000000,43.750000,12.000000,930.500000,561.000000 +185,68.500000,6.000000,1.000000,2.000000,1.000000,1.000000,0.000000,3.000000,3.000000,0.000000,23.000000,1.000000,239.690000,41.070000,926.500000,572.000000 +186,44.250000,5.000000,0.000000,1.500000,0.000000,0.000000,1.000000,2.000000,2.000000,0.000000,0.000000,1.000000,20.830000,12.800000,946.000000,573.000000 +187,61.000000,5.000000,1.000000,2.500000,1.000000,0.000000,1.000000,3.000000,2.000000,0.000000,4.000000,1.000000,67.640000,22.360000,935.000000,561.500000 +188,40.000000,5.000000,1.000000,1.000000,0.000000,0.000000,0.000000,2.000000,2.000000,1.000000,40.000000,1.000000,172.040000,10.560000,943.500000,572.500000 +189,44.500000,5.000000,1.000000,1.500000,0.000000,0.000000,0.000000,2.000000,2.000000,1.000000,55.000000,1.000000,289.970000,13.440000,936.500000,575.500000 +190,57.000000,5.000000,1.000000,1.000000,0.000000,0.000000,0.000000,2.000000,1.000000,0.000000,21.000000,1.000000,71.050000,11.020000,928.000000,564.000000 +191,35.000000,5.000000,1.000000,1.000000,0.000000,0.000000,0.000000,2.000000,2.000000,0.000000,45.000000,0.000000,59.000000,17.980000,929.000000,559.000000 +192,35.100000,7.000000,1.000000,2.500000,0.000000,0.000000,0.000000,2.000000,2.000000,0.000000,50.000000,0.000000,62.500000,18.880000,927.000000,559.000000 +193,64.500000,5.000000,1.000000,2.000000,0.000000,0.000000,1.000000,3.000000,1.000000,0.000000,5.000000,1.000000,86.250000,11.760000,933.000000,576.000000 +194,40.000000,4.000000,1.000000,1.500000,0.000000,0.000000,0.000000,2.000000,1.000000,1.000000,50.000000,1.000000,50.200000,9.360000,940.500000,568.000000 +195,42.600000,5.000000,0.000000,1.000000,0.000000,0.000000,0.000000,3.000000,2.000000,0.000000,22.000000,1.000000,21.420000,11.520000,921.000000,563.500000 +196,50.000000,5.000000,1.000000,1.500000,0.000000,0.000000,0.000000,3.000000,3.000000,0.000000,22.000000,1.000000,75.000000,27.300000,936.000000,565.500000 +197,58.000000,6.000000,1.000000,2.000000,0.000000,0.000000,1.000000,3.000000,2.000000,0.000000,6.000000,1.000000,73.920000,23.040000,951.000000,573.000000 +198,58.000000,7.000000,1.000000,2.000000,0.000000,0.000000,1.000000,3.000000,2.000000,0.000000,18.000000,1.000000,63.000000,17.680000,951.500000,568.500000 +199,55.000000,5.000000,1.000000,1.000000,0.000000,0.000000,0.000000,2.000000,1.000000,0.000000,18.000000,1.000000,115.000000,13.360000,951.000000,576.000000 +200,43.000000,5.000000,0.000000,2.000000,0.000000,0.000000,0.000000,3.000000,2.000000,0.000000,23.000000,1.000000,42.860000,11.600000,937.000000,555.000000 +201,54.000000,6.000000,0.000000,1.500000,0.000000,1.000000,1.000000,2.000000,2.000000,0.000000,3.000000,1.000000,47.150000,11.520000,945.000000,566.000000 +202,39.000000,5.000000,0.000000,1.000000,0.000000,0.000000,1.000000,2.000000,2.000000,0.000000,1.000000,1.000000,17.260000,9.980000,939.500000,564.500000 +203,45.000000,5.000000,1.000000,1.000000,0.000000,0.000000,0.000000,2.000000,2.000000,2.000000,47.000000,1.000000,75.000000,12.960000,939.000000,543.500000 +204,42.000000,5.000000,1.000000,1.000000,0.000000,0.000000,0.000000,0.000000,1.000000,0.000000,21.000000,1.000000,60.500000,11.130000,934.000000,540.500000 +205,38.900000,6.000000,1.000000,1.000000,0.000000,0.000000,0.000000,3.000000,2.000000,0.000000,29.000000,1.000000,42.350000,19.600000,933.000000,538.000000 +206,37.500000,4.000000,0.000000,1.000000,0.000000,1.000000,0.000000,0.000000,2.000000,1.000000,23.000000,1.000000,134.880000,20.660000,938.000000,539.500000 +207,39.000000,5.000000,0.000000,1.500000,0.000000,0.000000,1.000000,3.000000,2.000000,0.000000,2.000000,1.000000,19.240000,12.600000,940.000000,538.500000 +208,43.215000,4.000000,0.000000,1.500000,0.000000,1.000000,1.000000,3.000000,2.000000,0.000000,0.000000,1.000000,13.260000,11.520000,945.500000,553.000000 +209,26.500000,5.000000,0.000000,1.000000,0.000000,0.000000,0.000000,3.000000,2.000000,0.000000,29.000000,0.000000,26.030000,12.160000,914.000000,553.000000 +210,30.000000,6.000000,0.000000,1.500000,0.000000,0.000000,1.000000,2.000000,2.000000,0.000000,24.000000,0.000000,20.000000,12.800000,919.000000,554.000000 +211,29.500000,5.000000,0.000000,1.000000,0.000000,0.000000,0.000000,2.000000,2.000000,0.000000,22.000000,0.000000,35.840000,10.640000,914.000000,558.000000 diff --git a/class_project/DATA605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/notes.txt b/class_project/DATA605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/notes.txt new file mode 100644 index 000000000..e69de29bb diff --git a/class_project/DATA605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/outputs/baltim_profile_report.html b/class_project/DATA605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/outputs/baltim_profile_report.html new file mode 100644 index 000000000..fa10e3c20 --- /dev/null +++ b/class_project/DATA605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/outputs/baltim_profile_report.html @@ -0,0 +1,435 @@ +Baltimore Housing Data Profiling Report

Overview

Dataset statistics
Number of variables17
Number of observations211
Missing cells0
Missing cells (%)0.0%
Duplicate rows0
Duplicate rows (%)0.0%
Total size in memory28.2 KiB
Average record size in memory136.6 B

Variable types
Numeric9
Categorical8

Alerts

AC is highly overall correlated with AGEHigh correlation
AGE is highly overall correlated with AC and 1 other fieldsHigh correlation
CITCOU is highly overall correlated with AGE and 2 other fieldsHigh correlation
DWELL is highly overall correlated with LOTSZ and 1 other fieldsHigh correlation
FIREPL is highly overall correlated with PRICEHigh correlation
LOTSZ is highly overall correlated with DWELL and 1 other fieldsHigh correlation
NROOM is highly overall correlated with SQFTHigh correlation
PRICE is highly overall correlated with CITCOU and 3 other fieldsHigh correlation
SQFT is highly overall correlated with NROOMHigh correlation
X is highly overall correlated with CITCOUHigh correlation
GAR is highly imbalanced (57.3%)Imbalance
STATION is uniformly distributedUniform
STATION has unique valuesUnique

Reproduction
Analysis started2026-03-18 20:13:19.759568
Analysis finished2026-03-18 20:13:24.532147
Duration4.77 seconds
Software versionydata-profiling vv4.18.1
Download configurationconfig.json

Variables

STATION
Real number (ℝ)

Uniform  Unique 

Distinct211
Distinct (%)100.0%
Missing0
Missing (%)0.0%
Infinite0
Infinite (%)0.0%
Mean106
Minimum1
Maximum211
Zeros0
Zeros (%)0.0%
Negative0
Negative (%)0.0%
Memory size1.8 KiB
2026-03-18T16:13:24.578376image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/

Quantile statistics
Minimum1
5-th percentile11.5
Q153.5
median106
Q3158.5
95-th percentile200.5
Maximum211
Range210
Interquartile range (IQR)105

Descriptive statistics
Standard deviation61.05462
Coefficient of variation (CV)0.57598698
Kurtosis-1.2
Mean106
Median Absolute Deviation (MAD)53
Skewness0
Sum22366
Variance3727.6667
MonotonicityStrictly increasing

2026-03-18T16:13:24.666195image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
Histogram with fixed size bins (bins=50)
ValueCountFrequency (%)
11
 
0.5%
21
 
0.5%
31
 
0.5%
41
 
0.5%
51
 
0.5%
61
 
0.5%
71
 
0.5%
81
 
0.5%
91
 
0.5%
101
 
0.5%
Other values (201)201
95.3%
ValueCountFrequency (%)
11
0.5%
21
0.5%
31
0.5%
41
0.5%
51
0.5%
61
0.5%
71
0.5%
81
0.5%
91
0.5%
101
0.5%
ValueCountFrequency (%)
2111
0.5%
2101
0.5%
2091
0.5%
2081
0.5%
2071
0.5%
2061
0.5%
2051
0.5%
2041
0.5%
2031
0.5%
2021
0.5%

PRICE
Real number (ℝ)

High correlation 

Distinct140
Distinct (%)66.4%
Missing0
Missing (%)0.0%
Infinite0
Infinite (%)0.0%
Mean44.30718
Minimum3.5
Maximum165
Zeros0
Zeros (%)0.0%
Negative0
Negative (%)0.0%
Memory size1.8 KiB
2026-03-18T16:13:24.732889image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/

Quantile statistics
Minimum3.5
5-th percentile12.9
Q130.95
median40
Q353.75
95-th percentile88.5
Maximum165
Range161.5
Interquartile range (IQR)22.8

Descriptive statistics
Standard deviation23.606077
Coefficient of variation (CV)0.5327822
Kurtosis5.3208963
Mean44.30718
Median Absolute Deviation (MAD)11
Skewness1.7535862
Sum9348.815
Variance557.24686
MonotonicityNot monotonic

2026-03-18T16:13:24.800492image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
Histogram with fixed size bins (bins=50)
ValueCountFrequency (%)
395
 
2.4%
355
 
2.4%
305
 
2.4%
464
 
1.9%
574
 
1.9%
404
 
1.9%
483
 
1.4%
33.53
 
1.4%
473
 
1.4%
533
 
1.4%
Other values (130)172
81.5%
ValueCountFrequency (%)
3.51
0.5%
5.51
0.5%
6.71
0.5%
81
0.5%
8.51
0.5%
92
0.9%
9.61
0.5%
111
0.5%
12.51
0.5%
12.81
0.5%
ValueCountFrequency (%)
1651
0.5%
1451
0.5%
127.51
0.5%
1131
0.5%
1121
0.5%
1102
0.9%
1071
0.5%
104.31
0.5%
1021
0.5%
89.51
0.5%

NROOM
Real number (ℝ)

High correlation 

Distinct8
Distinct (%)3.8%
Missing0
Missing (%)0.0%
Infinite0
Infinite (%)0.0%
Mean5.1990521
Minimum3
Maximum10
Zeros0
Zeros (%)0.0%
Negative0
Negative (%)0.0%
Memory size1.8 KiB
2026-03-18T16:13:24.854419image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/

Quantile statistics
Minimum3
5-th percentile4
Q15
median5
Q36
95-th percentile7
Maximum10
Range7
Interquartile range (IQR)1

Descriptive statistics
Standard deviation1.1703475
Coefficient of variation (CV)0.22510787
Kurtosis3.181838
Mean5.1990521
Median Absolute Deviation (MAD)0
Skewness1.1893019
Sum1097
Variance1.3697134
MonotonicityNot monotonic

2026-03-18T16:13:24.898928image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
Histogram with fixed size bins (bins=8)
ValueCountFrequency (%)
5106
50.2%
639
 
18.5%
435
 
16.6%
712
 
5.7%
310
 
4.7%
84
 
1.9%
93
 
1.4%
102
 
0.9%
ValueCountFrequency (%)
310
 
4.7%
435
 
16.6%
5106
50.2%
639
 
18.5%
712
 
5.7%
84
 
1.9%
93
 
1.4%
102
 
0.9%
ValueCountFrequency (%)
102
 
0.9%
93
 
1.4%
84
 
1.9%
712
 
5.7%
639
 
18.5%
5106
50.2%
435
 
16.6%
310
 
4.7%

DWELL
Categorical

High correlation 

Distinct2
Distinct (%)0.9%
Missing0
Missing (%)0.0%
Memory size10.8 KiB
1.0
113 
0.0
98 

Length
Max length3
Median length3
Mean length3
Min length3

Characters and Unicode
Total characters633
Distinct characters3
Distinct categories1 ?
Distinct scripts1 ?
Distinct blocks1 ?

The Unicode Standard assigns character properties to each code point, which can be used to analyse textual variables.

Unique
Unique0 ?
Unique (%)0.0%

Sample
1st row0.0
2nd row1.0
3rd row1.0
4th row1.0
5th row1.0

Common Values

ValueCountFrequency (%)
1.0113
53.6%
0.098
46.4%

Length

2026-03-18T16:13:24.953765image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
Histogram of lengths of the category

Common Values (Plot)

2026-03-18T16:13:24.990611image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
ValueCountFrequency (%)
1.0113
53.6%
0.098
46.4%

Most occurring characters

ValueCountFrequency (%)
0309
48.8%
.211
33.3%
1113
 
17.9%

Most occurring categories

ValueCountFrequency (%)
(unknown)633
100.0%

Most frequent character per category

(unknown)
ValueCountFrequency (%)
0309
48.8%
.211
33.3%
1113
 
17.9%

Most occurring scripts

ValueCountFrequency (%)
(unknown)633
100.0%

Most frequent character per script

(unknown)
ValueCountFrequency (%)
0309
48.8%
.211
33.3%
1113
 
17.9%

Most occurring blocks

ValueCountFrequency (%)
(unknown)633
100.0%

Most frequent character per block

(unknown)
ValueCountFrequency (%)
0309
48.8%
.211
33.3%
1113
 
17.9%

NBATH
Real number (ℝ)

Distinct7
Distinct (%)3.3%
Missing0
Missing (%)0.0%
Infinite0
Infinite (%)0.0%
Mean1.5734597
Minimum1
Maximum5
Zeros0
Zeros (%)0.0%
Negative0
Negative (%)0.0%
Memory size1.8 KiB
2026-03-18T16:13:25.022703image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/

Quantile statistics
Minimum1
5-th percentile1
Q11
median1.5
Q32
95-th percentile2.75
Maximum5
Range4
Interquartile range (IQR)1

Descriptive statistics
Standard deviation0.64774839
Coefficient of variation (CV)0.41167141
Kurtosis3.4422107
Mean1.5734597
Median Absolute Deviation (MAD)0.5
Skewness1.5013076
Sum332
Variance0.41957797
MonotonicityNot monotonic

2026-03-18T16:13:25.072021image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
Histogram with fixed size bins (bins=7)
ValueCountFrequency (%)
183
39.3%
1.563
29.9%
234
16.1%
2.520
 
9.5%
37
 
3.3%
3.53
 
1.4%
51
 
0.5%
ValueCountFrequency (%)
183
39.3%
1.563
29.9%
234
16.1%
2.520
 
9.5%
37
 
3.3%
3.53
 
1.4%
51
 
0.5%
ValueCountFrequency (%)
51
 
0.5%
3.53
 
1.4%
37
 
3.3%
2.520
 
9.5%
234
16.1%
1.563
29.9%
183
39.3%

PATIO
Categorical

Distinct2
Distinct (%)0.9%
Missing0
Missing (%)0.0%
Memory size10.8 KiB
0.0
180 
1.0
31 

Length
Max length3
Median length3
Mean length3
Min length3

Characters and Unicode
Total characters633
Distinct characters3
Distinct categories1 ?
Distinct scripts1 ?
Distinct blocks1 ?

The Unicode Standard assigns character properties to each code point, which can be used to analyse textual variables.

Unique
Unique0 ?
Unique (%)0.0%

Sample
1st row0.0
2nd row1.0
3rd row1.0
4th row1.0
5th row1.0

Common Values

ValueCountFrequency (%)
0.0180
85.3%
1.031
 
14.7%

Length

2026-03-18T16:13:25.127129image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
Histogram of lengths of the category

Common Values (Plot)

2026-03-18T16:13:25.164109image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
ValueCountFrequency (%)
0.0180
85.3%
1.031
 
14.7%

Most occurring characters

ValueCountFrequency (%)
0391
61.8%
.211
33.3%
131
 
4.9%

Most occurring categories

ValueCountFrequency (%)
(unknown)633
100.0%

Most frequent character per category

(unknown)
ValueCountFrequency (%)
0391
61.8%
.211
33.3%
131
 
4.9%

Most occurring scripts

ValueCountFrequency (%)
(unknown)633
100.0%

Most frequent character per script

(unknown)
ValueCountFrequency (%)
0391
61.8%
.211
33.3%
131
 
4.9%

Most occurring blocks

ValueCountFrequency (%)
(unknown)633
100.0%

Most frequent character per block

(unknown)
ValueCountFrequency (%)
0391
61.8%
.211
33.3%
131
 
4.9%

FIREPL
Categorical

High correlation 

Distinct2
Distinct (%)0.9%
Missing0
Missing (%)0.0%
Memory size10.8 KiB
0.0
160 
1.0
51 

Length
Max length3
Median length3
Mean length3
Min length3

Characters and Unicode
Total characters633
Distinct characters3
Distinct categories1 ?
Distinct scripts1 ?
Distinct blocks1 ?

The Unicode Standard assigns character properties to each code point, which can be used to analyse textual variables.

Unique
Unique0 ?
Unique (%)0.0%

Sample
1st row0.0
2nd row1.0
3rd row1.0
4th row1.0
5th row1.0

Common Values

ValueCountFrequency (%)
0.0160
75.8%
1.051
 
24.2%

Length

2026-03-18T16:13:25.207781image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
Histogram of lengths of the category

Common Values (Plot)

2026-03-18T16:13:25.241091image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
ValueCountFrequency (%)
0.0160
75.8%
1.051
 
24.2%

Most occurring characters

ValueCountFrequency (%)
0371
58.6%
.211
33.3%
151
 
8.1%

Most occurring categories

ValueCountFrequency (%)
(unknown)633
100.0%

Most frequent character per category

(unknown)
ValueCountFrequency (%)
0371
58.6%
.211
33.3%
151
 
8.1%

Most occurring scripts

ValueCountFrequency (%)
(unknown)633
100.0%

Most frequent character per script

(unknown)
ValueCountFrequency (%)
0371
58.6%
.211
33.3%
151
 
8.1%

Most occurring blocks

ValueCountFrequency (%)
(unknown)633
100.0%

Most frequent character per block

(unknown)
ValueCountFrequency (%)
0371
58.6%
.211
33.3%
151
 
8.1%

AC
Categorical

High correlation 

Distinct2
Distinct (%)0.9%
Missing0
Missing (%)0.0%
Memory size10.8 KiB
0.0
160 
1.0
51 

Length
Max length3
Median length3
Mean length3
Min length3

Characters and Unicode
Total characters633
Distinct characters3
Distinct categories1 ?
Distinct scripts1 ?
Distinct blocks1 ?

The Unicode Standard assigns character properties to each code point, which can be used to analyse textual variables.

Unique
Unique0 ?
Unique (%)0.0%

Sample
1st row0.0
2nd row1.0
3rd row0.0
4th row1.0
5th row0.0

Common Values

ValueCountFrequency (%)
0.0160
75.8%
1.051
 
24.2%

Length

2026-03-18T16:13:25.283853image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
Histogram of lengths of the category

Common Values (Plot)

2026-03-18T16:13:25.316793image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
ValueCountFrequency (%)
0.0160
75.8%
1.051
 
24.2%

Most occurring characters

ValueCountFrequency (%)
0371
58.6%
.211
33.3%
151
 
8.1%

Most occurring categories

ValueCountFrequency (%)
(unknown)633
100.0%

Most frequent character per category

(unknown)
ValueCountFrequency (%)
0371
58.6%
.211
33.3%
151
 
8.1%

Most occurring scripts

ValueCountFrequency (%)
(unknown)633
100.0%

Most frequent character per script

(unknown)
ValueCountFrequency (%)
0371
58.6%
.211
33.3%
151
 
8.1%

Most occurring blocks

ValueCountFrequency (%)
(unknown)633
100.0%

Most frequent character per block

(unknown)
ValueCountFrequency (%)
0371
58.6%
.211
33.3%
151
 
8.1%

BMENT
Categorical

Distinct4
Distinct (%)1.9%
Missing0
Missing (%)0.0%
Memory size10.8 KiB
2.0
118 
3.0
59 
0.0
29 
1.0
 
5

Length
Max length3
Median length3
Mean length3
Min length3

Characters and Unicode
Total characters633
Distinct characters5
Distinct categories1 ?
Distinct scripts1 ?
Distinct blocks1 ?

The Unicode Standard assigns character properties to each code point, which can be used to analyse textual variables.

Unique
Unique0 ?
Unique (%)0.0%

Sample
1st row2.0
2nd row2.0
3rd row3.0
4th row2.0
5th row2.0

Common Values

ValueCountFrequency (%)
2.0118
55.9%
3.059
28.0%
0.029
 
13.7%
1.05
 
2.4%

Length

2026-03-18T16:13:25.360188image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
Histogram of lengths of the category

Common Values (Plot)

2026-03-18T16:13:25.398024image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
ValueCountFrequency (%)
2.0118
55.9%
3.059
28.0%
0.029
 
13.7%
1.05
 
2.4%

Most occurring characters

ValueCountFrequency (%)
0240
37.9%
.211
33.3%
2118
18.6%
359
 
9.3%
15
 
0.8%

Most occurring categories

ValueCountFrequency (%)
(unknown)633
100.0%

Most frequent character per category

(unknown)
ValueCountFrequency (%)
0240
37.9%
.211
33.3%
2118
18.6%
359
 
9.3%
15
 
0.8%

Most occurring scripts

ValueCountFrequency (%)
(unknown)633
100.0%

Most frequent character per script

(unknown)
ValueCountFrequency (%)
0240
37.9%
.211
33.3%
2118
18.6%
359
 
9.3%
15
 
0.8%

Most occurring blocks

ValueCountFrequency (%)
(unknown)633
100.0%

Most frequent character per block

(unknown)
ValueCountFrequency (%)
0240
37.9%
.211
33.3%
2118
18.6%
359
 
9.3%
15
 
0.8%

NSTOR
Categorical

Distinct5
Distinct (%)2.4%
Missing0
Missing (%)0.0%
Memory size10.8 KiB
2.0
149 
1.0
38 
3.0
18 
1.5
 
3
2.5
 
3

Length
Max length3
Median length3
Mean length3
Min length3

Characters and Unicode
Total characters633
Distinct characters6
Distinct categories1 ?
Distinct scripts1 ?
Distinct blocks1 ?

The Unicode Standard assigns character properties to each code point, which can be used to analyse textual variables.

Unique
Unique0 ?
Unique (%)0.0%

Sample
1st row3.0
2nd row2.0
3rd row2.0
4th row2.0
5th row2.0

Common Values

ValueCountFrequency (%)
2.0149
70.6%
1.038
 
18.0%
3.018
 
8.5%
1.53
 
1.4%
2.53
 
1.4%

Length

2026-03-18T16:13:25.447471image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
Histogram of lengths of the category

Common Values (Plot)

2026-03-18T16:13:25.489009image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
ValueCountFrequency (%)
2.0149
70.6%
1.038
 
18.0%
3.018
 
8.5%
1.53
 
1.4%
2.53
 
1.4%

Most occurring characters

ValueCountFrequency (%)
.211
33.3%
0205
32.4%
2152
24.0%
141
 
6.5%
318
 
2.8%
56
 
0.9%

Most occurring categories

ValueCountFrequency (%)
(unknown)633
100.0%

Most frequent character per category

(unknown)
ValueCountFrequency (%)
.211
33.3%
0205
32.4%
2152
24.0%
141
 
6.5%
318
 
2.8%
56
 
0.9%

Most occurring scripts

ValueCountFrequency (%)
(unknown)633
100.0%

Most frequent character per script

(unknown)
ValueCountFrequency (%)
.211
33.3%
0205
32.4%
2152
24.0%
141
 
6.5%
318
 
2.8%
56
 
0.9%

Most occurring blocks

ValueCountFrequency (%)
(unknown)633
100.0%

Most frequent character per block

(unknown)
ValueCountFrequency (%)
.211
33.3%
0205
32.4%
2152
24.0%
141
 
6.5%
318
 
2.8%
56
 
0.9%

GAR
Categorical

Imbalance 

Distinct4
Distinct (%)1.9%
Missing0
Missing (%)0.0%
Memory size10.8 KiB
0.0
175 
1.0
20 
2.0
 
15
3.0
 
1

Length
Max length3
Median length3
Mean length3
Min length3

Characters and Unicode
Total characters633
Distinct characters5
Distinct categories1 ?
Distinct scripts1 ?
Distinct blocks1 ?

The Unicode Standard assigns character properties to each code point, which can be used to analyse textual variables.

Unique
Unique1 ?
Unique (%)0.5%

Sample
1st row0.0
2nd row2.0
3rd row2.0
4th row2.0
5th row0.0

Common Values

ValueCountFrequency (%)
0.0175
82.9%
1.020
 
9.5%
2.015
 
7.1%
3.01
 
0.5%

Length

2026-03-18T16:13:25.541708image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
Histogram of lengths of the category

Common Values (Plot)

2026-03-18T16:13:25.581418image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
ValueCountFrequency (%)
0.0175
82.9%
1.020
 
9.5%
2.015
 
7.1%
3.01
 
0.5%

Most occurring characters

ValueCountFrequency (%)
0386
61.0%
.211
33.3%
120
 
3.2%
215
 
2.4%
31
 
0.2%

Most occurring categories

ValueCountFrequency (%)
(unknown)633
100.0%

Most frequent character per category

(unknown)
ValueCountFrequency (%)
0386
61.0%
.211
33.3%
120
 
3.2%
215
 
2.4%
31
 
0.2%

Most occurring scripts

ValueCountFrequency (%)
(unknown)633
100.0%

Most frequent character per script

(unknown)
ValueCountFrequency (%)
0386
61.0%
.211
33.3%
120
 
3.2%
215
 
2.4%
31
 
0.2%

Most occurring blocks

ValueCountFrequency (%)
(unknown)633
100.0%

Most frequent character per block

(unknown)
ValueCountFrequency (%)
0386
61.0%
.211
33.3%
120
 
3.2%
215
 
2.4%
31
 
0.2%

AGE
Real number (ℝ)

High correlation 

Distinct51
Distinct (%)24.2%
Missing0
Missing (%)0.0%
Infinite0
Infinite (%)0.0%
Mean30.104265
Minimum0
Maximum148
Zeros2
Zeros (%)0.9%
Negative0
Negative (%)0.0%
Memory size1.8 KiB
2026-03-18T16:13:25.633132image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/

Quantile statistics
Minimum0
5-th percentile4
Q120
median25
Q340
95-th percentile70
Maximum148
Range148
Interquartile range (IQR)20

Descriptive statistics
Standard deviation19.834999
Coefficient of variation (CV)0.65887669
Kurtosis6.3898021
Mean30.104265
Median Absolute Deviation (MAD)7
Skewness1.8406371
Sum6352
Variance393.42717
MonotonicityNot monotonic

2026-03-18T16:13:25.705303image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
Histogram with fixed size bins (bins=50)
ValueCountFrequency (%)
2519
 
9.0%
2014
 
6.6%
2214
 
6.6%
5012
 
5.7%
3010
 
4.7%
409
 
4.3%
238
 
3.8%
248
 
3.8%
188
 
3.8%
356
 
2.8%
Other values (41)103
48.8%
ValueCountFrequency (%)
02
 
0.9%
11
 
0.5%
25
2.4%
32
 
0.9%
43
1.4%
53
1.4%
61
 
0.5%
71
 
0.5%
81
 
0.5%
91
 
0.5%
ValueCountFrequency (%)
1481
 
0.5%
1002
 
0.9%
802
 
0.9%
753
 
1.4%
741
 
0.5%
703
 
1.4%
651
 
0.5%
604
 
1.9%
554
 
1.9%
5012
5.7%

CITCOU
Categorical

High correlation 

Distinct2
Distinct (%)0.9%
Missing0
Missing (%)0.0%
Memory size10.8 KiB
1.0
128 
0.0
83 

Length
Max length3
Median length3
Mean length3
Min length3

Characters and Unicode
Total characters633
Distinct characters3
Distinct categories1 ?
Distinct scripts1 ?
Distinct blocks1 ?

The Unicode Standard assigns character properties to each code point, which can be used to analyse textual variables.

Unique
Unique0 ?
Unique (%)0.0%

Sample
1st row0.0
2nd row1.0
3rd row1.0
4th row1.0
5th row1.0

Common Values

ValueCountFrequency (%)
1.0128
60.7%
0.083
39.3%

Length

2026-03-18T16:13:25.769344image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
Histogram of lengths of the category

Common Values (Plot)

2026-03-18T16:13:25.812937image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
ValueCountFrequency (%)
1.0128
60.7%
0.083
39.3%

Most occurring characters

ValueCountFrequency (%)
0294
46.4%
.211
33.3%
1128
20.2%

Most occurring categories

ValueCountFrequency (%)
(unknown)633
100.0%

Most frequent character per category

(unknown)
ValueCountFrequency (%)
0294
46.4%
.211
33.3%
1128
20.2%

Most occurring scripts

ValueCountFrequency (%)
(unknown)633
100.0%

Most frequent character per script

(unknown)
ValueCountFrequency (%)
0294
46.4%
.211
33.3%
1128
20.2%

Most occurring blocks

ValueCountFrequency (%)
(unknown)633
100.0%

Most frequent character per block

(unknown)
ValueCountFrequency (%)
0294
46.4%
.211
33.3%
1128
20.2%

LOTSZ
Real number (ℝ)

High correlation 

Distinct183
Distinct (%)86.7%
Missing0
Missing (%)0.0%
Infinite0
Infinite (%)0.0%
Mean72.280616
Minimum5.7
Maximum400.37
Zeros0
Zeros (%)0.0%
Negative0
Negative (%)0.0%
Memory size1.8 KiB
2026-03-18T16:13:25.865472image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/

Quantile statistics
Minimum5.7
5-th percentile9.895
Q120.76
median56.25
Q384.32
95-th percentile258.25
Maximum400.37
Range394.67
Interquartile range (IQR)63.56

Descriptive statistics
Standard deviation74.519463
Coefficient of variation (CV)1.0309744
Kurtosis5.5327196
Mean72.280616
Median Absolute Deviation (MAD)34.65
Skewness2.2854168
Sum15251.21
Variance5553.1504
MonotonicityNot monotonic

2026-03-18T16:13:25.939825image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
Histogram with fixed size bins (bins=50)
ValueCountFrequency (%)
755
 
2.4%
704
 
1.9%
62.54
 
1.9%
70.43
 
1.4%
163
 
1.4%
193
 
1.4%
1003
 
1.4%
183
 
1.4%
203
 
1.4%
127.12
 
0.9%
Other values (173)178
84.4%
ValueCountFrequency (%)
5.71
0.5%
71
0.5%
7.51
0.5%
8.61
0.5%
8.731
0.5%
8.971
0.5%
91
0.5%
9.11
0.5%
9.361
0.5%
9.451
0.5%
ValueCountFrequency (%)
400.371
0.5%
365.071
0.5%
362.121
0.5%
3601
0.5%
315.91
0.5%
303.831
0.5%
3001
0.5%
289.971
0.5%
279.511
0.5%
2681
0.5%

SQFT
Real number (ℝ)

High correlation 

Distinct156
Distinct (%)73.9%
Missing0
Missing (%)0.0%
Infinite0
Infinite (%)0.0%
Mean16.428365
Minimum5.76
Maximum47.61
Zeros0
Zeros (%)0.0%
Negative0
Negative (%)0.0%
Memory size1.8 KiB
2026-03-18T16:13:26.111429image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/

Quantile statistics
Minimum5.76
5-th percentile8.61
Q111.02
median13.44
Q319.94
95-th percentile35.73
Maximum47.61
Range41.85
Interquartile range (IQR)8.92

Descriptive statistics
Standard deviation8.210894
Coefficient of variation (CV)0.49979983
Kurtosis2.8609982
Mean16.428365
Median Absolute Deviation (MAD)3.36
Skewness1.6841355
Sum3466.385
Variance67.41878
MonotonicityNot monotonic

2026-03-18T16:13:26.175550image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
Histogram with fixed size bins (bins=50)
ValueCountFrequency (%)
11.528
 
3.8%
10.248
 
3.8%
12.86
 
2.8%
8.965
 
2.4%
14.44
 
1.9%
10.084
 
1.9%
15.64
 
1.9%
12.63
 
1.4%
123
 
1.4%
17.683
 
1.4%
Other values (146)163
77.3%
ValueCountFrequency (%)
5.761
 
0.5%
6.723
1.4%
6.961
 
0.5%
7.561
 
0.5%
7.81
 
0.5%
8.121
 
0.5%
8.42
0.9%
8.581
 
0.5%
8.641
 
0.5%
8.681
 
0.5%
ValueCountFrequency (%)
47.611
0.5%
46.321
0.5%
44.551
0.5%
44.121
0.5%
42.91
0.5%
41.071
0.5%
39.421
0.5%
38.251
0.5%
36.751
0.5%
36.721
0.5%

X
Real number (ℝ)

High correlation 

Distinct133
Distinct (%)63.0%
Missing0
Missing (%)0.0%
Infinite0
Infinite (%)0.0%
Mean911.64597
Minimum860
Maximum987.5
Zeros0
Zeros (%)0.0%
Negative0
Negative (%)0.0%
Memory size1.8 KiB
2026-03-18T16:13:26.242568image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/

Quantile statistics
Minimum860
5-th percentile868.5
Q1889
median910
Q3933.5
95-th percentile957
Maximum987.5
Range127.5
Interquartile range (IQR)44.5

Descriptive statistics
Standard deviation27.690841
Coefficient of variation (CV)0.030374556
Kurtosis-0.80680222
Mean911.64597
Median Absolute Deviation (MAD)23
Skewness0.17063174
Sum192357.3
Variance766.78269
MonotonicityNot monotonic

2026-03-18T16:13:26.317386image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
Histogram with fixed size bins (bins=50)
ValueCountFrequency (%)
9335
 
2.4%
8834
 
1.9%
8894
 
1.9%
9183
 
1.4%
9073
 
1.4%
9513
 
1.4%
9453
 
1.4%
9383
 
1.4%
9103
 
1.4%
9193
 
1.4%
Other values (123)177
83.9%
ValueCountFrequency (%)
8602
0.9%
8641
 
0.5%
8661
 
0.5%
8673
1.4%
867.51
 
0.5%
8683
1.4%
8692
0.9%
870.51
 
0.5%
8711
 
0.5%
872.51
 
0.5%
ValueCountFrequency (%)
987.51
 
0.5%
9751
 
0.5%
9711
 
0.5%
9641
 
0.5%
960.51
 
0.5%
9601
 
0.5%
9591
 
0.5%
958.51
 
0.5%
9581
 
0.5%
9573
1.4%

Y
Real number (ℝ)

Distinct113
Distinct (%)53.6%
Missing0
Missing (%)0.0%
Infinite0
Infinite (%)0.0%
Mean544.22701
Minimum505.5
Maximum581
Zeros0
Zeros (%)0.0%
Negative0
Negative (%)0.0%
Memory size1.8 KiB
2026-03-18T16:13:26.384983image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/

Quantile statistics
Minimum505.5
5-th percentile513.75
Q1528.75
median544.5
Q3559
95-th percentile574
Maximum581
Range75.5
Interquartile range (IQR)30.25

Descriptive statistics
Standard deviation18.584343
Coefficient of variation (CV)0.034148144
Kurtosis-0.94693005
Mean544.22701
Median Absolute Deviation (MAD)15
Skewness-0.039221408
Sum114831.9
Variance345.37779
MonotonicityNot monotonic

2026-03-18T16:13:26.454417image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
Histogram with fixed size bins (bins=50)
ValueCountFrequency (%)
5765
 
2.4%
5605
 
2.4%
5525
 
2.4%
5454
 
1.9%
5734
 
1.9%
5504
 
1.9%
5594
 
1.9%
5354
 
1.9%
526.54
 
1.9%
527.54
 
1.9%
Other values (103)168
79.6%
ValueCountFrequency (%)
505.51
0.5%
5081
0.5%
509.52
0.9%
510.51
0.5%
5111
0.5%
511.51
0.5%
512.51
0.5%
5131
0.5%
513.52
0.9%
5142
0.9%
ValueCountFrequency (%)
5811
 
0.5%
5781
 
0.5%
5771
 
0.5%
5765
2.4%
575.51
 
0.5%
5743
1.4%
5734
1.9%
572.51
 
0.5%
5723
1.4%
5711
 
0.5%

Interactions

2026-03-18T16:13:23.846597image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
2026-03-18T16:13:20.238802image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
2026-03-18T16:13:20.701818image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
2026-03-18T16:13:21.147835image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
2026-03-18T16:13:21.583225image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
2026-03-18T16:13:22.071076image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
2026-03-18T16:13:22.598403image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
2026-03-18T16:13:23.053779image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
2026-03-18T16:13:23.428341image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
2026-03-18T16:13:23.894025image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
2026-03-18T16:13:20.286081image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
2026-03-18T16:13:20.746663image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
2026-03-18T16:13:21.191373image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
2026-03-18T16:13:21.631086image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
2026-03-18T16:13:22.117314image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
2026-03-18T16:13:22.645982image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
2026-03-18T16:13:23.091733image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
2026-03-18T16:13:23.470993image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
2026-03-18T16:13:23.945069image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
2026-03-18T16:13:20.331840image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
2026-03-18T16:13:20.796803image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
2026-03-18T16:13:21.242293image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
2026-03-18T16:13:21.684317image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
2026-03-18T16:13:22.166553image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
2026-03-18T16:13:22.700007image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
2026-03-18T16:13:23.134531image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
2026-03-18T16:13:23.518552image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
2026-03-18T16:13:23.995365image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
2026-03-18T16:13:20.375223image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
2026-03-18T16:13:20.846750image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
2026-03-18T16:13:21.288907image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
2026-03-18T16:13:21.744364image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
2026-03-18T16:13:22.216011image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
2026-03-18T16:13:22.751446image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
2026-03-18T16:13:23.177473image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
2026-03-18T16:13:23.566407image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
2026-03-18T16:13:24.043703image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
2026-03-18T16:13:20.421136image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
2026-03-18T16:13:20.900722image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
2026-03-18T16:13:21.341429image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
2026-03-18T16:13:21.815689image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
2026-03-18T16:13:22.347724image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
2026-03-18T16:13:22.803807image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
2026-03-18T16:13:23.220461image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
2026-03-18T16:13:23.615517image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
2026-03-18T16:13:24.094941image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
2026-03-18T16:13:20.527067image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
2026-03-18T16:13:20.949683image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
2026-03-18T16:13:21.390846image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
2026-03-18T16:13:21.869447image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
2026-03-18T16:13:22.395298image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
2026-03-18T16:13:22.853620image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
2026-03-18T16:13:23.263769image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
2026-03-18T16:13:23.665480image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
2026-03-18T16:13:24.234328image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
2026-03-18T16:13:20.571223image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
2026-03-18T16:13:21.000211image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
2026-03-18T16:13:21.441897image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
2026-03-18T16:13:21.927223image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
2026-03-18T16:13:22.450773image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
2026-03-18T16:13:22.905584image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
2026-03-18T16:13:23.306444image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
2026-03-18T16:13:23.713853image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
2026-03-18T16:13:24.281392image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
2026-03-18T16:13:20.610979image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
2026-03-18T16:13:21.046163image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
2026-03-18T16:13:21.486271image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
2026-03-18T16:13:21.970218image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
2026-03-18T16:13:22.498667image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
2026-03-18T16:13:22.948889image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
2026-03-18T16:13:23.343110image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
2026-03-18T16:13:23.754182image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
2026-03-18T16:13:24.329936image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
2026-03-18T16:13:20.656810image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
2026-03-18T16:13:21.095282image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
2026-03-18T16:13:21.534436image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
2026-03-18T16:13:22.021412image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
2026-03-18T16:13:22.547299image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
2026-03-18T16:13:22.998556image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
2026-03-18T16:13:23.384808image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
2026-03-18T16:13:23.797170image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/

Correlations

2026-03-18T16:13:26.515539image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
ACAGEBMENTCITCOUDWELLFIREPLGARLOTSZNBATHNROOMNSTORPATIOPRICESQFTSTATIONXY
AC1.0000.5690.0160.2530.0920.1730.0860.2500.3500.2050.1370.2080.4460.1990.2920.2030.321
AGE0.5691.0000.0950.5110.1520.0000.131-0.284-0.220-0.0320.1940.047-0.4300.044-0.007-0.039-0.249
BMENT0.0160.0951.0000.1910.0570.0000.0000.0000.1530.1320.0000.1520.0630.1650.1530.1570.102
CITCOU0.2530.5110.1911.0000.4420.0000.0000.4720.2130.1750.3340.1400.5040.1810.3570.5540.331
DWELL0.0920.1520.0570.4421.0000.2160.1890.8730.2020.3400.4440.1720.5490.3890.3500.3220.287
FIREPL0.1730.0000.0000.0000.2161.0000.3350.4060.3480.4240.1280.2080.5330.4110.3770.2930.225
GAR0.0860.1310.0000.0000.1890.3351.0000.2800.2760.2020.0000.2230.3710.1940.0000.0000.161
LOTSZ0.250-0.2840.0000.4720.8730.4060.2801.0000.3460.4040.2080.3500.7300.469-0.254-0.0870.420
NBATH0.350-0.2200.1530.2130.2020.3480.2760.3461.0000.4690.1260.2990.4800.498-0.301-0.2910.355
NROOM0.205-0.0320.1320.1750.3400.4240.2020.4040.4691.0000.3010.3390.4560.634-0.271-0.2730.312
NSTOR0.1370.1940.0000.3340.4440.1280.0000.2080.1260.3011.0000.0630.2430.3630.1160.1620.088
PATIO0.2080.0470.1520.1400.1720.2080.2230.3500.2990.3390.0631.0000.4750.3460.3810.0000.408
PRICE0.446-0.4300.0630.5040.5490.5330.3710.7300.4800.4560.2430.4751.0000.480-0.204-0.1470.476
SQFT0.1990.0440.1650.1810.3890.4110.1940.4690.4980.6340.3630.3460.4801.000-0.302-0.3390.302
STATION0.292-0.0070.1530.3570.3500.3770.000-0.254-0.301-0.2710.1160.381-0.204-0.3021.0000.264-0.307
X0.203-0.0390.1570.5540.3220.2930.000-0.087-0.291-0.2730.1620.000-0.147-0.3390.2641.0000.075
Y0.321-0.2490.1020.3310.2870.2250.1610.4200.3550.3120.0880.4080.4760.302-0.3070.0751.000

Missing values

2026-03-18T16:13:24.413630image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
A simple visualization of nullity by column.
2026-03-18T16:13:24.485438image/svg+xmlMatplotlib v3.10.0, https://matplotlib.org/
Nullity matrix is a data-dense display which lets you quickly visually pick out patterns in data completion.

Sample

STATIONPRICENROOMDWELLNBATHPATIOFIREPLACBMENTNSTORGARAGECITCOULOTSZSQFTXY
0147.04.00.01.00.00.00.02.03.00.0148.00.05.7011.25907.0534.0
12113.07.01.02.51.01.01.02.02.02.09.01.0279.5128.92922.0574.0
23165.07.01.02.51.01.00.03.02.02.023.01.070.6430.62920.0581.0
34104.37.01.02.51.01.01.02.02.02.05.01.0174.6326.12923.0578.0
4562.57.01.01.51.01.00.02.02.00.019.01.0107.8022.04918.0574.0
5670.06.01.02.51.01.00.03.03.01.020.01.0139.6439.42900.0577.0
67127.56.01.02.51.01.01.03.01.02.020.01.0250.0021.88918.0576.0
7853.08.01.01.51.00.00.00.03.00.022.01.0100.0036.72907.0576.0
8964.56.01.01.01.01.01.03.02.00.022.01.0115.9025.60918.0562.0
910145.07.01.02.51.01.01.03.02.02.04.01.0365.0744.12897.0576.0
STATIONPRICENROOMDWELLNBATHPATIOFIREPLACBMENTNSTORGARAGECITCOULOTSZSQFTXY
20120239.0005.00.01.00.00.01.02.02.00.01.01.017.269.98939.5564.5
20220345.0005.01.01.00.00.00.02.02.02.047.01.075.0012.96939.0543.5
20320442.0005.01.01.00.00.00.00.01.00.021.01.060.5011.13934.0540.5
20420538.9006.01.01.00.00.00.03.02.00.029.01.042.3519.60933.0538.0
20520637.5004.00.01.00.01.00.00.02.01.023.01.0134.8820.66938.0539.5
20620739.0005.00.01.50.00.01.03.02.00.02.01.019.2412.60940.0538.5
20720843.2154.00.01.50.01.01.03.02.00.00.01.013.2611.52945.5553.0
20820926.5005.00.01.00.00.00.03.02.00.029.00.026.0312.16914.0553.0
20921030.0006.00.01.50.00.01.02.02.00.024.00.020.0012.80919.0554.0
21021129.5005.00.01.00.00.00.02.02.00.022.00.035.8410.64914.0558.0
\ No newline at end of file diff --git a/class_project/DATA605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/src/run_profiling.py b/class_project/DATA605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/src/run_profiling.py new file mode 100644 index 000000000..83d1590ad --- /dev/null +++ b/class_project/DATA605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/src/run_profiling.py @@ -0,0 +1,25 @@ +import pandas as pd +from ydata_profiling import ProfileReport +from pathlib import Path + +project_root = Path(__file__).resolve().parents[1] +data_path = project_root / "data" / "baltim.csv" +output_path = project_root / "outputs" / "baltim_profile_report.html" + +df = pd.read_csv(data_path) + +print("Data shape:", df.shape) +print("\nColumns:") +print(df.columns.tolist()) +print("\nFirst 5 rows:") +print(df.head()) + +profile = ProfileReport( + df, + title="Baltimore Housing Data Profiling Report", + explorative=True +) + +profile.to_file(output_path) + +print(f"\nReport saved to: {output_path}") \ No newline at end of file From b1cbfdd1a316f34946073016817bade52be5a904 Mon Sep 17 00:00:00 2001 From: songshenhuang <56412353+songshenhuang@users.noreply.github.com> Date: Tue, 31 Mar 2026 20:54:48 -0400 Subject: [PATCH 2/6] Create README.md Create the project directory and add the initial README for the YData profiling project. --- .../UmdTask391_DATA605_Spring2026_YData_profiling/README.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/README.md diff --git a/class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/README.md b/class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/README.md new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/README.md @@ -0,0 +1 @@ + From a14b710cdddc42aedf39d37107ddc4a47a9b32f9 Mon Sep 17 00:00:00 2001 From: songshenhuang <56412353+songshenhuang@users.noreply.github.com> Date: Tue, 31 Mar 2026 20:58:56 -0400 Subject: [PATCH 3/6] Update README.md --- .../README.md | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/README.md b/class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/README.md index 8b1378917..ba61927c4 100644 --- a/class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/README.md +++ b/class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/README.md @@ -1 +1,32 @@ +# YData-profiling Project +## Project Title +YData Profiling for Exploratory Data Analysis and Regression Modeling + +## Project Description +This project explores the Python library YData-profiling for automated exploratory data analysis (EDA). The goal is to generate comprehensive data profile reports, identify data quality issues, understand variable distributions, and prepare the dataset for predictive modeling. + +## Objectives +- Load and inspect a dataset using Pandas +- Generate an automated profiling report with YData-profiling +- Identify missing values, outliers, and data quality issues +- Perform data cleaning and feature engineering +- Build a regression model for prediction +- Evaluate model performance using appropriate metrics + +## Tool +- Python +- Pandas +- YData-profiling +- Scikit-learn +- Jupyter Notebook + +## Dataset +A public dataset will be selected for profiling and regression analysis. + +## Expected Output +- Automated profiling report +- Cleaned dataset +- Regression model +- Evaluation results +- Documentation showing how YData-profiling supports the workflow From c70901254a5996df2377fb5f5112528c3f7051a7 Mon Sep 17 00:00:00 2001 From: Songshen Huang Date: Wed, 8 Apr 2026 17:46:35 -0400 Subject: [PATCH 4/6] Move project to correct lowercase data605 path --- .../UmdTask391_DATA605_Spring2026_YData_profiling/README.md | 0 .../UmdTask391_DATA605_Spring2026_YData_profiling/data/baltim.csv | 0 .../UmdTask391_DATA605_Spring2026_YData_profiling/notes.txt | 0 .../outputs/baltim_profile_report.html | 0 .../src/run_profiling.py | 0 5 files changed, 0 insertions(+), 0 deletions(-) delete mode 100644 class_project/DATA605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/README.md rename class_project/{DATA605 => data605}/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/data/baltim.csv (100%) rename class_project/{DATA605 => data605}/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/notes.txt (100%) rename class_project/{DATA605 => data605}/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/outputs/baltim_profile_report.html (100%) rename class_project/{DATA605 => data605}/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/src/run_profiling.py (100%) diff --git a/class_project/DATA605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/README.md b/class_project/DATA605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/README.md deleted file mode 100644 index e69de29bb..000000000 diff --git a/class_project/DATA605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/data/baltim.csv b/class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/data/baltim.csv similarity index 100% rename from class_project/DATA605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/data/baltim.csv rename to class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/data/baltim.csv diff --git a/class_project/DATA605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/notes.txt b/class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/notes.txt similarity index 100% rename from class_project/DATA605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/notes.txt rename to class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/notes.txt diff --git a/class_project/DATA605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/outputs/baltim_profile_report.html b/class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/outputs/baltim_profile_report.html similarity index 100% rename from class_project/DATA605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/outputs/baltim_profile_report.html rename to class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/outputs/baltim_profile_report.html diff --git a/class_project/DATA605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/src/run_profiling.py b/class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/src/run_profiling.py similarity index 100% rename from class_project/DATA605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/src/run_profiling.py rename to class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/src/run_profiling.py From 90f57a21d407ec68ce96c97bdab0a19f4d2ee0a7 Mon Sep 17 00:00:00 2001 From: Songshen Huang Date: Wed, 8 Apr 2026 18:07:44 -0400 Subject: [PATCH 5/6] Add Docker template files for project setup --- .../.dockerignore | 143 ++++++++++++++ .../bashrc | 1 + .../docker_bash.sh | 39 ++++ .../docker_build.sh | 39 ++++ .../docker_clean.sh | 26 +++ .../docker_cmd.sh | 42 ++++ .../docker_exec.sh | 25 +++ .../docker_jupyter.sh | 74 +++++++ .../docker_name.sh | 12 ++ .../docker_push.sh | 25 +++ .../etc_sudoers | 31 +++ .../requirements.txt | 7 + .../run_jupyter.sh | 69 +++++++ .../utils.sh | 183 ++++++++++++++++++ .../version.sh | 28 +++ 15 files changed, 744 insertions(+) create mode 100644 class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/.dockerignore create mode 100644 class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/bashrc create mode 100755 class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/docker_bash.sh create mode 100755 class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/docker_build.sh create mode 100755 class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/docker_clean.sh create mode 100755 class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/docker_cmd.sh create mode 100755 class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/docker_exec.sh create mode 100644 class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/docker_jupyter.sh create mode 100644 class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/docker_name.sh create mode 100755 class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/docker_push.sh create mode 100644 class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/etc_sudoers create mode 100644 class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/requirements.txt create mode 100755 class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/run_jupyter.sh create mode 100644 class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/utils.sh create mode 100755 class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/version.sh diff --git a/class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/.dockerignore b/class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/.dockerignore new file mode 100644 index 000000000..fd85b2584 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/.dockerignore @@ -0,0 +1,143 @@ +# Exclude files from Docker build context. This prevents unnecessary files from +# being sent to Docker daemon, reducing build time and image size. + +# Python artifacts +__pycache__/ +*.pyc +*.pyo +*.pyd +*.egg-info/ + +# Virtual environments +venv/ +.venv/ +env/ +.env +.envrc +client_venv.helpers/ +ENV/ + +# Jupyter +.ipynb_checkpoints/ +.jupyter/ + +# Build artifacts +build/ +dist/ +*.eggs/ +.eggs/ + +# Cache and temporary files +*.log +*.tmp +*.cache +.pytest_cache/ +.mypy_cache/ +.coverage +htmlcov/ + +# Git and version control +.git/ +.gitignore +.gitattributes +.github/ + +# Docker build scripts (not needed at runtime) +docker_build.sh +docker_push.sh +docker_clean.sh +docker_exec.sh +docker_cmd.sh +docker_bash.sh +docker_jupyter.sh +docker_name.sh +run_jupyter.sh +Dockerfile.* +.dockerignore + +# Documentation +README.md +README.admin.md +docs/ +*.md +CHANGELOG.md +LICENSE + +# Configuration and secrets +.env.* +.env.local +.env.development +.env.production +.DS_Store +Thumbs.db + +# Shell configuration +.bashrc +.bash_history +.zshrc + +# Large data files (mount via volume instead) +data/ +*.csv +*.pkl +*.h5 +*.parquet +*.feather +*.arrow +*.npy +*.npz + +# Generated images +*.png +*.jpg +*.jpeg +*.gif +*.svg +*.pdf + +# Test files and examples +tests/ +test_* +*_test.py +tutorials/ +examples/ + +# IDE and editor files +.vscode/ +.idea/ +*.swp +*.swo +*~ +.project +.pydevproject +.settings/ +*.iml +.sublime-project +.sublime-workspace + +# Node and frontend (if applicable) +node_modules/ +npm-debug.log +yarn-error.log +.npm + +# Requirements management +requirements.in +Pipfile +Pipfile.lock +poetry.lock +setup.py +setup.cfg + +# CI/CD configuration +.gitlab-ci.yml +.travis.yml +Jenkinsfile +.circleci/ + +# Miscellaneous +*.bak +.venv.bak/ +*.whl +*.tar.gz +*.zip diff --git a/class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/bashrc b/class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/bashrc new file mode 100644 index 000000000..4b7ff4c49 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/bashrc @@ -0,0 +1 @@ +set -o vi diff --git a/class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/docker_bash.sh b/class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/docker_bash.sh new file mode 100755 index 000000000..e8af65fbf --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/docker_bash.sh @@ -0,0 +1,39 @@ +#!/bin/bash +# """ +# This script launches a Docker container with an interactive bash shell for +# development. +# """ + +# Exit immediately if any command exits with a non-zero status. +set -e + +# Print each command to stdout before executing it. +set -x + +# Import the utility functions from the project template. +GIT_ROOT=$(git rev-parse --show-toplevel) +source $GIT_ROOT/class_project/project_template/utils.sh + +# Load Docker configuration variables for this script. +get_docker_vars_script ${BASH_SOURCE[0]} +source $DOCKER_NAME +print_docker_vars + +# List the available Docker images matching the expected image name. +run "docker image ls $FULL_IMAGE_NAME" + +# Configure and run the Docker container with interactive bash shell. +# - Container is removed automatically on exit (--rm) +# - Interactive mode with TTY allocation (-ti) +# - Port forwarding for Jupyter or other services +# - Current directory mounted to /data inside container +CONTAINER_NAME=${IMAGE_NAME}_bash +PORT=8889 +cmd="docker run --rm -ti \ + --name $CONTAINER_NAME \ + -p $PORT:$PORT \ + -v $(pwd):/data \ + -v $GIT_ROOT:/git_root \ + -e PYTHONPATH=/git_root:/git_root/helpers_root \ + $FULL_IMAGE_NAME" +run $cmd diff --git a/class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/docker_build.sh b/class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/docker_build.sh new file mode 100755 index 000000000..e36e25824 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/docker_build.sh @@ -0,0 +1,39 @@ +#!/bin/bash +# """ +# Build a Docker container image for the project. +# +# This script sets up the build environment with error handling and command +# tracing, loads Docker configuration from docker_name.sh, and builds the +# Docker image using the build_container_image utility function. It supports +# both single-architecture and multi-architecture builds via the +# DOCKER_BUILD_MULTI_ARCH environment variable. +# """ + +# Exit immediately if any command exits with a non-zero status. +set -e + +# Print each command to stdout before executing it. +set -x + +# Import the utility functions. +GIT_ROOT=$(git rev-parse --show-toplevel) +source $GIT_ROOT/class_project/project_template/utils.sh + +# Load Docker configuration variables (REPO_NAME, IMAGE_NAME, FULL_IMAGE_NAME). +get_docker_vars_script ${BASH_SOURCE[0]} +source $DOCKER_NAME +print_docker_vars + +# Configure Docker build settings. +# Enable BuildKit for improved build performance and features. +export DOCKER_BUILDKIT=1 +#export DOCKER_BUILDKIT=0 + +# Configure single-architecture build (set to 1 for multi-arch build). +#export DOCKER_BUILD_MULTI_ARCH=1 +export DOCKER_BUILD_MULTI_ARCH=0 + +# Build the container image. +# Uncomment the line below to build without using Docker cache. +#build_container_image --no-cache +build_container_image diff --git a/class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/docker_clean.sh b/class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/docker_clean.sh new file mode 100755 index 000000000..4519e6c00 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/docker_clean.sh @@ -0,0 +1,26 @@ +#!/bin/bash +# """ +# Remove Docker container image for the project. +# +# This script cleans up Docker images by removing the container image +# matching the project configuration. Useful for freeing disk space or +# ensuring a fresh build. +# """ + +# Exit immediately if any command exits with a non-zero status. +set -e + +# Print each command to stdout before executing it. +set -x + +# Import the utility functions. +GIT_ROOT=$(git rev-parse --show-toplevel) +source $GIT_ROOT/class_project/project_template/utils.sh + +# Load Docker configuration variables for this script. +get_docker_vars_script ${BASH_SOURCE[0]} +source $DOCKER_NAME +print_docker_vars + +# Remove the container image. +remove_container_image diff --git a/class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/docker_cmd.sh b/class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/docker_cmd.sh new file mode 100755 index 000000000..3d2550992 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/docker_cmd.sh @@ -0,0 +1,42 @@ +#!/bin/bash -e +# """ +# Execute a command in a Docker container. +# +# This script runs a specified command inside a new Docker container instance. +# The container is removed automatically after the command completes. The +# current directory is mounted to /data inside the container. +# """ + +# Exit immediately if any command exits with a non-zero status. +set -e +#set -x + +# Capture the command to execute from command-line arguments. +CMD="$@" +echo "Executing: '$CMD'" + +# Import the utility functions. +GIT_ROOT=$(git rev-parse --show-toplevel) +source $GIT_ROOT/class_project/project_template/utils.sh + +# Load Docker configuration variables for this script. +get_docker_vars_script ${BASH_SOURCE[0]} +source $DOCKER_NAME +print_docker_vars + +# List available Docker images matching the expected image name. +run "docker image ls $FULL_IMAGE_NAME" +#(docker manifest inspect $FULL_IMAGE_NAME | grep arch) || true + +# Configure and run the Docker container with the specified command. +DOCKER_RUN_OPTS="" +CONTAINER_NAME=$IMAGE_NAME +run "docker run \ + --rm -ti \ + --name $CONTAINER_NAME \ + $DOCKER_RUN_OPTS \ + -v $(pwd):/data \ + -v $GIT_ROOT:/git_root \ + -e PYTHONPATH=/git_root:/git_root/helpers_root \ + $FULL_IMAGE_NAME \ + bash -c '$CMD'" diff --git a/class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/docker_exec.sh b/class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/docker_exec.sh new file mode 100755 index 000000000..843b1c0f8 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/docker_exec.sh @@ -0,0 +1,25 @@ +#!/bin/bash +# """ +# Execute a bash shell in a running Docker container. +# +# This script connects to an already running Docker container and opens an +# interactive bash session for debugging or inspection purposes. +# """ + +# Exit immediately if any command exits with a non-zero status. +set -e + +# Print each command to stdout before executing it. +set -x + +# Import the utility functions. +GIT_ROOT=$(git rev-parse --show-toplevel) +source $GIT_ROOT/class_project/project_template/utils.sh + +# Load Docker configuration variables for this script. +get_docker_vars_script ${BASH_SOURCE[0]} +source $DOCKER_NAME +print_docker_vars + +# Execute bash shell in the running container. +exec_container diff --git a/class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/docker_jupyter.sh b/class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/docker_jupyter.sh new file mode 100644 index 000000000..8e5a6e15d --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/docker_jupyter.sh @@ -0,0 +1,74 @@ +#!/bin/bash +# """ +# Execute Jupyter Lab in a Docker container. +# +# This script launches a Docker container running Jupyter Lab with +# configurable port, directory mounting, and vim bindings. It passes +# command-line options to the run_jupyter.sh script inside the container. +# +# Usage: +# > docker_jupyter.sh -d /path/to/notebooks -v -u -p 8889 +# """ + +# Exit immediately if any command exits with a non-zero status. +set -e +#set -x + +# Initialize default parameter values for Jupyter configuration. +export JUPYTER_HOST_PORT=8888 +export JUPYTER_USE_VIM=0 +export TARGET_DIR="" +TARGET_DIR=. +export VERBOSE=0 + +# Save original command-line options to pass through to run_jupyter.sh. +OLD_CMD_OPTS=$@ + +# Parse command-line options. +while getopts p:d:uv flag +do + case "${flag}" in + p) JUPYTER_HOST_PORT=${OPTARG};; # Port for Jupyter Lab + u) JUPYTER_USE_VIM=1;; # Enable vim bindings + d) TARGET_DIR=${OPTARG};; # Directory to mount as /data + v) VERBOSE=1;; # Enable verbose output + esac +done + +# Enable command tracing if verbose mode is requested. +if [[ $VERBOSE == 1 ]]; then + set -x +fi; + +# Import the utility functions. +GIT_ROOT=$(git rev-parse --show-toplevel) +source $GIT_ROOT/class_project/project_template/utils.sh + +# Load Docker configuration variables for this script. +get_docker_vars_script ${BASH_SOURCE[0]} +source $DOCKER_NAME +print_docker_vars + +# Configure Docker run options with port forwarding and optional volume mount. +DOCKER_RUN_OPTS="-p $JUPYTER_HOST_PORT:8888" +if [[ $TARGET_DIR != "" ]]; then + DOCKER_RUN_OPTS="$DOCKER_RUN_OPTS -v $TARGET_DIR:/data" +fi; +CMD="/curr_dir/run_jupyter.sh $OLD_CMD_OPTS" + +# List available Docker images and inspect architecture. +run "docker image ls $FULL_IMAGE_NAME" +(docker manifest inspect $FULL_IMAGE_NAME | grep arch) || true + +# Run the Docker container with Jupyter Lab. +CONTAINER_NAME=$IMAGE_NAME +run "docker run \ + --rm -ti \ + --name $CONTAINER_NAME \ + $DOCKER_RUN_OPTS \ + -v $(pwd):/curr_dir \ + -v $GIT_ROOT:/git_root \ + -e PYTHONPATH=/git_root:/git_root/helpers_root \ + -e JUPYTER_USE_VIM=$JUPYTER_USE_VIM \ + $FULL_IMAGE_NAME \ + $CMD" diff --git a/class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/docker_name.sh b/class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/docker_name.sh new file mode 100644 index 000000000..6bee77db0 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/docker_name.sh @@ -0,0 +1,12 @@ +#!/bin/bash +# """ +# Docker image naming configuration. +# +# This file defines the repository name, image name, and full image name +# variables used by all docker_*.sh scripts in the project template. +# """ + +REPO_NAME=gpsaggese +# The file should be all lower case. +IMAGE_NAME=umd_{project_name} +FULL_IMAGE_NAME=$REPO_NAME/$IMAGE_NAME diff --git a/class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/docker_push.sh b/class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/docker_push.sh new file mode 100755 index 000000000..8745194e9 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/docker_push.sh @@ -0,0 +1,25 @@ +#!/bin/bash +# """ +# Push Docker container image to Docker Hub or registry. +# +# This script authenticates with the Docker registry using credentials from +# ~/.docker/passwd.$REPO_NAME.txt and pushes the locally built container +# image to the remote repository. +# """ + +# Exit immediately if any command exits with a non-zero status. +set -e + +# Print each command to stdout before executing it. +set -x + +# Import the utility functions. +GIT_ROOT=$(git rev-parse --show-toplevel) +source $GIT_ROOT/class_project/project_template/utils.sh + +# Load Docker image naming configuration. +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +source $SCRIPT_DIR/docker_name.sh + +# Push the container image to the registry. +push_container_image diff --git a/class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/etc_sudoers b/class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/etc_sudoers new file mode 100644 index 000000000..ee0816a15 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/etc_sudoers @@ -0,0 +1,31 @@ +# +# This file MUST be edited with the 'visudo' command as root. +# +# Please consider adding local content in /etc/sudoers.d/ instead of +# directly modifying this file. +# +# See the man page for details on how to write a sudoers file. +# +Defaults env_reset +Defaults mail_badpass +Defaults secure_path="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/snap/bin" + +# Host alias specification + +# User alias specification + +# Cmnd alias specification + +# User privilege specification +root ALL=(ALL:ALL) ALL + +# Members of the admin group may gain root privileges +%admin ALL=(ALL) ALL + +# Allow members of group sudo to execute any command +%sudo ALL=(ALL:ALL) ALL + +# See sudoers(5) for more information on "#include" directives: +postgres ALL=(ALL) NOPASSWD:ALL + +#includedir /etc/sudoers.d diff --git a/class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/requirements.txt b/class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/requirements.txt new file mode 100644 index 000000000..7520382dc --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/requirements.txt @@ -0,0 +1,7 @@ +ipywidgets +jupyterlab +jupyterlab-vim +matplotlib +numpy +pandas +seaborn diff --git a/class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/run_jupyter.sh b/class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/run_jupyter.sh new file mode 100755 index 000000000..0e4e5ab60 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/run_jupyter.sh @@ -0,0 +1,69 @@ +#!/bin/bash -xe +# """ +# Launch Jupyter Lab server. +# +# This script starts Jupyter Lab on port 8888 with the following configuration: +# - No browser auto-launch (useful for Docker containers) +# - Accessible from any IP address (0.0.0.0) +# - Root user allowed (required for Docker environments) +# - No authentication token or password (for development convenience) +# - Vim keybindings can be enabled via JUPYTER_USE_VIM environment variable +# """ + +mkdir -p ~/.jupyter/lab/user-settings/@axlair/jupyterlab_vim +if [[ $JUPYTER_USE_VIM == 1 ]]; then + echo "Enabling vim." + cat < ~/.jupyter/lab/user-settings/\@axlair/jupyterlab_vim/plugin.jupyterlab-settings +{ + "enabled": true, + "enabledInEditors": true, + "extraKeybindings": [] +} +EOF +else + echo "Disabling vim." + cat < ~/.jupyter/lab/user-settings/\@axlair/jupyterlab_vim/plugin.jupyterlab-settings +{ + "enabled": false, + "enabledInEditors": false, + "extraKeybindings": [] +} +EOF +fi; + +mkdir -p ~/.jupyter/lab/user-settings/@jupyterlab/apputils-extension +cat < ~/.jupyter/lab/user-settings/\@jupyterlab/apputils-extension/notification.jupyterlab-settings +{ + // Notifications + // @jupyterlab/apputils-extension:notification + // Notifications settings. + + // Fetch official Jupyter news + // Whether to fetch news from the Jupyter news feed. If Always (`true`), it will make a request to a website. + "fetchNews": "false", + "checkForUpdates": false +} +EOF + +# Initialize Jupyter Lab command with base configuration. +JUPYTER_ARGS=( + "--port=8888" + "--no-browser" + "--ip=0.0.0.0" + "--allow-root" + "--ServerApp.token=''" + "--ServerApp.password=''" +) + +# Note: jupyterlab-vim extension can be disabled via JupyterLab settings if needed. + +# Start Jupyter Lab with development-friendly settings. +jupyter lab "${JUPYTER_ARGS[@]}" + +# Alternative: Use classic Jupyter Notebook instead of Jupyter Lab. +#jupyter-notebook \ +# --port=8888 \ +# --no-browser --ip=0.0.0.0 \ +# --allow-root \ +# --NotebookApp.token='' \ +# --NotebookApp.password='' diff --git a/class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/utils.sh b/class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/utils.sh new file mode 100644 index 000000000..fe126da15 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/utils.sh @@ -0,0 +1,183 @@ +#!/bin/bash +# """ +# Utility functions for Docker container management. +# """ + +get_docker_vars_script() { + # """ + # Load Docker variables from docker_name.sh script. + # + # :param script_path: Path to the script to determine the Docker configuration directory + # :return: Sources REPO_NAME, IMAGE_NAME, and FULL_IMAGE_NAME variables + # """ + local script_path=$1 + # Find the name of the container. + SCRIPT_DIR=$(dirname $script_path) + DOCKER_NAME="$SCRIPT_DIR/docker_name.sh" + if [[ ! -e $SCRIPT_DIR ]]; then + echo "Can't find $DOCKER_NAME" + exit -1 + fi; + source $DOCKER_NAME +} + + +print_docker_vars() { + # """ + # Print current Docker variables to stdout. + # """ + echo "REPO_NAME=$REPO_NAME" + echo "IMAGE_NAME=$IMAGE_NAME" + echo "FULL_IMAGE_NAME=$FULL_IMAGE_NAME" +} + + +run() { + # """ + # Execute a command with echo output. + # + # :param cmd: Command string to execute + # :return: Exit status of the executed command + # """ + cmd="$*" + echo "> $cmd" + eval "$cmd" +} + + +build_container_image() { + # """ + # Build a Docker container image. + # + # Supports both single-architecture and multi-architecture builds. + # Creates temporary build directory, copies files, and builds the image. + # + # :param @: Additional options to pass to docker build/buildx build + # """ + echo "# ${FUNCNAME[0]} ..." + FULL_IMAGE_NAME=$REPO_NAME/$IMAGE_NAME + echo "FULL_IMAGE_NAME=$FULL_IMAGE_NAME" + # Prepare build area. + #tar -czh . | docker build $OPTS -t $IMAGE_NAME - + DIR="../tmp.build" + if [[ -d $DIR ]]; then + rm -rf $DIR + fi; + cp -Lr . $DIR || true + # Build container. + echo "DOCKER_BUILDKIT=$DOCKER_BUILDKIT" + echo "DOCKER_BUILD_MULTI_ARCH=$DOCKER_BUILD_MULTI_ARCH" + if [[ $DOCKER_BUILD_MULTI_ARCH != 1 ]]; then + # Build for a single architecture. + echo "Building for current architecture..." + OPTS="--progress plain $@" + (cd $DIR; docker build $OPTS -t $FULL_IMAGE_NAME . 2>&1 | tee ../docker_build.log; exit ${PIPESTATUS[0]}) + else + # Build for multiple architectures. + echo "Building for multiple architectures..." + OPTS="$@" + export DOCKER_CLI_EXPERIMENTAL=enabled + # Create a new builder. + #docker buildx rm --all-inactive --force + #docker buildx create --name mybuilder + #docker buildx use mybuilder + # Use the default builder. + docker buildx use multiarch + docker buildx inspect --bootstrap + # Note that one needs to push to the repo since otherwise it is not + # possible to keep multiple. + (cd $DIR; docker buildx build --push --platform linux/arm64,linux/amd64 $OPTS --tag $FULL_IMAGE_NAME . 2>&1 | tee ../docker_build.log; exit ${PIPESTATUS[0]}) + # Report the status. + docker buildx imagetools inspect $FULL_IMAGE_NAME + fi; + # Report build version. + if [ -f docker_build.version.log ]; then + rm docker_build.version.log + fi + (cd $DIR; docker run --rm -it -v $(pwd):/data $FULL_IMAGE_NAME bash -c "/data/version.sh") 2>&1 | tee docker_build.version.log + # + docker image ls $REPO_NAME/$IMAGE_NAME + echo "*****************************" + echo "SUCCESS" + echo "*****************************" +} + + +remove_container_image() { + # """ + # Remove Docker container image(s) matching the current configuration. + # """ + echo "# ${FUNCNAME[0]} ..." + FULL_IMAGE_NAME=$REPO_NAME/$IMAGE_NAME + echo "FULL_IMAGE_NAME=$FULL_IMAGE_NAME" + docker image ls | grep $FULL_IMAGE_NAME + docker image ls | grep $FULL_IMAGE_NAME | awk '{print $1}' | xargs -n 1 -t docker image rm -f + docker image ls + echo "${FUNCNAME[0]} ... done" +} + + +push_container_image() { + # """ + # Push Docker container image to registry. + # + # Authenticates using credentials from ~/.docker/passwd.$REPO_NAME.txt. + # """ + echo "# ${FUNCNAME[0]} ..." + FULL_IMAGE_NAME=$REPO_NAME/$IMAGE_NAME + echo "FULL_IMAGE_NAME=$FULL_IMAGE_NAME" + docker login --username $REPO_NAME --password-stdin <~/.docker/passwd.$REPO_NAME.txt + docker images $FULL_IMAGE_NAME + docker push $FULL_IMAGE_NAME + echo "${FUNCNAME[0]} ... done" +} + + +pull_container_image() { + # """ + # Pull Docker container image from registry. + # """ + echo "# ${FUNCNAME[0]} ..." + FULL_IMAGE_NAME=$REPO_NAME/$IMAGE_NAME + echo "FULL_IMAGE_NAME=$FULL_IMAGE_NAME" + docker pull $FULL_IMAGE_NAME + echo "${FUNCNAME[0]} ... done" +} + + +kill_container() { + # """ + # Kill and remove Docker container(s) matching the current configuration. + # """ + echo "# ${FUNCNAME[0]} ..." + FULL_IMAGE_NAME=$REPO_NAME/$IMAGE_NAME + echo "FULL_IMAGE_NAME=$FULL_IMAGE_NAME" + docker container ls + # + CONTAINER_ID=$(docker container ls -a | grep $FULL_IMAGE_NAME | awk '{print $1}') + echo "CONTAINER_ID=$CONTAINER_ID" + if [[ ! -z $CONTAINER_ID ]]; then + docker container rm -f $CONTAINER_ID + docker container ls + fi; + echo "${FUNCNAME[0]} ... done" +} + + +exec_container() { + # """ + # Execute bash shell in running Docker container. + # + # Opens an interactive bash session in the first container matching the + # current configuration. + # """ + echo "# ${FUNCNAME[0]} ..." + FULL_IMAGE_NAME=$REPO_NAME/$IMAGE_NAME + echo "FULL_IMAGE_NAME=$FULL_IMAGE_NAME" + docker container ls + # + CONTAINER_ID=$(docker container ls -a | grep $FULL_IMAGE_NAME | awk '{print $1}') + echo "CONTAINER_ID=$CONTAINER_ID" + docker exec -it $CONTAINER_ID bash + echo "${FUNCNAME[0]} ... done" +} diff --git a/class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/version.sh b/class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/version.sh new file mode 100755 index 000000000..c46ed254c --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/version.sh @@ -0,0 +1,28 @@ +#!/bin/bash +# """ +# Display versions of installed tools and packages. +# +# This script prints version information for Python, pip, Jupyter, and all +# installed Python packages. Used for debugging and documentation purposes +# to verify the Docker container environment setup. +# """ + +# Display Python 3 version. +echo "# Python3" +python3 --version + +# Display pip version. +echo "# pip3" +pip3 --version + +# Display Jupyter version. +echo "# jupyter" +jupyter --version + +# List all installed Python packages and their versions. +echo "# Python packages" +pip3 list + +# Template for adding additional tool versions. +# echo "# mongo" +# mongod --version From b0f933fe76ab73c6736afcb311250e79794637bb Mon Sep 17 00:00:00 2001 From: Songshen Huang Date: Wed, 22 Apr 2026 18:28:29 -0400 Subject: [PATCH 6/6] Add YData-profiling tutorial notebooks and utils --- .../requirements.txt | 3 + .../src/run_profiling.py | 33 +- .../src/ydata_profiling_utils.py | 84 +++ .../ydata_profiling.API.ipynb | 575 ++++++++++++++++++ .../ydata_profiling.example.ipynb | 0 5 files changed, 675 insertions(+), 20 deletions(-) create mode 100644 class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/src/ydata_profiling_utils.py create mode 100644 class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/ydata_profiling.API.ipynb create mode 100644 class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/ydata_profiling.example.ipynb diff --git a/class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/requirements.txt b/class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/requirements.txt index 7520382dc..901e063fc 100644 --- a/class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/requirements.txt +++ b/class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/requirements.txt @@ -5,3 +5,6 @@ matplotlib numpy pandas seaborn +ydata-profiling +scikit-learn +setuptools<82 \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/src/run_profiling.py b/class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/src/run_profiling.py index 83d1590ad..7254cdcf6 100644 --- a/class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/src/run_profiling.py +++ b/class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/src/run_profiling.py @@ -1,25 +1,18 @@ -import pandas as pd -from ydata_profiling import ProfileReport -from pathlib import Path +import ydata_profiling_utils as ydputi -project_root = Path(__file__).resolve().parents[1] -data_path = project_root / "data" / "baltim.csv" -output_path = project_root / "outputs" / "baltim_profile_report.html" -df = pd.read_csv(data_path) +def main() -> None: + """ + Run YData-profiling on the Baltimore housing dataset. -print("Data shape:", df.shape) -print("\nColumns:") -print(df.columns.tolist()) -print("\nFirst 5 rows:") -print(df.head()) + :return: None. + """ + df = ydputi.load_baltimore_data() + ydputi.print_basic_info(df) + profile = ydputi.create_profile_report(df) + output_path = ydputi.save_profile_report(profile) + print(f"\nReport saved to: {output_path}") -profile = ProfileReport( - df, - title="Baltimore Housing Data Profiling Report", - explorative=True -) -profile.to_file(output_path) - -print(f"\nReport saved to: {output_path}") \ No newline at end of file +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/src/ydata_profiling_utils.py b/class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/src/ydata_profiling_utils.py new file mode 100644 index 000000000..c2688fdd8 --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/src/ydata_profiling_utils.py @@ -0,0 +1,84 @@ +""" +Utility functions for YData-profiling tutorial. + +Import as: + +import src.ydata_profiling_utils as ydputi +""" + +from pathlib import Path + +import pandas as pd +from ydata_profiling import ProfileReport + + +def get_project_root() -> Path: + """ + Return the project root directory. + + :return: Path to the project root. + """ + return Path(__file__).resolve().parents[1] + + +def load_baltimore_data() -> pd.DataFrame: + """ + Load the Baltimore housing dataset. + + :return: Loaded pandas DataFrame. + """ + data_path = get_project_root() / "data" / "baltim.csv" + df = pd.read_csv(data_path) + return df + + +def print_basic_info(df: pd.DataFrame) -> None: + """ + Print basic dataset information. + + :param df: Input DataFrame. + :return: None. + """ + print("Data shape:", df.shape) + print("\nColumns:") + print(df.columns.tolist()) + print("\nFirst 5 rows:") + print(df.head()) + + +def create_profile_report( + df: pd.DataFrame, + title: str = "Baltimore Housing Data Profiling Report", + explorative: bool = True, +) -> ProfileReport: + """ + Create a YData-profiling report. + + :param df: Input DataFrame. + :param title: Report title. + :param exploratory: Whether to use exploratory mode. + :return: ProfileReport object. + """ + profile = ProfileReport( + df, + title=title, + explorative=explorative, + ) + return profile + + +def save_profile_report( + profile: ProfileReport, + output_filename: str = "baltim_profile_report.html", +) -> Path: + """ + Save the profile report to the outputs directory. + + :param profile: ProfileReport object. + :param output_filename: Output HTML file name. + :return: Path to the saved report. + """ + output_path = get_project_root() / "outputs" / output_filename + output_path.parent.mkdir(parents=True, exist_ok=True) + profile.to_file(output_path) + return output_path \ No newline at end of file diff --git a/class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/ydata_profiling.API.ipynb b/class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/ydata_profiling.API.ipynb new file mode 100644 index 000000000..22e74c30d --- /dev/null +++ b/class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/ydata_profiling.API.ipynb @@ -0,0 +1,575 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "81c466e4", + "metadata": {}, + "source": [ + "# YData-profiling API Overview\n", + "\n", + "This notebook introduces the core API of YData-profiling.\n", + "It shows how to create a profiling report from a pandas DataFrame\n", + "and how to export the report as an HTML file." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6bdd6d73", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from ydata_profiling import ProfileReport" + ] + }, + { + "cell_type": "markdown", + "id": "3ef86225", + "metadata": {}, + "source": [ + "## 1. Create a simple DataFrame\n", + "\n", + "We start with a small synthetic dataset.\n", + "This makes it easy to understand the basic workflow of YData-profiling.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "b944d523", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ageincomeowns_house
023500000
125520001
231610001
340730001
429580000
\n", + "
" + ], + "text/plain": [ + " age income owns_house\n", + "0 23 50000 0\n", + "1 25 52000 1\n", + "2 31 61000 1\n", + "3 40 73000 1\n", + "4 29 58000 0" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_demo = pd.DataFrame(\n", + " {\n", + " \"age\": [23, 25, 31, 40, 29],\n", + " \"income\": [50000, 52000, 61000, 73000, 58000],\n", + " \"owns_house\": [0, 1, 1, 1, 0],\n", + " }\n", + ")\n", + "\n", + "df_demo" + ] + }, + { + "cell_type": "markdown", + "id": "42a25a3d", + "metadata": {}, + "source": [ + "## 2. Generate a profiling report\n", + "\n", + "The `ProfileReport` function creates a summary of the dataset.\n", + "It includes variable types, descriptive statistics, missing values,\n", + "and correlations." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "1d681cc8", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "89e49eccdf3a41778f02213df15a4085", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Summarize dataset: 0%| | 0/5 [00:00" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "profile_demo = ProfileReport(\n", + " df_demo,\n", + " title=\"Simple YData-profiling Demo\",\n", + " explorative=True,\n", + ")\n", + "\n", + "profile_demo" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv (3.12.3)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/ydata_profiling.example.ipynb b/class_project/data605/Spring2026/projects/UmdTask391_DATA605_Spring2026_YData_profiling/ydata_profiling.example.ipynb new file mode 100644 index 000000000..e69de29bb