diff --git a/packages/document-issue-data/README.md b/packages/document-issue-data/README.md index 384ce5e5..a5205fba 100644 --- a/packages/document-issue-data/README.md +++ b/packages/document-issue-data/README.md @@ -13,6 +13,7 @@ pixi run mnt-jdrive pixi run get-data # gets raw doc issue data pixi run pull-data # get previously found data pixi run find-files +pixi run find-latest-add-descriptions # find latest files and add descriptions pixi run push-data # push found files back to jobs folder pixi run cp-to-dashboard # copy data to dashboard directory diff --git a/packages/document-issue-data/scripts/_load_dng_data.py b/packages/document-issue-data/scripts/_load_dng_data.py index 1295fb01..808f940b 100644 --- a/packages/document-issue-data/scripts/_load_dng_data.py +++ b/packages/document-issue-data/scripts/_load_dng_data.py @@ -2,7 +2,7 @@ import glob import pandas as pd -FDIR_RAW = pathlib.Path(__file__).parent / "data-raw" / "config" +FDIR_RAW = pathlib.Path(__file__).parent / "data-raw" def get_docs(): @@ -54,7 +54,6 @@ def get_issues(): # Append the dataframe to the list dataframes.append(df) - # Concatenate all dataframes into a single dataframe df_issues = pd.concat(dataframes, ignore_index=True) diff --git a/packages/document-issue-data/scripts/find_latest_add_descriptions.py b/packages/document-issue-data/scripts/find_latest_add_descriptions.py index 567be9e2..b228fac1 100644 --- a/packages/document-issue-data/scripts/find_latest_add_descriptions.py +++ b/packages/document-issue-data/scripts/find_latest_add_descriptions.py @@ -5,6 +5,7 @@ from _load_found_files import FDIR_FIND_FILES, load_found_files FDIR_PROCESSED_1 = pathlib.Path(__file__).parent / "data-processed-1" +FDIR_PROCESSED_1.mkdir(exist_ok=True, parents=True) MAP_COLS_DOCS = { "project_number": "project", "document_code": "document-code", @@ -92,35 +93,32 @@ def merge_and_format(df_docs, df_paths): return df +df_docs = get_docs()[COLS_DOCS] +found, missing = load_found_files(list(FDIR_FIND_FILES.glob("found_files*.txt"))) -if __name__ == "__main__": - df_docs = get_docs()[COLS_DOCS] +# latest +df_latest = get_latest(found | missing).merge(df_docs, on="document_code", how="left") +df_latest.to_csv(FDIR_PROCESSED_1 / "latest.csv", index=False) - found, missing = load_found_files(list(FDIR_FIND_FILES.glob("found_files*.txt"))) +# latest found +df_paths = get_latest(found) +df_latest_found = merge_and_format(df_docs, df_paths) - # latest - df_latest = get_latest(found | missing).merge(df_docs, on="document_code", how="left") - df_latest.to_csv(FDIR_PROCESSED_1 / "latest.csv", index=False) +fpth_tmp_latest_found = pathlib.Path(__file__).parent.parent / "dashboard" / "src" / "data" / "tmp" / "latest_found.csv" +fpth_tmp_latest_found.parent.mkdir(exist_ok=True, parents=True) +df_latest_found.to_csv(FDIR_PROCESSED_1 / "latest_found.csv", index=False) +df_latest_found.to_csv(pathlib.Path(__file__).parent.parent / "dashboard" / "src" / "data" / "latest_found.csv", index=False) +df_latest_found.loc[0:10].to_csv(fpth_tmp_latest_found, index=False) - # latest found - df_paths = get_latest(found) - df_latest_found = merge_and_format(df_docs, df_paths) - +# schematics +df_latest_found_schematics = df_latest_found.query("`drawing-type` == 'Schematic'") +df_latest_found_schematics.to_csv(FDIR_PROCESSED_1 / "latest_found_schematics.csv", index=False) +df_latest_found_schematics.to_csv(pathlib.Path(__file__).parent.parent / "dashboard" / "src" / "data" / "latest_found_schematics.csv", index=False) - df_latest_found.to_csv(FDIR_PROCESSED_1 / "latest_found.csv", index=False) - df_latest_found.to_csv(pathlib.Path(__file__).parent.parent / "dashboard" / "src" / "data" / "latest_found.csv", index=False) - df_latest_found.loc[0:10].to_csv(pathlib.Path(__file__).parent.parent / "dashboard" / "src" / "data" / "tmp" / "latest_found.csv", index=False) - - # schematics - df_latest_found_schematics = df_latest_found.query("`drawing-type` == 'Schematic'") - df_latest_found_schematics.to_csv(FDIR_PROCESSED_1 / "latest_found_schematics.csv", index=False) - df_latest_found_schematics.to_csv(pathlib.Path(__file__).parent.parent / "dashboard" / "src" / "data" / "latest_found_schematics.csv", index=False) - - - # details - df_latest_found_details = df_latest_found.query("`drawing-type` == 'Detail'") - df_latest_found_details.to_csv(FDIR_PROCESSED_1 / "latest_found_details.csv", index=False) - df_latest_found_details.to_csv(pathlib.Path(__file__).parent.parent / "dashboard" / "src" / "data" / "latest_found_details.csv", index=False) - print("done") +# details +df_latest_found_details = df_latest_found.query("`drawing-type` == 'Detail'") +df_latest_found_details.to_csv(FDIR_PROCESSED_1 / "latest_found_details.csv", index=False) +df_latest_found_details.to_csv(pathlib.Path(__file__).parent.parent / "dashboard" / "src" / "data" / "latest_found_details.csv", index=False) +print("done") diff --git a/packages/document-issue-data/scripts/timestamp-get-raw.txt b/packages/document-issue-data/scripts/timestamp-get-raw.txt index 291641cd..bcd069b8 100644 --- a/packages/document-issue-data/scripts/timestamp-get-raw.txt +++ b/packages/document-issue-data/scripts/timestamp-get-raw.txt @@ -1 +1 @@ -Wed Feb 12 12:12:08 UTC 2025 +Thu May 29 12:49:46 UTC 2025 diff --git a/pixi.toml b/pixi.toml index 35aea88b..4257ea7f 100644 --- a/pixi.toml +++ b/pixi.toml @@ -55,7 +55,7 @@ dashboard-preview = { cmd = "npm run dev", cwd = "packages/document-issue-data/d test-document-issue-quarto = { cmd = "pytest", cwd = "packages/document-issue-quarto" } test-document-issue-io = { cmd = "pytest", cwd = "packages/document-issue-io" } test-document-issue-api = { cmd = "pytest", cwd = "packages/document-issue-api", env = { SQLALCHEMY_WARN_20 = "1" } } -test-bep = { cmd = "pytest", cwd = "packages/bep"} +test-bep = { cmd = "pytest", cwd = "packages/bep" } update-examples = { cmd = "pytest --update-examples", cwd = "packages/document-issue-io" } update-quarto-extensions = "tar -czvf packages/document-issue-io/src/document_issue_io/templates/document-issue-quarto.tar.gz -C packages/document-issue-quarto _extensions" build-docs = { cmd = "quarto render .", cwd = "docs" } #, depends-on = ["test-document-issue-io"] @@ -75,14 +75,16 @@ tests = { depends-on = [ "test-document-issue-api", ] } lint = "ruff check --fix" -format = "ruff format" -mk-jdrive = { cmd = "mkdir -p /home/jovyan/jobs"} -chmod-scripts = { cmd = "chmod +x *.sh", cwd = "SECRETS"} -mnt-jdrive = { cmd = "./mnt-jdrive.sh", depends-on = ["mk-jdrive"], cwd = "SECRETS"} -get-data = {cmd = "./get-data.sh", cwd = "SECRETS"} -push-data = {cmd = "./push-data.sh", cwd = "SECRETS"} -pull-data = {cmd = "./pull-data.sh", cwd = "SECRETS"} -cp-to-dashboard ={ cmd = "./cp-to-dashboard.sh", cwd = "SECRETS"} -find-files = { cmd = "python find_files.py", cwd = "packages/document-issue-data/scripts"} -get-project-roles = { cmd = "python get-project-roles.py", cwd = "packages/bep/scripts"} - +format = "ruff format" +mk-jdrive = { cmd = "mkdir -p /home/jovyan/jobs" } +chmod-scripts = { cmd = "chmod +x *.sh", cwd = "SECRETS" } +mnt-jdrive = { cmd = "./mnt-jdrive.sh", depends-on = [ + "mk-jdrive", +], cwd = "SECRETS" } +get-data = { cmd = "./get-data.sh", cwd = "SECRETS" } +push-data = { cmd = "./push-data.sh", cwd = "SECRETS" } +pull-data = { cmd = "./pull-data.sh", cwd = "SECRETS" } +cp-to-dashboard = { cmd = "./cp-to-dashboard.sh", cwd = "SECRETS" } +find-files = { cmd = "python find_files.py", cwd = "packages/document-issue-data/scripts" } +find-latest-add-descriptions = { cmd = "python find_latest_add_descriptions.py", cwd = "packages/document-issue-data/scripts" } +get-project-roles = { cmd = "python get-project-roles.py", cwd = "packages/bep/scripts" }