diggs-schema-processor/transform_workbook.py at main · DIGGSml/diggs-schema-processor · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import pandas as pd
import os
import sys
import warnings
from openpyxl import Workbook
from openpyxl.utils.dataframe import dataframe_to_rows

# Filter the specific warning about Data Validation
warnings.filterwarnings('ignore', category=UserWarning,
                       message='Data Validation extension is not supported and will be removed')

def transform_workbook(input_filename):
    # Create output filename
    filename, ext = os.path.splitext(input_filename)
    output_filename = f"{filename}-new{ext}"

    try:
        # Read the Definitions sheet with all Excel features enabled
        df_definitions = pd.read_excel(
            input_filename,
            sheet_name='Definitions',
            engine='openpyxl',
            keep_default_na=True
        )

        # Create empty lists for new rows
        new_rows = []

        # Process each row in the Definitions sheet
        for _, row in df_definitions.iterrows():
            # Create cobbles row (c)
            cobbles_row = row.copy()
            cobbles_row['ID'] = row['ID'] + 'c'
            if 'with' in str(row['Name']).lower():
                cobbles_row['Name'] = row['Name'] + ' and cobbles'
            else:
                cobbles_row['Name'] = row['Name'] + ' with cobbles'

            desc = str(row['Description'])
            if not desc.endswith('.'):
                desc += '.'
            cobbles_row['Description'] = desc + ' Field sample contains visible cobbles.'

            # Create cobbles and boulders row (cb)
            cb_row = row.copy()
            cb_row['ID'] = row['ID'] + 'cb'
            if 'with' in str(row['Name']).lower():
                cb_row['Name'] = row['Name'] + ', cobbles and boulders'
            else:
                cb_row['Name'] = row['Name'] + ' with cobbles and boulders'

            desc = str(row['Description'])
            if not desc.endswith('.'):
                desc += '.'
            cb_row['Description'] = desc + ' Field sample contains visible cobbles and boulders.'

            # Create boulders row (b)
            boulders_row = row.copy()
            boulders_row['ID'] = row['ID'] + 'b'
            if 'with' in str(row['Name']).lower():
                boulders_row['Name'] = row['Name'] + ' and boulders'
            else:
                boulders_row['Name'] = row['Name'] + ' with boulders'

            desc = str(row['Description'])
            if not desc.endswith('.'):
                desc += '.'
            boulders_row['Description'] = desc + ' Field sample contains visible boulders.'

            # Add all new rows
            new_rows.extend([cobbles_row, cb_row, boulders_row])

        # Create new DataFrame with all rows
        new_df_definitions = pd.DataFrame(new_rows)

        # Create AssociatedElements for new definitions
        associated_elements = []
        for _, row in new_df_definitions.iterrows():
            associated_elements.append({
                'Start': row['Start'],
                'ID': row['ID'],
                'SourceElement': '//diggs:Lithology/diggs:classificationCode',
                'ConditionalElement': ''
            })

        new_df_associated = pd.DataFrame(associated_elements)

        # Create a new Excel writer object with simplified settings
        with pd.ExcelWriter(
            output_filename,
            engine='openpyxl',
            mode='w'
        ) as writer:
            # Write the new Definitions sheet
            new_df_definitions.to_excel(writer, sheet_name='Definitions', index=False)

            # Write the new AssociatedElements sheet
            new_df_associated.to_excel(writer, sheet_name='AssociatedElements', index=False)

        print(f"New workbook created: {output_filename}")

    except Exception as e:
        print(f"Error processing workbook: {str(e)}")
        sys.exit(1)

if __name__ == "__main__":
    if len(sys.argv) != 2:
        print("Usage: python3 transform_workbook.py input_filename.xlsx")
        sys.exit(1)

    input_file = sys.argv[1]
    if not os.path.exists(input_file):
        print(f"Error: File '{input_file}' not found")
        sys.exit(1)

    transform_workbook(input_file)