-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathtransform_workbook.py
More file actions
116 lines (95 loc) · 4.25 KB
/
transform_workbook.py
File metadata and controls
116 lines (95 loc) · 4.25 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import pandas as pd
import os
import sys
import warnings
from openpyxl import Workbook
from openpyxl.utils.dataframe import dataframe_to_rows
# Filter the specific warning about Data Validation
warnings.filterwarnings('ignore', category=UserWarning,
message='Data Validation extension is not supported and will be removed')
def transform_workbook(input_filename):
# Create output filename
filename, ext = os.path.splitext(input_filename)
output_filename = f"{filename}-new{ext}"
try:
# Read the Definitions sheet with all Excel features enabled
df_definitions = pd.read_excel(
input_filename,
sheet_name='Definitions',
engine='openpyxl',
keep_default_na=True
)
# Create empty lists for new rows
new_rows = []
# Process each row in the Definitions sheet
for _, row in df_definitions.iterrows():
# Create cobbles row (c)
cobbles_row = row.copy()
cobbles_row['ID'] = row['ID'] + 'c'
if 'with' in str(row['Name']).lower():
cobbles_row['Name'] = row['Name'] + ' and cobbles'
else:
cobbles_row['Name'] = row['Name'] + ' with cobbles'
desc = str(row['Description'])
if not desc.endswith('.'):
desc += '.'
cobbles_row['Description'] = desc + ' Field sample contains visible cobbles.'
# Create cobbles and boulders row (cb)
cb_row = row.copy()
cb_row['ID'] = row['ID'] + 'cb'
if 'with' in str(row['Name']).lower():
cb_row['Name'] = row['Name'] + ', cobbles and boulders'
else:
cb_row['Name'] = row['Name'] + ' with cobbles and boulders'
desc = str(row['Description'])
if not desc.endswith('.'):
desc += '.'
cb_row['Description'] = desc + ' Field sample contains visible cobbles and boulders.'
# Create boulders row (b)
boulders_row = row.copy()
boulders_row['ID'] = row['ID'] + 'b'
if 'with' in str(row['Name']).lower():
boulders_row['Name'] = row['Name'] + ' and boulders'
else:
boulders_row['Name'] = row['Name'] + ' with boulders'
desc = str(row['Description'])
if not desc.endswith('.'):
desc += '.'
boulders_row['Description'] = desc + ' Field sample contains visible boulders.'
# Add all new rows
new_rows.extend([cobbles_row, cb_row, boulders_row])
# Create new DataFrame with all rows
new_df_definitions = pd.DataFrame(new_rows)
# Create AssociatedElements for new definitions
associated_elements = []
for _, row in new_df_definitions.iterrows():
associated_elements.append({
'Start': row['Start'],
'ID': row['ID'],
'SourceElement': '//diggs:Lithology/diggs:classificationCode',
'ConditionalElement': ''
})
new_df_associated = pd.DataFrame(associated_elements)
# Create a new Excel writer object with simplified settings
with pd.ExcelWriter(
output_filename,
engine='openpyxl',
mode='w'
) as writer:
# Write the new Definitions sheet
new_df_definitions.to_excel(writer, sheet_name='Definitions', index=False)
# Write the new AssociatedElements sheet
new_df_associated.to_excel(writer, sheet_name='AssociatedElements', index=False)
print(f"New workbook created: {output_filename}")
except Exception as e:
print(f"Error processing workbook: {str(e)}")
sys.exit(1)
if __name__ == "__main__":
if len(sys.argv) != 2:
print("Usage: python3 transform_workbook.py input_filename.xlsx")
sys.exit(1)
input_file = sys.argv[1]
if not os.path.exists(input_file):
print(f"Error: File '{input_file}' not found")
sys.exit(1)
transform_workbook(input_file)