From 773844e874fd97064b418e1d915c6814acb19b7d Mon Sep 17 00:00:00 2001 From: Andrew Chiu Date: Fri, 11 Oct 2024 15:25:29 -0400 Subject: [PATCH 1/6] Working Update --- backend/database_main.py | 48 +- backend/plandb_methods.py | 4 +- backend/update_db.py | 412 +++++++++++++++++ backend/update_plandb.py | 356 ++++++++------ backend/update_plandb_main.py | 245 ++++++++++ backend/update_util.py | 744 ++++++++++++++++++++++++++++-- backend/update_util_test_suite.py | 277 +++++++++++ 7 files changed, 1881 insertions(+), 205 deletions(-) create mode 100644 backend/update_db.py create mode 100644 backend/update_plandb_main.py create mode 100644 backend/update_util_test_suite.py diff --git a/backend/database_main.py b/backend/database_main.py index 01e2032..2cf1441 100644 --- a/backend/database_main.py +++ b/backend/database_main.py @@ -5,9 +5,10 @@ from sqlalchemy import create_engine from tqdm import tqdm from plandb_methods import * +from update_util import * def compileContrastCurves(stars, cont_path): - scenarios = pd.read_csv('cache/scenario_angles.csv')['scenario_name'] + scenarios = pd.read_csv('plandb.sioslab.com/cache/scenario_angles.csv')['scenario_name'] cols = ['scenario_name', 'st_id', 'r_lamD', 'r_as', 'r_mas', 'contrast', 'dMag', 'lam', 't_int_hr', 'fpp'] contrastCurves = pd.DataFrame([], columns = cols) @@ -31,12 +32,12 @@ def compileContrastCurves(stars, cont_path): else: indCC["scenario_name"] = scen indCC["st_id"] = list(stars.index[(stars['st_name'] == st_name)])[0] - contrastCurves = contrastCurves.append(indCC, ignore_index=True) + contrastCurves = contrastCurves._append(indCC, ignore_index=True) return contrastCurves def compileCompleteness(): datestr = Time.now().datetime.strftime("%Y-%m") - comp_data = pd.read_pickle(f"cache/comps_data_{datestr}.p") + comp_data = pd.read_pickle(f"plandb.sioslab.com/cache/comps_data_{datestr}.p") col_names = comp_data.columns.values.tolist() scenario_names = [] for x in col_names: @@ -56,29 +57,30 @@ def compileCompleteness(): row['compMaxdMag_' + scenario_name]]) singleRow = pd.DataFrame(newRows, columns = ['pl_id', 'completeness', 'scenario_name', 'compMinWA', 'compMaxWA', 'compMindMag', 'compMaxdMag']) - completeness = completeness.append(singleRow, ignore_index=True) + completeness = completeness._append(singleRow, ignore_index=True) # print(completeness) return completeness if __name__ == "__main__": - datestr = Time.now().datetime.strftime("%Y-%m") - plandata_path = Path(f'cache/plandata_{datestr}.p') + datestr = Time.now().datetime.strftime("2022-05") + plandata_path = Path(f'plandb.sioslab.com/cache/plandata_{datestr}.p') planets = pd.read_pickle(plandata_path) - stdata_path = Path(f'cache/stdata_{datestr}.p') + stdata_path = Path(f'plandb.sioslab.com/cache/stdata_{datestr}.p') stars = pd.read_pickle(stdata_path) - orbfits_path = Path(f'cache/table_orbitfits_{datestr}.p') + orbfits_path = Path(f'plandb.sioslab.com/cache/table_orbitfits_{datestr}.p') orbitfits = pd.read_pickle(orbfits_path) - orbdata_path = Path(f'cache/ephemeris_orbdata_{datestr}.p') + orbdata_path = Path(f'plandb.sioslab.com/cache/ephemeris_orbdata_{datestr}.p') orbits = pd.read_pickle(orbdata_path) - comps_path = Path(f'cache/comps_{datestr}.p') + comps_path = Path(f'plandb.sioslab.com/cache/comps_{datestr}.p') pdfs = pd.read_pickle(comps_path) + print(pdfs) - compiled_contr_curvs_path = Path(f'cache/compiled_cont_curvs_{datestr}.p') - contr_curvs_path = Path(f'cache/cont_curvs_{datestr.replace("-", "_")}') + compiled_contr_curvs_path = Path(f'plandb.sioslab.com/cache/compiled_cont_curvs_{datestr}.p') + contr_curvs_path = Path(f'plandb.sioslab.com/cache/cont_curvs_{datestr.replace("-", "_")}') if compiled_contr_curvs_path.exists(): contrast_curves = pd.read_pickle(compiled_contr_curvs_path) @@ -90,17 +92,27 @@ def addId(r): return r newpdfs = pdfs.apply(addId, axis = 1) - scenarios = pd.read_csv("cache/scenario_angles.csv") - compiled_completeness_path = Path(f"cache/compiled_completeness_{datestr}.p") + scenarios = pd.read_csv("plandb.sioslab.com/cache/scenario_angles.csv") + + compiled_completeness_path = Path(f"plandb.sioslab.com/cache/compiled_completeness_{datestr}.p") if compiled_completeness_path.exists(): completeness = pd.read_pickle(compiled_completeness_path) else: completeness = compileCompleteness() completeness.to_pickle(compiled_completeness_path) - passwd = input("db password: ") - username = 'plandb_admin' + # passwd = input("db password: ") + # username = 'plandb_admin' - engine = create_engine('mysql+pymysql://'+username+':'+passwd+'@localhost/plandb',echo=False) + engine = create_engine('mysql+pymysql://'+"andrewchiu"+':'+"Password123!"+'@localhost/testSios',echo=True) + #pool_pre_ping=True for remote - writeSQL(engine, plandata=planets, stdata=stars, orbitfits=orbitfits, orbdata=orbits, pdfs=newpdfs, aliases=None,contrastCurves=contrast_curves,scenarios=scenarios, completeness=completeness) + planets.to_excel("main_planets.xlsx") + stars.to_excel("main_stars.xlsx") + # orbitfits.to_excel("main_orbitfits") + # orbits.to_excel('main_orbits.xlsx') + contrast_curves.to_excel('main_contrast.xlsx') + completeness.to_excel('main_completeness.xlsx') + + + final_writeSQL(engine, plandata=planets, stdata=stars, orbitfits=orbitfits, orbdata=orbits, pdfs=newpdfs, aliases=None,contrastCurves=contrast_curves,scenarios=scenarios, completeness=completeness) diff --git a/backend/plandb_methods.py b/backend/plandb_methods.py index b5ffc44..d347026 100644 --- a/backend/plandb_methods.py +++ b/backend/plandb_methods.py @@ -35,7 +35,7 @@ from kep_generator import planet try: - from StringIO import StringIO + from StringIO import StringIO # type: ignore except ImportError: from io import BytesIO as StringIO @@ -2067,7 +2067,7 @@ def writeSQL(engine, plandata=None, stdata=None, orbitfits=None, orbdata=None, p #set indexes result = connection.execute(text("ALTER TABLE Planets ADD INDEX (pl_id)")) result = connection.execute(text("ALTER TABLE Planets ADD INDEX (st_id)")) - result = connection.execute(text("ALTER TABLE Planets ADD FOREIGN KEY (st_id) REFERENCES Stars(st_id) ON DELETE NO ACTION ON UPDATE NO ACTION")); + # result = connection.execute(text("ALTER TABLE Planets ADD FOREIGN KEY (st_id) REFERENCES Stars(st_id) ON DELETE NO ACTION ON UPDATE NO ACTION")); #add comments # addSQLcomments(connection,'Planets') diff --git a/backend/update_db.py b/backend/update_db.py new file mode 100644 index 0000000..6daa42c --- /dev/null +++ b/backend/update_db.py @@ -0,0 +1,412 @@ +import pandas as pd +from sqlalchemy import create_engine +import pymysql +from sqlalchemy import text +from plandb_methods import * +from database_main import * + +# Not working, don't use this as this merges the unmade tables with the database + +columns = ['pl_name', 'pl_letter', 'pl_refname', 'pl_orbper', 'pl_orbpererr1', 'pl_orbpererr2', 'pl_orbperlim', 'pl_orbperstr', 'pl_orblpererr1', 'pl_orblper', 'pl_orblpererr2', 'pl_orblperlim', 'pl_orblperstr', 'pl_orbsmax', 'pl_orbsmaxerr1', 'pl_orbsmaxerr2', 'pl_orbsmaxlim', 'pl_orbsmaxstr', 'pl_orbincl', 'pl_orbinclerr1', 'pl_orbinclerr2', 'pl_orbincllim', 'pl_orbinclstr', 'pl_orbtper', 'pl_orbtpererr1', 'pl_orbtpererr2', 'pl_orbtperlim', 'pl_orbtperstr', 'pl_orbeccen', 'pl_orbeccenerr1', 'pl_orbeccenerr2', 'pl_orbeccenlim', 'pl_orbeccenstr', 'pl_eqt', 'pl_eqterr1', 'pl_eqterr2', 'pl_eqtlim', 'pl_eqtstr', 'pl_occdep', 'pl_occdeperr1', 'pl_occdeperr2', 'pl_occdeplim', 'pl_occdepstr', 'pl_insol', 'pl_insolerr1', 'pl_insolerr2', 'pl_insollim', 'pl_insolstr', 'pl_dens', 'pl_denserr1', 'pl_denserr2', 'pl_denslim', 'pl_densstr', 'pl_trandep', 'pl_trandeperr1', 'pl_trandeperr2', 'pl_trandeplim', 'pl_trandepstr', 'pl_tranmid', 'pl_tranmiderr1', 'pl_tranmiderr2', 'pl_tranmidlim', 'pl_tranmidstr', 'pl_trandur', 'pl_trandurerr1', 'pl_trandurerr2', 'pl_trandurlim', 'pl_trandurstr', 'pl_controv_flag', 'pl_tsystemref', 'pl_projobliq', 'pl_projobliqerr1', 'pl_projobliqerr2', 'pl_projobliqlim', 'pl_projobliqstr', 'pl_rvamp', 'pl_rvamperr1', 'pl_rvamperr2', 'pl_rvamplim', 'pl_rvampstr', 'pl_radj', 'pl_radjerr1', 'pl_radjerr2', 'pl_radjlim', 'pl_radjstr', 'pl_radestr', 'pl_ratror', 'pl_ratrorerr1', 'pl_ratrorerr2', 'pl_ratrorlim', 'pl_ratrorstr', 'pl_ratdor', 'pl_trueobliq', 'pl_trueobliqerr1', 'pl_trueobliqerr2', 'pl_trueobliqlim', 'pl_trueobliqstr', 'pl_pubdate', 'pl_ratdorerr1', 'pl_ratdorerr2', 'pl_ratdorlim', 'pl_ratdorstr', 'pl_imppar', 'pl_impparerr1', 'pl_impparerr2', 'pl_impparlim', 'pl_impparstr', 'pl_cmassj', 'pl_cmassjerr1', 'pl_cmassjerr2', 'pl_cmassjlim', 'pl_cmassjstr', 'pl_cmasse', 'pl_cmasseerr1', 'pl_cmasseerr2', 'pl_cmasselim', 'pl_cmassestr', 'pl_massj', 'pl_massjerr1', 'pl_massjerr2', 'pl_massjlim', 'pl_massjstr', 'pl_massestr', 'pl_bmassj', 'pl_bmassjerr1', 'pl_bmassjerr2', 'pl_bmassjlim', 'pl_bmassjstr', 'pl_bmasse', 'pl_bmasseerr1', 'pl_bmasseerr2', 'pl_bmasselim', 'pl_bmassestr', 'pl_bmassprov', 'pl_msinij', 'pl_msinijerr1', 'pl_msinijerr2', 'pl_msinijlim', 'pl_msinijstr', 'pl_msiniestr', 'pl_nespec', 'pl_ntranspec', 'pl_nnotes', 'pl_def_override', 'pl_calc_sma', 'pl_angsep', 'pl_angseperr1', 'pl_angseperr2', 'pl_radj_forecastermod', 'pl_radj_forecastermoderr1', 'pl_radj_forecastermoderr2', 'pl_radj_fortney', 'pl_radj_fortneyerr1', 'pl_radj_fortneyerr2', 'pl_maxangsep', 'pl_minangsep', 'disc_year', 'disc_refname', 'discoverymethod', 'disc_locale', 'ima_flag', 'disc_instrument', 'disc_telescope', 'disc_facility', 'rv_flag'] +cache = True +datestr = Time.now().datetime.strftime("%Y-%m") + + + +# TODO fix different rows, so that diff rows is base on actual columns to compare, compare the switched values +#look into comparing values more in depth +def get_different_rows(df1, df2): + df_combined = pd.merge(df1, df2, indicator=True, how='outer') + different_rows = df_combined[df_combined['_merge'] != 'both'] + different_rows = different_rows.drop(columns=['_merge']) + + return different_rows + +password = input("Password:") +engine = create_engine('mysql+pymysql://'+"andrewchiu"+':'+password+'@localhost/testSios',echo=True) + +with engine.connect() as connection: + + sql = text("SELECT * FROM Planets") + results = connection.execute(sql) + df = pd.DataFrame(results.fetchall(), columns = results.keys()) + + + + data_path = Path(f'cache/data_cache_{datestr}.p') + if cache: + Path('cache/').mkdir(parents=True, exist_ok=True) + if data_path.exists(): + with open(data_path, 'rb') as f: + ipac_data = pickle.load(f) + else: + ipac_data = getIPACdata() + with open(data_path, 'wb') as f: + pickle.dump(data, f) + else: + ipac_data = getIPACdata() + + + changed_rows = [] + + for index, row in df.iterrows(): + name = row['pl_name'] + print(name) + + filter = ipac_data.query(f'pl_name == "{name}"') + filtered_planet = filter.loc(1) + + for col_name in columns: + + if filtered_planet[col_name].count() > 0: + ipac_col = filtered_planet[col_name].values[0] + sios_col = row[col_name] + + print(f"{name} ipac maxangsep {ipac_col}") + print(f"sios maxangsep {sios_col}") + + if ipac_col != None: + if (ipac_col != sios_col): + print(f"different") + changed_rows.append(row["pl_name"]) + else: + changed_rows.append(row['pl_name']) + + set_changed_row = list(set(changed_rows)) + print(f"Changed rows: {set_changed_row}") + + print(set_changed_row) + + filtered_ipac = ipac_data[ipac_data['pl_name'].isin(set_changed_row)] + + filtered_ipac.to_csv('output.csv', index=True) + + + + + #successfully have all planets (set of all different planets) + # now update recalc + # start with + # photo data good + # band info good + # orb data do now + + #orb data + + + with open('debug.txt', 'w') as file: + + file.write("loading photometry data\n") + + photdict_path = Path(f'cache/update_photdict_2022-05.p') + if cache: + Path('cache/').mkdir(parents=True, exist_ok=True) + if photdict_path.exists(): + with open(photdict_path, 'rb') as f: + photdict = pickle.load(f) + else: + photdict = loadPhotometryData(infile="plandb.sioslab.com/allphotdata_2015.npz") + with open(photdict_path, 'wb') as f: + pickle.dump(photdict, f) + else: + photdict = loadPhotometryData(infile="plandb.sioslab.com/allphotdata_2015.npz") + + file.write("Bandzip\n") + bandzip_path = Path(f'cache/update_bandzip_{datestr}.p') + if cache: + Path('cache/').mkdir(parents=True, exist_ok=True) + if bandzip_path.exists(): + with open(bandzip_path, 'rb') as f: + bandzip = pickle.load(f) + else: + bandzip = list(genBands()) + with open(bandzip_path, 'wb') as f: + pickle.dump(bandzip, f) + else: + bandzip = list(genBands()) + + + file.write("orbdata/orbfits\n") + orbdata_path = Path(f'cache/update_orbdata_{datestr}.p') + orbfits_path = Path(f'cache/update_orbfits_{datestr}.p') + if cache: + Path('cache/').mkdir(parents=True, exist_ok=True) + if orbdata_path.exists(): + with open(orbdata_path, 'rb') as f: + orbdata = pickle.load(f) + if orbfits_path.exists(): + with open(orbfits_path, 'rb') as f: + orbitfits = pickle.load(f) + else: + orbdata, orbitfits = genOrbitData(filtered_ipac, bandzip, photdict) + with open(orbdata_path, 'wb') as f: + pickle.dump(orbdata, f) + with open(orbfits_path, 'wb') as f: + pickle.dump(orbitfits, f) + else: + orbdata, orbitfits = genOrbitData(filtered_ipac, bandzip, photdict) + + file.write(f"orbdata: {orbdata}\n") + file.write(f"orbfits: {orbitfits}\n") + + + + file.write("ephemeris orbitfits/orbdata") + ephemeris_orbdata_path = Path(f'cache/update_ephemeris_orbdata_{datestr}.p') + ephemeris_orbfits_path = Path(f'cache/update_ephemeris_orbfits_{datestr}.p') + if cache: + Path('cache/').mkdir(parents=True, exist_ok=True) + if ephemeris_orbdata_path.exists(): + with open(ephemeris_orbdata_path, 'rb') as f: + ephemeris_orbdata = pickle.load(f) + if ephemeris_orbfits_path.exists(): + with open(ephemeris_orbfits_path, 'rb') as f: + ephemeris_orbitfits = pickle.load(f) + else: + ephemeris_orbitfits, ephemeris_orbdata = addEphemeris(filtered_ipac, orbitfits, orbdata, bandzip, photdict) + with open(ephemeris_orbdata_path, 'wb') as f: + pickle.dump(ephemeris_orbdata, f) + with open(ephemeris_orbfits_path, 'wb') as f: + pickle.dump(ephemeris_orbitfits, f) + else: + ephemeris_orbitfits, ephemeris_orbdata = addEphemeris(filtered_ipac, orbitfits, orbdata, bandzip, photdict) + file.write(f"ephemeris orbitfits: {ephemeris_orbitfits}\n") + file.write(f"ephemeris orbfits: {ephemeris_orbdata}\n") + + + + file.write("quadrature data") + quadrature_data_path = Path(f'cache/update_quadrature_data_{datestr}.p') + if cache: + Path('cache/').mkdir(parents=True, exist_ok=True) + if quadrature_data_path.exists(): + with open(quadrature_data_path, 'rb') as f: + quadrature_data = pickle.load(f) + else: + quadrature_data = calcQuadratureVals(ephemeris_orbitfits, bandzip, photdict) + with open(quadrature_data_path, 'wb') as f: + pickle.dump(quadrature_data, f) + else: + quadrature_data = calcQuadratureVals(ephemeris_orbitfits, bandzip, photdict) + file.write(f"quadrature data: {quadrature_data}\n") + + + file.write("contr data") + contr_data_path = Path(f'cache/update_contr_data_{datestr}.p') + exosims_json = 'plandb.sioslab.com/ci_perf_exosims.json' + if cache: + if contr_data_path.exists(): + contr_data = pd.read_pickle(contr_data_path) + else: + contr_data = calcContrastCurves(quadrature_data, exosims_json=exosims_json) + with open(contr_data_path, 'wb') as f: + pickle.dump(contr_data, f) + else: + contr_data = calcContrastCurves(quadrature_data, exosims_json=exosims_json) + file.write(f"contr data: {contr_data}\n") + + + + print('Doing completeness calculations') + comps_path = Path(f'cache/update_comps_{datestr}.p') + compdict_path = Path(f'cache/update_compdict_{datestr}.p') + comps_data_path = Path(f'cache/update_comps_data_{datestr}.p') + if cache: + Path('cache/').mkdir(parents=True, exist_ok=True) + if comps_path.exists(): + comps = pd.read_pickle(comps_path) + with open(compdict_path, 'rb') as f: + compdict = pickle.load(f) + comps_data = pd.read_pickle(comps_data_path) + else: + comps, compdict, comps_data = calcPlanetCompleteness(contr_data, bandzip, photdict, exosims_json=exosims_json) + comps.to_pickle(comps_path) + with open(compdict_path, 'wb') as f: + pickle.dump(compdict, f) + comps_data.to_pickle(comps_data_path) + else: + comps, compdict, comps_data = calcPlanetCompleteness(contr_data, bandzip, photdict, exosims_json=exosims_json) + file.write(f"comps: {comps}\n") + file.write(f"compdict: {compdict}\n") + file.write(f"comps_data: {comps_data}\n") + + + file.write("generateTables") + + plandata_path = Path(f'cache/update_plandata_{datestr}.p') + stdata_path = Path(f'cache/update_stdata_{datestr}.p') + table_orbitfits_path = Path(f'cache/update_table_orbitfits_{datestr}.p') + if cache: + # plandata.to_pickle('plandata_'+datestr+'.pkl') + # stdata.to_pickle('stdata_' + datestr + '.pkl') + # orbitfits.to_pickle('orbitfits_'+datestr+'.pkl') + Path('cache/').mkdir(parents=True, exist_ok=True) + if plandata_path.exists(): + plandata = pd.read_pickle(plandata_path) + stdata = pd.read_pickle(stdata_path) + table_data = pd.read_pickle(table_orbitfits_path) + else: + plandata, stdata, orbitfits = generateTables(filtered_ipac, quadrature_data) + plandata.to_pickle(plandata_path) + stdata.to_pickle(stdata_path) + orbitfits.to_pickle(table_orbitfits_path) + else: + plandata, stdata, orbitfits = generateTables(filtered_ipac, comps_data) + + file.write(f"plandata: {plandata}\n") + file.write(f"stdata: {stdata}\n") + file.write(f"orbitfits: {orbitfits}\n") + + + + def resolve(r): + if pd.notna(r['value_right']): + return r['value_right'] + return r['value_left'] + # Necessary to merge/upsert dataframes, because since indices are recalculated everytime, and multiple keys/indices, must be filtered by scenerio_name, upserting table2 to table1, taking new updated values from table2 and add new records, might have to reindex + def mergeTables( table1, table2): + merged_df = pd.merge(table1, table2, on='pl_name',how='outer',suffixes=('_left', '_right')) + + + + + merged_df['value'] = merged_df.apply(resolve, axis=1) + + merged_df.drop(columns=['value_left', 'value_right'], inplace=True) + + return merged_df + + # filtered_planet = filter.loc(1) + + # for col_name in columns: + + # if filtered_planet[col_name].count() > 0: + # ipac_col = filtered_planet[col_name].values[0] + # sios_col = row[col_name] + + # print(f"{name} ipac maxangsep {ipac_col}") + # print(f"sios maxangsep {sios_col}") + + # if ipac_col != None: + # if (ipac_col != sios_col): + # print(f"different") + # changed_rows.append(row["pl_name"]) + # else: + # changed_rows.append(row['pl_name']) + + def compileCompleteness(): + datestr = Time.now().datetime.strftime("%Y-%m") + comp_data = pd.read_pickle(f"cache/comps_data_{datestr}.p") + col_names = comp_data.columns.values.tolist() + scenario_names = [] + for x in col_names: + if x[:8] == 'complete': + scenario_names.append(x[13:]) + #drop contr_curve col + completeness = pd.DataFrame([], columns = ['pl_id', 'completeness', 'scenario_name', 'compMinWA', 'compMaxWA', 'compMindMag', 'compMaxdMag']) + for i, row in tqdm(comp_data.iterrows()): + newRows = [] + for scenario_name in scenario_names: + newRows = [] + if pd.notna(row[('completeness_' + scenario_name)]): + newRows.append([row['pl_id'], row['completeness_' + scenario_name], scenario_name, + row['compMinWA_' + scenario_name], + row['compMaxWA_' + scenario_name], + row['compMindMag_' + scenario_name], + row['compMaxdMag_' + scenario_name]]) + + singleRow = pd.DataFrame(newRows, columns = ['pl_id', 'completeness', 'scenario_name', 'compMinWA', 'compMaxWA', 'compMindMag', 'compMaxdMag']) + completeness = completeness._append(singleRow, ignore_index=True) + # print(completeness) + return completeness + + + + compiled_contr_curvs_path = Path(f'plandb.sioslab.com/cache/update_compiled_cont_curvs_{datestr}.p') + contr_curvs_path = Path(f'plandb.sioslab.com/cache/update_cont_curvs_{datestr.replace("-", "_")}') + + + if compiled_contr_curvs_path.exists(): + contrast_curves = pd.read_pickle(compiled_contr_curvs_path) + else: + contrast_curves = compileContrastCurves(stdata, contr_curvs_path) + contrast_curves.to_pickle(compiled_contr_curvs_path) + def addId(r): + r['pl_id']= list(planets.index[(planets['pl_name'] == r['Name'])])[0] + return r + + + scenarios = pd.read_csv("plandb.sioslab.com/cache/scenario_angles.csv") + compiled_completeness_path = Path(f"cache/update_compiled_completeness_{datestr}.p") + # if compiled_completeness_path.exists(): + # completeness = pd.read_pickle(compiled_completeness_path) + # else: + completeness = compileCompleteness() + completeness.to_pickle(compiled_completeness_path) + + sql = text("SELECT * FROM ContrastCurves") + results = connection.execute(sql) + contrast_df = pd.DataFrame(results.fetchall(), columns = results.keys()) + + new_contrast = compileContrastCurves(stdata, contr_curvs_path) + + merged_contrast = mergeTables(contrast_df, contrast_curves) + print(merged_contrast) + + + + # compare scenerio name with what we have here calculated from the newly calculated, and create a new dataframe that appends the old ones, the merged duplciates, and new ones, then just df.to_sql .if_exists == replace, with new + # not able to j do a query, doesnt allow for 1/3 indices, no other query to just upsert, especially since id's are reclaculated everytime + + + + + + + + + + + + # if(filtered_planet.loc[0, "{col_name}"] "{col_name}") + # changed_rows.append + + + + + + + + #What if new planets get added? + + + + # sios_columns = results.keys() + + + + + + + + + # print(df) + + # data = getIPACdata() + + # ipac_columns = data.keys() + + # print(data) + + # diff1 = list(sios_columns - ipac_columns) + # print("sios columns" + sios_columns + "\n\n\n\n\n") + # print(diff1) + # print("ipac columns" +ipac_columns + "\n\n\n\n\n") + # diff2 = list(ipac_columns - sios_columns) + # print(diff2) + + # combined_diff = diff1+ diff2 + # print(combined_diff) + + #WOrks, 3800 rows here as oppose to 4800 + # NOTES branch/changed absolute path to relative path, did this (got changed rows that need to be changed, now just need to run claculations for only these rows, talk about the bad code/syntax errors (.F0, _append, and the engine connection, unequal pass in param, and sql text(), too big to be minor error, but finished database on my system :) ) \ No newline at end of file diff --git a/backend/update_plandb.py b/backend/update_plandb.py index 7ede699..e9624a4 100644 --- a/backend/update_plandb.py +++ b/backend/update_plandb.py @@ -1,164 +1,226 @@ import pandas as pd from sqlalchemy import create_engine import pymysql +import glob from sqlalchemy import text from plandb_methods import * from database_main import * from update_util import * +import os +import sys - -cache = True +#Always keep cache false, because it essentially does nothing if it's on and updates based on month +cache = False datestr = Time.now().datetime.strftime("%Y-%m") -# workflow: get current database -> get ipac database -> compare current database to updated values in ipac database, to create a difference dataframe -> create a database from the difference dataframe (updated database) -> merge/upsert the difference database with the current database -> replace current database, with the merged, keep old values and adding updated values -# create connection with current database -password = input("password: ") -engine = create_engine('mysql+pymysql://'+"andrewchiu"+':'+password+'@localhost/testSios',echo=True) -diff_engine = create_engine('mysql+pymysql://'+"andrewchiu"+':'+password+'@localhost/testSiosDiffEngine',echo=True) +# Setup SQL and MySQL engines +password = input("SQL password: ") +sios_engine = create_engine('mysql+pymysql://'+"andrewchiu"+':'+password+'@localhost/testSios',echo=True) +diff_sios_engine = create_engine('mysql+pymysql://'+"andrewchiu"+':'+password+'@localhost/testSiosDiffEngine',echo=True) +new_engine = create_engine('mysql+pymysql://'+"andrewchiu"+':'+password+'@localhost/newEngine',echo=True) -with engine.connect() as connection: - # with diff_engine.connect() as diff_engine_connection: - - # get current database dataframe - current_database_df = get_current_database(connection) +with sios_engine.connect() as connection: + + #get ipac data + print("Getting ipac data") + + old_ipac_data, new_ipac_data = get_store_ipac() + + new_ipac_data.at[2, "pl_letter"] = 'a' + new_ipac_data.at[3, "pl_letter"] = 'a' + new_ipac_data.at[2, "pl_orbper"] = 180 + new_ipac_data.at[3, "pl_orbper"] = 1800 + + print(f"most recent: {old_ipac_data}") + print(f"current: {new_ipac_data}") + + #TODO: be able to insert custom ipac data, for test, possibly using flag + #TODO: Test new row differences, because currently its zero, make test data that is slightly different (similar to ipac data) + print("calculating row differences") + change_ipac_df, log = get_ipac_differences(old_ipac_data, new_ipac_data) + change_ipac_df.to_excel("plandb.sioslab.com/backend/sheets/change_ipac.xlsx") + print(f"Changed: {change_ipac_df}") + + for entry in log: + print(f"Reason: {entry['reason']}") + print(f"Description: {entry['description']}") + print("Details:", entry['details']) + print("-" * 40) + + if change_ipac_df.empty: + print("No changes detected, zero rows have been updated/added") + sys.exit() + + + # TODO: Is it just planets, this actually might not be necessary, good anyways + # Keep track of planets, for later upsert + planets_to_update = [] + + for index, row in change_ipac_df.iterrows(): + planets_to_update.append(row['pl_name']) + + print(planets_to_update) + + input2 = input("continue?") + + # get photodict + photdict_path = Path(f'cache/update_photdict_2022-05.p') + infile="plandb.sioslab.com/backend/allphotdata_2015.npz" + photdict = get_photo_data(photdict_path, infile, cache) + + print(photdict) - current_database_df.to_excel("current_database.xlsx", index=False) - - # get ipac database dataframe - data_path = Path(f'cache/data_cache_{datestr}.p') - ipac_data_df = get_ipac_database(data_path, cache) - - ipac_data_df.to_excel("ipac_data.xlsx", index=False) - - #find updates from current to ipac - - changed_df = find_row_differences(current_database_df, ipac_data_df) - changed_df.to_excel("changed.xlsx") - - # get photodict - photdict_path = Path(f'cache/update_photdict_2022-05.p') - infile="plandb.sioslab.com/allphotdata_2015.npz" - photdict = get_photo_data(photdict_path, infile, cache) - + # get bandzip + bandzip_path = Path(f'cache/update_bandzip_{datestr}.p') + bandzip = get_bandzip(bandzip_path, cache) + + print(bandzip) + + # get orbdata, orbfits + print("Generating orbdata and orbfits") + orbdata_path = Path(f'cache/update_orbdata_{datestr}.p') + orbfits_path = Path(f'cache/update_orbfits_{datestr}.p') + orbdata, orbfits = get_orbdata(orbdata_path, orbfits_path, change_ipac_df, bandzip, photdict, cache) + + # orbdata.to_excel("plandb.sioslab.com/backend/sheets/orbata.xlsx") + # orbdata.to_excel("plandb.sioslab.com/backend/sheets/orbfits.xlsx") + + + print(orbdata) + # get ephemeris + ephemeris_orbdata_path = Path(f'cache/update_ephemeris_orbdata_{datestr}.p') + ephemeris_orbfits_path = Path(f'cache/update_ephemeris_orbfits_{datestr}.p') + ephemeris_orbitfits, ephemeris_orbdata = get_ephemerisdata(ephemeris_orbdata_path, ephemeris_orbfits_path, change_ipac_df, orbfits, orbdata, bandzip, photdict, cache) + ephemeris_orbitfits.to_excel("plandb.sioslab.com/backend/sheets/ephemeris_orbfits.xlsx") + + #quadrature + print("quadrature") + quadrature_data_path = Path(f'cache/update_quadrature_data_{datestr}.p') + quadrature_data = get_quadrature(quadrature_data_path, ephemeris_orbitfits, bandzip, photdict, cache) + quadrature_data.to_excel("plandb.sioslab.com/backend/sheets/quadrature_data.xlsx") + + + contr_data_path = Path(f'cache/update_contr_data_{datestr}.p') + exosims_json = 'plandb.sioslab.com/ci_perf_exosims.json' + + contr_data = get_contrastness(contr_data_path, exosims_json, quadrature_data, cache) + contr_data.to_excel("plandb.sioslab.com/backend/sheets/contr_data.xlsx") + + comps_path = Path(f'cache/update_comps_{datestr}.p') + compdict_path = Path(f'cache/update_compdict_{datestr}.p') + comps_data_path = Path(f'cache/update_comps_data_{datestr}.p') + + + comps, compdict, comps_data = get_completeness(comps_path, compdict_path, comps_data_path, contr_data, bandzip, photdict, exosims_json, cache) + comps.to_excel("plandb.sioslab.com/backend/sheets/comps.xlsx") + comps_data.to_excel("plandb.sioslab.com/backend/sheets/comps_data.xlsx") + #None for compdict, as its dictionary + + plandata_path = Path(f'cache/update_plandata_{datestr}.p') + stdata_path = Path(f'cache/update_stdata_{datestr}.p') + table_orbitfits_path = Path(f'cache/update_table_orbitfits_{datestr}.p') + + # orbitfits got updated, maybe change to new var + plan_data, stdata, orbitfits = get_generated_tables(plandata_path, stdata_path, table_orbitfits_path, change_ipac_df, quadrature_data, comps_data, cache) + plan_data.to_excel("plandb.sioslab.com/backend/sheets/plandata.xlsx") + stdata.to_excel("plandb.sioslab.com/backend/sheets/stdata.xlsx") + orbitfits.to_excel('plandb.sioslab.com/backend/sheets/later_orbitfits.xlsx') + + + # Do compileContrastness and compile contrastness + # Look into and possibly remove the - to _ + contr_curvs2_path = Path(f'plandb.sioslab.com/cache/cont_curvs2_{datestr.replace("-", "_")}') + compiled_contr_curvs_path = Path(f'plandb.sioslab.com/cache/cont_curvs_{datestr.replace("-", "_")}') + compiled_contrast_curves, newpdfs = get_compiled_contrast(compiled_contr_curvs_path, stdata, comps_data, change_ipac_df, contr_curvs2_path) + # compiled_contrast_curves = compileContrastCurves(stdata, compiled_contr_curvs_path) + compiled_contrast_curves.to_excel("plandb.sioslab.com/backend/sheets/compiled_contrast_curves.xlsx") + # With current code, since get_compiled_contrast isnt fully working with new pdfs, new pdfs should be empty + newpdfs.to_excel("plandb.sioslab.com/backend/sheets/newpdfs.xlsx") + + + # compile completeness + compiled_completeness_path = Path(f"plandb.sioslab.com/cache/compiled_completeness_{datestr}.p") + compiled_completeness = get_compiled_completeness(compiled_completeness_path, comps_data) + compiled_completeness.to_excel("plandb.sioslab.com/backend/sheets/compiled_completeness.xlsx") + + diff_completeness_df = pd.DataFrame({ + 'completeness_id': [0], + 'pl_id': [3], + 'completeness': [0.0111], + 'scenario_name' : ['Optimistic_NF_Imager_20000hr'], + 'compMinWA': [None], + 'compMaxWA': [None], + 'compMindMag': [None], + 'compMaxdMag': [None], + }) + + # MakeSQL for the temporary database, creates diff engine, -> upsert diff engine with current engine + scenarios = pd.read_csv("plandb.sioslab.com/cache/scenario_angles.csv") + # writeSQL(engine, plandata=planets, stdata=stars, orbitfits=orbitfits, orbdata=orbits, pdfs=newpdfs, aliases=None,contrastCurves=contrast_curves,scenarios=scenarios, completeness=completeness) + temp_writeSQL(diff_sios_engine, plandata=plan_data, stdata=stdata, orbitfits=orbfits, orbdata=orbdata, pdfs=None, aliases=None, contrastCurves=compiled_contrast_curves, scenarios=scenarios, completeness=compiled_completeness) - # get bandzip - bandzip_path = Path(f'cache/update_bandzip_{datestr}.p') - bandzip = get_bandzip(bandzip_path, cache) - - # get orbdata, orbfits - orbdata_path = Path(f'cache/update_orbdata_{datestr}.p') - orbfits_path = Path(f'cache/update_orbfits_{datestr}.p') - orbdata, orbfits = get_orbdata(orbdata_path, orbfits_path, changed_df, bandzip, photdict, cache) - - # Line below, contains too main records to write xlsx, check manually - # orbdata.to_excel("orbdata.xlsx") - orbfits.to_excel("orbfits.xlsx") - - # get ephemeris - ephemeris_orbdata_path = Path(f'cache/update_ephemeris_orbdata_{datestr}.p') - ephemeris_orbfits_path = Path(f'cache/update_ephemeris_orbfits_{datestr}.p') - ephemeris_orbitfits, ephemeris_orbdata = get_ephemerisdata(ephemeris_orbdata_path, ephemeris_orbfits_path, changed_df, orbfits, orbdata, bandzip, photdict, cache) - ephemeris_orbitfits.to_excel("ephemeris_orbfits.xlsx") - - # Line below, contains too main records to write xlsx, check manually - # ephemeris_orbdata.to_excel("ephemeris_orbdata.xlsx") - - quadrature_data_path = Path(f'cache/update_quadrature_data_{datestr}.p') - quadrature_data = get_quadrature(quadrature_data_path, ephemeris_orbitfits, bandzip, photdict, cache) - quadrature_data.to_excel("quadrature_data.xlsx") - - contr_data_path = Path(f'cache/update_contr_data_{datestr}.p') - exosims_json = 'plandb.sioslab.com/ci_perf_exosims.json' - - contr_data = get_contrastness(contr_data_path, exosims_json, quadrature_data, cache) - contr_data.to_excel("contr_data.xlsx") - - comps_path = Path(f'cache/update_comps_{datestr}.p') - compdict_path = Path(f'cache/update_compdict_{datestr}.p') - comps_data_path = Path(f'cache/update_comps_data_{datestr}.p') - - comps, compdict, comps_data = get_completeness(comps_path, compdict_path, comps_data_path, contr_data, bandzip, photdict, exosims_json, cache) - comps.to_excel("comps.xlsx") - comps_data.to_excel("comps_data.xlsx") - #None for compdict, as its dictionary - - plandata_path = Path(f'cache/update_plandata_{datestr}.p') - stdata_path = Path(f'cache/update_stdata_{datestr}.p') - table_orbitfits_path = Path(f'cache/update_table_orbitfits_{datestr}.p') - - # orbitfits got updated, maybe change to new var - plan_data, stdata, orbitfits = get_generated_tables(plandata_path, stdata_path, table_orbitfits_path, changed_df, quadrature_data, comps_data, cache) - plan_data.to_excel("plandata.xlsx") - stdata.to_excel("stdata.xlsx") - orbitfits.to_excel('later_orbitfits.xlsx') - - #do compileContrastness and compile contrastness - # Look into and possibly remove the - to _ - contr_curvs2_path = Path(f'plandb.sioslab.com/cache/cont_curvs2_{datestr.replace("-", "_")}') - compiled_contr_curvs_path = Path(f'plandb.sioslab.com/cache/cont_curvs_{datestr.replace("-", "_")}') - compiled_contrast_curves, newpdfs = get_compiled_contrast(compiled_contr_curvs_path, stdata, comps_data, changed_df, contr_curvs2_path) - # compiled_contrast_curves = compileContrastCurves(stdata, compiled_contr_curvs_path) - compiled_contrast_curves.to_excel("compiled_contrast_curves.xlsx") - # With current code, since get_compiled_contrast isnt fully working with new pdfs, new pdfs should be empty - newpdfs.to_excel("newpdfs.xlsx") - - # compile completeness - compiled_completeness_path = Path(f"plandb.sioslab.com/cache/compiled_completeness_{datestr}.p") + # Get from diff_database + diff_engine_connection = diff_sios_engine.connect() + diff_completeness_df, diff_contrast_curves_df, diff_orbitfits_df, diff_orbits_df, diff_pdfs_df, diff_planets_df, diff_scenarios_df, diff_stars_df = get_all_from_db(diff_engine_connection) + + # Get from old_database + old_completeness_df, old_contrast_curves_df, old_orbitfits_df, old_orbits_df, old_pdfs_df, old_planets_df, old_scenarios_df, old_stars_df = get_all_from_db(connection) + + print("Merging Planets") + merged_planets = upsert_df(old_planets_df, diff_planets_df, "pl_name") - compiled_completeness = get_compiled_completeness(compiled_completeness_path, comps_data) - compiled_completeness.to_excel("compiled_completeness.xlsx") - - - - # remember do makesql with this and then get those tables and upsert those shorter new ones with the current - # writeSQL(engine, plandata=planets, stdata=stars, orbitfits=orbitfits, orbdata=orbits, pdfs=newpdfs, aliases=None,contrastCurves=contrast_curves,scenarios=scenarios, completeness=completeness) - scenarios = pd.read_csv("plandb.sioslab.com/cache/scenario_angles.csv") + # Upsert completeness + print("Merging Completeness") + + print(old_completeness_df, diff_completeness_df) + ssss = input("continue?") + # Merge based on pl_id, foreign key relation, base on foreign key relation, might do the same for rest + # TODO, this is highly likely to be wrong, forced to used indices here, because only unique, unfortunately incorrect, must base it upsert on the varying indices later + # TODO: Iterate through each pl_id, since these are base off pl_id, correspond with pl_name, and then reset those completeness rows for that pl (remove and then add the new ones that were) + merged_completeness = upsert_general(old_completeness_df, diff_completeness_df, 'pl_id') + + print(merged_completeness) + input3 = input("continue?") - - temp_writeSQL(diff_engine, plandata=plan_data, stdata=stdata, orbitfits=orbfits, orbdata=orbdata, pdfs=None, aliases=None, contrastCurves=compiled_contrast_curves, scenarios=scenarios, completeness=compiled_completeness) - - #get from diff_database - diff_engine_connection = diff_engine.connect() - diff_completeness_df, diff_contrast_curves_df, diff_orbitfits_df, diff_orbits_df, diff_pdfs_df, diff_planets_df, diff_scenarios_df, diff_stars_df = get_all_from_db(diff_engine_connection) - - #get from old_database - old_completeness_df, old_contrast_curves_df, old_orbitfits_df, old_orbits_df, old_pdfs_df, old_planets_df, old_scenarios_df, old_stars_df = get_all_from_db(connection) - - - - #merge with old, compare each - - # upsert planets - # Have to do name, because indices don't match, logic applies down unless otherwise in comment - merged_planets = upsert_dataframe(old_planets_df, diff_planets_df, "pl_name") - - - # upsert completeness - # TODO, this is highly likely to be wrong, forced to used indices here, because only unique, unfortunately incorrect, must base it upsert on the varying indices later - # merged_completeness = upsert_dataframe(old_completeness_df, diff_completeness_df, "completeness_id") - - # upsert stars - merged_stars = upsert_dataframe(old_stars_df, diff_stars_df, "st_name") - - # upsert orbitfits - merged_orbitfits = upsert_dataframe(old_orbitfits_df, diff_orbitfits_df, "pl_name") - - # upsert orbits - merged_orbits = upsert_dataframe(old_orbits_df, diff_orbits_df, "pl_name") - - # upsert contrast curves - # TODO, fix the column name, for unique one later? - merged_contrast_curves = upsert_dataframe(old_contrast_curves_df, diff_contrast_curves_df, "r_lamD") - - # upsert pdfs - merged_pdfs = upsert_dataframe(old_pdfs_df, diff_pdfs_df, "Name") - - # No need to upsert scenarios, as it's updated locally - - #write back to original database with new values, - # TODO: optionally, store old database in a different database for archive - temp_writeSQL(engine, merged_planets, merged_stars, merged_orbitfits, merged_orbits, merged_pdfs, aliases=None, contrastCurves=merged_contrast_curves, scenarios= scenarios, completeness=None) - - \ No newline at end of file + # Upsert stars + # TODO Star differences are based off that the planets table has foreign key st_id, therefore, to properly update stars, must go through planets, see what has been updated, and then go down through those planets, and their stars, and then if that planet has changed, update that star + print("Merging stars") + merged_stars = upsert_df(old_stars_df, diff_stars_df, "st_name") + + input4 = input("continue4") + + # For these later upserts, only way to properly upsert is to detect the change from the earlier value, like the difference in planets from ipac, and then categorize the change as a result from upated information or new information. If new information (new planet), just add but if updated information, going to have to track down the previous. Maybe it's possible for me to locally store database versions, so it can be quickly updated based on path of changes that happened + # Upsert orbitfits + print("Merging Orbit Fits") + #TODO: should this be pl_name or pl_id + merged_orbitfits = upsert_general(old_orbitfits_df, diff_orbitfits_df, "pl_name") + + # Upsert orbits + print("Merging Orbits") + merged_orbits = upsert_general(old_orbits_df, diff_orbits_df, "pl_name") + + + # TODO: Fix this based on logic above, use st_id, track st_id from st_names, likely need to do same logic for stars + # If stars are already reindexed, so should the st_id, if not track + print("Merging Curves") + merged_contrast_curves = upsert_df(old_contrast_curves_df, diff_contrast_curves_df, "st_id") + + # Might have to compare back to old, dataframe, track down what planet the index is and then use that index to renumber the indexes in the old dataframe for the diff dataframe to properly upsert with matching indices + # TODO: Maybe add pl_name and st_name to pdfs and contrast curves, to make it easier + + + # upsert pdfs + # TODO: Track with pl_id + print("Merging pdfs") + merged_pdfs = upsert_df(old_pdfs_df, diff_pdfs_df, "pl_id") + + # No need to upsert scenarios, as it's updated all at once + + # Write back to original database with new values, + # TODO: optionally, store old database in a different database for archive + print("Writing New Database") + final_writeSQL(new_engine, merged_planets, merged_stars, merged_orbitfits, merged_orbits, None, aliases=None, contrastCurves=None, scenarios = scenarios, completeness = merged_completeness) + + print("Done") diff --git a/backend/update_plandb_main.py b/backend/update_plandb_main.py new file mode 100644 index 0000000..8ab4079 --- /dev/null +++ b/backend/update_plandb_main.py @@ -0,0 +1,245 @@ +import pandas as pd +from sqlalchemy import create_engine +import pymysql +import glob +from sqlalchemy import text +from plandb_methods import * +from database_main import * +from update_util import * +import os +import sys + + +#Always keep cache false, because it essentially does nothing if it's on and updates based on month +cache = False +datestr = Time.now().datetime.strftime("%Y-%m") + + +# Setup SQL and MySQL engines +password = input("SQL password: ") +sios_engine = create_engine('mysql+pymysql://'+"andrewchiu"+':'+password+'@localhost/testSios',echo=True) +diff_sios_engine = create_engine('mysql+pymysql://'+"andrewchiu"+':'+password+'@localhost/testSiosDiffEngine',echo=True) +new_engine = create_engine('mysql+pymysql://'+"andrewchiu"+':'+password+'@localhost/newEngine',echo=True) + +with sios_engine.connect() as connection: + + # Get ipac data + print("Getting IPAC Data") + + old_ipac_data, new_ipac_data = get_store_ipac() + + #TODO: Must be atleast 2 changes, FIX the fucntions like getEphermisValues to do hangle this + # TODO: Sample artificial change + new_ipac_data.at[2, "pl_letter"] = 'a' + new_ipac_data.at[3, "pl_letter"] = 'a' + new_ipac_data.at[2, "pl_orbper"] = 180 + new_ipac_data.at[3, "pl_orbper"] = 1800 + + + new_ipac_data.to_excel("plandb.sioslab.com/backend/sheets/new_ipac.xlsx") + old_ipac_data.to_excel("plandb.sioslab.com/backend/sheets/old_ipac_data.xlsx") + + print(f"New IPAC: {new_ipac_data}") + print(f"Old IPAC: {old_ipac_data}") + + + #TODO: be able to insert custom ipac data, for test, possibly using flag + #TODO: Test new row differences, because currently its zero, make test data that is slightly different (similar to ipac data) + print("calculating row differences") + change_ipac_df, log = get_ipac_differences(old_ipac_data, new_ipac_data) + change_ipac_df.to_excel("plandb.sioslab.com/backend/sheets/change_ipac.xlsx") + print(f"Changed: {change_ipac_df}") + + for entry in log: + print(f"Reason: {entry['reason']}") + print(f"Description: {entry['description']}") + print("Details:", entry['details']) + print("-" * 40) + + if change_ipac_df.empty: + print("No changes detected, zero rows have been updated/added") + sys.exit() + + input1 = input("continue?") + + # get photodict + photdict_path = Path(f'cache/update_photdict_2022-05.p') + infile="plandb.sioslab.com/backend/allphotdata_2015.npz" + photdict = get_photo_data(photdict_path, infile, cache) + + print(photdict) + + # get bandzip + bandzip_path = Path(f'cache/update_bandzip_{datestr}.p') + bandzip = get_bandzip(bandzip_path, cache) + + print(bandzip) + + # get orbdata, orbfits + print("Generating orbdata and orbfits") + orbdata_path = Path(f'cache/update_orbdata_{datestr}.p') + orbfits_path = Path(f'cache/update_orbfits_{datestr}.p') + orbdata, orbfits = get_orbdata(orbdata_path, orbfits_path, change_ipac_df, bandzip, photdict, cache) + + # orbdata.to_excel("plandb.sioslab.com/backend/sheets/orbata.xlsx") + # orbdata.to_excel("plandb.sioslab.com/backend/sheets/orbfits.xlsx") + + print(orbdata) + + # get ephemeris + ephemeris_orbdata_path = Path(f'cache/update_ephemeris_orbdata_{datestr}.p') + ephemeris_orbfits_path = Path(f'cache/update_ephemeris_orbfits_{datestr}.p') + ephemeris_orbitfits, ephemeris_orbdata = get_ephemerisdata(ephemeris_orbdata_path, ephemeris_orbfits_path, change_ipac_df, orbfits, orbdata, bandzip, photdict, cache) + ephemeris_orbitfits.to_excel("plandb.sioslab.com/backend/sheets/ephemeris_orbfits.xlsx") + + # get quadrature + print("Quadrature") + quadrature_data_path = Path(f'cache/update_quadrature_data_{datestr}.p') + quadrature_data = get_quadrature(quadrature_data_path, ephemeris_orbitfits, bandzip, photdict, cache) + quadrature_data.to_excel("plandb.sioslab.com/backend/sheets/quadrature_data.xlsx") + + contr_data_path = Path(f'cache/update_contr_data_{datestr}.p') + exosims_json = 'plandb.sioslab.com/ci_perf_exosims.json' + + contr_data = get_contrastness(contr_data_path, exosims_json, quadrature_data, cache) + contr_data.to_excel("plandb.sioslab.com/backend/sheets/contr_data.xlsx") + + comps_path = Path(f'cache/update_comps_{datestr}.p') + compdict_path = Path(f'cache/update_compdict_{datestr}.p') + comps_data_path = Path(f'cache/update_comps_data_{datestr}.p') + + + comps, compdict, comps_data = get_completeness(comps_path, compdict_path, comps_data_path, contr_data, bandzip, photdict, exosims_json, cache) + comps.to_excel("plandb.sioslab.com/backend/sheets/comps.xlsx") + comps_data.to_excel("plandb.sioslab.com/backend/sheets/comps_data.xlsx") + #None for compdict, as its dictionary + + plandata_path = Path(f'cache/update_plandata_{datestr}.p') + stdata_path = Path(f'cache/update_stdata_{datestr}.p') + table_orbitfits_path = Path(f'cache/update_table_orbitfits_{datestr}.p') + + # Orbitfits got updated, maybe change to new var + plan_data, stdata, orbitfits = get_generated_tables(plandata_path, stdata_path, table_orbitfits_path, change_ipac_df, quadrature_data, comps_data, cache) + plan_data.to_excel("plandb.sioslab.com/backend/sheets/plandata.xlsx") + stdata.to_excel("plandb.sioslab.com/backend/sheets/stdata.xlsx") + orbitfits.to_excel('plandb.sioslab.com/backend/sheets/later_orbitfits.xlsx') + + + # Do compileContrastness and compile contrastness + # Look into and possibly remove the - to _ + contr_curvs2_path = Path(f'plandb.sioslab.com/cache/cont_curvs2_{datestr.replace("-", "_")}') + compiled_contr_curvs_path = Path(f'plandb.sioslab.com/cache/cont_curvs_{datestr.replace("-", "_")}') + compiled_contrast_curves, newpdfs = get_compiled_contrast(compiled_contr_curvs_path, stdata, comps_data, change_ipac_df, contr_curvs2_path) + # compiled_contrast_curves = compileContrastCurves(stdata, compiled_contr_curvs_path) + compiled_contrast_curves.to_excel("plandb.sioslab.com/backend/sheets/compiled_contrast_curves.xlsx") + # With current code, since get_compiled_contrast isnt fully working with new pdfs, new pdfs should be empty + newpdfs.to_excel("plandb.sioslab.com/backend/sheets/newpdfs.xlsx") + + + # compile completeness + compiled_completeness_path = Path(f"plandb.sioslab.com/cache/compiled_completeness_{datestr}.p") + compiled_completeness = get_compiled_completeness(compiled_completeness_path, comps_data) + compiled_completeness.to_excel("plandb.sioslab.com/backend/sheets/compiled_completeness.xlsx") + + # diff_completeness_df = pd.DataFrame({ + # 'completeness_id': [0], + # 'pl_id': [3], + # 'completeness': [0.0111], + # 'scenario_name' : ['Optimistic_NF_Imager_20000hr'], + # 'compMinWA': [None], + # 'compMaxWA': [None], + # 'compMindMag': [None], + # 'compMaxdMag': [None], + # }) + + # MakeSQL for the temporary database, creates diff engine, -> upsert diff engine with current engine + scenarios = pd.read_csv("plandb.sioslab.com/cache/scenario_angles.csv") + # writeSQL(engine, plandata=planets, stdata=stars, orbitfits=orbitfits, orbdata=orbits, pdfs=newpdfs, aliases=None,contrastCurves=contrast_curves,scenarios=scenarios, completeness=completeness) + temp_writeSQL(diff_sios_engine, plandata=plan_data, stdata=stdata, orbitfits=orbfits, orbdata=orbdata, pdfs=None, aliases=None, contrastCurves=compiled_contrast_curves, scenarios=scenarios, completeness=compiled_completeness) + + # Get from diff_database + diff_engine_connection = diff_sios_engine.connect() + diff_completeness_df, diff_contrast_curves_df, diff_orbitfits_df, diff_orbits_df, diff_pdfs_df, diff_planets_df, diff_scenarios_df, diff_stars_df = get_all_from_db(diff_engine_connection) + + # Get from old_database + old_completeness_df, old_contrast_curves_df, old_orbitfits_df, old_orbits_df, old_pdfs_df, old_planets_df, old_scenarios_df, old_stars_df = get_all_from_db(connection) + + # Upsert planets + # Planets don't have pl_id, they only have pl_name which is indexed after + print("Merging planets") + merged_planets = upsert_general(old_planets_df, diff_planets_df, "pl_name") + + + # Upsert completeness + print("Merging completeness") + + #TODO: Test new completeness, manually input change in completeness (Not sure how to artificially complete chane) + # only new value + # diff_completeness_df = pd.DataFrame({ + # 'completeness_id': [0], + # 'pl_id': [3], + # 'completeness': [0.0111], + # 'scenario_name' : ['Optimistic_NF_Imager_20000hr'], + # 'compMinWA': [None], + # 'compMaxWA': [None], + # 'compMindMag': [None], + # 'compMaxdMag': [None], + # }) + + #TODO: Test new completeness, manually input change in completeness (Not sure how to artificially complete chane) + # Old value updated and new value + # df_merged_modified_new = pd.DataFrame({ + # 'completeness_id': [0, 1, 2, 3, 4], + # 'pl_id': [3, 3, 3, 3, 3], + # 'completeness': [0, 0, 0.011, 0.085041, 0.233326834], + # 'scenario_name': ['Conservative_NF_Imager_25hr', 'Conservative_NF_Imager_100hr', 'Conservative_NF_Imager_10000hr', 'Optimistic_NF_Imager_25hr', 'Optimistic_NF_Imager_1111hr'], + # 'compMinWa': [None, None, None, None, None], + # 'compMaxWa': [None, None, None, None, None], + # 'compMindMag': [None, None, None, None, None], + # 'compMaxdMag': [None, None, None, None, None] + # }) + + print(old_completeness_df, diff_completeness_df) + # Merge based on pl_id, foreign key relation, base on foreign key relation, might do the same for rest + # TODO, this is highly likely to be wrong, forced to used indices here, because only unique, unfortunately incorrect, must base it upsert on the varying indices later + # TODO: Iterate through each pl_id, since these are base off pl_id, correspond with pl_name, and then reset those completeness rows for that pl (remove and then add the new ones that were) + merged_completeness = upsert_general(old_completeness_df, diff_completeness_df, "pl_id") + + # Upsert stars + # TODO Star differences are based off that the planets table has foreign key st_id, therefore, to properly update stars, must go through planets, see what has been updated, and then go down through those planets, and their stars, and then if that planet has changed, update that star + # Same logic as with planets + print("Merging stars") + merged_stars = upsert_general(old_stars_df, diff_stars_df, "st_name") + + # For these later upserts, only way to properly upsert is to detect the change from the earlier value, like the difference in planets from ipac, and then categorize the change as a result from upated information or new information. If new information (new planet), just add but if updated information, going to have to track down the previous. Maybe it's possible for me to locally store database versions, so it can be quickly updated based on path of changes that happened + # Upsert orbitfits + print("Merging orbit fits") + merged_orbitfits = upsert_general(old_orbitfits_df, diff_orbitfits_df, "pl_id") + + # TODO: Orbitfits don't have anyway of uniquely upserting other than orbit fit id, so if there's new orbit fits, they must be detected from the other builds, and then added + # Upsert orbits + print("Merging orbits") + merged_orbits = upsert_general(old_orbits_df, diff_orbits_df, "pl_id") + + # Upsert contrast curvess + print("Merging curves") + merged_contrast_curves = upsert_general(old_contrast_curves_df, diff_contrast_curves_df, "st_id") + + + # Upsert pdfs + # TODO: same as orbit fits + # print("Merging pdfs") + # merged_pdfs = upsert_df(old_pdfs_df, diff_pdfs_df, "Name") + + # No need to upsert scenarios, as it's updated locally + + # write back to original database with new values, + # TODO: optionally, store old database in a different database for archive + print("Merging and final write") + final_writeSQL(new_engine, merged_planets, merged_stars, merged_orbitfits, merged_orbits, None, aliases=None, contrastCurves=None, scenarios=scenarios, completeness=merged_completeness) + + print("Done") + + # TODO: Print all total changes + # TODO: Correct the merges/compiles with no changes, ending up outputting an empty table, (account for table upserts with no changes) (Handle no new updates case). For example, final completeness would be empty if there are no changes to completeness in the update. + # TODO: Get "result = connection.execute(text("ALTER TABLE OrbitFits ROW_FORMAT=COMPRESSED"))" removed, line 1047 of update_util.py, operational error when that line is not there, it has to do with database setup \ No newline at end of file diff --git a/backend/update_util.py b/backend/update_util.py index c745df3..e55b10b 100644 --- a/backend/update_util.py +++ b/backend/update_util.py @@ -1,3 +1,4 @@ +import EXOSIMS.util import pandas as pd from sqlalchemy import create_engine import pymysql @@ -5,11 +6,515 @@ from plandb_methods import * from database_main import * import sweetviz as sv -from typing import Tuple +from typing import Any, Dict, Tuple +from EXOSIMS.util.get_dirs import * +from EXOSIMS.util.getExoplanetArchive import * # Util functions for update, also compacts much of the code from gen_plandb and database_main + +def customGetExoplanetArchivePS( + forceNew: bool = False, **specs: Dict[Any, Any] +) -> pd.DataFrame: + """ + Get the contents of the Exoplanet Archive's Planetary Systems table and cache + results. If a previous query has been saved to disk, load that. + + Args: + forceNew (bool): + Run a fresh query even if results exist on disk. + + Returns: + pandas.DataFrame: + Planetary Systems table + """ + + basestr = "updateExoplanetArchivePS" + querystring = r"select+*+from+ps" + + return EXOSIMS.util.getExoplanetArchive.cacheExoplanetArchiveQuery(basestr, querystring, forceNew=forceNew, **specs) + + +def customGetExoplanetArchivePSCP(forceNew: bool = False, **specs: Any) -> pd.DataFrame: + """ + Get the contents of the Exoplanet Archive's Planetary Systems Composite Parameters + table and cache results. If a previous query has been saved to disk, load that. + + Args: + forceNew (bool): + Run a fresh query even if results exist on disk. + + Returns: + pandas.DataFrame: + Planetary Systems composited parameters table + """ + + basestr = "updateExoplanetArchivePSCP" + querystring = r"select+*+from+pscomppars" + + return EXOSIMS.util.getExoplanetArchive.cacheExoplanetArchiveQuery(basestr, querystring, forceNew=forceNew, **specs) + + +def get_store_ipac_helper(pscp_data : pd.DataFrame, ps_data : pd.DataFrame) -> pd.DataFrame: + """ + + Majority of getIpacData() from plandb_methods. This helps modulate code between ipac compilation functionality and caching functionality + + Args: + pscp_data (pd.DataFrame): _description_ + ps_data (pd.DataFrame): _description_ + + Returns: + pd.DataFrame: _description_ + """ + #only keep stuff related to the star from composite + composite_cols = [col for col in pscp_data.columns if (col == 'pl_name') or str.startswith(col, 'st_') or col == 'sy_vmag'] + # composite_cols.extend(['pl_radj', 'pl_radj_reflink']) + pscp_data = pscp_data[composite_cols] + + #sort by planet name + pscp_data = pscp_data.sort_values(by=['pl_name']).reset_index(drop=True) + ps_data = ps_data.sort_values(by=['pl_name']).reset_index(drop=True) + + #merge data sets + merged_data = ps_data.copy() + blank_col = [None]*len(ps_data) + for col in composite_cols: + if col not in merged_data.columns: + merged_data[col] = blank_col + # Writing as a loop for now... + t_bar = trange(len(merged_data), leave=False) + last_pl = None + for i, row in merged_data.iterrows(): + t_bar.update() + pl = row.pl_name + + if pl != last_pl: + # Get the composite row + c_row = pscp_data.loc[pscp_data.pl_name == row.pl_name] + + # Now take all the stellar data from the composite table + for col in composite_cols: + if col != 'pl_name': + if pd.isnull(merged_data.loc[i, col]): + merged_data.at[i, col] = c_row[col].values[0] + + last_pl = pl + t_bar.close() + #create columns for short references and publication years + authregex = re.compile(r"()|()") + merged_data['publication_year'] = merged_data.pl_refname.str.extract('(\d{4})') + merged_data['shortrefs'] = [authregex.sub("", merged_data['pl_refname'][j]).strip() for j in range(len(merged_data))] + merged_data['refs'] = merged_data["pl_refname"].values + merged_data['best_data'] = np.zeros(len(merged_data)) + + #pick best attribute row for each planet + print("Choosing best attributes for all planets.") + # This is used for typesetting the progress bar + max_justification = merged_data.pl_name.str.len().max() + t_bar = trange(len(merged_data), leave=False) + for j,name in enumerate(merged_data['pl_name'].values): + # print("%s: %d/%d"%(name,j+1,len(data))) + t_bar.set_description(name.ljust(max_justification)) + t_bar.update() + + planet_rows = merged_data.loc[merged_data["pl_name"] == name] + + sorted_rows = planet_rows.sort_values(by=["publication_year"], axis=0, ascending=False) + good_idx = sorted_rows.index[0] + good_lvl = 0 + for index, row in sorted_rows.iterrows(): + base_need = (not pd.isnull(row["pl_orbsmax"]) or not pd.isnull(row["pl_orbper"])) and \ + (not pd.isnull(row["pl_bmassj"]) or not pd.isnull(row["pl_radj"])) + + # Has everything + if good_lvl < 8 and (base_need + and not pd.isnull(row["pl_orbeccen"]) and not pd.isnull(row["pl_orbtper"]) + and not pd.isnull(row["pl_orblper"]) and not pd.isnull(row["pl_orbincl"])): + if not pd.isnull(row["pl_radj"]): + good_idx = index + good_lvl = 8 + break + elif good_lvl < 7: + good_idx = index + good_lvl = 7 + + # Has everything except inclination + if good_lvl < 6 and (base_need + and not pd.isnull(row["pl_orbeccen"]) and not pd.isnull(row["pl_orbtper"]) + and not pd.isnull(row["pl_orblper"])): + if not pd.isnull(row["pl_radj"]): + good_idx = index + good_lvl = 6 + elif good_lvl < 5: + good_idx = index + good_lvl = 5 + + # Has either periapsis time or argument of pariapsis + elif good_lvl < 4 and (base_need + and not pd.isnull(row["pl_orbeccen"]) and (not pd.isnull(row["pl_orbtper"]) + or not pd.isnull(row["pl_orblper"]))): + if not pd.isnull(row["pl_radj"]): + good_idx = index + good_lvl = 4 + elif good_lvl < 3: + good_idx = index + good_lvl = 3 + + # Has eccentricity + elif good_lvl < 2 and (base_need + and not pd.isnull(row["pl_orbeccen"])): + if not pd.isnull(row["pl_radj"]): + good_idx = index + good_lvl = 2 + elif good_lvl < 1: + good_idx = index + good_lvl = 1 + + # 1st doesn't have basic info + elif index == good_idx and not base_need: + good_idx = -1 + # Previous row needed to be replaced + elif good_idx == -1 and base_need: + good_idx = index + good_lvl = 1 + + if good_idx == -1: + good_idx = sorted_rows.index[0] + + merged_data.at[good_idx, "best_data"] = 1 + + columns_to_null = ["pl_orbper", "pl_orbpererr1", "pl_orbpererr2", "pl_orbperlim", "pl_orbsmax", + "pl_orbsmaxerr1", "pl_orbsmaxerr2", "pl_orbsmaxlim", "pl_orbeccen", + "pl_orbeccenerr1", "pl_orbeccenerr2", "pl_orbeccenlim", "pl_orbtper", + "pl_orbtpererr1", "pl_orbtpererr2", "pl_orbtperlim", "pl_orblper", + "pl_orblpererr1", "pl_orblpererr2", "pl_orblperlim", "pl_bmassj", + "pl_bmassjerr1", "pl_bmassjerr2", "pl_bmassjlim", "pl_orbincl", "pl_orbinclerr1", "pl_orbinclerr2", + "pl_orbincllim", "pl_bmassprov"] + rad_columns = ["pl_radj", "pl_radjerr1", "pl_radjerr2", "pl_radjlim"] + final_replace_columns = list(columns_to_null) + final_replace_columns.extend(rad_columns) + + #update rows as needed + print("Updating planets with best attributes.") + max_justification = pscp_data.pl_name.str.len().max() + t_bar = trange(len(pscp_data), leave=False) + merged_data = merged_data.assign(pl_def_override=np.zeros(len(merged_data))) + # return ps_data, pscp_data, merged_data + # for j,name in enumerate(merged_data['pl_name'].values): + # # print("%s: %d/%d"%(name,j+1,len(data))) + # t_bar.set_description(name.ljust(max_justification)) + # t_bar.update(1) + + # row_data = merged_data.loc[(merged_data["best_data"] == 1) & (merged_data["pl_name"] == name)] + # idx = merged_data.loc[(merged_data["pl_name"] == name)].index + + # # row_data.index = idx + # row_data_replace = row_data[final_replace_columns] + # # Want to keep radius vals from composite table instead of replacing with null, so we don't null radius columns + # merged_data.loc[(merged_data["pl_name"] == name,columns_to_null)] = np.nan + # merged_data.update(row_data_replace,overwrite=True) + # merged_data.loc[idx,'pl_def_override'] = 1 + # merged_data.loc[idx, 'disc_refname'] = row_data['disc_refname'].values[0] + + # if not np.isnan(row_data['pl_radj'].values[0]): + # merged_data.loc[idx, 'pl_radreflink'] = row_data['disc_refname'].values[0] + + # Drop rows that aren't marked as the best data + merged_data = merged_data.loc[merged_data.best_data == 1] + # return ps_data, pscp_data, merged_data + + #sort by planet name + merged_data = merged_data.sort_values(by=['pl_name']).reset_index(drop=True) + + print("Filtering to useable planets and calculating additional properties.") + + # filter rows: + # we need: + # distance AND + # (sma OR (period AND stellar mass)) AND + # (radius OR mass (either true or m\sin(i))) + # return ps_data, pscp_data, merged_data + keep = (~np.isnan(merged_data['sy_dist'].values)) & (~np.isnan(merged_data['pl_orbsmax'].values) | \ + (~np.isnan(merged_data['pl_orbper'].values) & ~np.isnan(merged_data['st_mass'].values))) & \ + (~np.isnan(merged_data['pl_bmassj'].values) | ~np.isnan(merged_data['pl_radj'].values)) + merged_data = merged_data[keep] + merged_data = merged_data.reset_index(drop=True) + + #remove extraneous columns + merged_data = merged_data.drop(columns=['pl_rade', + 'pl_radelim', + # 'pl_radserr2', + 'pl_radeerr1', + # 'pl_rads', + # 'pl_radslim', + 'pl_radeerr2', + # 'pl_radserr1', + 'pl_masse', + 'pl_masseerr1', + 'pl_masseerr2', + 'pl_masselim', + 'pl_msinie', + 'pl_msinieerr1', + 'pl_msinieerr2', + 'pl_msinielim' + ]) + has_lum = ~np.isnan(merged_data['st_lum'].values) + merged_data.loc[has_lum, 'st_lum_correction'] = (10 ** merged_data.loc[has_lum, 'st_lum']) ** .5 # Since lum is log base 10 of solar luminosity + + + #fill in missing smas from period & star mass + nosma = np.isnan(merged_data['pl_orbsmax'].values) + p2sma = lambda mu,T: ((mu*T**2/(4*np.pi**2))**(1/3.)).to('AU') + GMs = const.G*(merged_data['st_mass'][nosma].values*u.solMass) # units of solar mass + T = merged_data['pl_orbper'][nosma].values*u.day + tmpsma = p2sma(GMs,T) + merged_data.loc[nosma,'pl_orbsmax'] = tmpsma + merged_data['pl_calc_sma'] = pd.Series(np.zeros(len(merged_data['pl_name'])), index=merged_data.index) + merged_data.loc[nosma, 'pl_calc_sma'] = 1 + + #propagate filled in sma errors + GMerrs = ((merged_data['st_masserr1'][nosma] - merged_data['st_masserr2'][nosma])/2.).values*u.solMass*const.G + Terrs = ((merged_data['pl_orbpererr1'][nosma] - merged_data['pl_orbpererr2'][nosma])/2.).values*u.day + + smaerrs = np.sqrt((2.0*T**2.0*GMs)**(2.0/3.0)/(9*np.pi**(4.0/3.0)*T**2.0)*Terrs**2.0 +\ + (2.0*T**2.0*GMs)**(2.0/3.0)/(36*np.pi**(4.0/3.0)*GMs**2.0)*GMerrs**2.0).to('AU') + merged_data.loc[nosma,'pl_orbsmaxerr1'] = smaerrs + merged_data.loc[nosma,'pl_orbsmaxerr2'] = -smaerrs + + + #update all WAs (and errors) based on sma + WA = np.arctan((merged_data['pl_orbsmax'].values*u.AU)/(merged_data['sy_dist'].values*u.pc)).to('mas') + merged_data['pl_angsep'] = WA.value + sigma_a = ((merged_data['pl_orbsmaxerr1']- merged_data['pl_orbsmaxerr2'])/2.).values*u.AU + sigma_d = ((merged_data['sy_disterr1']- merged_data['sy_disterr2'])/2.).values*u.pc + sigma_wa = (np.sqrt(( (merged_data['pl_orbsmax'].values*u.AU)**2.0*sigma_d**2 + (merged_data['sy_dist'].values*u.pc)**2.0*sigma_a**2)/\ + ((merged_data['pl_orbsmax'].values*u.AU)**2.0 + (merged_data['sy_dist'].values*u.pc)**2.0)**2.0).decompose()*u.rad).to(u.mas) + merged_data['pl_angseperr1'] = sigma_wa.value + merged_data['pl_angseperr2'] = -sigma_wa.value + + #fill in radius based on mass + # noR = ((merged_data['pl_rade_reflink'] == 'Calculated Value') |\ + # (merged_data['pl_rade_reflink'] == 'Calculated Value') |\ + # merged_data['pl_radj'].isnull()).values + # merged_data['pl_calc_rad'] = pd.Series(np.zeros(len(merged_data['pl_name'])), index=merged_data.index) + # merged_data.loc[noR, 'pl_calc_rad'] = 1 + noR = (merged_data['pl_radj'].isnull()).values + + # Initialize the ForecasterMod + forecaster_mod = ForecasterMod() + # merged_data.loc[3856, 'pl_bmassjerr2'] = -.18 + # merged_data.loc[3935, 'pl_orbsmaxerr2'] = -0.6 + # merged_data.loc[3905, 'pl_bmassjerr1'] = 0.1 + m = ((merged_data['pl_bmassj'][noR].values*u.M_jupiter).to(u.M_earth)).value + merr = (((merged_data['pl_bmassjerr1'][noR].values - merged_data['pl_bmassjerr2'][noR].values)/2.0)*u.M_jupiter).to(u.M_earth).value + R = forecaster_mod.calc_radius_from_mass(m*u.M_earth) + # R = [forecaster_m.calc_radius_from_mass(mp*u.M_earth) for mp in m] + + # Turning what was a list comprehension into a loop to handle edge case + Rerr = np.zeros(len(m)) + for j, m_val in enumerate(m): + if np.isnan(merr[j]): + Rerr[j] = np.nan + elif merr[j] == 0: + # Happens with truncation error sometimes, check if the error in earth radii is input to IPAC correctly + merr_earth = (((merged_data.iloc[j]['pl_bmasseerr1'] - merged_data.iloc[j]['pl_bmasseerr2'])/2.0)*u.M_earth).to(u.M_earth).value + Rerr[j] = forecaster_mod.calc_radius_from_mass(u.M_earth*np.random.normal(loc=m_val, scale=merr_earth, size=int(1e4))).std().value + else: + Rerr[j] = forecaster_mod.calc_radius_from_mass(u.M_earth*np.random.normal(loc=m_val, scale=merr[j], size=int(1e4))).std().value + Rerr = Rerr*u.R_earth + + # Rerr = np.array([forecaster_mod.calc_radius_from_mass(u.M_earth*np.random.normal(loc=m[j], scale=merr[j], size=int(1e4))).std().value if not(np.isnan(merr[j])) else np.nan for j in range(len(m))])*u.R_earth + + #create mod forecaster radius column and error cols + # merged_data = merged_data.assign(pl_radj_forecastermod=merged_data['pl_radj'].values) + merged_data['pl_radj_forecastermod'] = merged_data['pl_radj'].values + merged_data.loc[noR,'pl_radj_forecastermod'] = (R.to(u.R_jupiter)).value + + # merged_data = merged_data.assign(pl_radj_forecastermoderr1=merged_data['pl_radjerr1'].values) + merged_data['pl_radj_forecastermoderr1'] = merged_data['pl_radjerr1'].values + merged_data.loc[noR,'pl_radj_forecastermoderr1'] = (Rerr.to(u.R_jupiter)).value + + # merged_data = merged_data.assign(pl_radj_forecastermoderr2=merged_data['pl_radjerr2'].values) + merged_data['pl_radj_forecastermoderr2'] = merged_data['pl_radjerr2'].values + merged_data.loc[noR,'pl_radj_forecastermoderr2'] = -(Rerr.to(u.R_jupiter)).value + + + + # now the Fortney model + from EXOSIMS.PlanetPhysicalModel.FortneyMarleyCahoyMix1 import \ + FortneyMarleyCahoyMix1 + fortney = FortneyMarleyCahoyMix1() + + ml10 = m <= 17 + Rf = np.zeros(m.shape) + Rf[ml10] = fortney.R_ri(0.67,m[ml10]) + + mg10 = m > 17 + tmpsmas = merged_data['pl_orbsmax'][noR].values + tmpsmas = tmpsmas[mg10] + tmpsmas[tmpsmas < fortney.giant_pts2[:,1].min()] = fortney.giant_pts2[:,1].min() + tmpsmas[tmpsmas > fortney.giant_pts2[:,1].max()] = fortney.giant_pts2[:,1].max() + + tmpmass = m[mg10] + tmpmass[tmpmass > fortney.giant_pts2[:,2].max()] = fortney.giant_pts2[:,2].max() + + Rf[mg10] = griddata(fortney.giant_pts2, fortney.giant_vals2,( np.array([10.]*np.where(mg10)[0].size), tmpsmas, tmpmass)) + + # merged_data = merged_data.assign(pl_radj_fortney=merged_data['pl_radj'].values) + merged_data['pl_radj_fortney'] = merged_data['pl_radj'].values + #data['pl_radj_fortney'][noR] = ((Rf*u.R_earth).to(u.R_jupiter)).value + merged_data.loc[noR,'pl_radj_fortney'] = ((Rf*u.R_earth).to(u.R_jupiter)).value + + # Calculate erros for fortney radius + Rf_err = [] + tmpsmas = merged_data['pl_orbsmax'][noR].values + tmpsmaserr = (merged_data['pl_orbsmaxerr1'][noR].values - merged_data['pl_orbsmaxerr2'][noR].values) / 2.0 + adist = np.zeros((len(m), int(1e4))) + for j, _ in enumerate(tmpsmas): # Create smax distribution + if np.isnan(tmpsmaserr[j]) or (tmpsmaserr[j] == 0): + adist[j, :] = (np.ones(int(1e4))* tmpsmas[j]) + else: + adist[j, :] = (np.random.normal(loc=tmpsmas[j], scale=tmpsmaserr[j], size=int(1e4))) + + for j, _ in enumerate(m): # Create m distribution and calculate errors + if np.isnan(merr[j]) or (merr[j] == 0): + # merr[j] = 0 + mdist = np.ones(int(1e4)) * m[j] + else: + mdist = (np.random.normal(loc=m[j], scale=merr[j], size=int(1e4))) + + for i in range(len(mdist)): + while mdist[i] < 0: + mdist[i] = (np.random.normal(loc=m[j], scale=merr[j], size=int(1))) + + ml10_dist = mdist <= 17 + Rf_dist = np.zeros(mdist.shape) + Rf_dist[ml10_dist] = fortney.R_ri(0.67, mdist[ml10_dist]) + + cur_adist = adist[j, :] + mg10_dist = mdist > 17 + tmpsmas = cur_adist[mg10_dist] + tmpsmas[tmpsmas < fortney.giant_pts2[:, 1].min()] = fortney.giant_pts2[:, 1].min() + tmpsmas[tmpsmas > fortney.giant_pts2[:, 1].max()] = fortney.giant_pts2[:, 1].max() + + tmpmass = mdist[mg10_dist] + tmpmass[tmpmass > fortney.giant_pts2[:, 2].max()] = fortney.giant_pts2[:, 2].max() + + Rf_dist[mg10_dist] = griddata(fortney.giant_pts2, fortney.giant_vals2, + (np.array([10.] * np.where(mg10_dist)[0].size), tmpsmas, tmpmass)) + Rf_err.append(Rf_dist.std()) + + # merged_data = merged_data.assign(pl_radj_fortneyerr1=merged_data['pl_radjerr1'].values) + merged_data['pl_radj_fortneyerr1'] = merged_data['pl_radjerr1'].values + merged_data.loc[noR, 'pl_radj_fortneyerr1'] = ((Rf_err * u.R_earth).to(u.R_jupiter)).value + # merged_data = merged_data.assign(pl_radj_forecastermoderr2=merged_data['pl_radjerr2'].values) + merged_data['pl_radj_fortneyerr2'] = merged_data['pl_radjerr2'].values + merged_data.loc[noR, 'pl_radj_fortneyerr2'] = -((Rf_err * u.R_earth).to(u.R_jupiter)).value + + + #populate max WA based on available eccentricity data (otherwise maxWA = WA) + hase = ~np.isnan(merged_data['pl_orbeccen'].values) + maxWA = WA[:] + maxWA[hase] = np.arctan((merged_data['pl_orbsmax'][hase].values*(1 + merged_data['pl_orbeccen'][hase].values)*u.AU)/(merged_data['sy_dist'][hase].values*u.pc)).to('mas') + # merged_data = merged_data.assign(pl_maxangsep=maxWA.value) + merged_data['pl_maxangsep'] = maxWA.value + + #populate min WA based on eccentricity & inclination data (otherwise minWA = WA) + hasI = ~np.isnan(merged_data['pl_orbincl'].values) + s = merged_data['pl_orbsmax'].values*u.AU + s[hase] *= (1 - merged_data['pl_orbeccen'][hase].values) + s[hasI] *= np.cos(merged_data['pl_orbincl'][hasI].values*u.deg) + s[~hasI] = 0 + minWA = np.arctan(s/(merged_data['sy_dist'].values*u.pc)).to('mas') + # merged_data = merged_data.assign(pl_minangsep=minWA.value) + merged_data['pl_minangsep'] =minWA.value + + #Fill in missing luminosity from meanstars + ms = MeanStars() + nolum_teff = np.isnan(merged_data['st_lum'].values) & ~np.isnan(merged_data['st_teff'].values) + teffs = merged_data.loc[nolum_teff, 'st_teff'] + lums_1 = ms.TeffOther('logL', teffs) # Calculates Luminosity when teff exists + merged_data.loc[nolum_teff, 'st_lum'] = lums_1 + + nolum_noteff_spect = np.isnan(merged_data['st_lum'].values) & ~merged_data['st_spectype'].isnull().values + spects = merged_data.loc[nolum_noteff_spect, 'st_spectype'] + lums2 = [] + for str_row in spects.values: + spec_letter = str_row[0] + # spec_rest.append(str_row[1:]) + spec_num_match = re.search("^[0-9,]+", str_row[1:]) + if spec_num_match is not None: + spec_num = str_row[spec_num_match.start() + 1:spec_num_match.end() + 1] + # Calculates luminosity when teff does not exist but spectral type exists + lums2.append(ms.SpTOther('logL', spec_letter, spec_num)) + else: + lums2.append(np.nan) + merged_data.loc[nolum_noteff_spect, 'st_lum'] = lums2 + + return merged_data + +def get_store_ipac() -> tuple[pd.DataFrame, pd.DataFrame]: + + # Saves PSCP & PS data cache, for current update comparison, and for future update + pscp_data_dummy = customGetExoplanetArchivePSCP(forceNew=True) + ps_data_dummy = customGetExoplanetArchivePS(forceNew=True) + + downloads_dir = get_downloads_dir() + print(downloads_dir) + + def extract_time(filename): + time = filename.name[24:] + return time + + + # TODO: CHANGE THIS PATH, to the system + ipac_cache_dir = Path(f'/Users/andrewchiu/.EXOSIMS/downloads') + + if ipac_cache_dir.exists(): + filtered_files = [f for f in ipac_cache_dir.iterdir() if f.is_file() and f.name.startswith("updateExoplanetArchivePS_")] + if (len(filtered_files) > 1): + sorted_files = sorted(filtered_files, key=extract_time) + target_file = sorted_files[-2] + new_file = sorted_files[-1] + print(target_file) + print(new_file) + elif filtered_files: + print("1 or 0 files, must initial update file.") + else: + print("No matching files.") + + filtered_files2 = [f for f in ipac_cache_dir.iterdir() if f.is_file() and f.name.startswith("updateExoplanetArchivePSCP_")] + if (len(filtered_files2) > 1): + sorted_files2 = sorted(filtered_files2, key=extract_time) + target_file2 = sorted_files2[-2] + new_file2 = sorted_files2[-1] + print(target_file2) + print(new_file2) + elif filtered_files2: + print("1 or 0 files, must initial update file.") + else: + print("No matching files.") + + with open(target_file, 'rb') as file: + ps_data = pickle.load(file) + + with open(target_file2, 'rb') as file2: + pscp_data = pickle.load(file2) + + with open(new_file, 'rb') as file3: + new_ps_data = pickle.load(file3) + + with open(new_file2, 'rb') as file4: + new_pscp_data = pickle.load(file4) + + old_ipac_data = get_store_ipac_helper(pscp_data=pscp_data, ps_data = ps_data) + + new_ipac_data = get_store_ipac_helper(pscp_data=new_pscp_data, ps_data= new_ps_data) + + return old_ipac_data, new_ipac_data + def get_current_database(connection: sqlalchemy.Connection) -> pd.DataFrame: results = connection.execute(text("SELECT * FROM PLANETS")) df = pd.DataFrame(results.fetchall(), columns = results.keys()) @@ -31,37 +536,64 @@ def get_ipac_database(data_path: Path, cache: bool) -> pd.DataFrame: else: ipac_data = getIPACdata() return ipac_data - -# returns updated, and added -# TODO change to find more accurate differences, using generateTables in plandb_methods, -def find_row_differences(current_database: pd.DataFrame, new_database: pd.DataFrame) -> pd.DataFrame: - changed_rows = [] - columns = ['pl_name', 'pl_letter', 'pl_refname', 'pl_orbper', 'pl_orbpererr1', 'pl_orbpererr2', 'pl_orbperlim', 'pl_orbperstr', 'pl_orblpererr1', 'pl_orblper', 'pl_orblpererr2', 'pl_orblperlim', 'pl_orblperstr', 'pl_orbsmax', 'pl_orbsmaxerr1', 'pl_orbsmaxerr2', 'pl_orbsmaxlim', 'pl_orbsmaxstr', 'pl_orbincl', 'pl_orbinclerr1', 'pl_orbinclerr2', 'pl_orbincllim', 'pl_orbinclstr', 'pl_orbtper', 'pl_orbtpererr1', 'pl_orbtpererr2', 'pl_orbtperlim', 'pl_orbtperstr', 'pl_orbeccen', 'pl_orbeccenerr1', 'pl_orbeccenerr2', 'pl_orbeccenlim', 'pl_orbeccenstr', 'pl_eqt', 'pl_eqterr1', 'pl_eqterr2', 'pl_eqtlim', 'pl_eqtstr', 'pl_occdep', 'pl_occdeperr1', 'pl_occdeperr2', 'pl_occdeplim', 'pl_occdepstr', 'pl_insol', 'pl_insolerr1', 'pl_insolerr2', 'pl_insollim', 'pl_insolstr', 'pl_dens', 'pl_denserr1', 'pl_denserr2', 'pl_denslim', 'pl_densstr', 'pl_trandep', 'pl_trandeperr1', 'pl_trandeperr2', 'pl_trandeplim', 'pl_trandepstr', 'pl_tranmid', 'pl_tranmiderr1', 'pl_tranmiderr2', 'pl_tranmidlim', 'pl_tranmidstr', 'pl_trandur', 'pl_trandurerr1', 'pl_trandurerr2', 'pl_trandurlim', 'pl_trandurstr', 'pl_controv_flag', 'pl_tsystemref', 'pl_projobliq', 'pl_projobliqerr1', 'pl_projobliqerr2', 'pl_projobliqlim', 'pl_projobliqstr', 'pl_rvamp', 'pl_rvamperr1', 'pl_rvamperr2', 'pl_rvamplim', 'pl_rvampstr', 'pl_radj', 'pl_radjerr1', 'pl_radjerr2', 'pl_radjlim', 'pl_radjstr', 'pl_radestr', 'pl_ratror', 'pl_ratrorerr1', 'pl_ratrorerr2', 'pl_ratrorlim', 'pl_ratrorstr', 'pl_ratdor', 'pl_trueobliq', 'pl_trueobliqerr1', 'pl_trueobliqerr2', 'pl_trueobliqlim', 'pl_trueobliqstr', 'pl_pubdate', 'pl_ratdorerr1', 'pl_ratdorerr2', 'pl_ratdorlim', 'pl_ratdorstr', 'pl_imppar', 'pl_impparerr1', 'pl_impparerr2', 'pl_impparlim', 'pl_impparstr', 'pl_cmassj', 'pl_cmassjerr1', 'pl_cmassjerr2', 'pl_cmassjlim', 'pl_cmassjstr', 'pl_cmasse', 'pl_cmasseerr1', 'pl_cmasseerr2', 'pl_cmasselim', 'pl_cmassestr', 'pl_massj', 'pl_massjerr1', 'pl_massjerr2', 'pl_massjlim', 'pl_massjstr', 'pl_massestr', 'pl_bmassj', 'pl_bmassjerr1', 'pl_bmassjerr2', 'pl_bmassjlim', 'pl_bmassjstr', 'pl_bmasse', 'pl_bmasseerr1', 'pl_bmasseerr2', 'pl_bmasselim', 'pl_bmassestr', 'pl_bmassprov', 'pl_msinij', 'pl_msinijerr1', 'pl_msinijerr2', 'pl_msinijlim', 'pl_msinijstr', 'pl_msiniestr', 'pl_nespec', 'pl_ntranspec', 'pl_nnotes', 'pl_def_override', 'pl_calc_sma', 'pl_angsep', 'pl_angseperr1', 'pl_angseperr2', 'pl_radj_forecastermod', 'pl_radj_forecastermoderr1', 'pl_radj_forecastermoderr2', 'pl_radj_fortney', 'pl_radj_fortneyerr1', 'pl_radj_fortneyerr2', 'pl_maxangsep', 'pl_minangsep', 'disc_year', 'disc_refname', 'discoverymethod', 'disc_locale', 'ima_flag', 'disc_instrument', 'disc_telescope', 'disc_facility', 'rv_flag'] - - for index, row in current_database.iterrows(): - name = row['pl_name'] - print(name) + + #Could i have just done comparison = old_df == updated_df +def get_ipac_differences(old_df: pd.DataFrame, updated_df: pd.DataFrame, tolerance: float = 1e-1): + col_names1 = old_df.columns.tolist() + col_names2 = updated_df.columns.tolist() + change_log = [] - filter = new_database.query(f'pl_name == "{name}"') - filtered_planet = filter.loc(1) + if col_names1 != col_names2: + return pd.DataFrame(), change_log - for col_name in columns: + diff_df = pd.DataFrame() - if filtered_planet[col_name].count() > 0: - ipac_col = filtered_planet[col_name].values[0] - sios_col = row[col_name] - - if ipac_col != None: - if (ipac_col != sios_col): - changed_rows.append(row["pl_name"]) - else: - changed_rows.append(row['pl_name']) + for index, row in old_df.iterrows(): + + pl_name_ind = row['pl_name'] + corresponding_updated_row = updated_df.loc[updated_df['pl_name'] == pl_name_ind] - set_changed_row = list(set(changed_rows)) + if corresponding_updated_row.empty: + diff_df = pd.concat([diff_df, pd.DataFrame([row])], ignore_index=True) + else: + corresponding_updated_row = corresponding_updated_row.iloc[0] + differences = {} + + for col in old_df.columns: + old_value = row[col] + new_value = corresponding_updated_row[col] + + # Nan's + if pd.isna(old_value) and pd.isna(new_value): + continue # Nan's are equal + + # Tolerance + if isinstance(old_value, (int, float, np.number)) and isinstance(new_value, (int, float, np.number)): + if abs(old_value - new_value) <= tolerance: + continue # Tolerance considered equal + + if old_value != new_value: + differences[col] = {"old": old_value, "new": new_value} + + if differences: + diff_df = pd.concat([diff_df, pd.DataFrame([corresponding_updated_row])], ignore_index=True) + + updated_df = updated_df.drop(corresponding_updated_row.name) - return new_database[new_database['pl_name'].isin(set_changed_row)] + if not updated_df.empty: + diff_df = pd.concat([diff_df, updated_df], ignore_index=True) + + return diff_df, change_log + + +def upsert_general(old_df: pd.DataFrame, new_df: pd.DataFrame, col : str) -> pd.DataFrame: + updated_old_df = old_df[~old_df[col].isin(new_df[col])] + + return pd.concat([updated_old_df, new_df], ignore_index=True) + + def get_photo_data(photdict_path: Path, infile: str, cache: bool): # photdict_path = Path(f'cache/update_photdict_2022-05.p') if cache: @@ -243,8 +775,8 @@ def get_compiled_contrast(compiled_contr_curvs_path: Path, stars: pd.DataFrame, # TODO figure out whats happening, why does the original of this from database_main use pdfs = comp_data # newpdfs = pdfs.apply(addId, axis = 1) - return contrast_curves, newpdfs - + return contrast_curves, newpdfs + def get_compiled_completeness(compiled_completeness_path: Path, comple_data: pd.DataFrame) -> pd.DataFrame: # scenarios = pd.read_csv("plandb.sioslab.com/cache/scenario_angles.csv") # compiled_completeness_path = Path(f"plandb.sioslab.com/cache/compiled_completeness_{datestr}.p") @@ -284,7 +816,7 @@ def compileCompleteness(comp_data: pd.DataFrame) -> pd.DataFrame: completeness.to_pickle(compiled_completeness_path) return completeness -# create a new shorter database of the updated, that is able to merge later +# Create a new shorter database of the updated, that is able to merge later # TODO change this to only make dataframe, and then add to database def update_sql(engine, plandata=None, stdata=None, orbitfits=None, orbdata=None, pdfs=None, aliases=None,contrastCurves=None,scenarios=None, completeness=None): return @@ -318,15 +850,6 @@ def get_all_from_db(connection: sqlalchemy.Connection) -> Tuple[pd.DataFrame, pd return completeness_df, contrast_curves_df, orbitfits_df, orbits_df, pdfs_df, planets_df, scenarios_df, stars_df -def upsert_dataframe(current_df : pd.DataFrame, new_df : pd.DataFrame , key_column) -> pd.DataFrame: - current_df.set_index(key_column, inplace=True) - new_df.set_index(key_column, inplace=True) - - df_combined = current_df.combine_first(new_df) - - df_combined.reset_index(inplace=True) - - return df_combined def temp_writeSQL(engine, plandata=None, stdata=None, orbitfits=None, orbdata=None, pdfs=None, aliases=None,contrastCurves=None,scenarios=None, completeness=None): """write outputs to sql database via connection""" connection = engine.connect() @@ -358,7 +881,7 @@ def temp_writeSQL(engine, plandata=None, stdata=None, orbitfits=None, orbdata=No #set indexes result = connection.execute(text("ALTER TABLE Planets ADD INDEX (pl_id)")) result = connection.execute(text("ALTER TABLE Planets ADD INDEX (st_id)")) - result = connection.execute(text("ALTER TABLE Planets ADD FOREIGN KEY (st_id) REFERENCES Stars(st_id) ON DELETE NO ACTION ON UPDATE NO ACTION")); + # result = connection.execute(text("ALTER TABLE Planets ADD FOREIGN KEY (st_id) REFERENCES Stars(st_id) ON DELETE NO ACTION ON UPDATE NO ACTION")); #add comments # addSQLcomments(connection,'Planets') @@ -470,6 +993,151 @@ def temp_writeSQL(engine, plandata=None, stdata=None, orbitfits=None, orbdata=No +def final_writeSQL(engine, plandata=None, stdata=None, orbitfits=None, orbdata=None, pdfs=None, aliases=None,contrastCurves=None,scenarios=None, completeness=None): + """write outputs to sql database via connection""" + connection = engine.connect() + connection.execute(text("DROP TABLE IF EXISTS Completeness, ContrastCurves, Scenarios, PDFs, Orbits, OrbitFits, Planets, Stars")) + + if stdata is not None: + # print("Writing Stars") + # namemxchar = np.array([len(n) for n in stdata['st_name'].values]).max() + # stdata = stdata.rename_axis('st_id') + # stdata.to_sql('Stars', connection, chunksize=100, if_exists='replace', + # dtype={'st_id': sqlalchemy.types.INT, + # 'st_name': sqlalchemy.types.String(namemxchar)}) + # # set indexes + # result = connection.execute(text('ALTER TABLE Stars ADD INDEX (st_id)')) + stdata.to_sql('Stars', connection) + # add comments + # addSQLcomments(connection, 'Stars') + + if plandata is not None: + print("Writing Planets") + # namemxchar = np.array([len(n) for n in plandata['pl_name'].values]).max() + # plandata = plandata.rename_axis('pl_id') + # plandata.to_sql('Planets',connection,chunksize=100,if_exists='replace', + # dtype={'pl_id':sqlalchemy.types.INT, + # 'pl_name':sqlalchemy.types.String(namemxchar), + # 'st_name':sqlalchemy.types.String(namemxchar-2), + # 'pl_letter':sqlalchemy.types.CHAR(1), + # 'st_id': sqlalchemy.types.INT}) + # #set indexes + # result = connection.execute(text("ALTER TABLE Planets ADD INDEX (pl_id)")) + # result = connection.execute(text("ALTER TABLE Planets ADD INDEX (st_id)")) + # result = connection.execute(text("ALTER TABLE Planets ADD FOREIGN KEY (st_id) REFERENCES Stars(st_id) ON DELETE NO ACTION ON UPDATE NO ACTION")); + plandata.to_sql('Planets', connection) + #add comments + # addSQLcomments(connection,'Planets') + + if orbitfits is not None: + print("Writing OrbitFits") + # TODO: figure out if this line is okay to keep + # orbitfits = orbitfits.rename_axis('orbitfit_id') + namemxchar = np.array([len(n) for n in orbitfits['pl_name'].values]).max() + orbitfits.to_sql('OrbitFits',connection,chunksize=100,if_exists='replace', + dtype={'pl_id': sqlalchemy.types.INT, + 'orbitfit_id': sqlalchemy.types.INT, + 'pl_name': sqlalchemy.types.String(namemxchar)}, + index=True) + # result = connection.execute(text("ALTER TABLE OrbitFits ADD INDEX (orbitfit_id)")) + # result = connection.execute(text("ALTER TABLE OrbitFits ADD INDEX (pl_id)")) + # TODO Get this commented back in to correctly have foreign keys + # result = connection.execute(text("ALTER TABLE OrbitFits ADD FOREIGN KEY (pl_id) REFERENCES Planets(pl_id) ON DELETE NO ACTION ON UPDATE NO ACTION")); + # orbitfits.to_sql('OrbitFits', connection) + result = connection.execute(text("ALTER TABLE OrbitFits ROW_FORMAT=COMPRESSED")) + + # addSQLcomments(connection,'OrbitFits') + if orbdata is not None: + print("Writing Orbits") + namemxchar = np.array([len(n) for n in orbdata['pl_name'].values]).max() + orbdata = orbdata.rename_axis('orbit_id') + orbdata.to_sql('Orbits',connection,chunksize=100,if_exists='replace', + dtype={'pl_name':sqlalchemy.types.String(namemxchar), + 'pl_id': sqlalchemy.types.INT, + 'orbit_id': sqlalchemy.types.BIGINT, + 'orbitfit_id': sqlalchemy.types.INT}, + index=True) + result = connection.execute(text("ALTER TABLE Orbits ADD INDEX (orbit_id)")) + result = connection.execute(text("ALTER TABLE Orbits ADD INDEX (pl_id)")) + result = connection.execute(text("ALTER TABLE Orbits ADD INDEX (orbitfit_id)")) + # TODO Get this commented back in to correctly have foreign keys + # result = connection.execute(text("ALTER TABLE Orbits ADD FOREIGN KEY (pl_id) REFERENCES Planets(pl_id) ON DELETE NO ACTION ON UPDATE NO ACTION")); + # result = connection.execute(text("ALTER TABLE Orbits ADD FOREIGN KEY (orbitfit_id) REFERENCES OrbitFits(orbitfit_id) ON DELETE NO ACTION ON UPDATE NO ACTION")); + orbdata.to_sql('Orbits', connection) + # addSQLcomments(connection,'Orbits') + + if pdfs is not None: + # print("Writing PDFs") + # pdfs = pdfs.reset_index(drop=True) + # namemxchar = np.array([len(n) for n in pdfs['Name'].values]).max() + # pdfs = pdfs.rename_axis('pdf_id') + # pdfs.to_sql('PDFs',connection,chunksize=100,if_exists='replace', + # dtype={'pl_name':sqlalchemy.types.String(namemxchar), + # 'pl_id': sqlalchemy.types.INT}) + # # result = connection.execute("ALTER TABLE PDFs ADD INDEX (orbitfit_id)") + # result = connection.execute(text("ALTER TABLE PDFs ADD INDEX (pl_id)")) + # result = connection.execute(text("ALTER TABLE PDFs ADD INDEX (pdf_id)")) + # # result = connection.execute("ALTER TABLE PDFs ADD FOREIGN KEY (orbitfit_id) REFERENCES OrbitFits(orbitfit_id) ON DELETE NO ACTION ON UPDATE NO ACTION") + # result = connection.execute(text("ALTER TABLE PDFs ADD FOREIGN KEY (pl_id) REFERENCES Planets(pl_id) ON DELETE NO ACTION ON UPDATE NO ACTION")) + pdfs.to_sql('PDFs', connection) + # addSQLcomments(connection,'PDFs') + + if aliases is not None: + print("Writing Alias") + aliases.to_sql('Aliases', connection) + # aliases = aliases.rename_axis('alias_id') + # aliasmxchar = np.array([len(n) for n in aliases['Alias'].values]).max() + # aliases.to_sql('Aliases',connection,chunksize=100,if_exists='replace',dtype={'Alias':sqlalchemy.types.String(aliasmxchar)}) + # result = connection.execute(text("ALTER TABLE Aliases ADD INDEX (alias_id)")) + # result = connection.execute(text("ALTER TABLE Aliases ADD INDEX (Alias)")) + # result = connection.execute(text("ALTER TABLE Aliases ADD INDEX (st_id)")) + # result = connection.execute(text("ALTER TABLE Aliases ADD FOREIGN KEY (st_id) REFERENCES Stars(st_id) ON DELETE NO ACTION ON UPDATE NO ACTION")) + + if scenarios is not None: + print("Writing Scenarios") + scenarios.to_sql("Scenarios", connection) + + # namemxchar = np.array([len(n) for n in scenarios['scenario_name'].values]).max() + # scenarios.to_sql("Scenarios", connection, chunksize=100, if_exists='replace', dtype={ + # 'scenario_name': sqlalchemy.types.String(namemxchar),}, index = False) + + # result = connection.execute(text("ALTER TABLE Scenarios ADD INDEX (scenario_name)")) + + if contrastCurves is not None: + print("Writing ContrastCurves") + + contrastCurves.to_sql("ContrastCurves", connection) + # contrastCurves = contrastCurves.rename_axis("curve_id") + # #TODO change namemx char to commented out line, temporary value of 1000000 used here as a placeholder + # # namemxchar = np.array([len(n) for n in contrastCurves['scenario_name'].values]).max() + # namemxchar = 1000 + # contrastCurves.to_sql("ContrastCurves", connection, chunksize=100, if_exists='replace', dtype={ + # 'st_id': sqlalchemy.types.INT, + # 'curve_id' : sqlalchemy.types.INT, + # 'scenario_name': sqlalchemy.types.String(namemxchar)}, index = True) + # # result = connection.execute("ALTER TABLE ContastCurves ADD INDEX (scenario_name)") + + # result = connection.execute("ALTER TABLE ContastCurves ADD INDEX (st_id)") + + # TODO: get this commented back in to have correct foreign keys + # result = connection.execute(text("ALTER TABLE ContrastCurves ADD FOREIGN KEY (st_id) REFERENCES Stars(st_id) ON DELETE NO ACTION ON UPDATE NO ACTION")) + # result = connection.execute(text("ALTER TABLE ContrastCurves ADD FOREIGN KEY (scenario_name) REFERENCES Scenarios(scenario_name) ON DELETE NO ACTION ON UPDATE NO ACTION")) + + if completeness is not None: + # print("Writing completeness") + # completeness = completeness.rename_axis("completeness_id") + # namemxchar = np.array([len(n) for n in completeness['scenario_name'].values]).max() + # completeness.to_sql("Completeness", connection, chunksize=100, if_exists='replace', dtype={ + # # 'scenario_id': sqlalchemy.types.INT, + # 'pl_id' : sqlalchemy.types.INT, + # 'completeness_id' : sqlalchemy.types.INT, + # 'scenario_name': sqlalchemy.types.String(namemxchar)}, index = True) + completeness.to_sql("Completeness", connection) + # TODO: get this commented back in to have correct foreign keys + # result = connection.execute(text("ALTER TABLE Completeness ADD FOREIGN KEY (pl_id) REFERENCES Planets(pl_id) ON DELETE NO ACTION ON UPDATE NO ACTION")) + # result = connection.execute(text("ALTER TABLE ContrastCurves ADD FOREIGN KEY (scenario_name) REFERENCES Scenarios(scenario_name) ON DELETE NO ACTION ON UPDATE NO ACTION")) + + diff --git a/backend/update_util_test_suite.py b/backend/update_util_test_suite.py new file mode 100644 index 0000000..da4c118 --- /dev/null +++ b/backend/update_util_test_suite.py @@ -0,0 +1,277 @@ +import unittest +from update_util import * + +df_original = pd.DataFrame({ + 'pl_name': ['11 Com b','11 UMi b', '14 And b'], + 'pl_letter': ['b', 'b', 'b'], + 'pl_orbper': [326.03, 516.22, 185.84] + }) + +df_let_modified = pd.DataFrame({ + 'pl_name': ['11 Com b','11 UMi b', '14 And b'], + 'pl_letter': ['b', 'a', 'b'], + 'pl_orbper': [326.03, 516.22, 185.84] + }) + +df_num_modified = pd.DataFrame({ + 'pl_name': ['11 Com b','11 UMi b', '14 And b'], + 'pl_letter': ['b','b', 'b'], + 'pl_orbper': [326.03, 516.22, 180.84] + }) + +df_new_rows = pd.DataFrame({ + 'pl_name': ['11 Com b','11 UMi b', '14 And b', '14 Her b'], + 'pl_letter': ['b','b', 'b', 'b'], + 'pl_orbper': [326.03, 516.22, 185.84, 1773.4] + }) + +df_modified_and_new_rows = pd.DataFrame({ + 'pl_name': ['11 Com b','11 UMi b', '14 And b', '14 Her b'], + 'pl_letter': ['b','b', 'b', 'b'], + 'pl_orbper': [326.03, 512.22, 185.84, 1773.4] + }) + +df_let_changed_res = pd.DataFrame({ + 'pl_name': ['11 UMi b'], + 'pl_letter': ['a'], + 'pl_orbper': [516.22] +}) + +df_num_changed_res = pd.DataFrame({ + 'pl_name': ['14 And b'], + 'pl_letter': ['b'], + 'pl_orbper': [180.84] +}) + +df_new_res = pd.DataFrame({ + 'pl_name': ['14 Her b'], + 'pl_letter': ['b'], + 'pl_orbper': [1773.4] +}) + + +df_modified_new_res = pd.DataFrame({ + 'pl_name': ['11 UMi b', '14 Her b'], + 'pl_letter': ['b', 'b'], + 'pl_orbper': [512.22, 1773.4] +}) + + + +# TODO: Mismatching col needed + + + +class TestFunctions(unittest.TestCase): + def test_get_ipac_differences(self): + + let_modified_differences, _ = get_ipac_differences(df_original, df_let_modified) + num_modified_differences, _ = get_ipac_differences(df_original, df_num_modified) + new_rows_differences, _ = get_ipac_differences(df_original, df_new_rows) + modified_new_differences, _ = get_ipac_differences(df_original, df_modified_and_new_rows) + + pd.testing.assert_frame_equal(let_modified_differences, df_let_changed_res) + pd.testing.assert_frame_equal(num_modified_differences, df_num_changed_res) + pd.testing.assert_frame_equal(new_rows_differences, df_new_res) + pd.testing.assert_frame_equal(modified_new_differences, df_modified_new_res) + + def test_upsert(self): + + df_merged_original = pd.DataFrame({ + 'pl_name': ['11 Com b','11 UMi b', '14 And b'], + 'pl_letter': ['b', 'b', 'b'], + 'pl_orbper': [326.03, 516.22, 185.84] + }) + + df_all_new = pd.DataFrame({ + 'pl_name': ['14 Her b'], + 'pl_letter': ['b'], + 'pl_orbper': [1773.4] + }) + + df_some_new = pd.DataFrame({ + 'pl_name': ['11 Com b','11 UMi b', '14 And b', '14 Her b'], + 'pl_letter': ['b', 'b', 'b', 'b'], + 'pl_orbper': [326.03, 516.22, 185.84, 1773.4] + }) + + df_updated = pd.DataFrame({ + 'pl_name': ['11 Com b','11 UMi b', '14 And b'], + 'pl_letter': ['b', 'b', 'b'], + 'pl_orbper': [326.03, 512.0, 185.84] + }) + + df_updated_new = pd.DataFrame({ + 'pl_name': ['11 Com b','11 UMi b', '14 And b', '14 Her b'], + 'pl_letter': ['b', 'b', 'b', 'b'], + 'pl_orbper': [326.03, 512, 185.84, 1773.4] + }) + + df_merged_all_res = pd.DataFrame({ + 'pl_name': ['11 Com b','11 UMi b', '14 And b', '14 Her b'], + 'pl_letter': ['b', 'b', 'b', 'b'], + 'pl_orbper': [326.03, 516.22, 185.84, 1773.4] + }) + + df_merged_some_res = pd.DataFrame({ + 'pl_name': ['11 Com b','11 UMi b', '14 And b', '14 Her b'], + 'pl_letter': ['b', 'b', 'b', 'b'], + 'pl_orbper': [326.03, 516.22, 185.84, 1773.4] + }) + + df_merged_updated_res = pd.DataFrame({ + 'pl_name': ['11 Com b','11 UMi b', '14 And b'], + 'pl_letter': ['b', 'b', 'b'], + 'pl_orbper': [326.03, 512, 185.84] + }) + + df_merged_updated_new_res = pd.DataFrame({ + 'pl_name': ['11 Com b','11 UMi b', '14 And b', '14 Her b'], + 'pl_letter': ['b', 'b', 'b', 'b'], + 'pl_orbper': [326.03, 512, 185.84, 1773.4] + }) + + print(df_some_new.columns) + + upsert_same = upsert_general(df_merged_original, df_merged_original, 'pl_name') + upsert_all_new = upsert_general(df_merged_original, df_all_new, 'pl_name') + upsert_some_new = upsert_general(df_merged_original, df_some_new, 'pl_name') + upsert_updated = upsert_general(df_merged_original, df_updated, 'pl_name') + upsert_updated_new = upsert_general(df_merged_original, df_updated_new, 'pl_name') + + + + pd.testing.assert_frame_equal(df_merged_original, upsert_same) + pd.testing.assert_frame_equal(upsert_all_new, df_merged_all_res) + pd.testing.assert_frame_equal(upsert_some_new, df_merged_some_res) + pd.testing.assert_frame_equal(upsert_updated, df_merged_updated_res) + pd.testing.assert_frame_equal(upsert_updated_new, df_merged_updated_new_res) + + def test_completeness_upsert(self): + df_merged_original = pd.DataFrame({ + 'completeness_id': [0, 1, 2, 3], + 'pl_id': [3, 3, 3, 3], + 'completeness': [0, 0, 0, 0.085041], + 'scenario_name': ['Conservative_NF_Imager_25hr', 'Conservative_NF_Imager_100hr', 'Conservative_NF_Imager_10000hr', 'Optimistic_NF_Imager_25hr'], + 'compMinWA': [None, None, None, None], + 'compMaxWA': [None, None, None, None], + 'compMindMag': [None, None, None, None], + 'compMaxdMag': [None, None, None, None] + }) + + df_merged_modified = pd.DataFrame({ + 'completeness_id': [0, 1, 2, 3], + 'pl_id': [3, 3, 3, 3], + 'completeness': [0, 0, 0.084, 0.085041], + 'scenario_name': ['Conservative_NF_Imager_25hr', 'Conservative_NF_Imager_100hr', 'Conservative_NF_Imager_10000hr', 'Optimistic_NF_Imager_25hr'], + 'compMinWA': [None, None, None, None], + 'compMaxWA': [None, None, None, None], + 'compMindMag': [None, None, None, None], + 'compMaxdMag': [None, None, None, None] + }) + + df_merged_modified_new = pd.DataFrame({ + 'completeness_id': [0, 1, 2, 3, 4], + 'pl_id': [3, 3, 3, 3, 3], + 'completeness': [0, 0, 0.084, 0.085041, 0.226834], + 'scenario_name': ['Conservative_NF_Imager_25hr', 'Conservative_NF_Imager_100hr', 'Conservative_NF_Imager_10000hr', 'Optimistic_NF_Imager_25hr', 'Optimistic_NF_Imager_100hr'], + 'compMinWA': [None, None, None, None, None], + 'compMaxWA': [None, None, None, None, None], + 'compMindMag': [None, None, None, None, None], + 'compMaxdMag': [None, None, None, None, None] + }) + + df_merged_new = pd.DataFrame({ + 'completeness_id': [0, 1, 2, 3, 4], + 'pl_id': [3, 3, 3, 3, 3], + 'completeness': [0, 0, 0, 0.085041, 0.226834], + 'scenario_name': ['Conservative_NF_Imager_25hr', 'Conservative_NF_Imager_100hr', 'Conservative_NF_Imager_10000hr', 'Optimistic_NF_Imager_25hr', 'Optimistic_NF_Imager_100hr'], + 'compMinWA': [None, None, None, None, None], + 'compMaxWA': [None, None, None, None, None], + 'compMindMag': [None, None, None, None, None], + 'compMaxdMag': [None, None, None, None, None] + }) + + df_merged_single_new = pd.DataFrame({ + 'completeness_id': [0], + 'pl_id': [3], + 'completeness': ['0.270857'], + 'scenario_name': ['Optimistic_NF_Imager_10000hr'], + 'compMinWA': [None], + 'compMaxWA': [None], + 'compMindMag': [None], + 'compMaxdMag': [None] + }) + + df_merged_modified_res = pd.DataFrame({ + 'completeness_id': [0, 1, 2, 3], + 'pl_id': [3, 3, 3, 3], + 'completeness': [0, 0, 0.084, 0.085041], + 'scenario_name': ['Conservative_NF_Imager_25hr', 'Conservative_NF_Imager_100hr', 'Conservative_NF_Imager_10000hr', 'Optimistic_NF_Imager_25hr'], + 'compMinWA': [None, None, None, None], + 'compMaxWA': [None, None, None, None], + 'compMindMag': [None, None, None, None], + 'compMaxdMag': [None, None, None, None] + }) + + df_merged_modified_new_res = pd.DataFrame({ + 'completeness_id': [0, 1, 2, 3, 4], + 'pl_id': [3, 3, 3, 3, 3], + 'completeness': [0, 0, 0.084, 0.085041, 0.226834], + 'scenario_name': ['Conservative_NF_Imager_25hr', 'Conservative_NF_Imager_100hr', 'Conservative_NF_Imager_10000hr', 'Optimistic_NF_Imager_25hr', 'Optimistic_NF_Imager_100hr'], + 'compMinWA': [None, None, None, None, None], + 'compMaxWA': [None, None, None, None, None], + 'compMindMag': [None, None, None, None, None], + 'compMaxdMag': [None, None, None, None, None] + }) + + df_merged_new_res = pd.DataFrame({ + 'completeness_id': [0, 1, 2, 3, 4], + 'pl_id': [3, 3, 3, 3, 3], + 'completeness': [0, 0, 0, 0.085041, 0.226834], + 'scenario_name': ['Conservative_NF_Imager_25hr', 'Conservative_NF_Imager_100hr', 'Conservative_NF_Imager_10000hr', 'Optimistic_NF_Imager_25hr', 'Optimistic_NF_Imager_100hr'], + 'compMinWA': [None, None, None, None, None], + 'compMaxWA': [None, None, None, None, None], + 'compMindMag': [None, None, None, None, None], + 'compMaxdMag': [None, None, None, None, None] + }) + + df_merged_single_res = pd.DataFrame({ + 'completeness_id': [0], + 'pl_id': [3], + 'completeness': ['0.270857'], + 'scenario_name': ['Optimistic_NF_Imager_10000hr'], + 'compMinWA': [None], + 'compMaxWA': [None], + 'compMindMag': [None], + 'compMaxdMag': [None] + }) + + upsert_same = upsert_general(df_merged_original, df_merged_original, 'pl_id') + upsert_modified = upsert_general(df_merged_original, df_merged_modified, 'pl_id') + upsert_modified_new = upsert_general(df_merged_original, df_merged_modified_new, 'pl_id') + upsert_new = upsert_general(df_merged_original, df_merged_new, 'pl_id') + upsert_single = upsert_general(df_merged_original, df_merged_single_new, 'pl_id') + + print(upsert_modified_new) + print(df_merged_modified_new_res) + + + pd.testing.assert_frame_equal(df_merged_original, upsert_same) + pd.testing.assert_frame_equal(upsert_modified, df_merged_modified_res) + pd.testing.assert_frame_equal(upsert_modified_new, df_merged_modified_new_res) + pd.testing.assert_frame_equal(upsert_new, df_merged_new_res) + pd.testing.assert_frame_equal(upsert_single, df_merged_single_res) + + return + + def test_upsert(self): + + + + + return + + +if __name__ == '__main__': + unittest.main() \ No newline at end of file From 96983d1097bacd5f2e8fcaeb5c3d913df7dd7eae Mon Sep 17 00:00:00 2001 From: Andrew Chiu Date: Tue, 12 Nov 2024 00:32:38 -0500 Subject: [PATCH 2/6] Working Full Update --- backend/database_main.py | 3 +- backend/plandb_methods.py | 18 +-- backend/update_plandb_main.py | 31 +++- backend/update_util.py | 260 ++++++++++++++++++++++++++++++---- 4 files changed, 268 insertions(+), 44 deletions(-) diff --git a/backend/database_main.py b/backend/database_main.py index 2cf1441..d67d6e2 100644 --- a/backend/database_main.py +++ b/backend/database_main.py @@ -114,5 +114,4 @@ def addId(r): contrast_curves.to_excel('main_contrast.xlsx') completeness.to_excel('main_completeness.xlsx') - - final_writeSQL(engine, plandata=planets, stdata=stars, orbitfits=orbitfits, orbdata=orbits, pdfs=newpdfs, aliases=None,contrastCurves=contrast_curves,scenarios=scenarios, completeness=completeness) + writeSQL(engine, plandata=planets, stdata=stars, orbitfits=orbitfits, orbdata=orbits, pdfs=newpdfs, aliases=None,contrastCurves=contrast_curves,scenarios=None, completeness=completeness) diff --git a/backend/plandb_methods.py b/backend/plandb_methods.py index d347026..414e428 100644 --- a/backend/plandb_methods.py +++ b/backend/plandb_methods.py @@ -35,7 +35,7 @@ from kep_generator import planet try: - from StringIO import StringIO # type: ignore + from StringIO import StringIO except ImportError: from io import BytesIO as StringIO @@ -2083,7 +2083,7 @@ def writeSQL(engine, plandata=None, stdata=None, orbitfits=None, orbdata=None, p index=True) result = connection.execute(text("ALTER TABLE OrbitFits ADD INDEX (orbitfit_id)")) result = connection.execute(text("ALTER TABLE OrbitFits ADD INDEX (pl_id)")) - result = connection.execute(text("ALTER TABLE OrbitFits ADD FOREIGN KEY (pl_id) REFERENCES Planets(pl_id) ON DELETE NO ACTION ON UPDATE NO ACTION")); + # result = connection.execute(text("ALTER TABLE OrbitFits ADD FOREIGN KEY (pl_id) REFERENCES Planets(pl_id) ON DELETE NO ACTION ON UPDATE NO ACTION")); # addSQLcomments(connection,'OrbitFits') @@ -2100,8 +2100,8 @@ def writeSQL(engine, plandata=None, stdata=None, orbitfits=None, orbdata=None, p result = connection.execute(text("ALTER TABLE Orbits ADD INDEX (orbit_id)")) result = connection.execute(text("ALTER TABLE Orbits ADD INDEX (pl_id)")) result = connection.execute(text("ALTER TABLE Orbits ADD INDEX (orbitfit_id)")) - result = connection.execute(text("ALTER TABLE Orbits ADD FOREIGN KEY (pl_id) REFERENCES Planets(pl_id) ON DELETE NO ACTION ON UPDATE NO ACTION")); - result = connection.execute(text("ALTER TABLE Orbits ADD FOREIGN KEY (orbitfit_id) REFERENCES OrbitFits(orbitfit_id) ON DELETE NO ACTION ON UPDATE NO ACTION")); + # result = connection.execute(text("ALTER TABLE Orbits ADD FOREIGN KEY (pl_id) REFERENCES Planets(pl_id) ON DELETE NO ACTION ON UPDATE NO ACTION")); + # result = connection.execute(text("ALTER TABLE Orbits ADD FOREIGN KEY (orbitfit_id) REFERENCES OrbitFits(orbitfit_id) ON DELETE NO ACTION ON UPDATE NO ACTION")); # addSQLcomments(connection,'Orbits') @@ -2117,7 +2117,7 @@ def writeSQL(engine, plandata=None, stdata=None, orbitfits=None, orbdata=None, p result = connection.execute(text("ALTER TABLE PDFs ADD INDEX (pl_id)")) result = connection.execute(text("ALTER TABLE PDFs ADD INDEX (pdf_id)")) # result = connection.execute("ALTER TABLE PDFs ADD FOREIGN KEY (orbitfit_id) REFERENCES OrbitFits(orbitfit_id) ON DELETE NO ACTION ON UPDATE NO ACTION") - result = connection.execute(text("ALTER TABLE PDFs ADD FOREIGN KEY (pl_id) REFERENCES Planets(pl_id) ON DELETE NO ACTION ON UPDATE NO ACTION")) + # result = connection.execute(text("ALTER TABLE PDFs ADD FOREIGN KEY (pl_id) REFERENCES Planets(pl_id) ON DELETE NO ACTION ON UPDATE NO ACTION")) # addSQLcomments(connection,'PDFs') @@ -2152,8 +2152,8 @@ def writeSQL(engine, plandata=None, stdata=None, orbitfits=None, orbdata=None, p # result = connection.execute("ALTER TABLE ContastCurves ADD INDEX (st_id)") - result = connection.execute(text("ALTER TABLE ContrastCurves ADD FOREIGN KEY (st_id) REFERENCES Stars(st_id) ON DELETE NO ACTION ON UPDATE NO ACTION")) - result = connection.execute(text("ALTER TABLE ContrastCurves ADD FOREIGN KEY (scenario_name) REFERENCES Scenarios(scenario_name) ON DELETE NO ACTION ON UPDATE NO ACTION")) + # result = connection.execute(text("ALTER TABLE ContrastCurves ADD FOREIGN KEY (st_id) REFERENCES Stars(st_id) ON DELETE NO ACTION ON UPDATE NO ACTION")) + # result = connection.execute(text("ALTER TABLE ContrastCurves ADD FOREIGN KEY (scenario_name) REFERENCES Scenarios(scenario_name) ON DELETE NO ACTION ON UPDATE NO ACTION")) if completeness is not None: print("Writing completeness") @@ -2165,8 +2165,8 @@ def writeSQL(engine, plandata=None, stdata=None, orbitfits=None, orbdata=None, p 'completeness_id' : sqlalchemy.types.INT, 'scenario_name': sqlalchemy.types.String(namemxchar)}, index = True) - result = connection.execute(text("ALTER TABLE Completeness ADD FOREIGN KEY (pl_id) REFERENCES Planets(pl_id) ON DELETE NO ACTION ON UPDATE NO ACTION")) - result = connection.execute(text("ALTER TABLE ContrastCurves ADD FOREIGN KEY (scenario_name) REFERENCES Scenarios(scenario_name) ON DELETE NO ACTION ON UPDATE NO ACTION")) + # result = connection.execute(text("ALTER TABLE Completeness ADD FOREIGN KEY (pl_id) REFERENCES Planets(pl_id) ON DELETE NO ACTION ON UPDATE NO ACTION")) + # result = connection.execute(text("ALTER TABLE ContrastCurves ADD FOREIGN KEY (scenario_name) REFERENCES Scenarios(scenario_name) ON DELETE NO ACTION ON UPDATE NO ACTION")) diff --git a/backend/update_plandb_main.py b/backend/update_plandb_main.py index 8ab4079..076df75 100644 --- a/backend/update_plandb_main.py +++ b/backend/update_plandb_main.py @@ -129,11 +129,11 @@ # Look into and possibly remove the - to _ contr_curvs2_path = Path(f'plandb.sioslab.com/cache/cont_curvs2_{datestr.replace("-", "_")}') compiled_contr_curvs_path = Path(f'plandb.sioslab.com/cache/cont_curvs_{datestr.replace("-", "_")}') - compiled_contrast_curves, newpdfs = get_compiled_contrast(compiled_contr_curvs_path, stdata, comps_data, change_ipac_df, contr_curvs2_path) + # compiled_contrast_curves, newpdfs = get_compiled_contrast(compiled_contr_curvs_path, stdata, comps_data, change_ipac_df, contr_curvs2_path) # compiled_contrast_curves = compileContrastCurves(stdata, compiled_contr_curvs_path) - compiled_contrast_curves.to_excel("plandb.sioslab.com/backend/sheets/compiled_contrast_curves.xlsx") + # compiled_contrast_curves.to_excel("plandb.sioslab.com/backend/sheets/compiled_contrast_curves.xlsx") # With current code, since get_compiled_contrast isnt fully working with new pdfs, new pdfs should be empty - newpdfs.to_excel("plandb.sioslab.com/backend/sheets/newpdfs.xlsx") + # newpdfs.to_excel("plandb.sioslab.com/backend/sheets/newpdfs.xlsx") # compile completeness @@ -155,7 +155,7 @@ # MakeSQL for the temporary database, creates diff engine, -> upsert diff engine with current engine scenarios = pd.read_csv("plandb.sioslab.com/cache/scenario_angles.csv") # writeSQL(engine, plandata=planets, stdata=stars, orbitfits=orbitfits, orbdata=orbits, pdfs=newpdfs, aliases=None,contrastCurves=contrast_curves,scenarios=scenarios, completeness=completeness) - temp_writeSQL(diff_sios_engine, plandata=plan_data, stdata=stdata, orbitfits=orbfits, orbdata=orbdata, pdfs=None, aliases=None, contrastCurves=compiled_contrast_curves, scenarios=scenarios, completeness=compiled_completeness) + temp_writeSQL(diff_sios_engine, plandata=plan_data, stdata=stdata, orbitfits=orbitfits, orbdata=orbdata, pdfs=None, aliases=None, contrastCurves=None, scenarios=scenarios, completeness=compiled_completeness) # Get from diff_database diff_engine_connection = diff_sios_engine.connect() @@ -211,12 +211,28 @@ print("Merging stars") merged_stars = upsert_general(old_stars_df, diff_stars_df, "st_name") + print('old orbitfits') + print(old_orbitfits_df) + print("diff orbitfits") + print(diff_orbitfits_df) + old_orbitfits_df.to_excel('plandb.sioslab.com/backend/sheets/original_orbitfits.xlsx') + diff_orbitfits_df.to_excel('plandb.sioslab.com/backend/sheets/diff_orbitfits.xlsx') + # print("diff between diff and old, this should be nothing") + # print(diff_orbits_df) + input7 = input("test") + # For these later upserts, only way to properly upsert is to detect the change from the earlier value, like the difference in planets from ipac, and then categorize the change as a result from upated information or new information. If new information (new planet), just add but if updated information, going to have to track down the previous. Maybe it's possible for me to locally store database versions, so it can be quickly updated based on path of changes that happened # Upsert orbitfits + # TODO: maybe the error is here print("Merging orbit fits") merged_orbitfits = upsert_general(old_orbitfits_df, diff_orbitfits_df, "pl_id") - # TODO: Orbitfits don't have anyway of uniquely upserting other than orbit fit id, so if there's new orbit fits, they must be detected from the other builds, and then added + input5 = input("test2") + + + # TODO: compile/calc contrast curves still needs fixing + # TODO: make a seperate database for todays data, the old data, and test by comparing todays data to old data + update + # Upsert orbits print("Merging orbits") merged_orbits = upsert_general(old_orbits_df, diff_orbits_df, "pl_id") @@ -236,10 +252,11 @@ # write back to original database with new values, # TODO: optionally, store old database in a different database for archive print("Merging and final write") - final_writeSQL(new_engine, merged_planets, merged_stars, merged_orbitfits, merged_orbits, None, aliases=None, contrastCurves=None, scenarios=scenarios, completeness=merged_completeness) + write_update_SQL(new_engine, merged_planets, merged_stars, merged_orbitfits, merged_orbits, None, aliases=None, contrastCurves= None, scenarios=None, completeness=merged_completeness) print("Done") # TODO: Print all total changes # TODO: Correct the merges/compiles with no changes, ending up outputting an empty table, (account for table upserts with no changes) (Handle no new updates case). For example, final completeness would be empty if there are no changes to completeness in the update. - # TODO: Get "result = connection.execute(text("ALTER TABLE OrbitFits ROW_FORMAT=COMPRESSED"))" removed, line 1047 of update_util.py, operational error when that line is not there, it has to do with database setup \ No newline at end of file + + diff --git a/backend/update_util.py b/backend/update_util.py index e55b10b..e92c4cb 100644 --- a/backend/update_util.py +++ b/backend/update_util.py @@ -589,6 +589,10 @@ def get_ipac_differences(old_df: pd.DataFrame, updated_df: pd.DataFrame, toleran def upsert_general(old_df: pd.DataFrame, new_df: pd.DataFrame, col : str) -> pd.DataFrame: + + if (new_df.empty): + return old_df + updated_old_df = old_df[~old_df[col].isin(new_df[col])] return pd.concat([updated_old_df, new_df], ignore_index=True) @@ -822,33 +826,56 @@ def update_sql(engine, plandata=None, stdata=None, orbitfits=None, orbdata=None, return def get_all_from_db(connection: sqlalchemy.Connection) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame]: - planets_results = connection.execute(text("SELECT * FROM PLANETS")) - planets_df = pd.DataFrame(planets_results.fetchall(), columns = planets_results.keys()) + try: + planets_results = connection.execute(text("SELECT * FROM PLANETS")) + planets_df = pd.DataFrame(planets_results.fetchall(), columns = planets_results.keys()) + except Exception as e: + planets_df = pd.DataFrame() + + try: + contrast_curves_results = connection.execute(text("SELECT * FROM ContrastCurves")) + contrast_curves_df = pd.DataFrame(contrast_curves_results.fetchall(), columns = contrast_curves_results.keys()) + except Exception as e: + contrast_curves_df = pd.DataFrame() + try: + orbitfits_results = connection.execute(text("SELECT * FROM OrbitFits")) + orbitfits_df = pd.DataFrame(orbitfits_results.fetchall(), columns = orbitfits_results.keys()) + except Exception as e: + orbitfits_df = pd.DataFrame() - contrast_curves_results = connection.execute(text("SELECT * FROM ContrastCurves")) - contrast_curves_df = pd.DataFrame(contrast_curves_results.fetchall(), columns = contrast_curves_results.keys()) + try: + orbits_results = connection.execute(text("SELECT * FROM Orbits")) + orbits_df = pd.DataFrame(orbits_results.fetchall(), columns = orbits_results.keys()) + except Exception as e: + orbits_df = pd.DataFrame() + + + try: + # TODO, fix to pdfs, do completeness for now, as pdfs is currently empty from diff_database + pdfs_results = connection.execute(text("SELECT * FROM Completeness")) + pdfs_df = pd.DataFrame(pdfs_results.fetchall(), columns = pdfs_results.keys()) + except Exception as e: + pdfs_df = pd.DataFrame() - orbitfits_results = connection.execute(text("SELECT * FROM OrbitFits")) - orbitfits_df = pd.DataFrame(orbitfits_results.fetchall(), columns = orbitfits_results.keys()) + try: + scenarios_results = connection.execute(text("SELECT * FROM Scenarios")) + scenarios_df = pd.DataFrame(scenarios_results.fetchall(), columns = scenarios_results.keys()) + except Exception as e: + scenarios_df = pd.DataFrame() + + try: + stars_results = connection.execute(text("SELECT * FROM Stars")) + stars_df = pd.DataFrame(stars_results.fetchall(), columns = stars_results.keys()) + except Exception as e: + stars_df = pd.DataFrame() - - orbits_results = connection.execute(text("SELECT * FROM Orbits")) - orbits_df = pd.DataFrame(orbits_results.fetchall(), columns = orbits_results.keys()) - - # TODO, fix to pdfs, do completeness for now, as pdfs is currently empty from diff_database - pdfs_results = connection.execute(text("SELECT * FROM Completeness")) - pdfs_df = pd.DataFrame(pdfs_results.fetchall(), columns = pdfs_results.keys()) - - scenarios_results = connection.execute(text("SELECT * FROM Scenarios")) - scenarios_df = pd.DataFrame(scenarios_results.fetchall(), columns = scenarios_results.keys()) - - stars_results = connection.execute(text("SELECT * FROM Stars")) - stars_df = pd.DataFrame(stars_results.fetchall(), columns = stars_results.keys()) - - completeness_results = connection.execute(text("SELECT * FROM Completeness")) - completeness_df = pd.DataFrame(completeness_results.fetchall(), columns = completeness_results.keys()) - - return completeness_df, contrast_curves_df, orbitfits_df, orbits_df, pdfs_df, planets_df, scenarios_df, stars_df + try: + completeness_results = connection.execute(text("SELECT * FROM Completeness")) + completeness_df = pd.DataFrame(completeness_results.fetchall(), columns = completeness_results.keys()) + except Exception as e: + completeness_df = pd.DataFrame() + + return completeness_df, contrast_curves_df, orbitfits_df, orbits_df, pdfs_df, planets_df, scenarios_df, stars_df def temp_writeSQL(engine, plandata=None, stdata=None, orbitfits=None, orbdata=None, pdfs=None, aliases=None,contrastCurves=None,scenarios=None, completeness=None): """write outputs to sql database via connection""" @@ -1051,7 +1078,7 @@ def final_writeSQL(engine, plandata=None, stdata=None, orbitfits=None, orbdata=N if orbdata is not None: print("Writing Orbits") namemxchar = np.array([len(n) for n in orbdata['pl_name'].values]).max() - orbdata = orbdata.rename_axis('orbit_id') + # orbdata = orbdata.rename_axis('orbit_id') orbdata.to_sql('Orbits',connection,chunksize=100,if_exists='replace', dtype={'pl_name':sqlalchemy.types.String(namemxchar), 'pl_id': sqlalchemy.types.INT, @@ -1064,7 +1091,7 @@ def final_writeSQL(engine, plandata=None, stdata=None, orbitfits=None, orbdata=N # TODO Get this commented back in to correctly have foreign keys # result = connection.execute(text("ALTER TABLE Orbits ADD FOREIGN KEY (pl_id) REFERENCES Planets(pl_id) ON DELETE NO ACTION ON UPDATE NO ACTION")); # result = connection.execute(text("ALTER TABLE Orbits ADD FOREIGN KEY (orbitfit_id) REFERENCES OrbitFits(orbitfit_id) ON DELETE NO ACTION ON UPDATE NO ACTION")); - orbdata.to_sql('Orbits', connection) + # orbdata.to_sql('Orbits', connection) # addSQLcomments(connection,'Orbits') if pdfs is not None: @@ -1140,4 +1167,185 @@ def final_writeSQL(engine, plandata=None, stdata=None, orbitfits=None, orbdata=N +# TODO failsafes +def simple_writeSQL(engine, plandata=None, stdata=None, orbitfits=None, orbdata=None, pdfs=None, aliases=None,contrastCurves=None,scenarios=None, completeness=None): + connection = engine.connect() + connection.execute(text("DROP TABLE IF EXISTS Completeness, ContrastCurves, Scenarios, PDFs, Orbits, OrbitFits, Planets, Stars")) + + if stdata is not None: + print("Writing Stars") + stdata.to_sql('Stars', connection) + + if plandata is not None: + print("Writing Planets") + plandata.to_sql('Planets', connection) + + if orbitfits is not None: + print("Writing OrbitFits") + orbitfits.to_sql('OrbitFits', connection) + + + if orbdata is not None: + print("Writing Orbits") + orbdata.to_sql('Orbits', connection) + + if pdfs is not None: + print("Writing PDFs") + pdfs.to_sql('PDFs', connection) + + if aliases is not None: + print("Writing Aliases") + aliases.to_sql('Aliases', connection) + + if scenarios is not None: + print("Writing Scenarios") + scenarios.to_sql('Scenarios', connection) + + if contrastCurves is not None: + print("Writing Contrast Curves") + contrastCurves.to_sql('ContrastCurves', connection) + + if completeness is not None: + print("Writing Completeness") + completeness.to_sql('Completeness', connection) + + + + + + +def write_update_SQL(engine, plandata=None, stdata=None, orbitfits=None, orbdata=None, pdfs=None, aliases=None,contrastCurves=None,scenarios=None, completeness=None): + """write outputs to sql database via connection""" + connection = engine.connect() + connection.execute(text("DROP TABLE IF EXISTS Completeness, ContrastCurves, Scenarios, PDFs, Orbits, OrbitFits, Planets, Stars")) + + if stdata is not None: + print("Writing Stars") + namemxchar = np.array([len(n) for n in stdata['st_name'].values]).max() + # stdata = stdata.rename_axis('st_id') + stdata.to_sql('Stars', connection, chunksize=100, if_exists='replace', + dtype={'st_id': sqlalchemy.types.INT, + 'st_name': sqlalchemy.types.String(namemxchar)}) + # set indexes + result = connection.execute(text('ALTER TABLE Stars ADD INDEX (st_id)')) + + # add comments + # addSQLcomments(connection, 'Stars') + + if plandata is not None: + print("Writing Planets") + namemxchar = np.array([len(n) for n in plandata['pl_name'].values]).max() + # plandata = plandata.rename_axis('pl_id') + plandata.to_sql('Planets',connection,chunksize=100,if_exists='replace', + dtype={'pl_id':sqlalchemy.types.INT, + 'pl_name':sqlalchemy.types.String(namemxchar), + 'st_name':sqlalchemy.types.String(namemxchar-2), + 'pl_letter':sqlalchemy.types.CHAR(1), + 'st_id': sqlalchemy.types.INT}) + #set indexes + result = connection.execute(text("ALTER TABLE Planets ADD INDEX (pl_id)")) + result = connection.execute(text("ALTER TABLE Planets ADD INDEX (st_id)")) + # result = connection.execute(text("ALTER TABLE Planets ADD FOREIGN KEY (st_id) REFERENCES Stars(st_id) ON DELETE NO ACTION ON UPDATE NO ACTION")); + + #add comments + # addSQLcomments(connection,'Planets') + + if orbitfits is not None: + print("Writing OrbitFits") + # orbitfits = orbitfits.rename_axis('orbitfit_id') + namemxchar = np.array([len(n) for n in orbitfits['pl_name'].values]).max() + orbitfits.to_sql('OrbitFits',connection,chunksize=100,if_exists='replace', + dtype={'pl_id': sqlalchemy.types.INT, + 'orbitfit_id': sqlalchemy.types.INT, + 'pl_name': sqlalchemy.types.String(namemxchar)}, + index=True) + result = connection.execute(text("ALTER TABLE OrbitFits ADD INDEX (orbitfit_id)")) + result = connection.execute(text("ALTER TABLE OrbitFits ADD INDEX (pl_id)")) + # result = connection.execute(text("ALTER TABLE OrbitFits ADD FOREIGN KEY (pl_id) REFERENCES Planets(pl_id) ON DELETE NO ACTION ON UPDATE NO ACTION")); + + # addSQLcomments(connection,'OrbitFits') + + if orbdata is not None: + print("Writing Orbits") + namemxchar = np.array([len(n) for n in orbdata['pl_name'].values]).max() + # orbdata = orbdata.rename_axis('orbit_id') + orbdata.to_sql('Orbits',connection,chunksize=100,if_exists='replace', + dtype={'pl_name':sqlalchemy.types.String(namemxchar), + 'pl_id': sqlalchemy.types.INT, + 'orbit_id': sqlalchemy.types.BIGINT, + 'orbitfit_id': sqlalchemy.types.INT}, + index=True) + result = connection.execute(text("ALTER TABLE Orbits ADD INDEX (orbit_id)")) + result = connection.execute(text("ALTER TABLE Orbits ADD INDEX (pl_id)")) + result = connection.execute(text("ALTER TABLE Orbits ADD INDEX (orbitfit_id)")) + # result = connection.execute(text("ALTER TABLE Orbits ADD FOREIGN KEY (pl_id) REFERENCES Planets(pl_id) ON DELETE NO ACTION ON UPDATE NO ACTION")); + # result = connection.execute(text("ALTER TABLE Orbits ADD FOREIGN KEY (orbitfit_id) REFERENCES OrbitFits(orbitfit_id) ON DELETE NO ACTION ON UPDATE NO ACTION")); + + # addSQLcomments(connection,'Orbits') + + if pdfs is not None: + print("Writing PDFs") + pdfs = pdfs.reset_index(drop=True) + namemxchar = np.array([len(n) for n in pdfs['Name'].values]).max() + # pdfs = pdfs.rename_axis('pdf_id') + pdfs.to_sql('PDFs',connection,chunksize=100,if_exists='replace', + dtype={'pl_name':sqlalchemy.types.String(namemxchar), + 'pl_id': sqlalchemy.types.INT}) + # result = connection.execute("ALTER TABLE PDFs ADD INDEX (orbitfit_id)") + result = connection.execute(text("ALTER TABLE PDFs ADD INDEX (pl_id)")) + result = connection.execute(text("ALTER TABLE PDFs ADD INDEX (pdf_id)")) + # result = connection.execute("ALTER TABLE PDFs ADD FOREIGN KEY (orbitfit_id) REFERENCES OrbitFits(orbitfit_id) ON DELETE NO ACTION ON UPDATE NO ACTION") + # result = connection.execute(text("ALTER TABLE PDFs ADD FOREIGN KEY (pl_id) REFERENCES Planets(pl_id) ON DELETE NO ACTION ON UPDATE NO ACTION")) + + # addSQLcomments(connection,'PDFs') + + if aliases is not None: + print("Writing Alias") + # aliases = aliases.rename_axis('alias_id') + aliasmxchar = np.array([len(n) for n in aliases['Alias'].values]).max() + aliases.to_sql('Aliases',connection,chunksize=100,if_exists='replace',dtype={'Alias':sqlalchemy.types.String(aliasmxchar)}) + result = connection.execute(text("ALTER TABLE Aliases ADD INDEX (alias_id)")) + result = connection.execute(text("ALTER TABLE Aliases ADD INDEX (Alias)")) + result = connection.execute(text("ALTER TABLE Aliases ADD INDEX (st_id)")) + result = connection.execute(text("ALTER TABLE Aliases ADD FOREIGN KEY (st_id) REFERENCES Stars(st_id) ON DELETE NO ACTION ON UPDATE NO ACTION")) + + if scenarios is not None: + print("Writing Scenarios") + namemxchar = np.array([len(n) for n in scenarios['scenario_name'].values]).max() + scenarios.to_sql("Scenarios", connection, chunksize=100, if_exists='replace', dtype={ + 'scenario_name': sqlalchemy.types.String(namemxchar),}, index = False) + + result = connection.execute(text("ALTER TABLE Scenarios ADD INDEX (scenario_name)")) + + if contrastCurves is not None: + print("Writing ContrastCurves") + # contrastCurves = contrastCurves.rename_axis("curve_id") + namemxchar = np.array([len(n) for n in contrastCurves['scenario_name'].values]).max() + contrastCurves.to_sql("ContrastCurves", connection, chunksize=100, if_exists='replace', dtype={ + 'st_id': sqlalchemy.types.INT, + 'curve_id' : sqlalchemy.types.INT, + 'scenario_name': sqlalchemy.types.String(namemxchar)}, index = True) + # result = connection.execute("ALTER TABLE ContastCurves ADD INDEX (scenario_name)") + + # result = connection.execute("ALTER TABLE ContastCurves ADD INDEX (st_id)") + + # result = connection.execute(text("ALTER TABLE ContrastCurves ADD FOREIGN KEY (st_id) REFERENCES Stars(st_id) ON DELETE NO ACTION ON UPDATE NO ACTION")) + # result = connection.execute(text("ALTER TABLE ContrastCurves ADD FOREIGN KEY (scenario_name) REFERENCES Scenarios(scenario_name) ON DELETE NO ACTION ON UPDATE NO ACTION")) + + if completeness is not None: + print("Writing completeness") + # completeness = completeness.rename_axis("completeness_id") + # namemxchar = np.array([len(n) for n in completeness['scenario_name'].values]).max() + # TODO: Why is the namemxchar here scenario_name, how does it do this if scenario is null, currently using placeholder of 25 in scenario-name : sqlalchemy.types.string(25) + completeness.to_sql("Completeness", connection, chunksize=100, if_exists='replace', dtype={ + # 'scenario_id': sqlalchemy.types.INT, + 'pl_id' : sqlalchemy.types.INT, + 'completeness_id' : sqlalchemy.types.INT, + 'scenario_name': sqlalchemy.types.String(25)}, index = True) + + # result = connection.execute(text("ALTER TABLE Completeness ADD FOREIGN KEY (pl_id) REFERENCES Planets(pl_id) ON DELETE NO ACTION ON UPDATE NO ACTION")) + # result = connection.execute(text("ALTER TABLE ContrastCurves ADD FOREIGN KEY (scenario_name) REFERENCES Scenarios(scenario_name) ON DELETE NO ACTION ON UPDATE NO ACTION")) + + + \ No newline at end of file From b2d3f19551d188fa9fa1e442e66612b95b6cef4b Mon Sep 17 00:00:00 2001 From: Andrew Chiu Date: Wed, 4 Dec 2024 12:04:00 -0500 Subject: [PATCH 3/6] Testing Files --- backend/database_test.py | 243 +++++++++++++++++++ backend/test.py | 8 + backend/update_plandb.py | 401 ++++++++++++++++---------------- backend/update_plandb_method.py | 232 ++++++++++++++++++ cache/cont_curvs_2024_05 | Bin 0 -> 702 bytes cache/cont_curvs_2024_07 | Bin 0 -> 702 bytes cache/cont_curvs_2024_08 | Bin 0 -> 702 bytes cache/cont_curvs_2024_09 | Bin 0 -> 702 bytes cache/cont_curvs_2024_10 | Bin 0 -> 702 bytes 9 files changed, 678 insertions(+), 206 deletions(-) create mode 100644 backend/database_test.py create mode 100644 backend/test.py create mode 100644 backend/update_plandb_method.py create mode 100644 cache/cont_curvs_2024_05 create mode 100644 cache/cont_curvs_2024_07 create mode 100644 cache/cont_curvs_2024_08 create mode 100644 cache/cont_curvs_2024_09 create mode 100644 cache/cont_curvs_2024_10 diff --git a/backend/database_test.py b/backend/database_test.py new file mode 100644 index 0000000..fcc979c --- /dev/null +++ b/backend/database_test.py @@ -0,0 +1,243 @@ +import pandas as pd +import time +from sqlalchemy import create_engine +from tqdm import tqdm +from plandb_methods import * +from update_util import * +import logging + +from database_main import compileContrastCurves +from database_main import compileCompleteness + +from update_plandb_method import updateDatabase + + + +# build database from new values +# database_main/build db + +logging.basicConfig(filename="timer_log.txt", level=logging.INFO, format = "%(asctime)s - %(message)s") + +cache = False +datestr = Time.now().datetime.strftime("%Y-%m") + + +######### Complete Database build ############################################################################################################################ + +start_time_complete = time.time() +logging.info("Complete database build start") + +data = getIPACdata() + + + +#photometric data +photdict = loadPhotometryData(infile="plandb.sioslab.com/allphotdata_2015.npz") + +#band info +bandzip = list(genBands()) + +# TODO: look at genOrbitData method signature, with t0 default, and consider adding t0 +orbdata, orbitfits = genOrbitData(data, bandzip, photdict) + +ephemeris_orbitfits, ephemeris_orbdata = addEphemeris(data, orbitfits, orbdata, bandzip, photdict) + +quadrature_data = calcQuadratureVals(ephemeris_orbitfits, bandzip, photdict) + +exosims_json = 'plandb.sioslab.com/ci_perf_exosims.json' +contr_data = calcContrastCurves(quadrature_data, exosims_json=exosims_json) + +comps, compdict, comps_data = calcPlanetCompleteness(contr_data, bandzip, photdict, exosims_json=exosims_json) + +plandata, stdata, orbitfits = generateTables(data, comps_data) + +# TODO: Prob replace this path stuff, prob HAVE to cache contr curvecalculations and then compile, or maybe write new compile function +# TODO: calculate contrast curves and test it here +contr_curvs_path = Path(f'plandb.sioslab.com/cache/cont_curvs_{datestr.replace("-", "_")}') +contrast_curves = compileContrastCurves(stdata, contr_curvs_path) + + + +# newpdfs = pdfs.apply(addId, axis = 1) +scenarios = pd.read_csv("plandb.sioslab.com/cache/scenario_angles.csv") + +# compiled_completeness_path = Path(f"plandb.sioslab.com/cache/compiled_completeness_{datestr}.p") +# if compiled_completeness_path.exists(): +# completeness = pd.read_pickle(compiled_completeness_path) +# else: +# completeness = compileCompleteness() +# completeness.to_pickle(compiled_completeness_path) + + +# passwd = input("db password: ") +# username = 'plandb_admin' +engineToday = create_engine('mysql+pymysql://'+"andrewchiu"+':'+"Password123!"+'@localhost/TestToday',echo=True) + +#TODO: completeness later + +writeSQL(engineToday, plandata=plandata, stdata=stdata, orbitfits=orbitfits, orbdata=orbdata, pdfs=None, aliases=None,contrastCurves=None,scenarios=None, completeness=None) + +# TODO end time +end_time_complete = time.time() +elapsed_time = end_time_complete - start_time_complete +logging.info(f"complete database build end. Elapsed time: {elapsed_time:.2f} seconds") + +print(f"elapsed time: {elapsed_time}" ) + + +# build database from old values and update + +# build old (use cache files from 2022) + +######### Old Database build (2022) ############################################################################################################################ + + +start_time_2022 = time.time() +logging.info("2022 database build start") + +datestr = Time.now().datetime.strftime("2022-05") +plandata_path = Path(f'plandb.sioslab.com/cache/plandata_{datestr}.p') +planetsOld = pd.read_pickle(plandata_path) +stdata_path = Path(f'plandb.sioslab.com/cache/stdata_{datestr}.p') +starsOld = pd.read_pickle(stdata_path) +orbfits_path = Path(f'plandb.sioslab.com/cache/table_orbitfits_{datestr}.p') +orbitfitsOld = pd.read_pickle(orbfits_path) +orbdata_path = Path(f'plandb.sioslab.com/cache/ephemeris_orbdata_{datestr}.p') +orbitsOld = pd.read_pickle(orbdata_path) +comps_path = Path(f'plandb.sioslab.com/cache/comps_{datestr}.p') +pdfs = pd.read_pickle(comps_path) +print(pdfs) +compiled_contr_curvs_path = Path(f'plandb.sioslab.com/cache/compiled_cont_curvs_{datestr}.p') +contr_curvs_path = Path(f'plandb.sioslab.com/cache/cont_curvs_{datestr.replace("-", "_")}') +if compiled_contr_curvs_path.exists(): + contrast_curvesOld = pd.read_pickle(compiled_contr_curvs_path) +else: + contrast_curvesOld = compileContrastCurves(starsOld, contr_curvs_path) + contrast_curvesOld.to_pickle(compiled_contr_curvs_path) +def addId(r): + r['pl_id']= list(planetsOld.index[(planetsOld['pl_name'] == r['Name'])])[0] + return r + +#TODO Pdfs +newpdfs = pdfs.apply(addId, axis = 1) +scenarios = pd.read_csv("plandb.sioslab.com/cache/scenario_angles.csv") + +before_engine = create_engine('mysql+pymysql://'+"andrewchiu"+':'+"Password123!"+'@localhost/database2022before',echo=True) + +writeSQL(before_engine, plandata=planetsOld, stdata=starsOld, orbitfits=orbitfitsOld, orbdata=orbitsOld, pdfs=None, aliases=None,contrastCurves=None,scenarios=None, completeness=None) + +updateDatabase() + + +print("Done") + +end_time_2022 = time.time() +elapsed_time_2022 = end_time_2022 - start_time_2022 +logging.info(f"2022 database build complete, elapsed time: {elapsed_time_2022}") + +#TODO; compile later + +# compiled_completeness_path = Path(f"plandb.sioslab.com/cache/compiled_completeness_{datestr}.p") +# if compiled_completeness_path.exists(): +# completeness = pd.read_pickle(compiled_completeness_path) +# else: +# completeness = compileCompleteness() +# completeness.to_pickle(compiled_completeness_path) + +# TODO start time + +######### Update Database ############################################################################################################################ + + +start_time_update = time.time() +logging.info("update database start") + +updateDatabase("andrewchiu", "password123!", "database2022before", 'databaseDiffUpdate', "databaseAfterUpdate") + + +# update + +#TODO end time +end_time_update = time.time() +elapsed_time_update = end_time_update - start_time_update +logging.info(f"end update database elapsed time: {elapsed_time_update}") + +# Compare +elapsed_time_difference = elapsed_time_update - elapsed_time +elapsed_time_percent_change = elapsed_time_difference / elapsed_time +logging.info(f"time difference between update and build {elapsed_time_difference} difference of {elapsed_time_percent_change}") + +######### Compare Database Test ################################################################# + + +def get_all_tables_as_dataframes(connection): + cursor = connection.cursor() + + cursor.execute("SHOW TABLES;") + tables = cursor.fetchall() + + table_names = [table[0] for table in tables] + + dataframes = {} + for table_name in table_names: + query = f"SELECT * FROM {table_name};" + df = pd.read_sql(query, connection) + dataframes[table_name] = df + + cursor.close() + return dataframes + +def compare_all_tables(dataframes1, dataframes2): + comparison_results = {} + + tables1 = set(dataframes1.keys()) + tables2 = set(dataframes2.keys()) + + if tables1 != tables2: + comparison_results["missing_or_extra_tables"] = { + "in_dataframes1_only": tables1 - tables2, + "in_dataframes2_only": tables2 - tables1, + } + return comparison_results + + for table_name in tables1: + df1 = dataframes1[table_name] + df2 = dataframes2[table_name] + + if set(df1.columns) != set(df2.columns): + comparison_results[table_name] = { + "status": "mismatch", + "reason": "Column names or structure do not match", + "df1_columns": list(df1.columns), + "df2_columns": list(df2.columns), + } + continue + + df1_sorted = df1.sort_values(by=list(df1.columns)).reset_index(drop=True) + df2_sorted = df2.sort_values(by=list(df2.columns)).reset_index(drop=True) + + if df1_sorted.equals(df2_sorted): + comparison_results[table_name] = {"status": "match"} + else: + comparison_results[table_name] = { + "status": "mismatch", + "reason": "Row contents differ", + } + + return comparison_results + +with engineToday.connect() as connection2022: + + today_dataframes = get_all_tables_as_dataframes(connection2022) + + engine_after_update = create_engine('mysql+pymysql://'+"andrewchiu"+':'+"Password123!"+'@localhost/' + "databaseAfterUpdate",echo=True) + + + with engine_after_update.connect() as connectionAfter: + + after_update_dataframes = get_all_tables_as_dataframes(connectionAfter) + + comparison_result = compare_all_tables(today_dataframes, after_update_dataframes) + + for table, result in comparison_result.items(): + print(f"Table: {table}, Result: {result}") \ No newline at end of file diff --git a/backend/test.py b/backend/test.py new file mode 100644 index 0000000..f21d6c9 --- /dev/null +++ b/backend/test.py @@ -0,0 +1,8 @@ +import pandas as pd +from sqlalchemy import create_engine + +before_engine = create_engine('mysql+pymysql://'+"andrewchiu"+':'+"Password123!"+'@localhost/TestBeforeUpdate',echo=True) + + +today_df = pd.read_sql(before_engine) +print(today_df) \ No newline at end of file diff --git a/backend/update_plandb.py b/backend/update_plandb.py index e9624a4..0061ab2 100644 --- a/backend/update_plandb.py +++ b/backend/update_plandb.py @@ -7,220 +7,209 @@ from database_main import * from update_util import * import os -import sys -#Always keep cache false, because it essentially does nothing if it's on and updates based on month -cache = False +#Flags +cache = True datestr = Time.now().datetime.strftime("%Y-%m") - -# Setup SQL and MySQL engines +# sign in originally before running, to solve cryptography error, sign in using mysql -u username (andrewchiu) -p, then cryptography is solved +# workflow: get current database -> get ipac database -> compare current database to updated values in ipac database, to create a difference dataframe -> create a database from the difference dataframe (updated database) -> merge/upsert the difference database with the current database -> replace current database, with the merged, keep old values and adding updated values +# create connection with current database password = input("SQL password: ") -sios_engine = create_engine('mysql+pymysql://'+"andrewchiu"+':'+password+'@localhost/testSios',echo=True) -diff_sios_engine = create_engine('mysql+pymysql://'+"andrewchiu"+':'+password+'@localhost/testSiosDiffEngine',echo=True) +engine = create_engine('mysql+pymysql://'+"andrewchiu"+':'+password+'@localhost/testSios',echo=True) +diff_engine = create_engine('mysql+pymysql://'+"andrewchiu"+':'+password+'@localhost/testSiosDiffEngine',echo=True) new_engine = create_engine('mysql+pymysql://'+"andrewchiu"+':'+password+'@localhost/newEngine',echo=True) -with sios_engine.connect() as connection: - - #get ipac data - print("Getting ipac data") - - old_ipac_data, new_ipac_data = get_store_ipac() - - new_ipac_data.at[2, "pl_letter"] = 'a' - new_ipac_data.at[3, "pl_letter"] = 'a' - new_ipac_data.at[2, "pl_orbper"] = 180 - new_ipac_data.at[3, "pl_orbper"] = 1800 - - print(f"most recent: {old_ipac_data}") - print(f"current: {new_ipac_data}") - - #TODO: be able to insert custom ipac data, for test, possibly using flag - #TODO: Test new row differences, because currently its zero, make test data that is slightly different (similar to ipac data) - print("calculating row differences") - change_ipac_df, log = get_ipac_differences(old_ipac_data, new_ipac_data) - change_ipac_df.to_excel("plandb.sioslab.com/backend/sheets/change_ipac.xlsx") - print(f"Changed: {change_ipac_df}") - - for entry in log: - print(f"Reason: {entry['reason']}") - print(f"Description: {entry['description']}") - print("Details:", entry['details']) - print("-" * 40) - - if change_ipac_df.empty: - print("No changes detected, zero rows have been updated/added") - sys.exit() - - - # TODO: Is it just planets, this actually might not be necessary, good anyways - # Keep track of planets, for later upsert - planets_to_update = [] - - for index, row in change_ipac_df.iterrows(): - planets_to_update.append(row['pl_name']) - - print(planets_to_update) - - input2 = input("continue?") - - # get photodict - photdict_path = Path(f'cache/update_photdict_2022-05.p') - infile="plandb.sioslab.com/backend/allphotdata_2015.npz" - photdict = get_photo_data(photdict_path, infile, cache) - - print(photdict) - # get bandzip - bandzip_path = Path(f'cache/update_bandzip_{datestr}.p') - bandzip = get_bandzip(bandzip_path, cache) - - print(bandzip) - - # get orbdata, orbfits - print("Generating orbdata and orbfits") - orbdata_path = Path(f'cache/update_orbdata_{datestr}.p') - orbfits_path = Path(f'cache/update_orbfits_{datestr}.p') - orbdata, orbfits = get_orbdata(orbdata_path, orbfits_path, change_ipac_df, bandzip, photdict, cache) - - # orbdata.to_excel("plandb.sioslab.com/backend/sheets/orbata.xlsx") - # orbdata.to_excel("plandb.sioslab.com/backend/sheets/orbfits.xlsx") - - - print(orbdata) - # get ephemeris - ephemeris_orbdata_path = Path(f'cache/update_ephemeris_orbdata_{datestr}.p') - ephemeris_orbfits_path = Path(f'cache/update_ephemeris_orbfits_{datestr}.p') - ephemeris_orbitfits, ephemeris_orbdata = get_ephemerisdata(ephemeris_orbdata_path, ephemeris_orbfits_path, change_ipac_df, orbfits, orbdata, bandzip, photdict, cache) - ephemeris_orbitfits.to_excel("plandb.sioslab.com/backend/sheets/ephemeris_orbfits.xlsx") - - #quadrature - print("quadrature") - quadrature_data_path = Path(f'cache/update_quadrature_data_{datestr}.p') - quadrature_data = get_quadrature(quadrature_data_path, ephemeris_orbitfits, bandzip, photdict, cache) - quadrature_data.to_excel("plandb.sioslab.com/backend/sheets/quadrature_data.xlsx") - - - contr_data_path = Path(f'cache/update_contr_data_{datestr}.p') - exosims_json = 'plandb.sioslab.com/ci_perf_exosims.json' - - contr_data = get_contrastness(contr_data_path, exosims_json, quadrature_data, cache) - contr_data.to_excel("plandb.sioslab.com/backend/sheets/contr_data.xlsx") - - comps_path = Path(f'cache/update_comps_{datestr}.p') - compdict_path = Path(f'cache/update_compdict_{datestr}.p') - comps_data_path = Path(f'cache/update_comps_data_{datestr}.p') - - - comps, compdict, comps_data = get_completeness(comps_path, compdict_path, comps_data_path, contr_data, bandzip, photdict, exosims_json, cache) - comps.to_excel("plandb.sioslab.com/backend/sheets/comps.xlsx") - comps_data.to_excel("plandb.sioslab.com/backend/sheets/comps_data.xlsx") - #None for compdict, as its dictionary - - plandata_path = Path(f'cache/update_plandata_{datestr}.p') - stdata_path = Path(f'cache/update_stdata_{datestr}.p') - table_orbitfits_path = Path(f'cache/update_table_orbitfits_{datestr}.p') - - # orbitfits got updated, maybe change to new var - plan_data, stdata, orbitfits = get_generated_tables(plandata_path, stdata_path, table_orbitfits_path, change_ipac_df, quadrature_data, comps_data, cache) - plan_data.to_excel("plandb.sioslab.com/backend/sheets/plandata.xlsx") - stdata.to_excel("plandb.sioslab.com/backend/sheets/stdata.xlsx") - orbitfits.to_excel('plandb.sioslab.com/backend/sheets/later_orbitfits.xlsx') - - - # Do compileContrastness and compile contrastness - # Look into and possibly remove the - to _ - contr_curvs2_path = Path(f'plandb.sioslab.com/cache/cont_curvs2_{datestr.replace("-", "_")}') - compiled_contr_curvs_path = Path(f'plandb.sioslab.com/cache/cont_curvs_{datestr.replace("-", "_")}') - compiled_contrast_curves, newpdfs = get_compiled_contrast(compiled_contr_curvs_path, stdata, comps_data, change_ipac_df, contr_curvs2_path) - # compiled_contrast_curves = compileContrastCurves(stdata, compiled_contr_curvs_path) - compiled_contrast_curves.to_excel("plandb.sioslab.com/backend/sheets/compiled_contrast_curves.xlsx") - # With current code, since get_compiled_contrast isnt fully working with new pdfs, new pdfs should be empty - newpdfs.to_excel("plandb.sioslab.com/backend/sheets/newpdfs.xlsx") - - - # compile completeness - compiled_completeness_path = Path(f"plandb.sioslab.com/cache/compiled_completeness_{datestr}.p") - compiled_completeness = get_compiled_completeness(compiled_completeness_path, comps_data) - compiled_completeness.to_excel("plandb.sioslab.com/backend/sheets/compiled_completeness.xlsx") - - diff_completeness_df = pd.DataFrame({ - 'completeness_id': [0], - 'pl_id': [3], - 'completeness': [0.0111], - 'scenario_name' : ['Optimistic_NF_Imager_20000hr'], - 'compMinWA': [None], - 'compMaxWA': [None], - 'compMindMag': [None], - 'compMaxdMag': [None], - }) - - # MakeSQL for the temporary database, creates diff engine, -> upsert diff engine with current engine - scenarios = pd.read_csv("plandb.sioslab.com/cache/scenario_angles.csv") - # writeSQL(engine, plandata=planets, stdata=stars, orbitfits=orbitfits, orbdata=orbits, pdfs=newpdfs, aliases=None,contrastCurves=contrast_curves,scenarios=scenarios, completeness=completeness) - temp_writeSQL(diff_sios_engine, plandata=plan_data, stdata=stdata, orbitfits=orbfits, orbdata=orbdata, pdfs=None, aliases=None, contrastCurves=compiled_contrast_curves, scenarios=scenarios, completeness=compiled_completeness) +with engine.connect() as connection: + + + + # with diff_engine.connect() as diff_engine_connection: + + # get current database dataframe + print("getting current database") + current_database_df = get_current_database(connection) + + current_database_df.to_excel("plandb.sioslab.com/backend/sheets/current_database.xlsx", index=False) + + # get ipac database dataframe + print("getting ipac database") + + ipac_archive_dir = "plandb.sioslab.com/backend/ipac_archive" + + files = os.listdir(ipac_archive_dir) + + most_recent_file = files[0] + most_recent_ind = 0 + + for file in files: + + ind_str = file.rsplit('-', 1)[-1] + current_ind = int(ind_str) + + if(current_ind > most_recent_ind): + most_recent_ind = current_ind + most_recent_file = file + + #load archive, load first because it needs be the most recent excluding month, + print("loading archived ipac") + + + data_path = Path(f'cache/data_cache_{datestr}.p') + ipac_data_df = get_ipac_database(data_path, cache) + # Archive everytime, replace if same month, archive for future use + print("archiving present time ipac") + with open(Path(f"plandb.sioslab.com/backend/ipac_archive/ipac_archive_{datestr}_{most_recent_ind + 1}.p"), 'wb') as f: + pickle.dump(ipac_data_df, f) + + + most_recent_ipac_data = pickle.load(Path(f"plandb.sioslab.com/backend/ipac_archive/{most_recent_file}")) + + change_ipac_df = find_row_differences(most_recent_ipac_data, ipac_data_df) + + + #find updates from current to ipac + changed_df = find_row_differences(current_database_df, ipac_data_df) + changed_df.to_excel("plandb.sioslab.com/backend/sheets/changed.xlsx") + changed_df.to_pickle(Path(f"plandb.sioslab.com/changed_df")) + + changed_df = pd.read_pickle(Path(f"plandb.sioslab.com/changed_df")) + + + # get photodict + photdict_path = Path(f'cache/update_photdict_2022-05.p') + infile="plandb.sioslab.com/backend/allphotdata_2015.npz" + photdict = get_photo_data(photdict_path, infile, cache) + + + # get bandzip + bandzip_path = Path(f'cache/update_bandzip_{datestr}.p') + bandzip = get_bandzip(bandzip_path, cache) + + # get orbdata, orbfits + orbdata_path = Path(f'cache/update_orbdata_{datestr}.p') + orbfits_path = Path(f'cache/update_orbfits_{datestr}.p') + orbdata, orbfits = get_orbdata(orbdata_path, orbfits_path, changed_df, bandzip, photdict, cache) + + # Line below, contains too main records to write xlsx, check manually + # orbdata.to_excel("orbdata.xlsx") + orbfits.to_excel("orbfits.xlsx") + + # get ephemeris + ephemeris_orbdata_path = Path(f'cache/update_ephemeris_orbdata_{datestr}.p') + ephemeris_orbfits_path = Path(f'cache/update_ephemeris_orbfits_{datestr}.p') + ephemeris_orbitfits, ephemeris_orbdata = get_ephemerisdata(ephemeris_orbdata_path, ephemeris_orbfits_path, changed_df, orbfits, orbdata, bandzip, photdict, cache) + ephemeris_orbitfits.to_excel("ephemeris_orbfits.xlsx") + + # Line below, contains too main records to write xlsx, check manually + # ephemeris_orbdata.to_excel("ephemeris_orbdata.xlsx") + + quadrature_data_path = Path(f'cache/update_quadrature_data_{datestr}.p') + quadrature_data = get_quadrature(quadrature_data_path, ephemeris_orbitfits, bandzip, photdict, cache) + quadrature_data.to_excel("plandb.sioslab.com/backend/sheets/quadrature_data.xlsx") + + contr_data_path = Path(f'cache/update_contr_data_{datestr}.p') + exosims_json = 'plandb.sioslab.com/ci_perf_exosims.json' + + contr_data = get_contrastness(contr_data_path, exosims_json, quadrature_data, cache) + contr_data.to_excel("plandb.sioslab.com/backend/sheets/contr_data.xlsx") + + comps_path = Path(f'cache/update_comps_{datestr}.p') + compdict_path = Path(f'cache/update_compdict_{datestr}.p') + comps_data_path = Path(f'cache/update_comps_data_{datestr}.p') + + comps, compdict, comps_data = get_completeness(comps_path, compdict_path, comps_data_path, contr_data, bandzip, photdict, exosims_json, cache) + comps.to_excel("plandb.sioslab.com/backend/sheets/comps.xlsx") + comps_data.to_excel("plandb.sioslab.com/backend/sheets/comps_data.xlsx") + #None for compdict, as its dictionary + + plandata_path = Path(f'cache/update_plandata_{datestr}.p') + stdata_path = Path(f'cache/update_stdata_{datestr}.p') + table_orbitfits_path = Path(f'cache/update_table_orbitfits_{datestr}.p') + + # orbitfits got updated, maybe change to new var + plan_data, stdata, orbitfits = get_generated_tables(plandata_path, stdata_path, table_orbitfits_path, changed_df, quadrature_data, comps_data, cache) + plan_data.to_excel("plandb.sioslab.com/backend/sheets/plandata.xlsx") + stdata.to_excel("plandb.sioslab.com/backend/sheets/stdata.xlsx") + orbitfits.to_excel('plandb.sioslab.com/backend/sheets/later_orbitfits.xlsx') + + #do compileContrastness and compile contrastness + # Look into and possibly remove the - to _ + contr_curvs2_path = Path(f'plandb.sioslab.com/cache/cont_curvs2_{datestr.replace("-", "_")}') + compiled_contr_curvs_path = Path(f'plandb.sioslab.com/cache/cont_curvs_{datestr.replace("-", "_")}') + compiled_contrast_curves, newpdfs = get_compiled_contrast(compiled_contr_curvs_path, stdata, comps_data, changed_df, contr_curvs2_path) + # compiled_contrast_curves = compileContrastCurves(stdata, compiled_contr_curvs_path) + compiled_contrast_curves.to_excel("plandb.sioslab.com/backend/sheets/compiled_contrast_curves.xlsx") + # With current code, since get_compiled_contrast isnt fully working with new pdfs, new pdfs should be empty + newpdfs.to_excel("plandb.sioslab.com/backend/sheets/newpdfs.xlsx") + + # compile completeness + compiled_completeness_path = Path(f"plandb.sioslab.com/cache/compiled_completeness_{datestr}.p") - # Get from diff_database - diff_engine_connection = diff_sios_engine.connect() - diff_completeness_df, diff_contrast_curves_df, diff_orbitfits_df, diff_orbits_df, diff_pdfs_df, diff_planets_df, diff_scenarios_df, diff_stars_df = get_all_from_db(diff_engine_connection) - - # Get from old_database - old_completeness_df, old_contrast_curves_df, old_orbitfits_df, old_orbits_df, old_pdfs_df, old_planets_df, old_scenarios_df, old_stars_df = get_all_from_db(connection) - - print("Merging Planets") - merged_planets = upsert_df(old_planets_df, diff_planets_df, "pl_name") + compiled_completeness = get_compiled_completeness(compiled_completeness_path, comps_data) + compiled_completeness.to_excel("plandb.sioslab.com/backend/sheets/compiled_completeness.xlsx") + + + + # remember do makesql with this and then get those tables and upsert those shorter new ones with the current + # writeSQL(engine, plandata=planets, stdata=stars, orbitfits=orbitfits, orbdata=orbits, pdfs=newpdfs, aliases=None,contrastCurves=contrast_curves,scenarios=scenarios, completeness=completeness) + scenarios = pd.read_csv("plandb.sioslab.com/cache/scenario_angles.csv") - # Upsert completeness - print("Merging Completeness") - - print(old_completeness_df, diff_completeness_df) - ssss = input("continue?") - # Merge based on pl_id, foreign key relation, base on foreign key relation, might do the same for rest - # TODO, this is highly likely to be wrong, forced to used indices here, because only unique, unfortunately incorrect, must base it upsert on the varying indices later - # TODO: Iterate through each pl_id, since these are base off pl_id, correspond with pl_name, and then reset those completeness rows for that pl (remove and then add the new ones that were) - merged_completeness = upsert_general(old_completeness_df, diff_completeness_df, 'pl_id') - - print(merged_completeness) - input3 = input("continue?") - # Upsert stars - # TODO Star differences are based off that the planets table has foreign key st_id, therefore, to properly update stars, must go through planets, see what has been updated, and then go down through those planets, and their stars, and then if that planet has changed, update that star - print("Merging stars") - merged_stars = upsert_df(old_stars_df, diff_stars_df, "st_name") - - input4 = input("continue4") - - # For these later upserts, only way to properly upsert is to detect the change from the earlier value, like the difference in planets from ipac, and then categorize the change as a result from upated information or new information. If new information (new planet), just add but if updated information, going to have to track down the previous. Maybe it's possible for me to locally store database versions, so it can be quickly updated based on path of changes that happened - # Upsert orbitfits - print("Merging Orbit Fits") - #TODO: should this be pl_name or pl_id - merged_orbitfits = upsert_general(old_orbitfits_df, diff_orbitfits_df, "pl_name") - - # Upsert orbits - print("Merging Orbits") - merged_orbits = upsert_general(old_orbits_df, diff_orbits_df, "pl_name") - - - # TODO: Fix this based on logic above, use st_id, track st_id from st_names, likely need to do same logic for stars - # If stars are already reindexed, so should the st_id, if not track - print("Merging Curves") - merged_contrast_curves = upsert_df(old_contrast_curves_df, diff_contrast_curves_df, "st_id") - - # Might have to compare back to old, dataframe, track down what planet the index is and then use that index to renumber the indexes in the old dataframe for the diff dataframe to properly upsert with matching indices - # TODO: Maybe add pl_name and st_name to pdfs and contrast curves, to make it easier - - - # upsert pdfs - # TODO: Track with pl_id - print("Merging pdfs") - merged_pdfs = upsert_df(old_pdfs_df, diff_pdfs_df, "pl_id") - - # No need to upsert scenarios, as it's updated all at once - - # Write back to original database with new values, - # TODO: optionally, store old database in a different database for archive - print("Writing New Database") - final_writeSQL(new_engine, merged_planets, merged_stars, merged_orbitfits, merged_orbits, None, aliases=None, contrastCurves=None, scenarios = scenarios, completeness = merged_completeness) - - print("Done") + temp_writeSQL(diff_engine, plandata=plan_data, stdata=stdata, orbitfits=orbfits, orbdata=orbdata, pdfs=None, aliases=None, contrastCurves=compiled_contrast_curves, scenarios=scenarios, completeness=compiled_completeness) + + #get from diff_database + diff_engine_connection = diff_engine.connect() + diff_completeness_df, diff_contrast_curves_df, diff_orbitfits_df, diff_orbits_df, diff_pdfs_df, diff_planets_df, diff_scenarios_df, diff_stars_df = get_all_from_db(diff_engine_connection) + + #get from old_database + old_completeness_df, old_contrast_curves_df, old_orbitfits_df, old_orbits_df, old_pdfs_df, old_planets_df, old_scenarios_df, old_stars_df = get_all_from_db(connection) + + + + #merge with old, compare each + + # upsert planets + # Have to do name, because indices don't match, logic applies down unless otherwise in comment + print("merging planets") + merged_planets = upsert_dataframe(old_planets_df, diff_planets_df, "pl_name") + + + # upsert completeness + print("supposed to merge completeness") + # TODO, this is highly likely to be wrong, forced to used indices here, because only unique, unfortunately incorrect, must base it upsert on the varying indices later + # merged_completeness = upsert_dataframe(old_completeness_df, diff_completeness_df, "completeness_id") + + # upsert stars + print("merging stars") + merged_stars = upsert_dataframe(old_stars_df, diff_stars_df, "st_name") + + # upsert orbitfits + print("merging oribt fits") + # merged_orbitfits = upsert_dataframe(old_orbitfits_df, diff_orbitfits_df, "pl_name") + + # upsert orbits + # print("merging orbits") + # merged_orbits = upsert_dataframe(old_orbits_df, diff_orbits_df, "pl_name") + + # upsert contrast curves + # TODO, fix the column name, for unique one later? + print("merging curves") + merged_contrast_curves = upsert_dataframe(old_contrast_curves_df, diff_contrast_curves_df, "r_lamD") + + + # # upsert pdfs + # print("merging pdfs") + # merged_pdfs = upsert_dataframe(old_pdfs_df, diff_pdfs_df, "Name") + + # No need to upsert scenarios, as it's updated locally + + #write back to original database with new values, + # TODO: optionally, store old database in a different database for archive + # orbit fits wrong + print("Merging") + final_writeSQL(new_engine, merged_planets, merged_stars, None, None, None, aliases=None, contrastCurves=None, scenarios= scenarios, completeness=None) + + print("Done") \ No newline at end of file diff --git a/backend/update_plandb_method.py b/backend/update_plandb_method.py new file mode 100644 index 0000000..1522863 --- /dev/null +++ b/backend/update_plandb_method.py @@ -0,0 +1,232 @@ +import pandas as pd +from sqlalchemy import create_engine +import pymysql +import glob +from sqlalchemy import text +from plandb_methods import * +from database_main import * +from update_util import * +import os +import sys + + +# TODO: make method + +#Always keep cache false, because it essentially does nothing if it's on and updates based on month +cache = False +datestr = Time.now().datetime.strftime("%Y-%m") + +# TODO need to pass in db? (read from seperately and pass in, or read in here, prob read in here, with no parameters, just update()) +def updateDatabase(): + + # Setup SQL and MySQL engines + password = "Password123!" + + #TODO Change these when necessary + sios_engine = create_engine('mysql+pymysql://'+"andrewchiu"+':'+password+'@localhost/testBeforeUpdate',echo=True) + diff_sios_engine = create_engine('mysql+pymysql://'+"andrewchiu"+':'+password+'@localhost/testSiosDiffEngine',echo=True) + new_engine = create_engine('mysql+pymysql://'+"andrewchiu"+':'+password+'@localhost/testAfterUpdate',echo=True) + + with sios_engine.connect() as connection: + + # Get ipac data + print("Getting IPAC Data") + + old_ipac_data, new_ipac_data = get_store_ipac() + + new_ipac_data.to_excel("plandb.sioslab.com/backend/sheets/new_ipac.xlsx") + old_ipac_data.to_excel("plandb.sioslab.com/backend/sheets/old_ipac_data.xlsx") + + print(f"New IPAC: {new_ipac_data}") + print(f"Old IPAC: {old_ipac_data}") + + + #TODO: be able to insert custom ipac data, for test, possibly using flag + #TODO: Test new row differences, because currently its zero, make test data that is slightly different (similar to ipac data) + print("calculating row differences") + change_ipac_df, log = get_ipac_differences(old_ipac_data, new_ipac_data) + change_ipac_df.to_excel("plandb.sioslab.com/backend/sheets/change_ipac.xlsx") + print(f"Changed: {change_ipac_df}") + + for entry in log: + print(f"Reason: {entry['reason']}") + print(f"Description: {entry['description']}") + print("Details:", entry['details']) + print("-" * 40) + + if change_ipac_df.empty: + print("No changes detected, zero rows have been updated/added") + sys.exit() + + input1 = input("continue?") + + # get photodict + photdict_path = Path(f'cache/update_photdict_2022-05.p') + infile="plandb.sioslab.com/backend/allphotdata_2015.npz" + photdict = get_photo_data(photdict_path, infile, cache) + + print(photdict) + + # get bandzip + bandzip_path = Path(f'cache/update_bandzip_{datestr}.p') + bandzip = get_bandzip(bandzip_path, cache) + + print(bandzip) + + # get orbdata, orbfits + print("Generating orbdata and orbfits") + orbdata_path = Path(f'cache/update_orbdata_{datestr}.p') + orbfits_path = Path(f'cache/update_orbfits_{datestr}.p') + orbdata, orbfits = get_orbdata(orbdata_path, orbfits_path, change_ipac_df, bandzip, photdict, cache) + + # orbdata.to_excel("plandb.sioslab.com/backend/sheets/orbata.xlsx") + # orbdata.to_excel("plandb.sioslab.com/backend/sheets/orbfits.xlsx") + + print(orbdata) + + # get ephemeris + ephemeris_orbdata_path = Path(f'cache/update_ephemeris_orbdata_{datestr}.p') + ephemeris_orbfits_path = Path(f'cache/update_ephemeris_orbfits_{datestr}.p') + ephemeris_orbitfits, ephemeris_orbdata = get_ephemerisdata(ephemeris_orbdata_path, ephemeris_orbfits_path, change_ipac_df, orbfits, orbdata, bandzip, photdict, cache) + ephemeris_orbitfits.to_excel("plandb.sioslab.com/backend/sheets/ephemeris_orbfits.xlsx") + + # get quadrature + print("Quadrature") + quadrature_data_path = Path(f'cache/update_quadrature_data_{datestr}.p') + quadrature_data = get_quadrature(quadrature_data_path, ephemeris_orbitfits, bandzip, photdict, cache) + quadrature_data.to_excel("plandb.sioslab.com/backend/sheets/quadrature_data.xlsx") + + contr_data_path = Path(f'cache/update_contr_data_{datestr}.p') + exosims_json = 'plandb.sioslab.com/ci_perf_exosims.json' + + contr_data = get_contrastness(contr_data_path, exosims_json, quadrature_data, cache) + contr_data.to_excel("plandb.sioslab.com/backend/sheets/contr_data.xlsx") + + comps_path = Path(f'cache/update_comps_{datestr}.p') + compdict_path = Path(f'cache/update_compdict_{datestr}.p') + comps_data_path = Path(f'cache/update_comps_data_{datestr}.p') + + + comps, compdict, comps_data = get_completeness(comps_path, compdict_path, comps_data_path, contr_data, bandzip, photdict, exosims_json, cache) + comps.to_excel("plandb.sioslab.com/backend/sheets/comps.xlsx") + comps_data.to_excel("plandb.sioslab.com/backend/sheets/comps_data.xlsx") + #None for compdict, as its dictionary + + plandata_path = Path(f'cache/update_plandata_{datestr}.p') + stdata_path = Path(f'cache/update_stdata_{datestr}.p') + table_orbitfits_path = Path(f'cache/update_table_orbitfits_{datestr}.p') + + # Orbitfits got updated, maybe change to new var + plan_data, stdata, orbitfits = get_generated_tables(plandata_path, stdata_path, table_orbitfits_path, change_ipac_df, quadrature_data, comps_data, cache) + plan_data.to_excel("plandb.sioslab.com/backend/sheets/plandata.xlsx") + stdata.to_excel("plandb.sioslab.com/backend/sheets/stdata.xlsx") + orbitfits.to_excel('plandb.sioslab.com/backend/sheets/later_orbitfits.xlsx') + + + # Do compileContrastness and compile contrastness + # Look into and possibly remove the - to _ + contr_curvs2_path = Path(f'plandb.sioslab.com/cache/cont_curvs2_{datestr.replace("-", "_")}') + compiled_contr_curvs_path = Path(f'plandb.sioslab.com/cache/cont_curvs_{datestr.replace("-", "_")}') + # compiled_contrast_curves, newpdfs = get_compiled_contrast(compiled_contr_curvs_path, stdata, comps_data, change_ipac_df, contr_curvs2_path) + # compiled_contrast_curves = compileContrastCurves(stdata, compiled_contr_curvs_path) + # compiled_contrast_curves.to_excel("plandb.sioslab.com/backend/sheets/compiled_contrast_curves.xlsx") + # With current code, since get_compiled_contrast isnt fully working with new pdfs, new pdfs should be empty + # newpdfs.to_excel("plandb.sioslab.com/backend/sheets/newpdfs.xlsx") + + + # compile completeness + compiled_completeness_path = Path(f"plandb.sioslab.com/cache/compiled_completeness_{datestr}.p") + compiled_completeness = get_compiled_completeness(compiled_completeness_path, comps_data) + compiled_completeness.to_excel("plandb.sioslab.com/backend/sheets/compiled_completeness.xlsx") + + # diff_completeness_df = pd.DataFrame({ + # 'completeness_id': [0], + # 'pl_id': [3], + # 'completeness': [0.0111], + # 'scenario_name' : ['Optimistic_NF_Imager_20000hr'], + # 'compMinWA': [None], + # 'compMaxWA': [None], + # 'compMindMag': [None], + # 'compMaxdMag': [None], + # }) + + # MakeSQL for the temporary database, creates diff engine, -> upsert diff engine with current engine + scenarios = pd.read_csv("plandb.sioslab.com/cache/scenario_angles.csv") + # writeSQL(engine, plandata=planets, stdata=stars, orbitfits=orbitfits, orbdata=orbits, pdfs=newpdfs, aliases=None,contrastCurves=contrast_curves,scenarios=scenarios, completeness=completeness) + + # TODO: is there unwanted side effects of using temp_writeSQL instead of actual writeSQL + temp_writeSQL(diff_sios_engine, plandata=plan_data, stdata=stdata, orbitfits=orbitfits, orbdata=orbdata, pdfs=None, aliases=None, contrastCurves=None, scenarios=scenarios, completeness=compiled_completeness) + + # Get from diff_database + diff_engine_connection = diff_sios_engine.connect() + diff_completeness_df, diff_contrast_curves_df, diff_orbitfits_df, diff_orbits_df, diff_pdfs_df, diff_planets_df, diff_scenarios_df, diff_stars_df = get_all_from_db(diff_engine_connection) + + # Get from old_database + old_completeness_df, old_contrast_curves_df, old_orbitfits_df, old_orbits_df, old_pdfs_df, old_planets_df, old_scenarios_df, old_stars_df = get_all_from_db(connection) + + # Upsert planets + # Planets don't have pl_id, they only have pl_name which is indexed after + print("Merging planets") + merged_planets = upsert_general(old_planets_df, diff_planets_df, "pl_name") + + + # Upsert completeness + print("Merging completeness") + + print(old_completeness_df, diff_completeness_df) + # Merge based on pl_id, foreign key relation, base on foreign key relation, might do the same for rest + # TODO, this is highly likely to be wrong, forced to used indices here, because only unique, unfortunately incorrect, must base it upsert on the varying indices later + # TODO: Iterate through each pl_id, since these are base off pl_id, correspond with pl_name, and then reset those completeness rows for that pl (remove and then add the new ones that were) + merged_completeness = upsert_general(old_completeness_df, diff_completeness_df, "pl_id") + + # Upsert stars + # TODO Star differences are based off that the planets table has foreign key st_id, therefore, to properly update stars, must go through planets, see what has been updated, and then go down through those planets, and their stars, and then if that planet has changed, update that star + # Same logic as with planets + print("Merging stars") + merged_stars = upsert_general(old_stars_df, diff_stars_df, "st_name") + + print('old orbitfits') + print(old_orbitfits_df) + print("diff orbitfits") + print(diff_orbitfits_df) + old_orbitfits_df.to_excel('plandb.sioslab.com/backend/sheets/original_orbitfits.xlsx') + diff_orbitfits_df.to_excel('plandb.sioslab.com/backend/sheets/diff_orbitfits.xlsx') + # print("diff between diff and old, this should be nothing") + # print(diff_orbits_df) + + # For these later upserts, only way to properly upsert is to detect the change from the earlier value, like the difference in planets from ipac, and then categorize the change as a result from upated information or new information. If new information (new planet), just add but if updated information, going to have to track down the previous. Maybe it's possible for me to locally store database versions, so it can be quickly updated based on path of changes that happened + # Upsert orbitfits + # TODO: maybe the error is here + print("Merging orbit fits") + merged_orbitfits = upsert_general(old_orbitfits_df, diff_orbitfits_df, "pl_id") + + # TODO: compile/calc contrast curves still needs fixing + # TODO: make a seperate database for todays data, the old data, and test by comparing todays data to old data + update + + # Upsert orbits + print("Merging orbits") + merged_orbits = upsert_general(old_orbits_df, diff_orbits_df, "pl_id") + + # Upsert contrast curvess + print("Merging curves") + merged_contrast_curves = upsert_general(old_contrast_curves_df, diff_contrast_curves_df, "st_id") + + + # Upsert pdfs + # TODO: same as orbit fits + # print("Merging pdfs") + # merged_pdfs = upsert_df(old_pdfs_df, diff_pdfs_df, "Name") + + # No need to upsert scenarios, as it's updated locally + + # write back to original database with new values, + # TODO: optionally, store old database in a different database for archive + print("Merging and final write") + write_update_SQL(new_engine, merged_planets, merged_stars, merged_orbitfits, merged_orbits, None, aliases=None, contrastCurves= None, scenarios=None, completeness=merged_completeness) + + print("Done") + + # TODO: Print all total changes + # TODO: Correct the merges/compiles with no changes, ending up outputting an empty table, (account for table upserts with no changes) (Handle no new updates case). For example, final completeness would be empty if there are no changes to completeness in the update. + + diff --git a/cache/cont_curvs_2024_05 b/cache/cont_curvs_2024_05 new file mode 100644 index 0000000000000000000000000000000000000000..f82ab9a3ec80e6a8f2f0fdfbf0956f4068b907cc GIT binary patch literal 702 zcmZuv!D`$v5MA%C+cdNdp|nXT^t_?qb8jt8D6}RnL*+^rOg$B|?(gbwd z>6i2a+mSX2DMSZpB+VPW_cRyr??2;F7;d`jQJ*~*SxkU)or2J-1JiCp2X!vz)`6a8G!o$-MmyA3* z9sBu`=IqPv|Av(EOxh#|%QkKbElc$$|I=AO*oIw8KzOx{C$%} z-;?wTr=eyxT(Oe+z1w?Ch5=o99mjrsz`g#Iu`LJ&*%UXm*!8!8L*+^rOg$B|?(gbwd z>6i2a+mSX2DMSZpB+VPW_cRyr??2;F7;d`jQJ*~*SxkU)or2J-1JiCp2X!vz)`6a8G!o$-MmyA3* z9sBu`=IqPv|Av(EOxh#|%QkKbElc$$|I=AO*oIw8KzOx{C$%} z-;?wTr=eyxT(Oe+z1w?Ch5=o99mjrsz`g#Iu`LJ&*%UXm*!8!8L*+^rOg$B|?(gbwd z>6i2a+mSX2DMSZpB+VPW_cRyr??2;F7;d`jQJ*~*SxkU)or2J-1JiCp2X!vz)`6a8G!o$-MmyA3* z9sBu`=IqPv|Av(EOxh#|%QkKbElc$$|I=AO*oIw8KzOx{C$%} z-;?wTr=eyxT(Oe+z1w?Ch5=o99mjrsz`g#Iu`LJ&*%UXm*!8!8L*+^rOg$B|?(gbwd z>6i2a+mSX2DMSZpB+VPW_cRyr??2;F7;d`jQJ*~*SxkU)or2J-1JiCp2X!vz)`6a8G!o$-MmyA3* z9sBu`=IqPv|Av(EOxh#|%QkKbElc$$|I=AO*oIw8KzOx{C$%} z-;?wTr=eyxT(Oe+z1w?Ch5=o99mjrsz`g#Iu`LJ&*%UXm*!8!8L*+^rOg$B|?(gbwd z>6i2a+mSX2DMSZpB+VPW_cRyr??2;F7;d`jQJ*~*SxkU)or2J-1JiCp2X!vz)`6a8G!o$-MmyA3* z9sBu`=IqPv|Av(EOxh#|%QkKbElc$$|I=AO*oIw8KzOx{C$%} z-;?wTr=eyxT(Oe+z1w?Ch5=o99mjrsz`g#Iu`LJ&*%UXm*!8!8 Date: Wed, 11 Dec 2024 01:25:26 -0500 Subject: [PATCH 4/6] Working Database Test --- backend/database_test.py | 22 +-- backend/plandb_methods.py | 2 +- backend/test.py | 10 +- backend/update_plandb.py | 1 + backend/update_plandb_method.py | 289 +++++++++++++++++++++++++++----- backend/update_util.py | 122 +++++++++----- ci_perf_exosims.json | 108 ++++++------ exosims_input.json | 110 ++++++------ 8 files changed, 447 insertions(+), 217 deletions(-) diff --git a/backend/database_test.py b/backend/database_test.py index fcc979c..5a736eb 100644 --- a/backend/database_test.py +++ b/backend/database_test.py @@ -9,7 +9,7 @@ from database_main import compileContrastCurves from database_main import compileCompleteness -from update_plandb_method import updateDatabase +from update_plandb_method import * @@ -51,7 +51,7 @@ plandata, stdata, orbitfits = generateTables(data, comps_data) -# TODO: Prob replace this path stuff, prob HAVE to cache contr curvecalculations and then compile, or maybe write new compile function +# TODO: Prob replace this path stuff, prob HAVE to plandb.com/cache contr curvecalculations and then compile, or maybe write new compile function # TODO: calculate contrast curves and test it here contr_curvs_path = Path(f'plandb.sioslab.com/cache/cont_curvs_{datestr.replace("-", "_")}') contrast_curves = compileContrastCurves(stdata, contr_curvs_path) @@ -61,7 +61,7 @@ # newpdfs = pdfs.apply(addId, axis = 1) scenarios = pd.read_csv("plandb.sioslab.com/cache/scenario_angles.csv") -# compiled_completeness_path = Path(f"plandb.sioslab.com/cache/compiled_completeness_{datestr}.p") +# compiled_completeness_path = Path(f"plandb.com/cache/compiled_completeness_{datestr}.p") # if compiled_completeness_path.exists(): # completeness = pd.read_pickle(compiled_completeness_path) # else: @@ -87,7 +87,7 @@ # build database from old values and update -# build old (use cache files from 2022) +# build old (use plandb.com/cache files from 2022) ######### Old Database build (2022) ############################################################################################################################ @@ -95,7 +95,7 @@ start_time_2022 = time.time() logging.info("2022 database build start") -datestr = Time.now().datetime.strftime("2022-05") +datestr = "2022-05" plandata_path = Path(f'plandb.sioslab.com/cache/plandata_{datestr}.p') planetsOld = pd.read_pickle(plandata_path) stdata_path = Path(f'plandb.sioslab.com/cache/stdata_{datestr}.p') @@ -126,9 +126,6 @@ def addId(r): writeSQL(before_engine, plandata=planetsOld, stdata=starsOld, orbitfits=orbitfitsOld, orbdata=orbitsOld, pdfs=None, aliases=None,contrastCurves=None,scenarios=None, completeness=None) -updateDatabase() - - print("Done") end_time_2022 = time.time() @@ -137,7 +134,7 @@ def addId(r): #TODO; compile later -# compiled_completeness_path = Path(f"plandb.sioslab.com/cache/compiled_completeness_{datestr}.p") +# compiled_completeness_path = Path(f"plandb.com/cache/compiled_completeness_{datestr}.p") # if compiled_completeness_path.exists(): # completeness = pd.read_pickle(compiled_completeness_path) # else: @@ -148,11 +145,10 @@ def addId(r): ######### Update Database ############################################################################################################################ - start_time_update = time.time() logging.info("update database start") -updateDatabase("andrewchiu", "password123!", "database2022before", 'databaseDiffUpdate', "databaseAfterUpdate") +updateDatabaseTest("andrewchiu", "Password123!", "database2022before", 'databaseDiffUpdate', "databaseAfterUpdate") # update @@ -207,7 +203,7 @@ def compare_all_tables(dataframes1, dataframes2): if set(df1.columns) != set(df2.columns): comparison_results[table_name] = { "status": "mismatch", - "reason": "Column names or structure do not match", + "reason": "Column names don't match", "df1_columns": list(df1.columns), "df2_columns": list(df2.columns), } @@ -221,7 +217,7 @@ def compare_all_tables(dataframes1, dataframes2): else: comparison_results[table_name] = { "status": "mismatch", - "reason": "Row contents differ", + "reason": "Row contents different", } return comparison_results diff --git a/backend/plandb_methods.py b/backend/plandb_methods.py index 414e428..5c00cc1 100644 --- a/backend/plandb_methods.py +++ b/backend/plandb_methods.py @@ -1287,7 +1287,7 @@ def calcContrastCurves(data, exosims_json): # These are used to keep track of where each planet's star's contrast curves # are going to be saved datestr = Time.now().datetime.strftime("%Y_%m") - contrast_curve_cache_base = Path(f'cache/cont_curvs_{datestr}/') + contrast_curve_cache_base = Path(f'plandb.sioslab.com/cache/cont_curvs_{datestr}/') contrast_curve_cache_base.mkdir(parents=True, exist_ok=True) star_base_path_list = [] diff --git a/backend/test.py b/backend/test.py index f21d6c9..678d683 100644 --- a/backend/test.py +++ b/backend/test.py @@ -1,8 +1,4 @@ -import pandas as pd -from sqlalchemy import create_engine +import time -before_engine = create_engine('mysql+pymysql://'+"andrewchiu"+':'+"Password123!"+'@localhost/TestBeforeUpdate',echo=True) - - -today_df = pd.read_sql(before_engine) -print(today_df) \ No newline at end of file +datestr = Time.now().datetime.strftime("%Y-%m") +print(datestr) \ No newline at end of file diff --git a/backend/update_plandb.py b/backend/update_plandb.py index 0061ab2..00ce70a 100644 --- a/backend/update_plandb.py +++ b/backend/update_plandb.py @@ -11,6 +11,7 @@ #Flags cache = True +# always rely on historic ipac datestr = Time.now().datetime.strftime("%Y-%m") # sign in originally before running, to solve cryptography error, sign in using mysql -u username (andrewchiu) -p, then cryptography is solved diff --git a/backend/update_plandb_method.py b/backend/update_plandb_method.py index 1522863..71828cb 100644 --- a/backend/update_plandb_method.py +++ b/backend/update_plandb_method.py @@ -17,15 +17,15 @@ datestr = Time.now().datetime.strftime("%Y-%m") # TODO need to pass in db? (read from seperately and pass in, or read in here, prob read in here, with no parameters, just update()) -def updateDatabase(): +# TODO test if you can just pass in sios_engine name and new_engine name as the same to overwrite original, if not add that functionality +def updateDatabase(user : str, password : str, sios_engine_name : str, diff_engine_name : str, new_engine_name: str): # Setup SQL and MySQL engines - password = "Password123!" - + #TODO Change these when necessary - sios_engine = create_engine('mysql+pymysql://'+"andrewchiu"+':'+password+'@localhost/testBeforeUpdate',echo=True) - diff_sios_engine = create_engine('mysql+pymysql://'+"andrewchiu"+':'+password+'@localhost/testSiosDiffEngine',echo=True) - new_engine = create_engine('mysql+pymysql://'+"andrewchiu"+':'+password+'@localhost/testAfterUpdate',echo=True) + sios_engine = create_engine('mysql+pymysql://'+user+':'+password+'@localhost/' + sios_engine_name,echo=True) + diff_sios_engine = create_engine('mysql+pymysql://'+user+':'+password+'@localhost/' + diff_engine_name,echo=True) + new_engine = create_engine('mysql+pymysql://'+user+':'+password+'@localhost/' + new_engine_name,echo=True) with sios_engine.connect() as connection: @@ -34,7 +34,7 @@ def updateDatabase(): old_ipac_data, new_ipac_data = get_store_ipac() - new_ipac_data.to_excel("plandb.sioslab.com/backend/sheets/new_ipac.xlsx") + new_ipac_data.to_excel("plandb.sioslab.com/backend/sheets/new_ipac_data.xlsx") old_ipac_data.to_excel("plandb.sioslab.com/backend/sheets/old_ipac_data.xlsx") print(f"New IPAC: {new_ipac_data}") @@ -44,39 +44,248 @@ def updateDatabase(): #TODO: be able to insert custom ipac data, for test, possibly using flag #TODO: Test new row differences, because currently its zero, make test data that is slightly different (similar to ipac data) print("calculating row differences") - change_ipac_df, log = get_ipac_differences(old_ipac_data, new_ipac_data) + change_ipac_df = get_ipac_differences(old_ipac_data, new_ipac_data) change_ipac_df.to_excel("plandb.sioslab.com/backend/sheets/change_ipac.xlsx") print(f"Changed: {change_ipac_df}") - for entry in log: - print(f"Reason: {entry['reason']}") - print(f"Description: {entry['description']}") - print("Details:", entry['details']) - print("-" * 40) + # for entry in log: + # print(f"Reason: {entry['reason']}") + # print(f"Description: {entry['description']}") + # print("Details:", entry['details']) + # print("-" * 40) if change_ipac_df.empty: print("No changes detected, zero rows have been updated/added") sys.exit() + + # get photodict + photdict_path = Path(f'plandb.sioslab.com/cache/update_photdict_2022-05.p') + infile="plandb.sioslab.com/backend/allphotdata_2015.npz" + photdict = get_photo_data(photdict_path, infile, cache) + + print(photdict) + + # get bandzip + bandzip_path = Path(f'plandb.sioslab.com/cache/update_bandzip_{datestr}.p') + bandzip = get_bandzip(bandzip_path, cache) + + print(bandzip) + + # get orbdata, orbfits + print("Generating orbdata and orbfits") + orbdata_path = Path(f'plandb.sioslab.com/cache/update_orbdata_{datestr}.p') + orbfits_path = Path(f'plandb.sioslab.com/cache/update_orbfits_{datestr}.p') + orbdata, orbfits = get_orbdata(orbdata_path, orbfits_path, change_ipac_df, bandzip, photdict, cache) + + # orbdata.to_excel("plandb.sioslab.com/backend/sheets/orbata.xlsx") + # orbdata.to_excel("plandb.sioslab.com/backend/sheets/orbfits.xlsx") + + print(orbdata) + + # get ephemeris + ephemeris_orbdata_path = Path(f'plandb.sioslab.com/cache/update_ephemeris_orbdata_{datestr}.p') + ephemeris_orbfits_path = Path(f'plandb.sioslab.com/cache/update_ephemeris_orbfits_{datestr}.p') + ephemeris_orbitfits, ephemeris_orbdata = get_ephemerisdata(ephemeris_orbdata_path, ephemeris_orbfits_path, change_ipac_df, orbfits, orbdata, bandzip, photdict, cache) + ephemeris_orbitfits.to_excel("plandb.sioslab.com/backend/sheets/ephemeris_orbfits.xlsx") - input1 = input("continue?") + # get quadrature + print("Quadrature") + quadrature_data_path = Path(f'plandb.sioslab.com/cache/update_quadrature_data_{datestr}.p') + quadrature_data = get_quadrature(quadrature_data_path, ephemeris_orbitfits, bandzip, photdict, cache) + quadrature_data.to_excel("plandb.sioslab.com/backend/sheets/quadrature_data.xlsx") + + contr_data_path = Path(f'plandb.sioslab.com/cache/update_contr_data_{datestr}.p') + exosims_json = 'plandb.sioslab.com/ci_perf_exosims.json' + + contr_data = get_contrastness(contr_data_path, exosims_json, quadrature_data, cache) + contr_data.to_excel("plandb.sioslab.com/backend/sheets/contr_data.xlsx") + + comps_path = Path(f'plandb.sioslab.com/cache/update_comps_{datestr}.p') + compdict_path = Path(f'plandb.sioslab.com/cache/update_compdict_{datestr}.p') + comps_data_path = Path(f'plandb.sioslab.com/cache/update_comps_data_{datestr}.p') + + + comps, compdict, comps_data = get_completeness(comps_path, compdict_path, comps_data_path, contr_data, bandzip, photdict, exosims_json, cache) + comps.to_excel("plandb.sioslab.com/backend/sheets/comps.xlsx") + comps_data.to_excel("plandb.sioslab.com/backend/sheets/comps_data.xlsx") + #None for compdict, as its dictionary + + plandata_path = Path(f'plandb.sioslab.com/cache/update_plandata_{datestr}.p') + stdata_path = Path(f'plandb.sioslab.com/cache/update_stdata_{datestr}.p') + table_orbitfits_path = Path(f'plandb.sioslab.com/cache/update_table_orbitfits_{datestr}.p') + + # Orbitfits got updated, maybe change to new var + plan_data, stdata, orbitfits = get_generated_tables(plandata_path, stdata_path, table_orbitfits_path, change_ipac_df, quadrature_data, comps_data, cache) + plan_data.to_excel("plandb.sioslab.com/backend/sheets/plandata.xlsx") + stdata.to_excel("plandb.sioslab.com/backend/sheets/stdata.xlsx") + orbitfits.to_excel('plandb.sioslab.com/backend/sheets/later_orbitfits.xlsx') + + + # Do compileContrastness and compile contrastness + # Look into and possibly remove the - to _ + contr_curvs2_path = Path(f'plandb.sioslab.com/cache/cont_curvs2_{datestr.replace("-", "_")}') + compiled_contr_curvs_path = Path(f'plandb.sioslab.com/cache/cont_curvs_{datestr.replace("-", "_")}') + # compiled_contrast_curves, newpdfs = get_compiled_contrast(compiled_contr_curvs_path, stdata, comps_data, change_ipac_df, contr_curvs2_path) + # compiled_contrast_curves = compileContrastCurves(stdata, compiled_contr_curvs_path) + # compiled_contrast_curves.to_excel("plandb.sioslab.com/backend/sheets/compiled_contrast_curves.xlsx") + # With current code, since get_compiled_contrast isnt fully working with new pdfs, new pdfs should be empty + # newpdfs.to_excel("plandb.sioslab.com/backend/sheets/newpdfs.xlsx") + + + # compile completeness + compiled_completeness_path = Path(f"plandb.sioslab.com/cache/compiled_completeness_{datestr}.p") + compiled_completeness = get_compiled_completeness(compiled_completeness_path, comps_data) + compiled_completeness.to_excel("plandb.sioslab.com/backend/sheets/compiled_completeness.xlsx") + + + # MakeSQL for the temporary database, creates diff engine, -> upsert diff engine with current engine + scenarios = pd.read_csv("plandb.sioslab.com/cache/scenario_angles.csv") + # writeSQL(engine, plandata=planets, stdata=stars, orbitfits=orbitfits, orbdata=orbits, pdfs=newpdfs, aliases=None,contrastCurves=contrast_curves,scenarios=scenarios, completeness=completeness) + + # TODO: is there unwanted side effects of using temp_writeSQL instead of actual writeSQL + temp_writeSQL(diff_sios_engine, plandata=plan_data, stdata=stdata, orbitfits=orbitfits, orbdata=orbdata, pdfs=None, aliases=None, contrastCurves=None, scenarios=scenarios, completeness=compiled_completeness) + + # Get from diff_database + diff_engine_connection = diff_sios_engine.connect() + diff_completeness_df, diff_contrast_curves_df, diff_orbitfits_df, diff_orbits_df, diff_pdfs_df, diff_planets_df, diff_scenarios_df, diff_stars_df = get_all_from_db(diff_engine_connection) + + # Get from old_database + old_completeness_df, old_contrast_curves_df, old_orbitfits_df, old_orbits_df, old_pdfs_df, old_planets_df, old_scenarios_df, old_stars_df = get_all_from_db(connection) + + # Upsert planets + # Planets don't have pl_id, they only have pl_name which is indexed after + print("Merging planets") + merged_planets = upsert_general(old_planets_df, diff_planets_df, "pl_name") + + # Upsert completeness + # print("Merging completeness") + + # print(old_completeness_df, diff_completeness_df) + # # Merge based on pl_id, foreign key relation, base on foreign key relation, might do the same for rest + # # TODO, this is highly likely to be wrong, forced to used indices here, because only unique, unfortunately incorrect, must base it upsert on the varying indices later + # # TODO: Iterate through each pl_id, since these are base off pl_id, correspond with pl_name, and then reset those completeness rows for that pl (remove and then add the new ones that were) + # merged_completeness = upsert_general(old_completeness_df, diff_completeness_df, "pl_id") + + # Upsert stars + # TODO Star differences are based off that the planets table has foreign key st_id, therefore, to properly update stars, must go through planets, see what has been updated, and then go down through those planets, and their stars, and then if that planet has changed, update that star + # Same logic as with planets + print("Merging stars") + merged_stars = upsert_general(old_stars_df, diff_stars_df, "st_name") + + print('old orbitfits') + print(old_orbitfits_df) + print("diff orbitfits") + print(diff_orbitfits_df) + old_orbitfits_df.to_excel('plandb.sioslab.com/backend/sheets/original_orbitfits.xlsx') + diff_orbitfits_df.to_excel('plandb.sioslab.com/backend/sheets/diff_orbitfits.xlsx') + # print("diff between diff and old, this should be nothing") + # print(diff_orbits_df) + + # For these later upserts, only way to properly upsert is to detect the change from the earlier value, like the difference in planets from ipac, and then categorize the change as a result from upated information or new information. If new information (new planet), just add but if updated information, going to have to track down the previous. Maybe it's possible for me to locally store database versions, so it can be quickly updated based on path of changes that happened + # Upsert orbitfits + # TODO: maybe the error is here + print("Merging orbit fits") + merged_orbitfits = upsert_general(old_orbitfits_df, diff_orbitfits_df, "pl_id") + + # TODO: compile/calc contrast curves still needs fixing + # TODO: make a seperate database for todays data, the old data, and test by comparing todays data to old data + update + + # Upsert orbits + print("Merging orbits") + merged_orbits = upsert_general(old_orbits_df, diff_orbits_df, "pl_id") + + # Upsert contrast curvess + print("Merging curves") + merged_contrast_curves = upsert_general(old_contrast_curves_df, diff_contrast_curves_df, "st_id") + + + # Upsert pdfs + # TODO: same as orbit fits + # print("Merging pdfs") + # merged_pdfs = upsert_df(old_pdfs_df, diff_pdfs_df, "Name") + + # No need to upsert scenarios, as it's updated locally + + # write back to original database with new values, + # TODO: optionally, store old database in a different database for archive + print("Merging and final write") + write_update_SQL(new_engine, merged_planets, merged_stars, merged_orbitfits, merged_orbits, pdfs=None, aliases=None, contrastCurves= None, scenarios=None, completeness=None) + + print("Done") + + # TODO: Print all total changes + # TODO: Correct the merges/compiles with no changes, ending up outputting an empty table, (account for table upserts with no changes) (Handle no new updates case). For example, final completeness would be empty if there are no changes to completeness in the update. + + + """_summary_ + """ +def updateDatabaseTest(user : str, password : str, sios_engine_name : str, diff_engine_name : str, new_engine_name: str): + + # Setup SQL and MySQL engines + + #TODO Change these when necessary + sios_engine = create_engine('mysql+pymysql://'+user+':'+password+'@localhost/' + sios_engine_name,echo=True) + diff_sios_engine = create_engine('mysql+pymysql://'+user+':'+password+'@localhost/' + diff_engine_name,echo=True) + new_engine = create_engine('mysql+pymysql://'+user+':'+password+'@localhost/' + new_engine_name,echo=True) + + with sios_engine.connect() as connection: + + # Get ipac data + print("Getting IPAC Data") + + + ################ This is changed from regular updateDatabase(), so that we use archived matching ipac data ################## + # old_ipac_data, new_ipac_data = get_store_ipac() + + old_ipac_data_path = Path(f'plandb.sioslab.com/cache/data_cache_2022-05.p') + old_ipac_data = pd.read_pickle(old_ipac_data_path) + + new_ipac_data = getIPACdata() + + new_ipac_data.to_excel("plandb.sioslab.com/backend/sheets/new_ipac_data.xlsx") + old_ipac_data.to_excel("plandb.sioslab.com/backend/sheets/old_ipac_data.xlsx") + + print(f"New IPAC: {new_ipac_data}") + print(f"Old IPAC: {old_ipac_data}") + + + #TODO: be able to insert custom ipac data, for test, possibly using flag + #TODO: Test new row differences, because currently its zero, make test data that is slightly different (similar to ipac data) + print("calculating row differences") + change_ipac_df = get_ipac_differences(old_ipac_data, new_ipac_data, ["pl_name"]) + change_ipac_df.to_excel("plandb.sioslab.com/backend/sheets/change_ipac.xlsx") + print(f"Changed: {change_ipac_df}") + + # for entry in log: + # print(f"Reason: {entry['reason']}") + # print(f"Description: {entry['description']}") + # print("Details:", entry['details']) + # print("-" * 40) + + # input1 = input("test") + + if change_ipac_df.empty: + print("No changes detected, zero rows have been updated/added") + sys.exit() + # get photodict - photdict_path = Path(f'cache/update_photdict_2022-05.p') + photdict_path = Path(f'plandb.sioslab.com/cache/update_photdict_2022-05.p') infile="plandb.sioslab.com/backend/allphotdata_2015.npz" photdict = get_photo_data(photdict_path, infile, cache) print(photdict) # get bandzip - bandzip_path = Path(f'cache/update_bandzip_{datestr}.p') + bandzip_path = Path(f'plandb.sioslab.com/cache/update_bandzip_{datestr}.p') bandzip = get_bandzip(bandzip_path, cache) print(bandzip) # get orbdata, orbfits print("Generating orbdata and orbfits") - orbdata_path = Path(f'cache/update_orbdata_{datestr}.p') - orbfits_path = Path(f'cache/update_orbfits_{datestr}.p') + orbdata_path = Path(f'plandb.sioslab.com/cache/update_orbdata_{datestr}.p') + orbfits_path = Path(f'plandb.sioslab.com/cache/update_orbfits_{datestr}.p') orbdata, orbfits = get_orbdata(orbdata_path, orbfits_path, change_ipac_df, bandzip, photdict, cache) # orbdata.to_excel("plandb.sioslab.com/backend/sheets/orbata.xlsx") @@ -85,26 +294,26 @@ def updateDatabase(): print(orbdata) # get ephemeris - ephemeris_orbdata_path = Path(f'cache/update_ephemeris_orbdata_{datestr}.p') - ephemeris_orbfits_path = Path(f'cache/update_ephemeris_orbfits_{datestr}.p') + ephemeris_orbdata_path = Path(f'plandb.sioslab.com/cache/update_ephemeris_orbdata_{datestr}.p') + ephemeris_orbfits_path = Path(f'plandb.sioslab.com/cache/update_ephemeris_orbfits_{datestr}.p') ephemeris_orbitfits, ephemeris_orbdata = get_ephemerisdata(ephemeris_orbdata_path, ephemeris_orbfits_path, change_ipac_df, orbfits, orbdata, bandzip, photdict, cache) ephemeris_orbitfits.to_excel("plandb.sioslab.com/backend/sheets/ephemeris_orbfits.xlsx") # get quadrature print("Quadrature") - quadrature_data_path = Path(f'cache/update_quadrature_data_{datestr}.p') + quadrature_data_path = Path(f'plandb.sioslab.com/cache/update_quadrature_data_{datestr}.p') quadrature_data = get_quadrature(quadrature_data_path, ephemeris_orbitfits, bandzip, photdict, cache) quadrature_data.to_excel("plandb.sioslab.com/backend/sheets/quadrature_data.xlsx") - contr_data_path = Path(f'cache/update_contr_data_{datestr}.p') + contr_data_path = Path(f'plandb.sioslab.com/cache/update_contr_data_{datestr}.p') exosims_json = 'plandb.sioslab.com/ci_perf_exosims.json' contr_data = get_contrastness(contr_data_path, exosims_json, quadrature_data, cache) contr_data.to_excel("plandb.sioslab.com/backend/sheets/contr_data.xlsx") - comps_path = Path(f'cache/update_comps_{datestr}.p') - compdict_path = Path(f'cache/update_compdict_{datestr}.p') - comps_data_path = Path(f'cache/update_comps_data_{datestr}.p') + comps_path = Path(f'plandb.sioslab.com/cache/update_comps_{datestr}.p') + compdict_path = Path(f'plandb.sioslab.com/cache/update_compdict_{datestr}.p') + comps_data_path = Path(f'plandb.sioslab.com/cache/update_comps_data_{datestr}.p') comps, compdict, comps_data = get_completeness(comps_path, compdict_path, comps_data_path, contr_data, bandzip, photdict, exosims_json, cache) @@ -112,9 +321,9 @@ def updateDatabase(): comps_data.to_excel("plandb.sioslab.com/backend/sheets/comps_data.xlsx") #None for compdict, as its dictionary - plandata_path = Path(f'cache/update_plandata_{datestr}.p') - stdata_path = Path(f'cache/update_stdata_{datestr}.p') - table_orbitfits_path = Path(f'cache/update_table_orbitfits_{datestr}.p') + plandata_path = Path(f'plandb.sioslab.com/cache/update_plandata_{datestr}.p') + stdata_path = Path(f'plandb.sioslab.com/cache/update_stdata_{datestr}.p') + table_orbitfits_path = Path(f'plandb.sioslab.com/cache/update_table_orbitfits_{datestr}.p') # Orbitfits got updated, maybe change to new var plan_data, stdata, orbitfits = get_generated_tables(plandata_path, stdata_path, table_orbitfits_path, change_ipac_df, quadrature_data, comps_data, cache) @@ -139,16 +348,6 @@ def updateDatabase(): compiled_completeness = get_compiled_completeness(compiled_completeness_path, comps_data) compiled_completeness.to_excel("plandb.sioslab.com/backend/sheets/compiled_completeness.xlsx") - # diff_completeness_df = pd.DataFrame({ - # 'completeness_id': [0], - # 'pl_id': [3], - # 'completeness': [0.0111], - # 'scenario_name' : ['Optimistic_NF_Imager_20000hr'], - # 'compMinWA': [None], - # 'compMaxWA': [None], - # 'compMindMag': [None], - # 'compMaxdMag': [None], - # }) # MakeSQL for the temporary database, creates diff engine, -> upsert diff engine with current engine scenarios = pd.read_csv("plandb.sioslab.com/cache/scenario_angles.csv") @@ -171,13 +370,13 @@ def updateDatabase(): # Upsert completeness - print("Merging completeness") + # print("Merging completeness") - print(old_completeness_df, diff_completeness_df) - # Merge based on pl_id, foreign key relation, base on foreign key relation, might do the same for rest - # TODO, this is highly likely to be wrong, forced to used indices here, because only unique, unfortunately incorrect, must base it upsert on the varying indices later - # TODO: Iterate through each pl_id, since these are base off pl_id, correspond with pl_name, and then reset those completeness rows for that pl (remove and then add the new ones that were) - merged_completeness = upsert_general(old_completeness_df, diff_completeness_df, "pl_id") + # print(old_completeness_df, diff_completeness_df) + # # Merge based on pl_id, foreign key relation, base on foreign key relation, might do the same for rest + # # TODO, this is highly likely to be wrong, forced to used indices here, because only unique, unfortunately incorrect, must base it upsert on the varying indices later + # # TODO: Iterate through each pl_id, since these are base off pl_id, correspond with pl_name, and then reset those completeness rows for that pl (remove and then add the new ones that were) + # merged_completeness = upsert_general(old_completeness_df, diff_completeness_df, "pl_id") # Upsert stars # TODO Star differences are based off that the planets table has foreign key st_id, therefore, to properly update stars, must go through planets, see what has been updated, and then go down through those planets, and their stars, and then if that planet has changed, update that star @@ -222,7 +421,7 @@ def updateDatabase(): # write back to original database with new values, # TODO: optionally, store old database in a different database for archive print("Merging and final write") - write_update_SQL(new_engine, merged_planets, merged_stars, merged_orbitfits, merged_orbits, None, aliases=None, contrastCurves= None, scenarios=None, completeness=merged_completeness) + write_update_SQL(new_engine, merged_planets, merged_stars, merged_orbitfits, merged_orbits, pdfs=None, aliases=None, contrastCurves= None, scenarios=None, completeness=None) print("Done") diff --git a/backend/update_util.py b/backend/update_util.py index e92c4cb..2da61c5 100644 --- a/backend/update_util.py +++ b/backend/update_util.py @@ -539,52 +539,90 @@ def get_ipac_database(data_path: Path, cache: bool) -> pd.DataFrame: #Could i have just done comparison = old_df == updated_df -def get_ipac_differences(old_df: pd.DataFrame, updated_df: pd.DataFrame, tolerance: float = 1e-1): - col_names1 = old_df.columns.tolist() - col_names2 = updated_df.columns.tolist() - change_log = [] - - if col_names1 != col_names2: - return pd.DataFrame(), change_log +# def get_ipac_differences(old_df: pd.DataFrame, updated_df: pd.DataFrame, tolerance: float = 1e-1): +# if list(old_df.columns) != list(updated_df.columns): +# return pd.DataFrame(), [] + +# diff_rows = [] +# change_log = [] + +# for index, old_row in old_df.iterrows(): +# pl_name_ind = old_row['pl_name'] +# corresponding_updated_row = updated_df.loc[updated_df['pl_name'] == pl_name_ind] + +# if corresponding_updated_row.empty: +# diff_rows.append(old_row.to_dict()) +# change_log.append({"pl_name": pl_name_ind, "status": "removed"}) +# else: +# corresponding_updated_row = corresponding_updated_row.iloc[0] +# differences = {} + +# for col in old_df.columns: +# old_value = old_row[col] +# new_value = corresponding_updated_row[col] + +# # NaNs are equal +# if pd.isna(old_value) and pd.isna(new_value): +# continue + +# # Tolerance +# if isinstance(old_value, (int, float)) and isinstance(new_value, (int, float)): +# if abs(old_value - new_value) <= tolerance: +# continue + +# if old_value != new_value: +# differences[col] = {"old": old_value, "new": new_value} + +# if differences: +# diff_rows.append(corresponding_updated_row.to_dict()) +# change_log.append({"pl_name": pl_name_ind, "differences": differences}) + +# # Drop the row from updated_df +# updated_df = updated_df[updated_df['pl_name'] != pl_name_ind] + +# # Add remaining/leftover rows from updated_df +# for _, new_row in updated_df.iterrows(): +# diff_rows.append(new_row.to_dict()) +# change_log.append({"pl_name": new_row['pl_name'], "status": "added"}) + +# diff_df = pd.DataFrame(diff_rows) +# return diff_df, change_log + + +def get_ipac_differences(old_df, new_df, key_columns = ["pl_name"]): + """ + Find differences between old and new DataFrames based on key columns, + handling `pd.NA` properly during comparisons. - diff_df = pd.DataFrame() + Parameters: + old_df (pd.DataFrame): The original DataFrame. + new_df (pd.DataFrame): The updated DataFrame. + key_columns (list): List of columns that form the unique key. - for index, row in old_df.iterrows(): - - pl_name_ind = row['pl_name'] - corresponding_updated_row = updated_df.loc[updated_df['pl_name'] == pl_name_ind] - - if corresponding_updated_row.empty: - diff_df = pd.concat([diff_df, pd.DataFrame([row])], ignore_index=True) - else: - corresponding_updated_row = corresponding_updated_row.iloc[0] - differences = {} - - for col in old_df.columns: - old_value = row[col] - new_value = corresponding_updated_row[col] - - # Nan's - if pd.isna(old_value) and pd.isna(new_value): - continue # Nan's are equal - - # Tolerance - if isinstance(old_value, (int, float, np.number)) and isinstance(new_value, (int, float, np.number)): - if abs(old_value - new_value) <= tolerance: - continue # Tolerance considered equal - - if old_value != new_value: - differences[col] = {"old": old_value, "new": new_value} - - if differences: - diff_df = pd.concat([diff_df, pd.DataFrame([corresponding_updated_row])], ignore_index=True) - - updated_df = updated_df.drop(corresponding_updated_row.name) + Returns: + pd.DataFrame: A DataFrame with updated and new rows. + """ + if not set(key_columns).issubset(old_df.columns) or not set(key_columns).issubset(new_df.columns): + raise ValueError("Key columns must exist in both DataFrames.") - if not updated_df.empty: - diff_df = pd.concat([diff_df, updated_df], ignore_index=True) + old_df = old_df.set_index(key_columns) + new_df = new_df.set_index(key_columns) + + old_df, new_df = old_df.align(new_df, join="outer", axis=1, fill_value=pd.NA) + shared_index = old_df.index.intersection(new_df.index) + + filled_old_df = old_df.loc[shared_index].fillna("__MISSING__") + filled_new_df = new_df.loc[shared_index].fillna("__MISSING__") + differences = (filled_old_df != filled_new_df).any(axis=1) + + updated_rows = new_df.loc[shared_index][differences] + new_rows = new_df[~new_df.index.isin(old_df.index)] - return diff_df, change_log + result = pd.concat([updated_rows, new_rows]) + + return result.reset_index() + + diff --git a/ci_perf_exosims.json b/ci_perf_exosims.json index 0e2b882..bc6235a 100644 --- a/ci_perf_exosims.json +++ b/ci_perf_exosims.json @@ -35,7 +35,7 @@ "scienceInstruments": [ { "name": "Conservative_NF_Imager", "ppFact_NF_Imager": 1, - "QE": "../CGI_Perf/EBcsvData/csv_fix/Photometry/QE_e2v_Spec.csv", + "QE": "CGI_Perf/EBcsvData/csv_fix/Photometry/QE_e2v_Spec.csv", "FoV": 9.5, "pixelNumber": 1024, "pixelSize": 1.3e-5, @@ -52,11 +52,11 @@ "k_samp": 0.20, "texp": 3, "ENF": 1.0, - "THPUT": "../CGI_Perf/EBcsvData/csv_fix/Photometry/THPT_EB_NFB1_200730.csv", - "DET": "../CGI_Perf/EBcsvData/csv_fix/Photometry/DET_CBE_210127.csv" + "THPUT": "CGI_Perf/EBcsvData/csv_fix/Photometry/THPT_EB_NFB1_200730.csv", + "DET": "CGI_Perf/EBcsvData/csv_fix/Photometry/DET_CBE_210127.csv" }, { "name": "Optimistic_NF_Imager", - "QE": "../CGI_Perf/EBcsvData/csv_fix/Photometry/QE_e2v_Spec.csv", + "QE": "CGI_Perf/EBcsvData/csv_fix/Photometry/QE_e2v_Spec.csv", "FoV": 9.5, "pixelNumber": 1024, "pixelSize": 1.3e-5, @@ -73,11 +73,11 @@ "k_samp": 0.20, "texp": 3, "ENF": 1.0, - "THPUT": "../CGI_Perf/EBcsvData/csv_fix/Photometry/THPT_EB_NFB1_200730.csv", - "DET": "../CGI_Perf/EBcsvData/csv_fix/Photometry/DET_CBE_210127.csv" + "THPUT": "CGI_Perf/EBcsvData/csv_fix/Photometry/THPT_EB_NFB1_200730.csv", + "DET": "CGI_Perf/EBcsvData/csv_fix/Photometry/DET_CBE_210127.csv" }, { "name": "Conservative_Amici_Spec", - "QE": "../CGI_Perf/EBcsvData/csv_fix/Photometry/QE_e2v_Spec.csv", + "QE": "CGI_Perf/EBcsvData/csv_fix/Photometry/QE_e2v_Spec.csv", "FoV": 1, "pixelNumber": 1024, "pixelSize": 1.3e-5, @@ -98,11 +98,11 @@ "focal":0.26, "Fnum": 52, "Rs": 50, - "THPUT": "../CGI_Perf/EBcsvData/csv_fix/Photometry/THPT_EB_SPB3_200730.csv", - "DET": "../CGI_Perf/EBcsvData/csv_fix/Photometry/DET_CBE_210127.csv" + "THPUT": "CGI_Perf/EBcsvData/csv_fix/Photometry/THPT_EB_SPB3_200730.csv", + "DET": "CGI_Perf/EBcsvData/csv_fix/Photometry/DET_CBE_210127.csv" }, { "name": "Optimistic_Amici_Spec", - "QE": "../CGI_Perf/EBcsvData/csv_fix/Photometry/QE_e2v_Spec.csv", + "QE": "CGI_Perf/EBcsvData/csv_fix/Photometry/QE_e2v_Spec.csv", "FoV": 1, "pixelNumber": 1024, "pixelSize": 1.3e-5, @@ -123,11 +123,11 @@ "focal":0.26, "Fnum": 52, "Rs": 50, - "THPUT": "../CGI_Perf/EBcsvData/csv_fix/Photometry/THPT_EB_SPB3_200730.csv", - "DET": "../CGI_Perf/EBcsvData/csv_fix/Photometry/DET_CBE_210127.csv" + "THPUT": "CGI_Perf/EBcsvData/csv_fix/Photometry/THPT_EB_SPB3_200730.csv", + "DET": "CGI_Perf/EBcsvData/csv_fix/Photometry/DET_CBE_210127.csv" }, { "name": "Conservative_WF_Imager", - "QE": "../CGI_Perf/EBcsvData/csv_fix/Photometry/QE_e2v_Spec.csv", + "QE": "CGI_Perf/EBcsvData/csv_fix/Photometry/QE_e2v_Spec.csv", "FoV": 1, "pixelNumber": 1024, "pixelSize": 1.3e-5, @@ -145,11 +145,11 @@ "texp": 14, "ENF": 1.0, "Rs": 50, - "THPUT": "../CGI_Perf/EBcsvData/csv_fix/Photometry/THPT_EB_WFB4_200730.csv", - "DET": "../CGI_Perf/EBcsvData/csv_fix/Photometry/DET_CBE_210127.csv" + "THPUT": "CGI_Perf/EBcsvData/csv_fix/Photometry/THPT_EB_WFB4_200730.csv", + "DET": "CGI_Perf/EBcsvData/csv_fix/Photometry/DET_CBE_210127.csv" }, { "name": "Optimistic_WF_Imager", - "QE": "../CGI_Perf/EBcsvData/csv_fix/Photometry/QE_e2v_Spec.csv", + "QE": "CGI_Perf/EBcsvData/csv_fix/Photometry/QE_e2v_Spec.csv", "FoV": 1, "pixelNumber": 1024, "pixelSize": 1.3e-5, @@ -167,8 +167,8 @@ "texp": 14, "ENF": 1.0, "Rs": 50, - "THPUT": "../CGI_Perf/EBcsvData/csv_fix/Photometry/THPT_EB_WFB4_200730.csv", - "DET": "../CGI_Perf/EBcsvData/csv_fix/Photometry/DET_CBE_210127.csv" + "THPUT": "CGI_Perf/EBcsvData/csv_fix/Photometry/THPT_EB_WFB4_200730.csv", + "DET": "CGI_Perf/EBcsvData/csv_fix/Photometry/DET_CBE_210127.csv" } ], @@ -185,13 +185,13 @@ "koAngles_Earth":[45.0,180.0], "koAngles_Moon":[45.0,180.0], "koAngles_Small":[1.0,180.0], - "occ_trans": "../CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_HLC_20190210b.csv", - "core_thruput": "../CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_HLC_20190210b.csv", - "core_mean_intensity": "../CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_HLC_20190210b.csv", - "core_area": "../CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_HLC_20190210b.csv", - "core_contrast": "../CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_HLC_20190210b.csv", + "occ_trans": "CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_HLC_20190210b.csv", + "core_thruput": "CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_HLC_20190210b.csv", + "core_mean_intensity": "CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_HLC_20190210b.csv", + "core_area": "CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_HLC_20190210b.csv", + "core_contrast": "CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_HLC_20190210b.csv", "core_platescale": 0.30, - "core_stability": "../CGI_Perf/EBcsvData/csv_fix/Cstability/CS_HLC_NFIM_201026.csv", + "core_stability": "CGI_Perf/EBcsvData/csv_fix/Cstability/CS_HLC_NFIM_201026.csv", "core_stability_setting": "REQ" }, { "name": "DRM_HLC-565", @@ -206,13 +206,13 @@ "koAngles_Earth":[45.0,180.0], "koAngles_Moon":[45.0,180.0], "koAngles_Small":[1.0,180.0], - "occ_trans": "../CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_HLC_20190210b.csv", - "core_thruput": "../CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_HLC_20190210b.csv", - "core_mean_intensity": "../CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_HLC_20190210b.csv", - "core_area": "../CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_HLC_20190210b.csv", - "core_contrast": "../CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_HLC_20190210b.csv", + "occ_trans": "CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_HLC_20190210b.csv", + "core_thruput": "CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_HLC_20190210b.csv", + "core_mean_intensity": "CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_HLC_20190210b.csv", + "core_area": "CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_HLC_20190210b.csv", + "core_contrast": "CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_HLC_20190210b.csv", "core_platescale": 0.30, - "core_stability": "../CGI_Perf/EBcsvData/csv_fix/Cstability/CS_HLC_NFIM_200520_SCI.csv", + "core_stability": "CGI_Perf/EBcsvData/csv_fix/Cstability/CS_HLC_NFIM_200520_SCI.csv", "core_stability_setting": "MCBE", "core_stability_interp": true }, @@ -227,13 +227,13 @@ "koAngles_Earth":[45.0,180.0], "koAngles_Moon":[45.0,180.0], "koAngles_Small":[1.0,180.0], - "occ_trans": "../CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_SPEC_20200617.csv", - "core_thruput": "../CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_SPEC_20200617.csv", - "core_mean_intensity": "../CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_SPEC_20200617.csv", - "core_area": "../CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_SPEC_20200617.csv", - "core_contrast": "../CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_SPEC_20200617.csv", + "occ_trans": "CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_SPEC_20200617.csv", + "core_thruput": "CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_SPEC_20200617.csv", + "core_mean_intensity": "CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_SPEC_20200617.csv", + "core_area": "CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_SPEC_20200617.csv", + "core_contrast": "CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_SPEC_20200617.csv", "core_platescale": 0.30, - "core_stability": "../CGI_Perf/EBcsvData/csv_fix/Cstability/CS_SPC_SPEC_201026.csv", + "core_stability": "CGI_Perf/EBcsvData/csv_fix/Cstability/CS_SPC_SPEC_201026.csv", "core_stability_setting": "REQ" }, { "name": "DRM_SPEC_B3", @@ -247,13 +247,13 @@ "koAngles_Earth":[45.0,180.0], "koAngles_Moon":[45.0,180.0], "koAngles_Small":[1.0,180.0], - "occ_trans": "../CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_SPEC_20200617.csv", - "core_thruput": "../CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_SPEC_20200617.csv", - "core_mean_intensity": "../CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_SPEC_20200617.csv", - "core_area": "../CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_SPEC_20200617.csv", - "core_contrast": "../CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_SPEC_20200617.csv", + "occ_trans": "CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_SPEC_20200617.csv", + "core_thruput": "CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_SPEC_20200617.csv", + "core_mean_intensity": "CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_SPEC_20200617.csv", + "core_area": "CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_SPEC_20200617.csv", + "core_contrast": "CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_SPEC_20200617.csv", "core_platescale": 0.30, - "core_stability": "../CGI_Perf/EBcsvData/csv_fix/Cstability/CS_SPC_SPEC_200520_SCI.csv", + "core_stability": "CGI_Perf/EBcsvData/csv_fix/Cstability/CS_SPC_SPEC_200520_SCI.csv", "core_stability_setting": "MCBE" }, { "name": "EB_IMG_B4", @@ -268,13 +268,13 @@ "koAngles_Earth":[45.0,180.0], "koAngles_Moon":[45.0,180.0], "koAngles_Small":[1.0,180.0], - "occ_trans": "../CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_WFOV_20200610.csv", - "core_thruput": "../CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_WFOV_20200610.csv", - "core_mean_intensity": "../CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_WFOV_20200610.csv", - "core_area": "../CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_WFOV_20200610.csv", - "core_contrast": "../CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_WFOV_20200610.csv", + "occ_trans": "CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_WFOV_20200610.csv", + "core_thruput": "CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_WFOV_20200610.csv", + "core_mean_intensity": "CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_WFOV_20200610.csv", + "core_area": "CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_WFOV_20200610.csv", + "core_contrast": "CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_WFOV_20200610.csv", "core_platescale": 0.30, - "core_stability": "../CGI_Perf/EBcsvData/csv_fix/Cstability/CS_SPC_WFIM_201026.csv", + "core_stability": "CGI_Perf/EBcsvData/csv_fix/Cstability/CS_SPC_WFIM_201026.csv", "core_stability_setting": "REQ" }, { "name": "DRM_IMG_B4", @@ -289,13 +289,13 @@ "koAngles_Earth":[45.0,180.0], "koAngles_Moon":[45.0,180.0], "koAngles_Small":[1.0,180.0], - "occ_trans": "../CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_WFOV_20200610.csv", - "core_thruput": "../CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_WFOV_20200610.csv", - "core_mean_intensity": "../CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_WFOV_20200610.csv", - "core_area": "../CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_WFOV_20200610.csv", - "core_contrast": "../CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_WFOV_20200610.csv", + "occ_trans": "CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_WFOV_20200610.csv", + "core_thruput": "CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_WFOV_20200610.csv", + "core_mean_intensity": "CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_WFOV_20200610.csv", + "core_area": "CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_WFOV_20200610.csv", + "core_contrast": "CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_WFOV_20200610.csv", "core_platescale": 0.30, - "core_stability": "../CGI_Perf/EBcsvData/csv_fix/Cstability/CS_SPC_WFIM_200520_SCI.csv", + "core_stability": "CGI_Perf/EBcsvData/csv_fix/Cstability/CS_SPC_WFIM_200520_SCI.csv", "core_stability_setting": "MCBE" } ], diff --git a/exosims_input.json b/exosims_input.json index dc727e4..77de071 100644 --- a/exosims_input.json +++ b/exosims_input.json @@ -1,5 +1,5 @@ { - "catalogpath":"hip_ci_etc.csv", + "catalogpath":"plandb.sioslab.com/hip_ci_etc.csv", "minComp": 0, "dMagLim": 22.5, "pupilDiam": 2.363, @@ -35,7 +35,7 @@ "scienceInstruments": [ { "name": "EB_NF_Imager", "ppFact_NF_Imager": 1, - "QE": "../CGI_Perf/EBcsvData/csv_fix/Photometry/QE_e2v_Spec.csv", + "QE": "CGI_Perf/EBcsvData/Photometry/QE_e2v_Spec.csv", "FoV": 9.5, "pixelNumber": 1024, "pixelSize": 1.3e-5, @@ -52,11 +52,11 @@ "k_samp": 0.20, "texp": 3, "ENF": 1.0, - "THPUT": "../CGI_Perf/EBcsvData/csv_fix/Photometry/THPT_EB_NFB1_200730.csv", - "DET": "../CGI_Perf/EBcsvData/csv_fix/Photometry/DET_CBE_210127.csv" + "THPUT": "CGI_Perf/EBcsvData/csv_fix/Photometry/THPT_EB_NFB1_200730.csv", + "DET": "CGI_Perf/EBcsvData/csv_fix/Photometry/DET_CBE_210127.csv" }, { "name": "DRM_NF_Imager", - "QE": "../CGI_Perf/EBcsvData/csv_fix/Photometry/QE_e2v_Spec.csv", + "QE": "CGI_Perf/EBcsvData/csv_fix/Photometry/QE_e2v_Spec.csv", "FoV": 9.5, "pixelNumber": 1024, "pixelSize": 1.3e-5, @@ -73,11 +73,11 @@ "k_samp": 0.20, "texp": 3, "ENF": 1.0, - "THPUT": "../CGI_Perf/EBcsvData/csv_fix/Photometry/THPT_EB_NFB1_200730.csv", - "DET": "../CGI_Perf/EBcsvData/csv_fix/Photometry/DET_CBE_210127.csv" + "THPUT": "CGI_Perf/EBcsvData/csv_fix/Photometry/THPT_EB_NFB1_200730.csv", + "DET": "CGI_Perf/EBcsvData/csv_fix/Photometry/DET_CBE_210127.csv" }, { "name": "EB_Amici_Spec", - "QE": "../CGI_Perf/EBcsvData/csv_fix/Photometry/QE_e2v_Spec.csv", + "QE": "CGI_Perf/EBcsvData/csv_fix/Photometry/QE_e2v_Spec.csv", "FoV": 1, "pixelNumber": 1024, "pixelSize": 1.3e-5, @@ -98,11 +98,11 @@ "focal":0.26, "Fnum": 52, "Rs": 50, - "THPUT": "../CGI_Perf/EBcsvData/csv_fix/Photometry/THPT_EB_SPB3_200730.csv", - "DET": "../CGI_Perf/EBcsvData/csv_fix/Photometry/DET_CBE_210127.csv" + "THPUT": "CGI_Perf/EBcsvData/csv_fix/Photometry/THPT_EB_SPB3_200730.csv", + "DET": "CGI_Perf/EBcsvData/csv_fix/Photometry/DET_CBE_210127.csv" }, { "name": "DRM_Amici_Spec", - "QE": "../CGI_Perf/EBcsvData/csv_fix/Photometry/QE_e2v_Spec.csv", + "QE": "CGI_Perf/EBcsvData/csv_fix/Photometry/QE_e2v_Spec.csv", "FoV": 1, "pixelNumber": 1024, "pixelSize": 1.3e-5, @@ -123,11 +123,11 @@ "focal":0.26, "Fnum": 52, "Rs": 50, - "THPUT": "../CGI_Perf/EBcsvData/csv_fix/Photometry/THPT_EB_SPB3_200730.csv", - "DET": "../CGI_Perf/EBcsvData/csv_fix/Photometry/DET_CBE_210127.csv" + "THPUT": "CGI_Perf/EBcsvData/csv_fix/Photometry/THPT_EB_SPB3_200730.csv", + "DET": "CGI_Perf/EBcsvData/csv_fix/Photometry/DET_CBE_210127.csv" }, { "name": "EB_WF_Imager", - "QE": "../CGI_Perf/EBcsvData/csv_fix/Photometry/QE_e2v_Spec.csv", + "QE": "CGI_Perf/EBcsvData/csv_fix/Photometry/QE_e2v_Spec.csv", "FoV": 1, "pixelNumber": 1024, "pixelSize": 1.3e-5, @@ -145,11 +145,11 @@ "texp": 14, "ENF": 1.0, "Rs": 50, - "THPUT": "../CGI_Perf/EBcsvData/csv_fix/Photometry/THPT_EB_WFB4_200730.csv", - "DET": "../CGI_Perf/EBcsvData/csv_fix/Photometry/DET_CBE_210127.csv" + "THPUT": "CGI_Perf/EBcsvData/csv_fix/Photometry/THPT_EB_WFB4_200730.csv", + "DET": "CGI_Perf/EBcsvData/csv_fix/Photometry/DET_CBE_210127.csv" }, { "name": "DRM_WF_Imager", - "QE": "../CGI_Perf/EBcsvData/csv_fix/Photometry/QE_e2v_Spec.csv", + "QE": "CGI_Perf/EBcsvData/csv_fix/Photometry/QE_e2v_Spec.csv", "FoV": 1, "pixelNumber": 1024, "pixelSize": 1.3e-5, @@ -167,8 +167,8 @@ "texp": 14, "ENF": 1.0, "Rs": 50, - "THPUT": "../CGI_Perf/EBcsvData/csv_fix/Photometry/THPT_EB_WFB4_200730.csv", - "DET": "../CGI_Perf/EBcsvData/csv_fix/Photometry/DET_CBE_210127.csv" + "THPUT": "CGI_Perf/EBcsvData/csv_fix/Photometry/THPT_EB_WFB4_200730.csv", + "DET": "CGI_Perf/EBcsvData/csv_fix/Photometry/DET_CBE_210127.csv" } ], @@ -185,13 +185,13 @@ "koAngles_Earth":[45.0,180.0], "koAngles_Moon":[45.0,180.0], "koAngles_Small":[1.0,180.0], - "occ_trans": "../CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_HLC_20190210b.csv", - "core_thruput": "../CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_HLC_20190210b.csv", - "core_mean_intensity": "../CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_HLC_20190210b.csv", - "core_area": "../CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_HLC_20190210b.csv", - "core_contrast": "../CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_HLC_20190210b.csv", + "occ_trans": "CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_HLC_20190210b.csv", + "core_thruput": "CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_HLC_20190210b.csv", + "core_mean_intensity": "CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_HLC_20190210b.csv", + "core_area": "CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_HLC_20190210b.csv", + "core_contrast": "CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_HLC_20190210b.csv", "core_platescale": 0.30, - "core_stability": "../CGI_Perf/EBcsvData/csv_fix/Cstability/CS_HLC_NFIM_201026.csv", + "core_stability": "CGI_Perf/EBcsvData/csv_fix/Cstability/CS_HLC_NFIM_201026.csv", "core_stability_setting": "REQ" }, { "name": "DRM_HLC-565", @@ -206,13 +206,13 @@ "koAngles_Earth":[45.0,180.0], "koAngles_Moon":[45.0,180.0], "koAngles_Small":[1.0,180.0], - "occ_trans": "../CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_HLC_20190210b.csv", - "core_thruput": "../CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_HLC_20190210b.csv", - "core_mean_intensity": "../CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_HLC_20190210b.csv", - "core_area": "../CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_HLC_20190210b.csv", - "core_contrast": "../CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_HLC_20190210b.csv", + "occ_trans": "CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_HLC_20190210b.csv", + "core_thruput": "CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_HLC_20190210b.csv", + "core_mean_intensity": "CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_HLC_20190210b.csv", + "core_area": "CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_HLC_20190210b.csv", + "core_contrast": "CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_HLC_20190210b.csv", "core_platescale": 0.30, - "core_stability": "../CGI_Perf/EBcsvData/csv_fix/Cstability/CS_HLC_NFIM_200520_SCI.csv", + "core_stability": "CGI_Perf/EBcsvData/csv_fix/Cstability/CS_HLC_NFIM_200520_SCI.csv", "core_stability_setting": "MCBE", "core_stability_interp": true }, @@ -227,13 +227,13 @@ "koAngles_Earth":[45.0,180.0], "koAngles_Moon":[45.0,180.0], "koAngles_Small":[1.0,180.0], - "occ_trans": "../CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_SPEC_20200617.csv", - "core_thruput": "../CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_SPEC_20200617.csv", - "core_mean_intensity": "../CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_SPEC_20200617.csv", - "core_area": "../CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_SPEC_20200617.csv", - "core_contrast": "../CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_SPEC_20200617.csv", + "occ_trans": "CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_SPEC_20200617.csv", + "core_thruput": "CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_SPEC_20200617.csv", + "core_mean_intensity": "CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_SPEC_20200617.csv", + "core_area": "CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_SPEC_20200617.csv", + "core_contrast": "CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_SPEC_20200617.csv", "core_platescale": 0.30, - "core_stability": "../CGI_Perf/EBcsvData/csv_fix/Cstability/CS_SPC_SPEC_201026.csv", + "core_stability": "CGI_Perf/EBcsvData/csv_fix/Cstability/CS_SPC_SPEC_201026.csv", "core_stability_setting": "REQ" }, { "name": "DRM_SPEC_B3", @@ -247,13 +247,13 @@ "koAngles_Earth":[45.0,180.0], "koAngles_Moon":[45.0,180.0], "koAngles_Small":[1.0,180.0], - "occ_trans": "../CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_SPEC_20200617.csv", - "core_thruput": "../CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_SPEC_20200617.csv", - "core_mean_intensity": "../CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_SPEC_20200617.csv", - "core_area": "../CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_SPEC_20200617.csv", - "core_contrast": "../CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_SPEC_20200617.csv", + "occ_trans": "CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_SPEC_20200617.csv", + "core_thruput": "CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_SPEC_20200617.csv", + "core_mean_intensity": "CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_SPEC_20200617.csv", + "core_area": "CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_SPEC_20200617.csv", + "core_contrast": "CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_SPEC_20200617.csv", "core_platescale": 0.30, - "core_stability": "../CGI_Perf/EBcsvData/csv_fix/Cstability/CS_SPC_SPEC_200520_SCI.csv", + "core_stability": "CGI_Perf/EBcsvData/csv_fix/Cstability/CS_SPC_SPEC_200520_SCI.csv", "core_stability_setting": "MCBE" }, { "name": "EB_IMG_B4", @@ -268,13 +268,13 @@ "koAngles_Earth":[45.0,180.0], "koAngles_Moon":[45.0,180.0], "koAngles_Small":[1.0,180.0], - "occ_trans": "../CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_WFOV_20200610.csv", - "core_thruput": "../CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_WFOV_20200610.csv", - "core_mean_intensity": "../CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_WFOV_20200610.csv", - "core_area": "../CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_WFOV_20200610.csv", - "core_contrast": "../CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_WFOV_20200610.csv", + "occ_trans": "CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_WFOV_20200610.csv", + "core_thruput": "CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_WFOV_20200610.csv", + "core_mean_intensity": "CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_WFOV_20200610.csv", + "core_area": "CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_WFOV_20200610.csv", + "core_contrast": "CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_WFOV_20200610.csv", "core_platescale": 0.30, - "core_stability": "../CGI_Perf/EBcsvData/csv_fix/Cstability/CS_SPC_WFIM_201026.csv", + "core_stability": "CGI_Perf/EBcsvData/csv_fix/Cstability/CS_SPC_WFIM_201026.csv", "core_stability_setting": "REQ" }, { "name": "DRM_IMG_B4", @@ -289,13 +289,13 @@ "koAngles_Earth":[45.0,180.0], "koAngles_Moon":[45.0,180.0], "koAngles_Small":[1.0,180.0], - "occ_trans": "../CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_WFOV_20200610.csv", - "core_thruput": "../CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_WFOV_20200610.csv", - "core_mean_intensity": "../CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_WFOV_20200610.csv", - "core_area": "../CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_WFOV_20200610.csv", - "core_contrast": "../CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_WFOV_20200610.csv", + "occ_trans": "CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_WFOV_20200610.csv", + "core_thruput": "CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_WFOV_20200610.csv", + "core_mean_intensity": "CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_WFOV_20200610.csv", + "core_area": "CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_WFOV_20200610.csv", + "core_contrast": "CGI_Perf/EBcsvData/csv_fix/Photometry/CGPERF_WFOV_20200610.csv", "core_platescale": 0.30, - "core_stability": "../CGI_Perf/EBcsvData/csv_fix/Cstability/CS_SPC_WFIM_200520_SCI.csv", + "core_stability": "CGI_Perf/EBcsvData/csv_fix/Cstability/CS_SPC_WFIM_200520_SCI.csv", "core_stability_setting": "MCBE" } ], From 94665a40eb25a41f3bd27c890f474a3fa610fcc1 Mon Sep 17 00:00:00 2001 From: Andrew Chiu Date: Sun, 15 Dec 2024 17:59:39 -0500 Subject: [PATCH 5/6] Refactor --- backend/{ => update}/database_test.py | 0 backend/update/test/database_test_compare.py | 46 ++ .../test}/update_util_test_suite.py | 0 backend/{ => update}/update_plandb_main.py | 0 backend/{ => update}/update_plandb_method.py | 0 backend/{ => update}/update_util.py | 0 backend/update_db.py | 412 ------------------ backend/update_plandb.py | 216 --------- 8 files changed, 46 insertions(+), 628 deletions(-) rename backend/{ => update}/database_test.py (100%) create mode 100644 backend/update/test/database_test_compare.py rename backend/{ => update/test}/update_util_test_suite.py (100%) rename backend/{ => update}/update_plandb_main.py (100%) rename backend/{ => update}/update_plandb_method.py (100%) rename backend/{ => update}/update_util.py (100%) delete mode 100644 backend/update_db.py delete mode 100644 backend/update_plandb.py diff --git a/backend/database_test.py b/backend/update/database_test.py similarity index 100% rename from backend/database_test.py rename to backend/update/database_test.py diff --git a/backend/update/test/database_test_compare.py b/backend/update/test/database_test_compare.py new file mode 100644 index 0000000..0cb4cb3 --- /dev/null +++ b/backend/update/test/database_test_compare.py @@ -0,0 +1,46 @@ +import pandas as pd + +# Load CSVs +df1 = pd.read_csv('plandb.sioslab.com/backend/test_results/databaseAfterUpdatePlanets.csv') +df2 = pd.read_csv('plandb.sioslab.com/backend/test_results/TestTodayPlanets.csv') + +merged = df1.merge(df2, on='pl_id', how='inner', suffixes=('_df1', '_df2')) + +# Initialize a results DataFrame for differences +differences = [] + +df1 = df1.drop(columns=['index']) +df1 = df1.set_index('pl_id') +df1.to_csv('plandb.sioslab.com/backend/test_results/databaseAfterUpdatePlanetsNew.csv') + + + + +# Iterate over each row in the merged DataFrame +for index, row in merged.iterrows(): + diff_details = {} + has_diff = False + + for col in df1.columns.difference(['pl_id']): + col_df1 = f"{col}_df1" + col_df2 = f"{col}_df2" + + if row[col_df1] != row[col_df2]: + has_diff = True + diff_details[col] = { + "df1": row[col_df1], + "df2": row[col_df2] + } + + if has_diff: + differences.append({ + "pl_id": row["pl_id"], + "differences": diff_details + }) + +# Convert the results to a DataFrame +differences_df = pd.DataFrame(differences) + +# Display the differences +print("Differences with explanations:") +print(differences_df) \ No newline at end of file diff --git a/backend/update_util_test_suite.py b/backend/update/test/update_util_test_suite.py similarity index 100% rename from backend/update_util_test_suite.py rename to backend/update/test/update_util_test_suite.py diff --git a/backend/update_plandb_main.py b/backend/update/update_plandb_main.py similarity index 100% rename from backend/update_plandb_main.py rename to backend/update/update_plandb_main.py diff --git a/backend/update_plandb_method.py b/backend/update/update_plandb_method.py similarity index 100% rename from backend/update_plandb_method.py rename to backend/update/update_plandb_method.py diff --git a/backend/update_util.py b/backend/update/update_util.py similarity index 100% rename from backend/update_util.py rename to backend/update/update_util.py diff --git a/backend/update_db.py b/backend/update_db.py deleted file mode 100644 index 6daa42c..0000000 --- a/backend/update_db.py +++ /dev/null @@ -1,412 +0,0 @@ -import pandas as pd -from sqlalchemy import create_engine -import pymysql -from sqlalchemy import text -from plandb_methods import * -from database_main import * - -# Not working, don't use this as this merges the unmade tables with the database - -columns = ['pl_name', 'pl_letter', 'pl_refname', 'pl_orbper', 'pl_orbpererr1', 'pl_orbpererr2', 'pl_orbperlim', 'pl_orbperstr', 'pl_orblpererr1', 'pl_orblper', 'pl_orblpererr2', 'pl_orblperlim', 'pl_orblperstr', 'pl_orbsmax', 'pl_orbsmaxerr1', 'pl_orbsmaxerr2', 'pl_orbsmaxlim', 'pl_orbsmaxstr', 'pl_orbincl', 'pl_orbinclerr1', 'pl_orbinclerr2', 'pl_orbincllim', 'pl_orbinclstr', 'pl_orbtper', 'pl_orbtpererr1', 'pl_orbtpererr2', 'pl_orbtperlim', 'pl_orbtperstr', 'pl_orbeccen', 'pl_orbeccenerr1', 'pl_orbeccenerr2', 'pl_orbeccenlim', 'pl_orbeccenstr', 'pl_eqt', 'pl_eqterr1', 'pl_eqterr2', 'pl_eqtlim', 'pl_eqtstr', 'pl_occdep', 'pl_occdeperr1', 'pl_occdeperr2', 'pl_occdeplim', 'pl_occdepstr', 'pl_insol', 'pl_insolerr1', 'pl_insolerr2', 'pl_insollim', 'pl_insolstr', 'pl_dens', 'pl_denserr1', 'pl_denserr2', 'pl_denslim', 'pl_densstr', 'pl_trandep', 'pl_trandeperr1', 'pl_trandeperr2', 'pl_trandeplim', 'pl_trandepstr', 'pl_tranmid', 'pl_tranmiderr1', 'pl_tranmiderr2', 'pl_tranmidlim', 'pl_tranmidstr', 'pl_trandur', 'pl_trandurerr1', 'pl_trandurerr2', 'pl_trandurlim', 'pl_trandurstr', 'pl_controv_flag', 'pl_tsystemref', 'pl_projobliq', 'pl_projobliqerr1', 'pl_projobliqerr2', 'pl_projobliqlim', 'pl_projobliqstr', 'pl_rvamp', 'pl_rvamperr1', 'pl_rvamperr2', 'pl_rvamplim', 'pl_rvampstr', 'pl_radj', 'pl_radjerr1', 'pl_radjerr2', 'pl_radjlim', 'pl_radjstr', 'pl_radestr', 'pl_ratror', 'pl_ratrorerr1', 'pl_ratrorerr2', 'pl_ratrorlim', 'pl_ratrorstr', 'pl_ratdor', 'pl_trueobliq', 'pl_trueobliqerr1', 'pl_trueobliqerr2', 'pl_trueobliqlim', 'pl_trueobliqstr', 'pl_pubdate', 'pl_ratdorerr1', 'pl_ratdorerr2', 'pl_ratdorlim', 'pl_ratdorstr', 'pl_imppar', 'pl_impparerr1', 'pl_impparerr2', 'pl_impparlim', 'pl_impparstr', 'pl_cmassj', 'pl_cmassjerr1', 'pl_cmassjerr2', 'pl_cmassjlim', 'pl_cmassjstr', 'pl_cmasse', 'pl_cmasseerr1', 'pl_cmasseerr2', 'pl_cmasselim', 'pl_cmassestr', 'pl_massj', 'pl_massjerr1', 'pl_massjerr2', 'pl_massjlim', 'pl_massjstr', 'pl_massestr', 'pl_bmassj', 'pl_bmassjerr1', 'pl_bmassjerr2', 'pl_bmassjlim', 'pl_bmassjstr', 'pl_bmasse', 'pl_bmasseerr1', 'pl_bmasseerr2', 'pl_bmasselim', 'pl_bmassestr', 'pl_bmassprov', 'pl_msinij', 'pl_msinijerr1', 'pl_msinijerr2', 'pl_msinijlim', 'pl_msinijstr', 'pl_msiniestr', 'pl_nespec', 'pl_ntranspec', 'pl_nnotes', 'pl_def_override', 'pl_calc_sma', 'pl_angsep', 'pl_angseperr1', 'pl_angseperr2', 'pl_radj_forecastermod', 'pl_radj_forecastermoderr1', 'pl_radj_forecastermoderr2', 'pl_radj_fortney', 'pl_radj_fortneyerr1', 'pl_radj_fortneyerr2', 'pl_maxangsep', 'pl_minangsep', 'disc_year', 'disc_refname', 'discoverymethod', 'disc_locale', 'ima_flag', 'disc_instrument', 'disc_telescope', 'disc_facility', 'rv_flag'] -cache = True -datestr = Time.now().datetime.strftime("%Y-%m") - - - -# TODO fix different rows, so that diff rows is base on actual columns to compare, compare the switched values -#look into comparing values more in depth -def get_different_rows(df1, df2): - df_combined = pd.merge(df1, df2, indicator=True, how='outer') - different_rows = df_combined[df_combined['_merge'] != 'both'] - different_rows = different_rows.drop(columns=['_merge']) - - return different_rows - -password = input("Password:") -engine = create_engine('mysql+pymysql://'+"andrewchiu"+':'+password+'@localhost/testSios',echo=True) - -with engine.connect() as connection: - - sql = text("SELECT * FROM Planets") - results = connection.execute(sql) - df = pd.DataFrame(results.fetchall(), columns = results.keys()) - - - - data_path = Path(f'cache/data_cache_{datestr}.p') - if cache: - Path('cache/').mkdir(parents=True, exist_ok=True) - if data_path.exists(): - with open(data_path, 'rb') as f: - ipac_data = pickle.load(f) - else: - ipac_data = getIPACdata() - with open(data_path, 'wb') as f: - pickle.dump(data, f) - else: - ipac_data = getIPACdata() - - - changed_rows = [] - - for index, row in df.iterrows(): - name = row['pl_name'] - print(name) - - filter = ipac_data.query(f'pl_name == "{name}"') - filtered_planet = filter.loc(1) - - for col_name in columns: - - if filtered_planet[col_name].count() > 0: - ipac_col = filtered_planet[col_name].values[0] - sios_col = row[col_name] - - print(f"{name} ipac maxangsep {ipac_col}") - print(f"sios maxangsep {sios_col}") - - if ipac_col != None: - if (ipac_col != sios_col): - print(f"different") - changed_rows.append(row["pl_name"]) - else: - changed_rows.append(row['pl_name']) - - set_changed_row = list(set(changed_rows)) - print(f"Changed rows: {set_changed_row}") - - print(set_changed_row) - - filtered_ipac = ipac_data[ipac_data['pl_name'].isin(set_changed_row)] - - filtered_ipac.to_csv('output.csv', index=True) - - - - - #successfully have all planets (set of all different planets) - # now update recalc - # start with - # photo data good - # band info good - # orb data do now - - #orb data - - - with open('debug.txt', 'w') as file: - - file.write("loading photometry data\n") - - photdict_path = Path(f'cache/update_photdict_2022-05.p') - if cache: - Path('cache/').mkdir(parents=True, exist_ok=True) - if photdict_path.exists(): - with open(photdict_path, 'rb') as f: - photdict = pickle.load(f) - else: - photdict = loadPhotometryData(infile="plandb.sioslab.com/allphotdata_2015.npz") - with open(photdict_path, 'wb') as f: - pickle.dump(photdict, f) - else: - photdict = loadPhotometryData(infile="plandb.sioslab.com/allphotdata_2015.npz") - - file.write("Bandzip\n") - bandzip_path = Path(f'cache/update_bandzip_{datestr}.p') - if cache: - Path('cache/').mkdir(parents=True, exist_ok=True) - if bandzip_path.exists(): - with open(bandzip_path, 'rb') as f: - bandzip = pickle.load(f) - else: - bandzip = list(genBands()) - with open(bandzip_path, 'wb') as f: - pickle.dump(bandzip, f) - else: - bandzip = list(genBands()) - - - file.write("orbdata/orbfits\n") - orbdata_path = Path(f'cache/update_orbdata_{datestr}.p') - orbfits_path = Path(f'cache/update_orbfits_{datestr}.p') - if cache: - Path('cache/').mkdir(parents=True, exist_ok=True) - if orbdata_path.exists(): - with open(orbdata_path, 'rb') as f: - orbdata = pickle.load(f) - if orbfits_path.exists(): - with open(orbfits_path, 'rb') as f: - orbitfits = pickle.load(f) - else: - orbdata, orbitfits = genOrbitData(filtered_ipac, bandzip, photdict) - with open(orbdata_path, 'wb') as f: - pickle.dump(orbdata, f) - with open(orbfits_path, 'wb') as f: - pickle.dump(orbitfits, f) - else: - orbdata, orbitfits = genOrbitData(filtered_ipac, bandzip, photdict) - - file.write(f"orbdata: {orbdata}\n") - file.write(f"orbfits: {orbitfits}\n") - - - - file.write("ephemeris orbitfits/orbdata") - ephemeris_orbdata_path = Path(f'cache/update_ephemeris_orbdata_{datestr}.p') - ephemeris_orbfits_path = Path(f'cache/update_ephemeris_orbfits_{datestr}.p') - if cache: - Path('cache/').mkdir(parents=True, exist_ok=True) - if ephemeris_orbdata_path.exists(): - with open(ephemeris_orbdata_path, 'rb') as f: - ephemeris_orbdata = pickle.load(f) - if ephemeris_orbfits_path.exists(): - with open(ephemeris_orbfits_path, 'rb') as f: - ephemeris_orbitfits = pickle.load(f) - else: - ephemeris_orbitfits, ephemeris_orbdata = addEphemeris(filtered_ipac, orbitfits, orbdata, bandzip, photdict) - with open(ephemeris_orbdata_path, 'wb') as f: - pickle.dump(ephemeris_orbdata, f) - with open(ephemeris_orbfits_path, 'wb') as f: - pickle.dump(ephemeris_orbitfits, f) - else: - ephemeris_orbitfits, ephemeris_orbdata = addEphemeris(filtered_ipac, orbitfits, orbdata, bandzip, photdict) - file.write(f"ephemeris orbitfits: {ephemeris_orbitfits}\n") - file.write(f"ephemeris orbfits: {ephemeris_orbdata}\n") - - - - file.write("quadrature data") - quadrature_data_path = Path(f'cache/update_quadrature_data_{datestr}.p') - if cache: - Path('cache/').mkdir(parents=True, exist_ok=True) - if quadrature_data_path.exists(): - with open(quadrature_data_path, 'rb') as f: - quadrature_data = pickle.load(f) - else: - quadrature_data = calcQuadratureVals(ephemeris_orbitfits, bandzip, photdict) - with open(quadrature_data_path, 'wb') as f: - pickle.dump(quadrature_data, f) - else: - quadrature_data = calcQuadratureVals(ephemeris_orbitfits, bandzip, photdict) - file.write(f"quadrature data: {quadrature_data}\n") - - - file.write("contr data") - contr_data_path = Path(f'cache/update_contr_data_{datestr}.p') - exosims_json = 'plandb.sioslab.com/ci_perf_exosims.json' - if cache: - if contr_data_path.exists(): - contr_data = pd.read_pickle(contr_data_path) - else: - contr_data = calcContrastCurves(quadrature_data, exosims_json=exosims_json) - with open(contr_data_path, 'wb') as f: - pickle.dump(contr_data, f) - else: - contr_data = calcContrastCurves(quadrature_data, exosims_json=exosims_json) - file.write(f"contr data: {contr_data}\n") - - - - print('Doing completeness calculations') - comps_path = Path(f'cache/update_comps_{datestr}.p') - compdict_path = Path(f'cache/update_compdict_{datestr}.p') - comps_data_path = Path(f'cache/update_comps_data_{datestr}.p') - if cache: - Path('cache/').mkdir(parents=True, exist_ok=True) - if comps_path.exists(): - comps = pd.read_pickle(comps_path) - with open(compdict_path, 'rb') as f: - compdict = pickle.load(f) - comps_data = pd.read_pickle(comps_data_path) - else: - comps, compdict, comps_data = calcPlanetCompleteness(contr_data, bandzip, photdict, exosims_json=exosims_json) - comps.to_pickle(comps_path) - with open(compdict_path, 'wb') as f: - pickle.dump(compdict, f) - comps_data.to_pickle(comps_data_path) - else: - comps, compdict, comps_data = calcPlanetCompleteness(contr_data, bandzip, photdict, exosims_json=exosims_json) - file.write(f"comps: {comps}\n") - file.write(f"compdict: {compdict}\n") - file.write(f"comps_data: {comps_data}\n") - - - file.write("generateTables") - - plandata_path = Path(f'cache/update_plandata_{datestr}.p') - stdata_path = Path(f'cache/update_stdata_{datestr}.p') - table_orbitfits_path = Path(f'cache/update_table_orbitfits_{datestr}.p') - if cache: - # plandata.to_pickle('plandata_'+datestr+'.pkl') - # stdata.to_pickle('stdata_' + datestr + '.pkl') - # orbitfits.to_pickle('orbitfits_'+datestr+'.pkl') - Path('cache/').mkdir(parents=True, exist_ok=True) - if plandata_path.exists(): - plandata = pd.read_pickle(plandata_path) - stdata = pd.read_pickle(stdata_path) - table_data = pd.read_pickle(table_orbitfits_path) - else: - plandata, stdata, orbitfits = generateTables(filtered_ipac, quadrature_data) - plandata.to_pickle(plandata_path) - stdata.to_pickle(stdata_path) - orbitfits.to_pickle(table_orbitfits_path) - else: - plandata, stdata, orbitfits = generateTables(filtered_ipac, comps_data) - - file.write(f"plandata: {plandata}\n") - file.write(f"stdata: {stdata}\n") - file.write(f"orbitfits: {orbitfits}\n") - - - - def resolve(r): - if pd.notna(r['value_right']): - return r['value_right'] - return r['value_left'] - # Necessary to merge/upsert dataframes, because since indices are recalculated everytime, and multiple keys/indices, must be filtered by scenerio_name, upserting table2 to table1, taking new updated values from table2 and add new records, might have to reindex - def mergeTables( table1, table2): - merged_df = pd.merge(table1, table2, on='pl_name',how='outer',suffixes=('_left', '_right')) - - - - - merged_df['value'] = merged_df.apply(resolve, axis=1) - - merged_df.drop(columns=['value_left', 'value_right'], inplace=True) - - return merged_df - - # filtered_planet = filter.loc(1) - - # for col_name in columns: - - # if filtered_planet[col_name].count() > 0: - # ipac_col = filtered_planet[col_name].values[0] - # sios_col = row[col_name] - - # print(f"{name} ipac maxangsep {ipac_col}") - # print(f"sios maxangsep {sios_col}") - - # if ipac_col != None: - # if (ipac_col != sios_col): - # print(f"different") - # changed_rows.append(row["pl_name"]) - # else: - # changed_rows.append(row['pl_name']) - - def compileCompleteness(): - datestr = Time.now().datetime.strftime("%Y-%m") - comp_data = pd.read_pickle(f"cache/comps_data_{datestr}.p") - col_names = comp_data.columns.values.tolist() - scenario_names = [] - for x in col_names: - if x[:8] == 'complete': - scenario_names.append(x[13:]) - #drop contr_curve col - completeness = pd.DataFrame([], columns = ['pl_id', 'completeness', 'scenario_name', 'compMinWA', 'compMaxWA', 'compMindMag', 'compMaxdMag']) - for i, row in tqdm(comp_data.iterrows()): - newRows = [] - for scenario_name in scenario_names: - newRows = [] - if pd.notna(row[('completeness_' + scenario_name)]): - newRows.append([row['pl_id'], row['completeness_' + scenario_name], scenario_name, - row['compMinWA_' + scenario_name], - row['compMaxWA_' + scenario_name], - row['compMindMag_' + scenario_name], - row['compMaxdMag_' + scenario_name]]) - - singleRow = pd.DataFrame(newRows, columns = ['pl_id', 'completeness', 'scenario_name', 'compMinWA', 'compMaxWA', 'compMindMag', 'compMaxdMag']) - completeness = completeness._append(singleRow, ignore_index=True) - # print(completeness) - return completeness - - - - compiled_contr_curvs_path = Path(f'plandb.sioslab.com/cache/update_compiled_cont_curvs_{datestr}.p') - contr_curvs_path = Path(f'plandb.sioslab.com/cache/update_cont_curvs_{datestr.replace("-", "_")}') - - - if compiled_contr_curvs_path.exists(): - contrast_curves = pd.read_pickle(compiled_contr_curvs_path) - else: - contrast_curves = compileContrastCurves(stdata, contr_curvs_path) - contrast_curves.to_pickle(compiled_contr_curvs_path) - def addId(r): - r['pl_id']= list(planets.index[(planets['pl_name'] == r['Name'])])[0] - return r - - - scenarios = pd.read_csv("plandb.sioslab.com/cache/scenario_angles.csv") - compiled_completeness_path = Path(f"cache/update_compiled_completeness_{datestr}.p") - # if compiled_completeness_path.exists(): - # completeness = pd.read_pickle(compiled_completeness_path) - # else: - completeness = compileCompleteness() - completeness.to_pickle(compiled_completeness_path) - - sql = text("SELECT * FROM ContrastCurves") - results = connection.execute(sql) - contrast_df = pd.DataFrame(results.fetchall(), columns = results.keys()) - - new_contrast = compileContrastCurves(stdata, contr_curvs_path) - - merged_contrast = mergeTables(contrast_df, contrast_curves) - print(merged_contrast) - - - - # compare scenerio name with what we have here calculated from the newly calculated, and create a new dataframe that appends the old ones, the merged duplciates, and new ones, then just df.to_sql .if_exists == replace, with new - # not able to j do a query, doesnt allow for 1/3 indices, no other query to just upsert, especially since id's are reclaculated everytime - - - - - - - - - - - - # if(filtered_planet.loc[0, "{col_name}"] "{col_name}") - # changed_rows.append - - - - - - - - #What if new planets get added? - - - - # sios_columns = results.keys() - - - - - - - - - # print(df) - - # data = getIPACdata() - - # ipac_columns = data.keys() - - # print(data) - - # diff1 = list(sios_columns - ipac_columns) - # print("sios columns" + sios_columns + "\n\n\n\n\n") - # print(diff1) - # print("ipac columns" +ipac_columns + "\n\n\n\n\n") - # diff2 = list(ipac_columns - sios_columns) - # print(diff2) - - # combined_diff = diff1+ diff2 - # print(combined_diff) - - #WOrks, 3800 rows here as oppose to 4800 - # NOTES branch/changed absolute path to relative path, did this (got changed rows that need to be changed, now just need to run claculations for only these rows, talk about the bad code/syntax errors (.F0, _append, and the engine connection, unequal pass in param, and sql text(), too big to be minor error, but finished database on my system :) ) \ No newline at end of file diff --git a/backend/update_plandb.py b/backend/update_plandb.py deleted file mode 100644 index 00ce70a..0000000 --- a/backend/update_plandb.py +++ /dev/null @@ -1,216 +0,0 @@ -import pandas as pd -from sqlalchemy import create_engine -import pymysql -import glob -from sqlalchemy import text -from plandb_methods import * -from database_main import * -from update_util import * -import os - - -#Flags -cache = True -# always rely on historic ipac -datestr = Time.now().datetime.strftime("%Y-%m") - -# sign in originally before running, to solve cryptography error, sign in using mysql -u username (andrewchiu) -p, then cryptography is solved -# workflow: get current database -> get ipac database -> compare current database to updated values in ipac database, to create a difference dataframe -> create a database from the difference dataframe (updated database) -> merge/upsert the difference database with the current database -> replace current database, with the merged, keep old values and adding updated values -# create connection with current database -password = input("SQL password: ") -engine = create_engine('mysql+pymysql://'+"andrewchiu"+':'+password+'@localhost/testSios',echo=True) -diff_engine = create_engine('mysql+pymysql://'+"andrewchiu"+':'+password+'@localhost/testSiosDiffEngine',echo=True) -new_engine = create_engine('mysql+pymysql://'+"andrewchiu"+':'+password+'@localhost/newEngine',echo=True) - - -with engine.connect() as connection: - - - - # with diff_engine.connect() as diff_engine_connection: - - # get current database dataframe - print("getting current database") - current_database_df = get_current_database(connection) - - current_database_df.to_excel("plandb.sioslab.com/backend/sheets/current_database.xlsx", index=False) - - # get ipac database dataframe - print("getting ipac database") - - ipac_archive_dir = "plandb.sioslab.com/backend/ipac_archive" - - files = os.listdir(ipac_archive_dir) - - most_recent_file = files[0] - most_recent_ind = 0 - - for file in files: - - ind_str = file.rsplit('-', 1)[-1] - current_ind = int(ind_str) - - if(current_ind > most_recent_ind): - most_recent_ind = current_ind - most_recent_file = file - - #load archive, load first because it needs be the most recent excluding month, - print("loading archived ipac") - - - data_path = Path(f'cache/data_cache_{datestr}.p') - ipac_data_df = get_ipac_database(data_path, cache) - # Archive everytime, replace if same month, archive for future use - print("archiving present time ipac") - with open(Path(f"plandb.sioslab.com/backend/ipac_archive/ipac_archive_{datestr}_{most_recent_ind + 1}.p"), 'wb') as f: - pickle.dump(ipac_data_df, f) - - - most_recent_ipac_data = pickle.load(Path(f"plandb.sioslab.com/backend/ipac_archive/{most_recent_file}")) - - change_ipac_df = find_row_differences(most_recent_ipac_data, ipac_data_df) - - - #find updates from current to ipac - changed_df = find_row_differences(current_database_df, ipac_data_df) - changed_df.to_excel("plandb.sioslab.com/backend/sheets/changed.xlsx") - changed_df.to_pickle(Path(f"plandb.sioslab.com/changed_df")) - - changed_df = pd.read_pickle(Path(f"plandb.sioslab.com/changed_df")) - - - # get photodict - photdict_path = Path(f'cache/update_photdict_2022-05.p') - infile="plandb.sioslab.com/backend/allphotdata_2015.npz" - photdict = get_photo_data(photdict_path, infile, cache) - - - # get bandzip - bandzip_path = Path(f'cache/update_bandzip_{datestr}.p') - bandzip = get_bandzip(bandzip_path, cache) - - # get orbdata, orbfits - orbdata_path = Path(f'cache/update_orbdata_{datestr}.p') - orbfits_path = Path(f'cache/update_orbfits_{datestr}.p') - orbdata, orbfits = get_orbdata(orbdata_path, orbfits_path, changed_df, bandzip, photdict, cache) - - # Line below, contains too main records to write xlsx, check manually - # orbdata.to_excel("orbdata.xlsx") - orbfits.to_excel("orbfits.xlsx") - - # get ephemeris - ephemeris_orbdata_path = Path(f'cache/update_ephemeris_orbdata_{datestr}.p') - ephemeris_orbfits_path = Path(f'cache/update_ephemeris_orbfits_{datestr}.p') - ephemeris_orbitfits, ephemeris_orbdata = get_ephemerisdata(ephemeris_orbdata_path, ephemeris_orbfits_path, changed_df, orbfits, orbdata, bandzip, photdict, cache) - ephemeris_orbitfits.to_excel("ephemeris_orbfits.xlsx") - - # Line below, contains too main records to write xlsx, check manually - # ephemeris_orbdata.to_excel("ephemeris_orbdata.xlsx") - - quadrature_data_path = Path(f'cache/update_quadrature_data_{datestr}.p') - quadrature_data = get_quadrature(quadrature_data_path, ephemeris_orbitfits, bandzip, photdict, cache) - quadrature_data.to_excel("plandb.sioslab.com/backend/sheets/quadrature_data.xlsx") - - contr_data_path = Path(f'cache/update_contr_data_{datestr}.p') - exosims_json = 'plandb.sioslab.com/ci_perf_exosims.json' - - contr_data = get_contrastness(contr_data_path, exosims_json, quadrature_data, cache) - contr_data.to_excel("plandb.sioslab.com/backend/sheets/contr_data.xlsx") - - comps_path = Path(f'cache/update_comps_{datestr}.p') - compdict_path = Path(f'cache/update_compdict_{datestr}.p') - comps_data_path = Path(f'cache/update_comps_data_{datestr}.p') - - comps, compdict, comps_data = get_completeness(comps_path, compdict_path, comps_data_path, contr_data, bandzip, photdict, exosims_json, cache) - comps.to_excel("plandb.sioslab.com/backend/sheets/comps.xlsx") - comps_data.to_excel("plandb.sioslab.com/backend/sheets/comps_data.xlsx") - #None for compdict, as its dictionary - - plandata_path = Path(f'cache/update_plandata_{datestr}.p') - stdata_path = Path(f'cache/update_stdata_{datestr}.p') - table_orbitfits_path = Path(f'cache/update_table_orbitfits_{datestr}.p') - - # orbitfits got updated, maybe change to new var - plan_data, stdata, orbitfits = get_generated_tables(plandata_path, stdata_path, table_orbitfits_path, changed_df, quadrature_data, comps_data, cache) - plan_data.to_excel("plandb.sioslab.com/backend/sheets/plandata.xlsx") - stdata.to_excel("plandb.sioslab.com/backend/sheets/stdata.xlsx") - orbitfits.to_excel('plandb.sioslab.com/backend/sheets/later_orbitfits.xlsx') - - #do compileContrastness and compile contrastness - # Look into and possibly remove the - to _ - contr_curvs2_path = Path(f'plandb.sioslab.com/cache/cont_curvs2_{datestr.replace("-", "_")}') - compiled_contr_curvs_path = Path(f'plandb.sioslab.com/cache/cont_curvs_{datestr.replace("-", "_")}') - compiled_contrast_curves, newpdfs = get_compiled_contrast(compiled_contr_curvs_path, stdata, comps_data, changed_df, contr_curvs2_path) - # compiled_contrast_curves = compileContrastCurves(stdata, compiled_contr_curvs_path) - compiled_contrast_curves.to_excel("plandb.sioslab.com/backend/sheets/compiled_contrast_curves.xlsx") - # With current code, since get_compiled_contrast isnt fully working with new pdfs, new pdfs should be empty - newpdfs.to_excel("plandb.sioslab.com/backend/sheets/newpdfs.xlsx") - - # compile completeness - compiled_completeness_path = Path(f"plandb.sioslab.com/cache/compiled_completeness_{datestr}.p") - - compiled_completeness = get_compiled_completeness(compiled_completeness_path, comps_data) - compiled_completeness.to_excel("plandb.sioslab.com/backend/sheets/compiled_completeness.xlsx") - - - - # remember do makesql with this and then get those tables and upsert those shorter new ones with the current - # writeSQL(engine, plandata=planets, stdata=stars, orbitfits=orbitfits, orbdata=orbits, pdfs=newpdfs, aliases=None,contrastCurves=contrast_curves,scenarios=scenarios, completeness=completeness) - scenarios = pd.read_csv("plandb.sioslab.com/cache/scenario_angles.csv") - - - temp_writeSQL(diff_engine, plandata=plan_data, stdata=stdata, orbitfits=orbfits, orbdata=orbdata, pdfs=None, aliases=None, contrastCurves=compiled_contrast_curves, scenarios=scenarios, completeness=compiled_completeness) - - #get from diff_database - diff_engine_connection = diff_engine.connect() - diff_completeness_df, diff_contrast_curves_df, diff_orbitfits_df, diff_orbits_df, diff_pdfs_df, diff_planets_df, diff_scenarios_df, diff_stars_df = get_all_from_db(diff_engine_connection) - - #get from old_database - old_completeness_df, old_contrast_curves_df, old_orbitfits_df, old_orbits_df, old_pdfs_df, old_planets_df, old_scenarios_df, old_stars_df = get_all_from_db(connection) - - - - #merge with old, compare each - - # upsert planets - # Have to do name, because indices don't match, logic applies down unless otherwise in comment - print("merging planets") - merged_planets = upsert_dataframe(old_planets_df, diff_planets_df, "pl_name") - - - # upsert completeness - print("supposed to merge completeness") - # TODO, this is highly likely to be wrong, forced to used indices here, because only unique, unfortunately incorrect, must base it upsert on the varying indices later - # merged_completeness = upsert_dataframe(old_completeness_df, diff_completeness_df, "completeness_id") - - # upsert stars - print("merging stars") - merged_stars = upsert_dataframe(old_stars_df, diff_stars_df, "st_name") - - # upsert orbitfits - print("merging oribt fits") - # merged_orbitfits = upsert_dataframe(old_orbitfits_df, diff_orbitfits_df, "pl_name") - - # upsert orbits - # print("merging orbits") - # merged_orbits = upsert_dataframe(old_orbits_df, diff_orbits_df, "pl_name") - - # upsert contrast curves - # TODO, fix the column name, for unique one later? - print("merging curves") - merged_contrast_curves = upsert_dataframe(old_contrast_curves_df, diff_contrast_curves_df, "r_lamD") - - - # # upsert pdfs - # print("merging pdfs") - # merged_pdfs = upsert_dataframe(old_pdfs_df, diff_pdfs_df, "Name") - - # No need to upsert scenarios, as it's updated locally - - #write back to original database with new values, - # TODO: optionally, store old database in a different database for archive - # orbit fits wrong - print("Merging") - final_writeSQL(new_engine, merged_planets, merged_stars, None, None, None, aliases=None, contrastCurves=None, scenarios= scenarios, completeness=None) - - print("Done") \ No newline at end of file From 9ef5f43f2af53cfa13c0d263cba3e9422d1423fd Mon Sep 17 00:00:00 2001 From: Andrew Chiu Date: Thu, 26 Dec 2024 10:28:45 -0500 Subject: [PATCH 6/6] Remove Refactor, Resolved ipac differences - Removed refactor, due to import paths being incorrect - Remove ignoring index from upsert_general (update_util) - Fixed null errors, from linking to un-accessible data, by passing down current ipac data down compile - Above point, is redundant because, this defeats the point of using change_ipac data, but is still quicker. Can be resolved by tracking down schema dependencies, and only passing all when needed --- backend/create_old_database.py | 91 +++++++++++++++++++ backend/create_today_database.py | 84 +++++++++++++++++ backend/{update => }/database_test.py | 10 ++ .../test => }/database_test_compare.py | 14 +-- backend/update_old_database.py | 35 +++++++ backend/{update => }/update_plandb_main.py | 0 backend/{update => }/update_plandb_method.py | 6 +- backend/{update => }/update_util.py | 0 .../test => }/update_util_test_suite.py | 0 9 files changed, 232 insertions(+), 8 deletions(-) create mode 100644 backend/create_old_database.py create mode 100644 backend/create_today_database.py rename backend/{update => }/database_test.py (97%) rename backend/{update/test => }/database_test_compare.py (62%) create mode 100644 backend/update_old_database.py rename backend/{update => }/update_plandb_main.py (100%) rename backend/{update => }/update_plandb_method.py (99%) rename backend/{update => }/update_util.py (100%) rename backend/{update/test => }/update_util_test_suite.py (100%) diff --git a/backend/create_old_database.py b/backend/create_old_database.py new file mode 100644 index 0000000..3b04dcd --- /dev/null +++ b/backend/create_old_database.py @@ -0,0 +1,91 @@ +import pandas as pd +from sqlalchemy import create_engine +import pymysql +import glob +from sqlalchemy import text +from plandb_methods import * +from database_main import * +from update_util import * +import os +import sys +import logging + +from database_main import compileContrastCurves +from database_main import compileCompleteness + +from update_plandb_method import * + + +""" +Create old database, 2022 information, to be updated. +For benchmarking purposes. +Currently uses MySQL database: (database2022before) +""" + + +logging.basicConfig(filename="timer_log.txt", level=logging.INFO, format = "%(asctime)s - %(message)s") + +cache = False +datestr = Time.now().datetime.strftime("%Y-%m") + + +start_time_complete = time.time() +logging.info("Complete database build start") + +data = getIPACdata() + + + +#photometric data +photdict = loadPhotometryData(infile="plandb.sioslab.com/allphotdata_2015.npz") + +#band info +bandzip = list(genBands()) + +# TODO: look at genOrbitData method signature, with t0 default, and consider adding t0 +orbdata, orbitfits = genOrbitData(data, bandzip, photdict) + +ephemeris_orbitfits, ephemeris_orbdata = addEphemeris(data, orbitfits, orbdata, bandzip, photdict) + +quadrature_data = calcQuadratureVals(ephemeris_orbitfits, bandzip, photdict) + +exosims_json = 'plandb.sioslab.com/ci_perf_exosims.json' +contr_data = calcContrastCurves(quadrature_data, exosims_json=exosims_json) + +comps, compdict, comps_data = calcPlanetCompleteness(contr_data, bandzip, photdict, exosims_json=exosims_json) + +plandata, stdata, orbitfits = generateTables(data, comps_data) + +# TODO: Prob replace this path stuff, prob HAVE to plandb.com/cache contr curvecalculations and then compile, or maybe write new compile function +# TODO: calculate contrast curves and test it here +contr_curvs_path = Path(f'plandb.sioslab.com/cache/cont_curvs_{datestr.replace("-", "_")}') +contrast_curves = compileContrastCurves(stdata, contr_curvs_path) + + + +# newpdfs = pdfs.apply(addId, axis = 1) +scenarios = pd.read_csv("plandb.sioslab.com/cache/scenario_angles.csv") + +# compiled_completeness_path = Path(f"plandb.com/cache/compiled_completeness_{datestr}.p") +# if compiled_completeness_path.exists(): +# completeness = pd.read_pickle(compiled_completeness_path) +# else: +# completeness = compileCompleteness() +# completeness.to_pickle(compiled_completeness_path) + + +# passwd = input("db password: ") +# username = 'plandb_admin' +engineToday = create_engine('mysql+pymysql://'+"andrewchiu"+':'+"Password123!"+'@localhost/TestToday',echo=True) + +#TODO: completeness later + +writeSQL(engineToday, plandata=plandata, stdata=stdata, orbitfits=orbitfits, orbdata=orbdata, pdfs=None, aliases=None,contrastCurves=None,scenarios=None, completeness=None) + +# TODO end time +end_time_complete = time.time() +elapsed_time = end_time_complete - start_time_complete +logging.info(f"complete database build end. Elapsed time: {elapsed_time:.2f} seconds") + +print(f"elapsed time: {elapsed_time}" ) + diff --git a/backend/create_today_database.py b/backend/create_today_database.py new file mode 100644 index 0000000..2ec4b76 --- /dev/null +++ b/backend/create_today_database.py @@ -0,0 +1,84 @@ +import pandas as pd +from sqlalchemy import create_engine +import pymysql +import glob +from sqlalchemy import text +from plandb_methods import * +from database_main import * +from update_util import * +import os +import sys +import logging + +from database_main import compileContrastCurves +from database_main import compileCompleteness + +from update_plandb_method import * + + + + +""" +Original compile of today's database using all current information. +Currently uses MySQL database: (TestToday) +""" + + +logging.basicConfig(filename="timer_log.txt", level=logging.INFO, format = "%(asctime)s - %(message)s") + +cache = False +datestr = Time.now().datetime.strftime("%Y-%m") + + + + +start_time_2022 = time.time() +logging.info("2022 database build start") + +#This is prob faster because it's pickled + +datestr = "2022-05" +plandata_path = Path(f'plandb.sioslab.com/cache/plandata_{datestr}.p') +planetsOld = pd.read_pickle(plandata_path) +stdata_path = Path(f'plandb.sioslab.com/cache/stdata_{datestr}.p') +starsOld = pd.read_pickle(stdata_path) +orbfits_path = Path(f'plandb.sioslab.com/cache/table_orbitfits_{datestr}.p') +orbitfitsOld = pd.read_pickle(orbfits_path) +orbdata_path = Path(f'plandb.sioslab.com/cache/ephemeris_orbdata_{datestr}.p') +orbitsOld = pd.read_pickle(orbdata_path) +comps_path = Path(f'plandb.sioslab.com/cache/comps_{datestr}.p') +pdfs = pd.read_pickle(comps_path) +print(pdfs) +compiled_contr_curvs_path = Path(f'plandb.sioslab.com/cache/compiled_cont_curvs_{datestr}.p') +contr_curvs_path = Path(f'plandb.sioslab.com/cache/cont_curvs_{datestr.replace("-", "_")}') +if compiled_contr_curvs_path.exists(): + contrast_curvesOld = pd.read_pickle(compiled_contr_curvs_path) +else: + contrast_curvesOld = compileContrastCurves(starsOld, contr_curvs_path) + contrast_curvesOld.to_pickle(compiled_contr_curvs_path) +def addId(r): + r['pl_id']= list(planetsOld.index[(planetsOld['pl_name'] == r['Name'])])[0] + return r + +#TODO Pdfs +newpdfs = pdfs.apply(addId, axis = 1) +scenarios = pd.read_csv("plandb.sioslab.com/cache/scenario_angles.csv") + +before_engine = create_engine('mysql+pymysql://'+"andrewchiu"+':'+"Password123!"+'@localhost/database2022before',echo=True) + +writeSQL(before_engine, plandata=planetsOld, stdata=starsOld, orbitfits=orbitfitsOld, orbdata=orbitsOld, pdfs=None, aliases=None,contrastCurves=None,scenarios=None, completeness=None) + +print("Done") + +end_time_2022 = time.time() +elapsed_time_2022 = end_time_2022 - start_time_2022 +logging.info(f"2022 database build complete, elapsed time: {elapsed_time_2022}") + +#TODO; compile later + +# compiled_completeness_path = Path(f"plandb.com/cache/compiled_completeness_{datestr}.p") +# if compiled_completeness_path.exists(): +# completeness = pd.read_pickle(compiled_completeness_path) +# else: +# completeness = compileCompleteness() +# completeness.to_pickle(compiled_completeness_path) diff --git a/backend/update/database_test.py b/backend/database_test.py similarity index 97% rename from backend/update/database_test.py rename to backend/database_test.py index 5a736eb..449af59 100644 --- a/backend/update/database_test.py +++ b/backend/database_test.py @@ -16,6 +16,16 @@ # build database from new values # database_main/build db +""" + +Entire database benchmarking and building script. +Broken down into the following steps. +a) Complete database compile, using today's information. +b) Build old database using 2022 information. +c) update old database to 2024. + +""" + logging.basicConfig(filename="timer_log.txt", level=logging.INFO, format = "%(asctime)s - %(message)s") cache = False diff --git a/backend/update/test/database_test_compare.py b/backend/database_test_compare.py similarity index 62% rename from backend/update/test/database_test_compare.py rename to backend/database_test_compare.py index 0cb4cb3..94db93f 100644 --- a/backend/update/test/database_test_compare.py +++ b/backend/database_test_compare.py @@ -1,17 +1,17 @@ import pandas as pd # Load CSVs -df1 = pd.read_csv('plandb.sioslab.com/backend/test_results/databaseAfterUpdatePlanets.csv') -df2 = pd.read_csv('plandb.sioslab.com/backend/test_results/TestTodayPlanets.csv') +df1 = pd.read_csv('plandb.sioslab.com/backend/update/test/logs/test_results/databaseAfterUpdateStars.csv') +df2 = pd.read_csv('plandb.sioslab.com/backend/update/test/logs/test_results/testTodayStars.csv') -merged = df1.merge(df2, on='pl_id', how='inner', suffixes=('_df1', '_df2')) +merged = df1.merge(df2, on='st_id', how='inner', suffixes=('_df1', '_df2')) # Initialize a results DataFrame for differences differences = [] df1 = df1.drop(columns=['index']) -df1 = df1.set_index('pl_id') -df1.to_csv('plandb.sioslab.com/backend/test_results/databaseAfterUpdatePlanetsNew.csv') +df1 = df1.set_index('st_id') +df1.to_csv('plandb.sioslab.com/backend/update/test/logs/test_results/databaseAfterUpdatePlanetsNew.csv') @@ -21,7 +21,7 @@ diff_details = {} has_diff = False - for col in df1.columns.difference(['pl_id']): + for col in df1.columns.difference(['st_id']): col_df1 = f"{col}_df1" col_df2 = f"{col}_df2" @@ -34,7 +34,7 @@ if has_diff: differences.append({ - "pl_id": row["pl_id"], + "pl_id": row["st_id"], "differences": diff_details }) diff --git a/backend/update_old_database.py b/backend/update_old_database.py new file mode 100644 index 0000000..8c86f2e --- /dev/null +++ b/backend/update_old_database.py @@ -0,0 +1,35 @@ +import pandas as pd +from sqlalchemy import create_engine +import pymysql +import glob +from sqlalchemy import text +import os +import sys +import time + +import logging + +from update_plandb_method import * + + +""" +Update pre-existing database. +In this use case, update 2022 database. +Currently uses MySQL database: (databaseAfterUpdate) +Additionally uses MySQL database: (databaseDiffUpdate) to update the database +""" + +start_time_update = time.time() +logging.info("update database start") + +updateDatabaseTest("andrewchiu", "Password123!", "database2022before", 'databaseDiffUpdate', "databaseAfterUpdate") + + +# update + +#TODO end time +end_time_update = time.time() +elapsed_time_update = end_time_update - start_time_update +logging.info(f"end update database elapsed time: {elapsed_time_update}") + + diff --git a/backend/update/update_plandb_main.py b/backend/update_plandb_main.py similarity index 100% rename from backend/update/update_plandb_main.py rename to backend/update_plandb_main.py diff --git a/backend/update/update_plandb_method.py b/backend/update_plandb_method.py similarity index 99% rename from backend/update/update_plandb_method.py rename to backend/update_plandb_method.py index 71828cb..7515592 100644 --- a/backend/update/update_plandb_method.py +++ b/backend/update_plandb_method.py @@ -326,7 +326,8 @@ def updateDatabaseTest(user : str, password : str, sios_engine_name : str, diff_ table_orbitfits_path = Path(f'plandb.sioslab.com/cache/update_table_orbitfits_{datestr}.p') # Orbitfits got updated, maybe change to new var - plan_data, stdata, orbitfits = get_generated_tables(plandata_path, stdata_path, table_orbitfits_path, change_ipac_df, quadrature_data, comps_data, cache) + # TODO maybe needs all ipac data, to update plandata here + plan_data, stdata, orbitfits = get_generated_tables(plandata_path, stdata_path, table_orbitfits_path, new_ipac_data, quadrature_data, comps_data, cache) plan_data.to_excel("plandb.sioslab.com/backend/sheets/plandata.xlsx") stdata.to_excel("plandb.sioslab.com/backend/sheets/stdata.xlsx") orbitfits.to_excel('plandb.sioslab.com/backend/sheets/later_orbitfits.xlsx') @@ -423,6 +424,9 @@ def updateDatabaseTest(user : str, password : str, sios_engine_name : str, diff_ print("Merging and final write") write_update_SQL(new_engine, merged_planets, merged_stars, merged_orbitfits, merged_orbits, pdfs=None, aliases=None, contrastCurves= None, scenarios=None, completeness=None) + + #Re-index + print("Done") # TODO: Print all total changes diff --git a/backend/update/update_util.py b/backend/update_util.py similarity index 100% rename from backend/update/update_util.py rename to backend/update_util.py diff --git a/backend/update/test/update_util_test_suite.py b/backend/update_util_test_suite.py similarity index 100% rename from backend/update/test/update_util_test_suite.py rename to backend/update_util_test_suite.py