From 6c71eb46d14736ed4beb5a0e4ddbc4107446cea5 Mon Sep 17 00:00:00 2001 From: alevember Date: Tue, 10 Mar 2026 10:22:32 +0100 Subject: [PATCH 1/4] 10.03 --- .vscode/settings.json | 3 + generovani.py | 109 ------------------------------ vizualizace-xarray/generovani.py | 110 +++++++++++++++++++++++-------- 3 files changed, 86 insertions(+), 136 deletions(-) create mode 100644 .vscode/settings.json delete mode 100644 generovani.py diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..c9ebf2d --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,3 @@ +{ + "python-envs.defaultEnvManager": "ms-python.python:system" +} \ No newline at end of file diff --git a/generovani.py b/generovani.py deleted file mode 100644 index 8fe78c4..0000000 --- a/generovani.py +++ /dev/null @@ -1,109 +0,0 @@ -import marimo - -__generated_with = "0.19.11" -app = marimo.App(width="medium") - - -@app.cell -def _(): - import marimo as mo - - return (mo,) - - -@app.cell -def _(mo): - mo.md(""" - ### Struktura vstupu náhodných polí (xarray) - - Data jsou reprezentována pomocí datové struktury `xarray.Dataset`. - - **Definované dimenze a souřadnice (coords):** - - `i_point`: Index konkrétního bodu v nepravidelné síti. - - `i_sample`: Index vzorku (realizace) náhodného pole. - - `i_dim`: Označení prostorové osy (např. 'x', 'y'). - - **Datové proměnné (data_vars):** - - `X`: Matice prostorových souřadnic bodů. Má tvar `[i_dim, i_point]`. - - `QA`: První vygenerované náhodné pole. Má tvar `[i_point, i_sample]`. - - `QB`: Druhé vygenerované náhodné pole. Má tvar `[i_point, i_sample]`. - """) - return - - -@app.cell -def _(): - import xarray as xr - import numpy as np - import matplotlib.pyplot as plt - - n_points = 300 - n_samples = 100 - n_dim = 2 - - X_data = np.random.rand(n_dim, n_points) - QA_data = np.random.rand(n_points, n_samples) - QB_data = np.random.normal(loc=0.5, scale=0.15, size=(n_points, n_samples)) - - ds = xr.Dataset( - data_vars={ - "X": (("i_dim", "i_point"), X_data), - "QA": (("i_point", "i_sample"), QA_data), - "QB": (("i_point", "i_sample"), QB_data), - }, - coords={ - "i_point": np.arange(n_points), - "i_sample": np.arange(n_samples), - "i_dim": ["x", "y"] - } - ) - - # Přidání atributů s pojmenováním porovnávaných veličin - ds["QA"].attrs["long_name"] = "Porovnávaná veličina A (např. propustnost)" - ds["QA"].attrs["units"] = "-" - ds["QB"].attrs["long_name"] = "Porovnávaná veličina B (např. hustota puklin)" - ds["QB"].attrs["units"] = "-" - ds.attrs["description"] = "Testovací data náhodných polí ze samplů na nepravidelné síti" - - stats = xr.Dataset({ - "mean_QA": ds["QA"].mean(dim="i_sample"), - "var_QA": ds["QA"].var(dim="i_sample"), - "mean_QB": ds["QB"].mean(dim="i_sample"), - "var_QB": ds["QB"].var(dim="i_sample") - }) - - x_coords = ds["X"].sel(i_dim="x") - y_coords = ds["X"].sel(i_dim="y") - - fig, axes = plt.subplots(2, 3, figsize=(15, 9)) - fig.suptitle("Porovnání polí QA a QB na neregulární síti", fontsize=16) - - sc1 = axes[0, 0].scatter(x_coords, y_coords, c=stats["mean_QA"], cmap='viridis', s=20) - axes[0, 0].set_title("QA: Mapa průměru") - plt.colorbar(sc1, ax=axes[0, 0]) - - sc2 = axes[0, 1].scatter(x_coords, y_coords, c=stats["var_QA"], cmap='magma', s=20) - axes[0, 1].set_title("QA: Mapa rozptylu") - plt.colorbar(sc2, ax=axes[0, 1]) - - axes[0, 2].hist(ds["QA"].values.flatten(), bins=30, color='skyblue', edgecolor='black') - axes[0, 2].set_title("QA: Histogram všech hodnot") - - sc3 = axes[1, 0].scatter(x_coords, y_coords, c=stats["mean_QB"], cmap='viridis', s=20) - axes[1, 0].set_title("QB: Mapa průměru") - plt.colorbar(sc3, ax=axes[1, 0]) - - sc4 = axes[1, 1].scatter(x_coords, y_coords, c=stats["var_QB"], cmap='magma', s=20) - axes[1, 1].set_title("QB: Mapa rozptylu") - plt.colorbar(sc4, ax=axes[1, 1]) - - axes[1, 2].hist(ds["QB"].values.flatten(), bins=30, color='salmon', edgecolor='black') - axes[1, 2].set_title("QB: Histogram všech hodnot") - - plt.tight_layout() - plt.show() - return - - -if __name__ == "__main__": - app.run() diff --git a/vizualizace-xarray/generovani.py b/vizualizace-xarray/generovani.py index 8fe78c4..c52f840 100644 --- a/vizualizace-xarray/generovani.py +++ b/vizualizace-xarray/generovani.py @@ -1,17 +1,16 @@ import marimo -__generated_with = "0.19.11" +__generated_with = "0.20.2" app = marimo.App(width="medium") -@app.cell +@app.cell(hide_code=True) def _(): import marimo as mo - return (mo,) -@app.cell +@app.cell(hide_code=True) def _(mo): mo.md(""" ### Struktura vstupu náhodných polí (xarray) @@ -31,19 +30,21 @@ def _(mo): return -@app.cell +@app.cell(hide_code=True) def _(): import xarray as xr import numpy as np import matplotlib.pyplot as plt + from scipy.stats import binned_statistic_2d - n_points = 300 + n_points = 1000 n_samples = 100 n_dim = 2 X_data = np.random.rand(n_dim, n_points) - QA_data = np.random.rand(n_points, n_samples) + QA_data = np.random.uniform(0.01, 1.0, size=(n_points, n_samples)) QB_data = np.random.normal(loc=0.5, scale=0.15, size=(n_points, n_samples)) + QB_data = np.clip(QB_data, 0.01, None) ds = xr.Dataset( data_vars={ @@ -58,12 +59,10 @@ def _(): } ) - # Přidání atributů s pojmenováním porovnávaných veličin - ds["QA"].attrs["long_name"] = "Porovnávaná veličina A (např. propustnost)" + ds["QA"].attrs["long_name"] = "Porovnávaná veličina A" ds["QA"].attrs["units"] = "-" - ds["QB"].attrs["long_name"] = "Porovnávaná veličina B (např. hustota puklin)" + ds["QB"].attrs["long_name"] = "Porovnávaná veličina B" ds["QB"].attrs["units"] = "-" - ds.attrs["description"] = "Testovací data náhodných polí ze samplů na nepravidelné síti" stats = xr.Dataset({ "mean_QA": ds["QA"].mean(dim="i_sample"), @@ -72,38 +71,95 @@ def _(): "var_QB": ds["QB"].var(dim="i_sample") }) - x_coords = ds["X"].sel(i_dim="x") - y_coords = ds["X"].sel(i_dim="y") + x = ds["X"].sel(i_dim="x").values + y = ds["X"].sel(i_dim="y").values + + # Pro grafy průměru a rozptylu použít imgshow nebo podobnou funkci + num_bins = 20 + bins = (num_bins, num_bins) + + binned_mean_QA = binned_statistic_2d(x, y, stats["mean_QA"].values, statistic='mean', bins=bins) + binned_var_QA = binned_statistic_2d(x, y, stats["var_QA"].values, statistic='mean', bins=bins) + binned_mean_QB = binned_statistic_2d(x, y, stats["mean_QB"].values, statistic='mean', bins=bins) + binned_var_QB = binned_statistic_2d(x, y, stats["var_QB"].values, statistic='mean', bins=bins) - fig, axes = plt.subplots(2, 3, figsize=(15, 9)) - fig.suptitle("Porovnání polí QA a QB na neregulární síti", fontsize=16) + x_edges = binned_mean_QA.x_edge + y_edges = binned_mean_QA.y_edge - sc1 = axes[0, 0].scatter(x_coords, y_coords, c=stats["mean_QA"], cmap='viridis', s=20) + fig, axes = plt.subplots(2, 3, figsize=(15, 10)) + + im1 = axes[0, 0].pcolormesh(x_edges, y_edges, binned_mean_QA.statistic.T, cmap='viridis', shading='flat') axes[0, 0].set_title("QA: Mapa průměru") - plt.colorbar(sc1, ax=axes[0, 0]) + axes[0, 0].set_aspect('equal') + plt.colorbar(im1, ax=axes[0, 0]) - sc2 = axes[0, 1].scatter(x_coords, y_coords, c=stats["var_QA"], cmap='magma', s=20) + im2 = axes[0, 1].pcolormesh(x_edges, y_edges, binned_var_QA.statistic.T, cmap='magma', shading='flat') axes[0, 1].set_title("QA: Mapa rozptylu") - plt.colorbar(sc2, ax=axes[0, 1]) + axes[0, 1].set_aspect('equal') + plt.colorbar(im2, ax=axes[0, 1]) axes[0, 2].hist(ds["QA"].values.flatten(), bins=30, color='skyblue', edgecolor='black') - axes[0, 2].set_title("QA: Histogram všech hodnot") + axes[0, 2].set_title("QA: Histogram") - sc3 = axes[1, 0].scatter(x_coords, y_coords, c=stats["mean_QB"], cmap='viridis', s=20) + im3 = axes[1, 0].pcolormesh(x_edges, y_edges, binned_mean_QB.statistic.T, cmap='viridis', shading='flat') axes[1, 0].set_title("QB: Mapa průměru") - plt.colorbar(sc3, ax=axes[1, 0]) + axes[1, 0].set_aspect('equal') + plt.colorbar(im3, ax=axes[1, 0]) - sc4 = axes[1, 1].scatter(x_coords, y_coords, c=stats["var_QB"], cmap='magma', s=20) + im4 = axes[1, 1].pcolormesh(x_edges, y_edges, binned_var_QB.statistic.T, cmap='magma', shading='flat') axes[1, 1].set_title("QB: Mapa rozptylu") - plt.colorbar(sc4, ax=axes[1, 1]) + axes[1, 1].set_aspect('equal') + plt.colorbar(im4, ax=axes[1, 1]) axes[1, 2].hist(ds["QB"].values.flatten(), bins=30, color='salmon', edgecolor='black') - axes[1, 2].set_title("QB: Histogram všech hodnot") + axes[1, 2].set_title("QB: Histogram") plt.tight_layout() plt.show() - return + + # Vypočtěte různé druhy průměrů (aritmetický, geometrický, harmonický) na podčtvercích (multi-scale) pro různé velikosti oken. Zatím pro jednu velikost okna. + q_values = stats["mean_QA"].values + grid_size = 10 + A_arith = np.zeros((grid_size, grid_size)) + A_geom = np.zeros((grid_size, grid_size)) + A_harm = np.zeros((grid_size, grid_size)) + + dx = 1.0 / grid_size + dy = 1.0 / grid_size + + for i in range(grid_size): + for j in range(grid_size): + mask = (x >= i*dx) & (x < (i+1)*dx) & (y >= j*dy) & (y < (j+1)*dy) + q_in_window = q_values[mask] + + if len(q_in_window) > 0: + A_arith[j, i] = np.mean(q_in_window) + A_geom[j, i] = np.exp(np.mean(np.log(q_in_window))) + A_harm[j, i] = 1.0 / np.mean(1.0 / q_in_window) + else: + A_arith[j, i] = np.nan + A_geom[j, i] = np.nan + A_harm[j, i] = np.nan + + fig2, axes2 = plt.subplots(1, 3, figsize=(15, 5)) + + im5 = axes2[0].imshow(A_arith, origin='lower', extent=[0, 1, 0, 1], cmap='viridis') + axes2[0].set_title("Aritmetický průměr") + plt.colorbar(im5, ax=axes2[0]) + + im6 = axes2[1].imshow(A_geom, origin='lower', extent=[0, 1, 0, 1], cmap='viridis') + axes2[1].set_title("Geometrický průměr") + plt.colorbar(im6, ax=axes2[1]) + + im7 = axes2[2].imshow(A_harm, origin='lower', extent=[0, 1, 0, 1], cmap='viridis') + axes2[2].set_title("Harmonický průměr") + plt.colorbar(im7, ax=axes2[2]) + + plt.tight_layout() + plt.show() + + return ds, fig, fig2 if __name__ == "__main__": - app.run() + app.run() \ No newline at end of file From fc49770a94c823f35d241003df52cdfb9d8d0dc6 Mon Sep 17 00:00:00 2001 From: alevember Date: Tue, 24 Mar 2026 10:44:40 +0100 Subject: [PATCH 2/4] 9.3 zadani --- vizualizace-xarray/analyza.py | 135 ++++++++++++++++ vizualizace-xarray/generovani.py | 187 +++++++++-------------- vizualizace-xarray/project/generovani.py | 76 --------- 3 files changed, 207 insertions(+), 191 deletions(-) create mode 100644 vizualizace-xarray/analyza.py delete mode 100644 vizualizace-xarray/project/generovani.py diff --git a/vizualizace-xarray/analyza.py b/vizualizace-xarray/analyza.py new file mode 100644 index 0000000..1624a28 --- /dev/null +++ b/vizualizace-xarray/analyza.py @@ -0,0 +1,135 @@ +import numpy as np +import matplotlib.pyplot as plt +from scipy.stats import binned_statistic_2d, ttest_ind +from scipy.interpolate import griddata +import pywt +import warnings +warnings.filterwarnings('ignore') + +def create_binned_grid(x, y, values, num_bins=20): + bins = (num_bins, num_bins) + binned = binned_statistic_2d(x, y, values, statistic='mean', bins=bins) + return binned.statistic, binned.x_edge, binned.y_edge + +def bin_single_field(X_coords, data_var, num_bins=20): + x = X_coords.sel(i_dim="x").values + y = X_coords.sel(i_dim="y").values + grid, x_edges, y_edges = create_binned_grid(x, y, data_var.values, num_bins) + return grid, x_edges, y_edges + +def bin_all_samples(X_coords, data_var, num_bins=20): + x = X_coords.sel(i_dim="x").values + y = X_coords.sel(i_dim="y").values + n_samples = data_var.shape[1] + + grid_3d = np.zeros((num_bins, num_bins, n_samples)) + for i in range(n_samples): + grid_3d[:, :, i], x_edges, y_edges = create_binned_grid(x, y, data_var.values[:, i], num_bins) + + return grid_3d, x_edges, y_edges + +def plot_means_two_columns(x_edges, y_edges, grid_mean_A, grid_mean_B): + fig, axes = plt.subplots(1, 2, figsize=(12, 5)) + + im1 = axes[0].pcolormesh(x_edges, y_edges, grid_mean_A.T, cmap='viridis', shading='flat') + axes[0].set_title("Průměr pole A") + plt.colorbar(im1, ax=axes[0]) + + im2 = axes[1].pcolormesh(x_edges, y_edges, grid_mean_B.T, cmap='viridis', shading='flat') + axes[1].set_title("Průměr pole B") + plt.colorbar(im2, ax=axes[1]) + + plt.tight_layout() + return fig + +def perform_ttest(grid_A, grid_B): + t_stat, p_value = ttest_ind(grid_A, grid_B, axis=-1, equal_var=False, alternative='two-sided') + return t_stat, p_value + +def plot_ttest_results(x_edges, y_edges, t_stat, p_value): + fig, axes = plt.subplots(1, 2, figsize=(12, 5)) + + im1 = axes[0].pcolormesh(x_edges, y_edges, t_stat.T, cmap='coolwarm', shading='flat') + axes[0].set_title("T-statistika") + plt.colorbar(im1, ax=axes[0]) + + im2 = axes[1].pcolormesh(x_edges, y_edges, p_value.T, cmap='RdYlGn', shading='flat', vmin=0, vmax=0.1) + axes[1].set_title("p-hodnota") + plt.colorbar(im2, ax=axes[1]) + + plt.tight_layout() + return fig + +def multiscale_analysis(x, y, q_a_vals, q_b_vals): + grid_sizes = [2, 4, 8] + p_values_all = {'arith': [], 'geom': [], 'harm': []} + + fig1, axes1 = plt.subplots(len(grid_sizes), 3, figsize=(15, 5 * len(grid_sizes))) + + for idx, gs in enumerate(grid_sizes): + dx = 1.0 / gs + dy = 1.0 / gs + + p_map_arith = np.full((gs, gs), np.nan) + p_map_geom = np.full((gs, gs), np.nan) + p_map_harm = np.full((gs, gs), np.nan) + + for i in range(gs): + for j in range(gs): + mask = (x >= i*dx) & (x < (i+1)*dx) & (y >= j*dy) & (y < (j+1)*dy) + + if np.sum(mask) > 0: + qa_w = q_a_vals[mask, :] + qb_w = q_b_vals[mask, :] + + qa_arith = np.mean(qa_w, axis=0) + qb_arith = np.mean(qb_w, axis=0) + + qa_w_safe = np.clip(qa_w, 1e-10, None) + qb_w_safe = np.clip(qb_w, 1e-10, None) + + qa_geom = np.exp(np.mean(np.log(qa_w_safe), axis=0)) + qb_geom = np.exp(np.mean(np.log(qb_w_safe), axis=0)) + + qa_harm = 1.0 / np.mean(1.0 / qa_w_safe, axis=0) + qb_harm = 1.0 / np.mean(1.0 / qb_w_safe, axis=0) + + _, p_a = ttest_ind(qa_arith, qb_arith, equal_var=False) + _, p_g = ttest_ind(qa_geom, qb_geom, equal_var=False) + _, p_h = ttest_ind(qa_harm, qb_harm, equal_var=False) + + p_map_arith[j, i] = p_a + p_map_geom[j, i] = p_g + p_map_harm[j, i] = p_h + + p_values_all['arith'].append(np.nanmean(p_map_arith)) + p_values_all['geom'].append(np.nanmean(p_map_geom)) + p_values_all['harm'].append(np.nanmean(p_map_harm)) + + im_a = axes1[idx, 0].imshow(p_map_arith, origin='lower', extent=[0, 1, 0, 1], cmap='RdYlGn', vmin=0, vmax=1) + axes1[idx, 0].set_ylabel(f"Mřížka {gs}x{gs}") + if idx == 0: axes1[idx, 0].set_title("Aritmetický") + plt.colorbar(im_a, ax=axes1[idx, 0]) + + im_g = axes1[idx, 1].imshow(p_map_geom, origin='lower', extent=[0, 1, 0, 1], cmap='RdYlGn', vmin=0, vmax=1) + if idx == 0: axes1[idx, 1].set_title("Geometrický") + plt.colorbar(im_g, ax=axes1[idx, 1]) + + im_h = axes1[idx, 2].imshow(p_map_harm, origin='lower', extent=[0, 1, 0, 1], cmap='RdYlGn', vmin=0, vmax=1) + if idx == 0: axes1[idx, 2].set_title("Harmonický") + plt.colorbar(im_h, ax=axes1[idx, 2]) + + plt.tight_layout() + + fig2, ax2 = plt.subplots(figsize=(8, 5)) + ax2.plot(grid_sizes, p_values_all['arith'], marker='o', label='Aritmetický') + ax2.plot(grid_sizes, p_values_all['geom'], marker='s', label='Geometrický') + ax2.plot(grid_sizes, p_values_all['harm'], marker='^', label='Harmonický') + ax2.set_xticks(grid_sizes) + ax2.set_xlabel("Velikost okna (počet dělení)") + ax2.set_ylabel("Průměrná p-hodnota") + ax2.set_title("Závislost p-hodnoty na velikosti okna") + plt.legend() + + return fig1, fig2 + diff --git a/vizualizace-xarray/generovani.py b/vizualizace-xarray/generovani.py index c52f840..eb846cf 100644 --- a/vizualizace-xarray/generovani.py +++ b/vizualizace-xarray/generovani.py @@ -7,6 +7,7 @@ @app.cell(hide_code=True) def _(): import marimo as mo + return (mo,) @@ -21,11 +22,6 @@ def _(mo): - `i_point`: Index konkrétního bodu v nepravidelné síti. - `i_sample`: Index vzorku (realizace) náhodného pole. - `i_dim`: Označení prostorové osy (např. 'x', 'y'). - - **Datové proměnné (data_vars):** - - `X`: Matice prostorových souřadnic bodů. Má tvar `[i_dim, i_point]`. - - `QA`: První vygenerované náhodné pole. Má tvar `[i_point, i_sample]`. - - `QB`: Druhé vygenerované náhodné pole. Má tvar `[i_point, i_sample]`. """) return @@ -34,132 +30,93 @@ def _(mo): def _(): import xarray as xr import numpy as np - import matplotlib.pyplot as plt - from scipy.stats import binned_statistic_2d + import analyza n_points = 1000 - n_samples = 100 + n_samples_A = 10 + n_samples_B = 20 n_dim = 2 X_data = np.random.rand(n_dim, n_points) - QA_data = np.random.uniform(0.01, 1.0, size=(n_points, n_samples)) - QB_data = np.random.normal(loc=0.5, scale=0.15, size=(n_points, n_samples)) - QB_data = np.clip(QB_data, 0.01, None) + + # Testovací pole = 10**( np.random.normal(N, loc = -10, scale=3)) + QA_data = 10**(np.random.normal(loc=-10, scale=3, size=(n_points, n_samples_A))) + + # případně zvětšit scale pro větší rozdíl průměrů polí + QB_data = 10**(np.random.normal(loc=-10, scale=5, size=(n_points, n_samples_B))) ds = xr.Dataset( data_vars={ "X": (("i_dim", "i_point"), X_data), - "QA": (("i_point", "i_sample"), QA_data), - "QB": (("i_point", "i_sample"), QB_data), + "QA": (("i_point", "i_sample_A"), QA_data), + "QB": (("i_point", "i_sample_B"), QB_data), }, coords={ "i_point": np.arange(n_points), - "i_sample": np.arange(n_samples), + "i_sample_A": np.arange(n_samples_A), + "i_sample_B": np.arange(n_samples_B), "i_dim": ["x", "y"] } ) + return analyza, ds + + +@app.cell(hide_code=True) +def _(ds): + # Průměry pro každé ze dvou polí. Průměry do samostatné buňky. + mean_QA = ds["QA"].mean(dim="i_sample_A") + mean_QB = ds["QB"].mean(dim="i_sample_B") + return mean_QA, mean_QB + + +@app.cell(hide_code=True) +def _(analyza, ds, mean_QA, mean_QB): + grid_mean_A, x_edges, y_edges = analyza.bin_single_field(ds["X"], mean_QA) + grid_mean_B, _, _ = analyza.bin_single_field(ds["X"], mean_QB) + + fig_means = analyza.plot_means_two_columns(x_edges, y_edges, grid_mean_A, grid_mean_B) + fig_means + return x_edges, y_edges + + +@app.cell(hide_code=True) +def _(analyza, ds): + grid_A, _, _ = analyza.bin_all_samples(ds["X"], ds["QA"]) + grid_B, _, _ = analyza.bin_all_samples(ds["X"], ds["QB"]) + + t_stat, p_value = analyza.perform_ttest(grid_A, grid_B) + return p_value, t_stat - ds["QA"].attrs["long_name"] = "Porovnávaná veličina A" - ds["QA"].attrs["units"] = "-" - ds["QB"].attrs["long_name"] = "Porovnávaná veličina B" - ds["QB"].attrs["units"] = "-" - - stats = xr.Dataset({ - "mean_QA": ds["QA"].mean(dim="i_sample"), - "var_QA": ds["QA"].var(dim="i_sample"), - "mean_QB": ds["QB"].mean(dim="i_sample"), - "var_QB": ds["QB"].var(dim="i_sample") - }) - - x = ds["X"].sel(i_dim="x").values - y = ds["X"].sel(i_dim="y").values - - # Pro grafy průměru a rozptylu použít imgshow nebo podobnou funkci - num_bins = 20 - bins = (num_bins, num_bins) - - binned_mean_QA = binned_statistic_2d(x, y, stats["mean_QA"].values, statistic='mean', bins=bins) - binned_var_QA = binned_statistic_2d(x, y, stats["var_QA"].values, statistic='mean', bins=bins) - binned_mean_QB = binned_statistic_2d(x, y, stats["mean_QB"].values, statistic='mean', bins=bins) - binned_var_QB = binned_statistic_2d(x, y, stats["var_QB"].values, statistic='mean', bins=bins) - - x_edges = binned_mean_QA.x_edge - y_edges = binned_mean_QA.y_edge - - fig, axes = plt.subplots(2, 3, figsize=(15, 10)) - - im1 = axes[0, 0].pcolormesh(x_edges, y_edges, binned_mean_QA.statistic.T, cmap='viridis', shading='flat') - axes[0, 0].set_title("QA: Mapa průměru") - axes[0, 0].set_aspect('equal') - plt.colorbar(im1, ax=axes[0, 0]) - - im2 = axes[0, 1].pcolormesh(x_edges, y_edges, binned_var_QA.statistic.T, cmap='magma', shading='flat') - axes[0, 1].set_title("QA: Mapa rozptylu") - axes[0, 1].set_aspect('equal') - plt.colorbar(im2, ax=axes[0, 1]) - - axes[0, 2].hist(ds["QA"].values.flatten(), bins=30, color='skyblue', edgecolor='black') - axes[0, 2].set_title("QA: Histogram") - - im3 = axes[1, 0].pcolormesh(x_edges, y_edges, binned_mean_QB.statistic.T, cmap='viridis', shading='flat') - axes[1, 0].set_title("QB: Mapa průměru") - axes[1, 0].set_aspect('equal') - plt.colorbar(im3, ax=axes[1, 0]) - - im4 = axes[1, 1].pcolormesh(x_edges, y_edges, binned_var_QB.statistic.T, cmap='magma', shading='flat') - axes[1, 1].set_title("QB: Mapa rozptylu") - axes[1, 1].set_aspect('equal') - plt.colorbar(im4, ax=axes[1, 1]) - - axes[1, 2].hist(ds["QB"].values.flatten(), bins=30, color='salmon', edgecolor='black') - axes[1, 2].set_title("QB: Histogram") - - plt.tight_layout() - plt.show() - - # Vypočtěte různé druhy průměrů (aritmetický, geometrický, harmonický) na podčtvercích (multi-scale) pro různé velikosti oken. Zatím pro jednu velikost okna. - q_values = stats["mean_QA"].values - grid_size = 10 - A_arith = np.zeros((grid_size, grid_size)) - A_geom = np.zeros((grid_size, grid_size)) - A_harm = np.zeros((grid_size, grid_size)) - - dx = 1.0 / grid_size - dy = 1.0 / grid_size - - for i in range(grid_size): - for j in range(grid_size): - mask = (x >= i*dx) & (x < (i+1)*dx) & (y >= j*dy) & (y < (j+1)*dy) - q_in_window = q_values[mask] - - if len(q_in_window) > 0: - A_arith[j, i] = np.mean(q_in_window) - A_geom[j, i] = np.exp(np.mean(np.log(q_in_window))) - A_harm[j, i] = 1.0 / np.mean(1.0 / q_in_window) - else: - A_arith[j, i] = np.nan - A_geom[j, i] = np.nan - A_harm[j, i] = np.nan - - fig2, axes2 = plt.subplots(1, 3, figsize=(15, 5)) - - im5 = axes2[0].imshow(A_arith, origin='lower', extent=[0, 1, 0, 1], cmap='viridis') - axes2[0].set_title("Aritmetický průměr") - plt.colorbar(im5, ax=axes2[0]) - - im6 = axes2[1].imshow(A_geom, origin='lower', extent=[0, 1, 0, 1], cmap='viridis') - axes2[1].set_title("Geometrický průměr") - plt.colorbar(im6, ax=axes2[1]) - - im7 = axes2[2].imshow(A_harm, origin='lower', extent=[0, 1, 0, 1], cmap='viridis') - axes2[2].set_title("Harmonický průměr") - plt.colorbar(im7, ax=axes2[2]) - - plt.tight_layout() - plt.show() - - return ds, fig, fig2 + +@app.cell(hide_code=True) +def _(analyza, p_value, t_stat, x_edges, y_edges): + fig_test = analyza.plot_ttest_results(x_edges, y_edges, t_stat, p_value) + fig_test + return + + +@app.cell(hide_code=True) +def _(analyza, ds): + x_vals = ds["X"].sel(i_dim="x").values + y_vals = ds["X"].sel(i_dim="y").values + q_a_vals = ds["QA"].values + q_b_vals = ds["QB"].values + + fig_multiscale_maps, fig_multiscale_line = analyza.multiscale_analysis(x_vals, y_vals, q_a_vals, q_b_vals) + return fig_multiscale_line, fig_multiscale_maps + + +@app.cell(hide_code=True) +def _(fig_multiscale_maps): + fig_multiscale_maps + return + + +@app.cell(hide_code=True) +def _(fig_multiscale_line): + fig_multiscale_line + return if __name__ == "__main__": - app.run() \ No newline at end of file + app.run() diff --git a/vizualizace-xarray/project/generovani.py b/vizualizace-xarray/project/generovani.py deleted file mode 100644 index 8d9f7dc..0000000 --- a/vizualizace-xarray/project/generovani.py +++ /dev/null @@ -1,76 +0,0 @@ -import xarray as xr -import numpy as np -import matplotlib.pyplot as plt - -# --- Konfigurační parametry --- -n_points = 300 # Počet bodů v prostoru -n_samples = 100 # Počet realizací (vzorků) -n_dim = 2 # Rozměrnost prostoru (x, y) - -# --- Generování syntetických dat --- -# Náhodné souřadnice bodů v jednotkovém čtverci [0, 1] x [0, 1] -X_data = np.random.rand(n_dim, n_points) - -# Pole QA: generováno z rovnoměrného rozdělení [0, 1] -QA_data = np.random.rand(n_points, n_samples) - -# Pole QB: generováno z normálního rozdělení (μ=0.5, σ=0.15) -QB_data = np.random.normal(loc=0.5, scale=0.15, size=(n_points, n_samples)) - -# --- Vytvoření struktury xarray Dataset --- -ds = xr.Dataset( - data_vars={ - "X": (("i_dim", "i_point"), X_data), - "QA": (("i_point", "i_sample"), QA_data), - "QB": (("i_point", "i_sample"), QB_data), - }, - coords={ - "i_point": np.arange(n_points), - "i_sample": np.arange(n_samples), - "i_dim": ["x", "y"] - } -) - -# --- Statistická analýza --- -# Výpočet průměru a rozptylu podél dimenze realizací (i_sample) -stats = xr.Dataset({ - "mean_QA": ds["QA"].mean(dim="i_sample"), - "var_QA": ds["QA"].var(dim="i_sample"), - "mean_QB": ds["QB"].mean(dim="i_sample"), - "var_QB": ds["QB"].var(dim="i_sample") -}) - -# Extraxe souřadnic pro vizualizaci -x_coords = ds["X"].sel(i_dim="x") -y_coords = ds["X"].sel(i_dim="y") - -# --- Vizualizace --- -fig, axes = plt.subplots(2, 3, figsize=(15, 9)) -fig.suptitle("Porovnání polí QA a QB na neregulární síti", fontsize=16) - -# Horní řádek: Analýza pole QA -sc1 = axes[0, 0].scatter(x_coords, y_coords, c=stats["mean_QA"], cmap='viridis', s=20) -axes[0, 0].set_title("QA: Mapa průměru") -plt.colorbar(sc1, ax=axes[0, 0]) - -sc2 = axes[0, 1].scatter(x_coords, y_coords, c=stats["var_QA"], cmap='magma', s=20) -axes[0, 1].set_title("QA: Mapa rozptylu") -plt.colorbar(sc2, ax=axes[0, 1]) - -axes[0, 2].hist(ds["QA"].values.flatten(), bins=30, color='skyblue', edgecolor='black') -axes[0, 2].set_title("QA: Histogram všech hodnot") - -# Spodní řádek: Analýza pole QB -sc3 = axes[1, 0].scatter(x_coords, y_coords, c=stats["mean_QB"], cmap='viridis', s=20) -axes[1, 0].set_title("QB: Mapa průměru") -plt.colorbar(sc3, ax=axes[1, 0]) - -sc4 = axes[1, 1].scatter(x_coords, y_coords, c=stats["var_QB"], cmap='magma', s=20) -axes[1, 1].set_title("QB: Mapa rozptylu") -plt.colorbar(sc4, ax=axes[1, 1]) - -axes[1, 2].hist(ds["QB"].values.flatten(), bins=30, color='salmon', edgecolor='black') -axes[1, 2].set_title("QB: Histogram všech hodnot") - -plt.tight_layout() -plt.show() \ No newline at end of file From 73ac432588653814ab79668b87836dff0a37350b Mon Sep 17 00:00:00 2001 From: alevember Date: Tue, 24 Mar 2026 10:47:18 +0100 Subject: [PATCH 3/4] 9.3 zadani --- vizualizace-xarray/analyza.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vizualizace-xarray/analyza.py b/vizualizace-xarray/analyza.py index 1624a28..95559c2 100644 --- a/vizualizace-xarray/analyza.py +++ b/vizualizace-xarray/analyza.py @@ -105,7 +105,7 @@ def multiscale_analysis(x, y, q_a_vals, q_b_vals): p_values_all['arith'].append(np.nanmean(p_map_arith)) p_values_all['geom'].append(np.nanmean(p_map_geom)) p_values_all['harm'].append(np.nanmean(p_map_harm)) - + im_a = axes1[idx, 0].imshow(p_map_arith, origin='lower', extent=[0, 1, 0, 1], cmap='RdYlGn', vmin=0, vmax=1) axes1[idx, 0].set_ylabel(f"Mřížka {gs}x{gs}") if idx == 0: axes1[idx, 0].set_title("Aritmetický") From 73d1258ead5913a713dbf2cffa2330f55cc3c5a9 Mon Sep 17 00:00:00 2001 From: alevember Date: Tue, 31 Mar 2026 10:01:46 +0200 Subject: [PATCH 4/4] zadani 24 --- vizualizace-xarray/analyza.py | 242 ++++++++++++++++++++++++++----- vizualizace-xarray/generovani.py | 137 +++++++++++------ 2 files changed, 298 insertions(+), 81 deletions(-) diff --git a/vizualizace-xarray/analyza.py b/vizualizace-xarray/analyza.py index 95559c2..77b59b4 100644 --- a/vizualizace-xarray/analyza.py +++ b/vizualizace-xarray/analyza.py @@ -1,78 +1,245 @@ import numpy as np import matplotlib.pyplot as plt -from scipy.stats import binned_statistic_2d, ttest_ind -from scipy.interpolate import griddata -import pywt +from scipy.stats import binned_statistic_2d, ttest_ind, wasserstein_distance +from typing import Tuple, Any +import xarray as xr +import pandas as pd +import hvplot.xarray +import hvplot.pandas +import holoviews as hv + +# Zapnutí interaktivního režimu pro hvplot grafy (lupa, posun, uložení) +hv.extension('bokeh') + import warnings warnings.filterwarnings('ignore') -def create_binned_grid(x, y, values, num_bins=20): +# Refaktorovat kód -> výpočty a složitější grafy do samostatného modulu/ modulů. + +class MeanCalculator: + # Modularize - create a class representing a "mean" + + @staticmethod + def arithmetic(q: np.ndarray) -> float: + # aritmetický průměr : A_1(q) = sum_i (q_i) / N + return float(np.mean(q)) + + @staticmethod + def geometric(q: np.ndarray) -> float: + # geometrický průměr : A_log(q) = exp( (sum_i log(q_i)) / N ) = (prod_i q_i)**(1/N) + # Omezení hodnot zdola (1e-10), aby se předešlo chybě logaritmu z nuly + safe_q = np.clip(q, 1e-10, None) + return float(np.exp(np.mean(np.log(safe_q)))) + + @staticmethod + def harmonic(q: np.ndarray) -> float: + # Harmonický průměr: A_inv(q) = (( sum_i q_i**(-1)) /N)**(-1) + # Omezení hodnot zdola, aby se předešlo dělení nulou + safe_q = np.clip(q, 1e-10, None) + return float(1.0 / np.mean(1.0 / safe_q)) + + +def create_binned_grid(x: np.ndarray, y: np.ndarray, values: np.ndarray, num_bins: int = 20) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: + """ + x - field A, has shape (N,N) + Přidat k hlavním funkcím doc string s popisem vstupních parametrů. + """ bins = (num_bins, num_bins) + + # Seskupení náhodných bodů do 2D mřížky (binning) s výpočtem průměru binned = binned_statistic_2d(x, y, values, statistic='mean', bins=bins) - return binned.statistic, binned.x_edge, binned.y_edge + + # Compute mean of whole array and use it as default value. + stat = binned.statistic + default_val = np.nanmean(values) + + # Vyplnění prázdných čtverců (kde nebyly žádné body a vzniklo NaN) celkovým průměrem + stat = np.where(np.isnan(stat), default_val, stat) + + return stat, binned.x_edge, binned.y_edge -def bin_single_field(X_coords, data_var, num_bins=20): +def bin_single_field(X_coords: Any, data_var: Any, num_bins: int = 20) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: + """ + x - field A, has shape (N,N) + """ x = X_coords.sel(i_dim="x").values y = X_coords.sel(i_dim="y").values grid, x_edges, y_edges = create_binned_grid(x, y, data_var.values, num_bins) return grid, x_edges, y_edges -def bin_all_samples(X_coords, data_var, num_bins=20): +def bin_all_samples(X_coords: Any, data_var: Any, num_bins: int = 20) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: + """ + x - field A, has shape (N,N) + """ x = X_coords.sel(i_dim="x").values y = X_coords.sel(i_dim="y").values n_samples = data_var.shape[1] + # Vytvoření 3D pole pro uložení binningu všech vzorků (samples) grid_3d = np.zeros((num_bins, num_bins, n_samples)) for i in range(n_samples): grid_3d[:, :, i], x_edges, y_edges = create_binned_grid(x, y, data_var.values[:, i], num_bins) return grid_3d, x_edges, y_edges -def plot_means_two_columns(x_edges, y_edges, grid_mean_A, grid_mean_B): - fig, axes = plt.subplots(1, 2, figsize=(12, 5)) +def plot_means_two_columns(x_edges: np.ndarray, y_edges: np.ndarray, grid_mean_A: np.ndarray, grid_mean_B: np.ndarray): + # Dva sloupce, vlevo field A, vpravo field B + # Pro grafy průměru a rozptylu použít imgshow nebo podobnou funkci + da_A = xr.DataArray(grid_mean_A.T, coords=[('y', y_edges[:-1]), ('x', x_edges[:-1])], name='Průměr_A') + da_B = xr.DataArray(grid_mean_B.T, coords=[('y', y_edges[:-1]), ('x', x_edges[:-1])], name='Průměr_B') - im1 = axes[0].pcolormesh(x_edges, y_edges, grid_mean_A.T, cmap='viridis', shading='flat') - axes[0].set_title("Průměr pole A") - plt.colorbar(im1, ax=axes[0]) + plot1 = da_A.hvplot.image(x='x', y='y', cmap='viridis', title="Průměr pole A", width=400, height=350) + plot2 = da_B.hvplot.image(x='x', y='y', cmap='viridis', title="Průměr pole B", width=400, height=350) - im2 = axes[1].pcolormesh(x_edges, y_edges, grid_mean_B.T, cmap='viridis', shading='flat') - axes[1].set_title("Průměr pole B") - plt.colorbar(im2, ax=axes[1]) + return plot1 + plot2 + +def plot_variances_two_columns(x_edges: np.ndarray, y_edges: np.ndarray, grid_var_A: np.ndarray, grid_var_B: np.ndarray): + # mapa průměru a rozptylu pole (řezu) + da_A = xr.DataArray(grid_var_A.T, coords=[('y', y_edges[:-1]), ('x', x_edges[:-1])], name='Rozptyl_A') + da_B = xr.DataArray(grid_var_B.T, coords=[('y', y_edges[:-1]), ('x', x_edges[:-1])], name='Rozptyl_B') - plt.tight_layout() - return fig + plot1 = da_A.hvplot.image(x='x', y='y', cmap='magma', title="Rozptyl pole A", width=400, height=350) + plot2 = da_B.hvplot.image(x='x', y='y', cmap='magma', title="Rozptyl pole B", width=400, height=350) + + return plot1 + plot2 + +def plot_histograms(data_A: np.ndarray, data_B: np.ndarray): + # histogram + df_A = pd.DataFrame({'QA': data_A.flatten()}) + df_B = pd.DataFrame({'QB': data_B.flatten()}) + + plot1 = df_A.hvplot.hist('QA', bins=30, color='skyblue', title="Histogram pole A", width=400, height=300) + plot2 = df_B.hvplot.hist('QB', bins=30, color='salmon', title="Histogram pole B", width=400, height=300) + + return plot1 + plot2 -def perform_ttest(grid_A, grid_B): - t_stat, p_value = ttest_ind(grid_A, grid_B, axis=-1, equal_var=False, alternative='two-sided') +def plot_point_counts(x: np.ndarray, y: np.ndarray, num_bins: int = 20): + # Přidat zobrazení počtu vstupních bodů v masce. + bins = (num_bins, num_bins) + + # Výpočet počtu bodů spadajících do každé buňky (statistic='count') + binned = binned_statistic_2d(x, y, None, statistic='count', bins=bins) + + da_c = xr.DataArray(binned.statistic.T, coords=[('y', binned.y_edge[:-1]), ('x', binned.x_edge[:-1])], name='Počet_bodů') + plot1 = da_c.hvplot.image(x='x', y='y', cmap='plasma', title="Počet vstupních bodů v masce", width=500, height=400) + return plot1 + +def perform_ttest(grid_A: np.ndarray, grid_B: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: + # Buňka 3 : statistický test (Two-sample t-tests , ) pro odpovídající dvojice pixelů průměru pro více polí ve skupině A a skupině B, může být různý počet. Viz. scipy.stats.testt_ind; a.shape = (nx, ny, 10) b.shape = (nx,ny, 20), axis=-1, equal_var=False, alternative='two-sided' + # ? test_ind s ignorováním NaN nebo pro maskovaná pole. np.masked_array + + # nan_policy='omit' umožňuje ignorovat hodnoty NaN při výpočtu statistiky + t_stat, p_value = ttest_ind(grid_A, grid_B, axis=-1, equal_var=False, alternative='two-sided', nan_policy='omit') return t_stat, p_value -def plot_ttest_results(x_edges, y_edges, t_stat, p_value): - fig, axes = plt.subplots(1, 2, figsize=(12, 5)) +def plot_ttest_results(x_edges: np.ndarray, y_edges: np.ndarray, t_stat: np.ndarray, p_value: np.ndarray): + # Grafy pro T (attribute statistics) a p-value. + da_t = xr.DataArray(t_stat.T, coords=[('y', y_edges[:-1]), ('x', x_edges[:-1])], name='T_statistika') + da_p = xr.DataArray(p_value.T, coords=[('y', y_edges[:-1]), ('x', x_edges[:-1])], name='p_hodnota') - im1 = axes[0].pcolormesh(x_edges, y_edges, t_stat.T, cmap='coolwarm', shading='flat') - axes[0].set_title("T-statistika") - plt.colorbar(im1, ax=axes[0]) + plot1 = da_t.hvplot.image(x='x', y='y', cmap='coolwarm', title="T-statistika", width=400, height=350) - im2 = axes[1].pcolormesh(x_edges, y_edges, p_value.T, cmap='RdYlGn', shading='flat', vmin=0, vmax=0.1) - axes[1].set_title("p-hodnota") - plt.colorbar(im2, ax=axes[1]) + # Range pro p-value (0, 1). Nastavení limitů barev přes clim + plot2 = da_p.hvplot.image(x='x', y='y', cmap='RdYlGn', title="p-hodnota", width=400, height=350, clim=(0, 1)) - plt.tight_layout() - return fig + return plot1 + plot2 + +def plot_interactive_mean(x: np.ndarray, y: np.ndarray, q_values: np.ndarray, mean_type: str, grid_size: int = 10): + # … přepínání typu průměru + # Compute mean of whole array and use it as default value. + if mean_type == "Aritmetický": default_val = MeanCalculator.arithmetic(q_values) + elif mean_type == "Geometrický": default_val = MeanCalculator.geometric(q_values) + else: default_val = MeanCalculator.harmonic(q_values) + + A_mean = np.full((grid_size, grid_size), default_val) + dx = 1.0 / grid_size + dy = 1.0 / grid_size + + for i in range(grid_size): + for j in range(grid_size): + # Booleovská maska pro výběr bodů, které spadají přesně do aktuálního čtverce (okna) + mask = (x >= i*dx) & (x < (i+1)*dx) & (y >= j*dy) & (y < (j+1)*dy) + q_in_window = q_values[mask] + + # Výpočet vybraného typu průměru pouze pokud jsou ve čtverci body + if len(q_in_window) > 0: + if mean_type == "Aritmetický": + A_mean[j, i] = MeanCalculator.arithmetic(q_in_window) + elif mean_type == "Geometrický": + A_mean[j, i] = MeanCalculator.geometric(q_in_window) + elif mean_type == "Harmonický": + A_mean[j, i] = MeanCalculator.harmonic(q_in_window) + + da = xr.DataArray(A_mean, coords=[('y', np.arange(grid_size)), ('x', np.arange(grid_size))], name=mean_type) + return da.hvplot.image(x='x', y='y', cmap='viridis', title=f"Typ průměru: {mean_type}", width=500, height=400) -def multiscale_analysis(x, y, q_a_vals, q_b_vals): +def plot_wasserstein_distance(x: np.ndarray, y: np.ndarray, q_a_vals: np.ndarray, q_b_vals: np.ndarray, grid_size: int = 10): + # 1D samples distance (even for different sizes): scipy.stats.wasserstein_distance; use p=1, where fast sweep algorithm is possible. Numerical comparison of tho set of samples within a single cell. + # Compute mean of whole array and use it as default value. + + # Výpočet Wassersteinovy vzdálenosti (Earth Mover's Distance) + default_val = wasserstein_distance(q_a_vals.flatten(), q_b_vals.flatten()) + w_map = np.full((grid_size, grid_size), default_val) + dx = 1.0 / grid_size + dy = 1.0 / grid_size + + for i in range(grid_size): + for j in range(grid_size): + mask = (x >= i*dx) & (x < (i+1)*dx) & (y >= j*dy) & (y < (j+1)*dy) + + if np.sum(mask) > 0: + qa_w = q_a_vals[mask, :].flatten() + qb_w = q_b_vals[mask, :].flatten() + w_map[j, i] = wasserstein_distance(qa_w, qb_w) + + da = xr.DataArray(w_map, coords=[('y', np.arange(grid_size)), ('x', np.arange(grid_size))], name='Wasserstein') + return da.hvplot.image(x='x', y='y', cmap='plasma', title="Wasserstein distance", width=500, height=400) + +def plot_geom_and_log(x: np.ndarray, y: np.ndarray, q_values: np.ndarray, grid_size: int = 10): + # Porovnání průměrů -> geometrický průměr + logaritmus průměru do grafu + # Compute mean of whole array and use it as default value. + default_geom = MeanCalculator.geometric(q_values) + default_log = np.log(MeanCalculator.arithmetic(q_values) + 1e-10) + + A_geom = np.full((grid_size, grid_size), default_geom) + A_log = np.full((grid_size, grid_size), default_log) + + dx = 1.0 / grid_size + dy = 1.0 / grid_size + + for i in range(grid_size): + for j in range(grid_size): + mask = (x >= i*dx) & (x < (i+1)*dx) & (y >= j*dy) & (y < (j+1)*dy) + q_in_window = q_values[mask] + + if len(q_in_window) > 0: + A_geom[j, i] = MeanCalculator.geometric(q_in_window) + A_log[j, i] = np.log(MeanCalculator.arithmetic(q_in_window) + 1e-10) + + da_geom = xr.DataArray(A_geom, coords=[('y', np.arange(grid_size)), ('x', np.arange(grid_size))], name='Geometricky') + da_log = xr.DataArray(A_log, coords=[('y', np.arange(grid_size)), ('x', np.arange(grid_size))], name='Logaritmus') + + plot1 = da_geom.hvplot.image(x='x', y='y', cmap='viridis', title="Geometrický průměr", width=400, height=350) + plot2 = da_log.hvplot.image(x='x', y='y', cmap='viridis', title="Logaritmus průměru", width=400, height=350) + + return plot1 + plot2 + +def multiscale_analysis(x: np.ndarray, y: np.ndarray, q_a_vals: np.ndarray, q_b_vals: np.ndarray) -> Tuple[plt.Figure, plt.Figure]: grid_sizes = [2, 4, 8] p_values_all = {'arith': [], 'geom': [], 'harm': []} fig1, axes1 = plt.subplots(len(grid_sizes), 3, figsize=(15, 5 * len(grid_sizes))) + + # Compute mean of whole array and use it as default value. + default_p_val = 1.0 for idx, gs in enumerate(grid_sizes): dx = 1.0 / gs dy = 1.0 / gs - p_map_arith = np.full((gs, gs), np.nan) - p_map_geom = np.full((gs, gs), np.nan) - p_map_harm = np.full((gs, gs), np.nan) + p_map_arith = np.full((gs, gs), default_p_val) + p_map_geom = np.full((gs, gs), default_p_val) + p_map_harm = np.full((gs, gs), default_p_val) for i in range(gs): for j in range(gs): @@ -105,7 +272,8 @@ def multiscale_analysis(x, y, q_a_vals, q_b_vals): p_values_all['arith'].append(np.nanmean(p_map_arith)) p_values_all['geom'].append(np.nanmean(p_map_geom)) p_values_all['harm'].append(np.nanmean(p_map_harm)) - + + # Vykreslení multi-scale map pro každou velikost mřížky im_a = axes1[idx, 0].imshow(p_map_arith, origin='lower', extent=[0, 1, 0, 1], cmap='RdYlGn', vmin=0, vmax=1) axes1[idx, 0].set_ylabel(f"Mřížka {gs}x{gs}") if idx == 0: axes1[idx, 0].set_title("Aritmetický") @@ -126,10 +294,6 @@ def multiscale_analysis(x, y, q_a_vals, q_b_vals): ax2.plot(grid_sizes, p_values_all['geom'], marker='s', label='Geometrický') ax2.plot(grid_sizes, p_values_all['harm'], marker='^', label='Harmonický') ax2.set_xticks(grid_sizes) - ax2.set_xlabel("Velikost okna (počet dělení)") - ax2.set_ylabel("Průměrná p-hodnota") - ax2.set_title("Závislost p-hodnoty na velikosti okna") plt.legend() - return fig1, fig2 - + return fig1, fig2 \ No newline at end of file diff --git a/vizualizace-xarray/generovani.py b/vizualizace-xarray/generovani.py index eb846cf..e74cd3d 100644 --- a/vizualizace-xarray/generovani.py +++ b/vizualizace-xarray/generovani.py @@ -3,14 +3,11 @@ __generated_with = "0.20.2" app = marimo.App(width="medium") - @app.cell(hide_code=True) def _(): import marimo as mo - return (mo,) - @app.cell(hide_code=True) def _(mo): mo.md(""" @@ -25,26 +22,27 @@ def _(mo): """) return - -@app.cell(hide_code=True) +@app.cell def _(): import xarray as xr import numpy as np import analyza - n_points = 1000 + n_points = 10000 n_samples_A = 10 n_samples_B = 20 n_dim = 2 + # Nastudovat xarray, mock data: X náhodné body na čtverci X_data = np.random.rand(n_dim, n_points) - + # Testovací pole = 10**( np.random.normal(N, loc = -10, scale=3)) QA_data = 10**(np.random.normal(loc=-10, scale=3, size=(n_points, n_samples_A))) - + # případně zvětšit scale pro větší rozdíl průměrů polí QB_data = 10**(np.random.normal(loc=-10, scale=5, size=(n_points, n_samples_B))) + # Definujte vstupní formát náhodných polí (knihovna xarray) ds = xr.Dataset( data_vars={ "X": (("i_dim", "i_point"), X_data), @@ -58,65 +56,120 @@ def _(): "i_dim": ["x", "y"] } ) - return analyza, ds - - -@app.cell(hide_code=True) + + # Přidat attribute s pojmenováním porovnávaných veličin. + ds["QA"].attrs["long_name"] = "Porovnávaná veličina A" + ds["QB"].attrs["long_name"] = "Porovnávaná veličina B" + + return analyza, ds, np, xr + +@app.cell def _(ds): # Průměry pro každé ze dvou polí. Průměry do samostatné buňky. mean_QA = ds["QA"].mean(dim="i_sample_A") mean_QB = ds["QB"].mean(dim="i_sample_B") - return mean_QA, mean_QB - + + var_QA = ds["QA"].var(dim="i_sample_A") + var_QB = ds["QB"].var(dim="i_sample_B") + return mean_QA, mean_QB, var_QA, var_QB -@app.cell(hide_code=True) +@app.cell def _(analyza, ds, mean_QA, mean_QB): + # Vytvořte grafy pro porovnání základních statistik dvou polí grid_mean_A, x_edges, y_edges = analyza.bin_single_field(ds["X"], mean_QA) grid_mean_B, _, _ = analyza.bin_single_field(ds["X"], mean_QB) - + fig_means = analyza.plot_means_two_columns(x_edges, y_edges, grid_mean_A, grid_mean_B) fig_means - return x_edges, y_edges + return fig_means, grid_mean_A, grid_mean_B, x_edges, y_edges + +@app.cell +def _(analyza, ds, var_QA, var_QB, x_edges, y_edges): + grid_var_A, _, _ = analyza.bin_single_field(ds["X"], var_QA) + grid_var_B, _, _ = analyza.bin_single_field(ds["X"], var_QB) + + fig_vars = analyza.plot_variances_two_columns(x_edges, y_edges, grid_var_A, grid_var_B) + fig_vars + return fig_vars, + +@app.cell +def _(analyza, ds): + fig_hist = analyza.plot_histograms(ds["QA"].values, ds["QB"].values) + fig_hist + return fig_hist, +@app.cell +def _(analyza, ds): + x_vals = ds["X"].sel(i_dim="x").values + y_vals = ds["X"].sel(i_dim="y").values + + fig_counts = analyza.plot_point_counts(x_vals, y_vals) + fig_counts + return fig_counts, x_vals, y_vals -@app.cell(hide_code=True) +@app.cell def _(analyza, ds): grid_A, _, _ = analyza.bin_all_samples(ds["X"], ds["QA"]) grid_B, _, _ = analyza.bin_all_samples(ds["X"], ds["QB"]) - + t_stat, p_value = analyza.perform_ttest(grid_A, grid_B) - return p_value, t_stat - + return grid_A, grid_B, p_value, t_stat -@app.cell(hide_code=True) +@app.cell def _(analyza, p_value, t_stat, x_edges, y_edges): fig_test = analyza.plot_ttest_results(x_edges, y_edges, t_stat, p_value) fig_test - return - + return fig_test, -@app.cell(hide_code=True) -def _(analyza, ds): - x_vals = ds["X"].sel(i_dim="x").values - y_vals = ds["X"].sel(i_dim="y").values +@app.cell +def _(mo): + # .. zkusit vyrobit interaktivní přepínání velikosti průměrovacích buňek + # … přepínání typu průměru + grid_slider = mo.ui.slider(start=2, stop=30, step=2, value=10, label="Velikost buněk (interaktivní)") + mean_dropdown = mo.ui.dropdown(options=["Aritmetický", "Geometrický", "Harmonický"], value="Aritmetický", label="Typ průměru") + return grid_slider, mean_dropdown + +@app.cell +def _(grid_slider, mean_dropdown, mo): + ui_controls = mo.vstack([grid_slider, mean_dropdown]) + ui_controls + return ui_controls, + +@app.cell +def _(analyza, mean_QA, grid_slider, mean_dropdown, x_vals, y_vals): + fig_interactive_mean = analyza.plot_interactive_mean(x_vals, y_vals, mean_QA.values, mean_type=mean_dropdown.value, grid_size=grid_slider.value) + fig_interactive_mean + return fig_interactive_mean, + +@app.cell +def _(analyza, mean_QA, grid_slider, x_vals, y_vals): + fig_geom_log = analyza.plot_geom_and_log(x_vals, y_vals, mean_QA.values, grid_size=grid_slider.value) + fig_geom_log + return fig_geom_log, + +@app.cell +def _(analyza, ds, grid_slider, x_vals, y_vals): q_a_vals = ds["QA"].values q_b_vals = ds["QB"].values - - fig_multiscale_maps, fig_multiscale_line = analyza.multiscale_analysis(x_vals, y_vals, q_a_vals, q_b_vals) - return fig_multiscale_line, fig_multiscale_maps - - -@app.cell(hide_code=True) -def _(fig_multiscale_maps): - fig_multiscale_maps + + fig_wasserstein = analyza.plot_wasserstein_distance(x_vals, y_vals, q_a_vals, q_b_vals, grid_size=grid_slider.value) + fig_wasserstein + return fig_wasserstein, q_a_vals, q_b_vals + +@app.cell +def _(analyza, q_a_vals, q_b_vals, x_vals, y_vals): + fig_multi_maps, fig_multi_line = analyza.multiscale_analysis(x_vals, y_vals, q_a_vals, q_b_vals) + return fig_multi_line, fig_multi_maps + +@app.cell +def _(fig_multi_maps): + fig_multi_maps return - -@app.cell(hide_code=True) -def _(fig_multiscale_line): - fig_multiscale_line +@app.cell +def _(fig_multi_line): + fig_multi_line return - if __name__ == "__main__": - app.run() + app.run() \ No newline at end of file