From a8284af443bce6e16def3fbafce93850c77cf12d Mon Sep 17 00:00:00 2001 From: Abdelaziz Bouzidi Date: Thu, 15 Jun 2023 15:53:26 +0200 Subject: [PATCH 1/2] Bug fixs --- evaluation_toolkit/README.md | 33 - .../evaluation_toolkit/__init__.py | 0 .../evaluation_toolkit/depth_evaluation.py | 564 ------------------ .../evaluation_toolkit/inference_toolkit.py | 328 ---------- evaluation_toolkit/setup.py | 33 - generate_sky_masks.py | 3 +- main_pipeline_no_lidar.py | 1 + pcl_util/pointcloud_subsampler.h | 2 +- 8 files changed, 3 insertions(+), 961 deletions(-) delete mode 100644 evaluation_toolkit/README.md delete mode 100644 evaluation_toolkit/evaluation_toolkit/__init__.py delete mode 100644 evaluation_toolkit/evaluation_toolkit/depth_evaluation.py delete mode 100644 evaluation_toolkit/evaluation_toolkit/inference_toolkit.py delete mode 100644 evaluation_toolkit/setup.py diff --git a/evaluation_toolkit/README.md b/evaluation_toolkit/README.md deleted file mode 100644 index 74301c8..0000000 --- a/evaluation_toolkit/README.md +++ /dev/null @@ -1,33 +0,0 @@ -# Evaluation Toolkit - -Set of tools to run a particular algorithm on a dataset constructed with the validation set constructor, and evaluate it, along with advanced statistics regarding depth value and pixel position in image with respect to flight path vector. - -## Inference Example - -Get the last frame and a previous frame such that the displacement magnitude is as close to 30cm as possible, with the condition of having a rotation of less that 1 radian. Each frame is preprocessed so that it is of shape `[C, H, W]` and with a range `[0, 1]` instead of `[0, 255]`. - -```python -from evaluation_toolkit import inferenceFramework - -engine = inferenceFramework(dataset_root, evaluation_list, lambda x: x.transpose(2, 0, 1).astype(np.float32)[None]/255) - -for sample in tqdm(engine): - latest_frame, latest_intrinsics, _ = sample.get_frame() - previous_frame, previous_intrinsics, previous_pose = sample.get_previous_frame(displacement=0.3) - estimated_depth_map = my_model(latest_frame, previous_frame, previous_pose) - engine.finish_frame(estimated_depth_map) -mean_inference_time, output_depth_maps = engine.finalize(output_path='output.npz') -``` - -You can find an example usage of this Inference Framework for SfmLearner [here](https://github.com/ClementPinard/SfmLearner-Pytorch/tree/validation_set_constructor) - -## Evaluation - -The evaluation step is a simple script that takes into input the computed depth maps (here in the file `output.npz`). You can combine multiple computed depth maps to compare algorithms. - -``` -depth_evaluation --dataset_root /path/to/dataset/root --est_depth output1.npz output2.npz --algorithm_names name1 name2 --evaluation_list_path /path/to/evaluation_list.txt --flight_path_vector_list /path/to/fligt_path_vector_list.txt <--scale_invariant> <--mask_path /path/to/mask.npy> --output_figures /path/to/figures/folder -``` - -It will output typical metrics and plot advanced statistics regarding the dataset and the depth estimations. -Note that if you want to save the figures, you will need `xelatex` installed in your system. Otherwise, don't specify a parameter to `--output_figures` and it will use `plt.show` \ No newline at end of file diff --git a/evaluation_toolkit/evaluation_toolkit/__init__.py b/evaluation_toolkit/evaluation_toolkit/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/evaluation_toolkit/evaluation_toolkit/depth_evaluation.py b/evaluation_toolkit/evaluation_toolkit/depth_evaluation.py deleted file mode 100644 index 5040abb..0000000 --- a/evaluation_toolkit/evaluation_toolkit/depth_evaluation.py +++ /dev/null @@ -1,564 +0,0 @@ -from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter -from path import Path -import numpy as np -import pandas as pd -import matplotlib -import matplotlib.pyplot as plt -from mpl_toolkits.axes_grid1 import make_axes_locatable -from matplotlib.colors import LinearSegmentedColormap -from matplotlib import cm -from tqdm import tqdm -from imageio import imwrite - -parser = ArgumentParser( - description="Evaluate depth maps with respect to ground truth depth and FPV position", - formatter_class=ArgumentDefaultsHelpFormatter, -) - -parser.add_argument("--dataset_root", metavar="DIR", type=Path) -parser.add_argument( - "--est_depth", - metavar="PATH", - type=Path, - nargs='+', - help="where the estimated depth maps are stored, must be a 3D npz file", -) -parser.add_argument( - "--algorithm_names", - "--names", - metavar="STR", - type=str, - nargs='+', - help="name of the algorithms corresponding to estimated depth arrays") - -parser.add_argument( - "--evaluation_list_path", - "--eval", - metavar="PATH", - type=Path, - help="File with list of images to test for depth evaluation", -) -parser.add_argument( - "--flight_path_vector_list", - "--fpv", - metavar="PATH", - type=Path, - help="File with list of speed vectors, used to compute error wrt direction", -) -parser.add_argument( - "--scale_invariant", - action="store_true", - help="If selected, will rescale depth map with ratio of medians", -) -parser.add_argument( - "--min_depth", - metavar="D", - default=1e-2, - type=float, - help="threshold below which GT is discarded", -) -parser.add_argument( - "--max_depth", - metavar="D", - default=250, - type=float, - help="threshold above which GT is discarded", -) -parser.add_argument( - "--depth_mask", - metavar="PATH", - default=None, - type=Path, - help="path to boolean numpy array. Should be the same size as ground truth. " - "False value will discard the corresponding pixel location for every ground truth", -) - -parser.add_argument( - "--output_figures", - metavar="DIR", - default=None, - type=Path, - help="where to save the figures, in pgf format. If not set, will show them with plt.show()" -) - -parser.add_argument( - "--output_samples", - type=int, - default=0, - metavar='N', - help="Outputs N Gt and estimation vizualisation sampels" -) - -coords = None - - -def get_values( - gt_depth, estim_depth, fpv, scale_invariant=False, mask=None, min_depth=1e-2, max_depth=250 -): - """Given a depth maps and depth estimation, return a table of all valid depth points with - additional metadata - - Args: - gt_depth (np.array): ground truth depth computed by RDC - estim_depth (np.array): Depth estimated with inference toolkit - fpv (np.array): array of 2 floats, representing the fpv coordinates, in pixels - scale_invariant (bool, optional): If set to True, will multiply estimated depth with - ratio between medians. This is representative of how - depth was evaluated in Eigen et al. - mask (np.array, optional): Boolean array of same shape as depth maps. Discard from evaluation - image points where mask[u,v] == False - min_depth (float, optional): Minimal depth below which ground truth is discarded and estimation is clipped.* - Defaults to 1e-2. - max_depth (float, optional): Maximal depth above which ground truth is discarded estimation is clipped. - Defaults to 250. - - Returns: - [type]: [description] - """ - global coords - if coords is None: - coords = np.stack( - np.meshgrid(np.arange(gt_depth.shape[1]), np.arange(gt_depth.shape[0])), axis=-1 - ) - - # TODO : For now, fpv distance is given in pixel distance. - # A more accurate way would be to use angular distance. - fpv_dist = np.linalg.norm(coords - fpv, axis=-1) - estim_depth = np.clip(estim_depth, min_depth, max_depth) - valid = (gt_depth > min_depth) & (gt_depth < max_depth) - if mask is not None: - valid = valid & mask - if valid.sum() == 0: - return - valid_gt, valid_estim = gt_depth[valid], estim_depth[valid] - if scale_invariant: - valid_estim = valid_estim * np.median(valid_gt) / np.median(valid_estim) - fpv_dist = fpv_dist[valid] - valid_coords = coords[valid] - values = np.stack([valid_gt, valid_estim, *valid_coords.T, fpv_dist], axis=-1) - - return pd.DataFrame(values, columns=["GT", "estim", "x", "y", "fpv_dist"]) - - -def plot_distribution(values, bins, ax, label=None, log_bins=False): - """Distribution plotting function, will plot with lines instead of bars - - Args: - values: histogram to plot - bins: Corresponding bins delimitting histogram values - ax: Matplotlib ax to plot on - label (str): Plot label name. Defaults to None. - log_bins (bool): If set to True, will set the scale to log. - Useful for Mean Log Error. Defaults to False. - """ - bin_dists = bins[1:] - bins[:-1] - total = sum(bin_dists) - normalized_values = (values / sum(values)) * bin_dists / total - bin_centers = 0.5 * (bins[1:] + bins[:-1]) - if log_bins: - bin_centers = np.exp(bin_centers) - ax.plot(bin_centers, normalized_values, label=label) - if log_bins: - ax.set_xscale("log") - - -def group_quantiles(df, to_group, columns, quantiles=[0.25, 0.5, 0.75]): - if isinstance(columns, str): - columns = [columns] - grouped_df = df.groupby(by=np.round(df[to_group])) - return grouped_df[columns].quantile(quantiles).unstack() - - -def error_map(error_per_px): - """Compute Image with pixelwise mean depth error - - Args: - error_per_px (pd.Series): Table of errors with pixels as index - - Returns: - np.array: Array with the shape of the image, with mean error at each pixel - """ - x, y = np.stack(error_per_px.index.values, axis=-1).astype(int) - error_map = np.full((int(x.max() + 1), int(y.max() + 1)), np.NaN) - error_map[x, y] = error_per_px.values - return error_map - - -def error_metrics(df, algo_name, suffix=''): - """Compute error metrics from a dataframe. - - Args: - df (pd.DataFrame): Table containing the metrics we are interested in. It can be - constructed with a groupby to have mean computed over a particular value insead of a global mean. - algo_name (str): Algorithm for which we compute the metrics - suffix (str, optional): Precision for the particular metric we are computing, - depending on how the dataframe was constructed and grouped by. Defaults to ''. - """ - error_names = ["AbsDiff", "AbsRel", "AbsLog", "StdDiff", "StdRel", "StdLog", "a1", "a2", "a3"] - errors = [ - df["absdiff"].mean(), - df["reldiff"].mean(), - df["abslogdiff"].mean(), - np.sqrt(df["absdiff2"].mean()), - np.sqrt(df["reldiff2"].mean()), - np.sqrt(df["logdiff2"].mean()), - df["a1"].mean(), - df["a2"].mean(), - df["a3"].mean(), - ] - - # Print the results - # TODO : save the result in latex tab format ? - print("Results for usual metrics for algorithm {}, {}".format(algo_name, suffix)) - print( - "{:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}".format( - *error_names - ) - ) - print( - "{:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}".format( - *errors - ) - ) - - -def viz_depth(depth, max_depth): - """Convert depth to a colored vizualisation. Infinity is black - - Args: - depth (np.array): 2D array of depth values - max_depth (float): max_depth will correspond to the end of the colormap spectrum. - Every value above this will be the same color, expect infinity which will be black. - - Returns: - np.array: np.uint8 array of colorized depth, ready to be saved - """ - opencv_rainbow_data = ( - (0.000, (1.00, 0.00, 0.00)), - (0.400, (1.00, 1.00, 0.00)), - (0.600, (0.00, 1.00, 0.00)), - (0.800, (0.00, 0.00, 1.00)), - (1.000, (0.60, 0.00, 1.00)) - ) - rainbow_cmap = LinearSegmentedColormap.from_list('opencv_rainbow', opencv_rainbow_data, 1000) - bone = cm.get_cmap('bone', 10000) - depth_norm = depth / max_depth - depth_viz = rainbow_cmap(depth_norm, bytes=True)[..., :3] - depth_viz[depth == np.inf] = 0 - return depth_viz - - -def visualize_sample(img_path, gt_path, estimations, algo_names, max_depth, output_folder): - """Visualize a sample and save to output_folder. - A sample consists in the image, the ground truth depth, and the different estimations - - Args: - img_path (Path): Where to load the image - gt_path (Path): Where to load the Ground truth depth map (usually same name as img but with npy extension) - estimations (List[np.array]): List of the estimations from all the algos we are testing - algo_names (List[str]): List of algorithm names, corresponding to the estimations given above - max_depth (float): depth saturation value above which every thing will be the same color - output_folder (Path): Where to save all the different vizualisations - """ - img_path.copy(output_folder) - img_name = img_path.stem - - gt_depth = np.load(gt_path) - max_gt = np.max(gt_depth[gt_depth < np.inf]) - max_depth = min(max_gt, max_depth) - imwrite(output_folder / "{}_GT.png".format(img_name), viz_depth(gt_depth, max_depth)) - for n, e in zip(algo_names, estimations): - imwrite(output_folder / "{}_{}.png".format(img_name, n), viz_depth(e, max_depth)) - - -def main(): - args = parser.parse_args() - assert (len(args.est_depth) == len(args.algorithm_names)) - - if args.output_figures is not None: - matplotlib.use("pgf") - pgf_with_xelatex = { - 'text.usetex': True, - "pgf.texsystem": "xelatex", - "pgf.preamble": r"\usepackage{amssymb} " - r"\usepackage{amsmath} " - r"\usepackage{fontspec} " - r"\usepackage{unicode-math}" - } - # Change to pgf if needed - savefig_ext = "pdf" - matplotlib.rcParams.update(pgf_with_xelatex) - - with open(args.evaluation_list_path, "r") as f: - test_img_path = [line[:-1] for line in f.readlines()] - fpv_list = np.loadtxt(args.flight_path_vector_list) - dataframes = {} - - if args.output_samples > 0 and args.output_figures is not None: - np.random.seed(1) - to_sample = np.random.choice(len(test_img_path), args.output_samples) - for i in to_sample: - estimated_depth_maps = [] - img_path = test_img_path[i] - for p in args.est_depth: - depth = np.load(p, allow_pickle=True)[img_path] - estimated_depth_maps.append(depth) - visualize_sample(args.dataset_root / img_path, - (args.dataset_root / img_path).stripext() + ".npy", - estimated_depth_maps, - args.algorithm_names, - args.max_depth, - args.output_figures) - - for p, name in zip(args.est_depth, args.algorithm_names): - estimated_depth = np.load(p, allow_pickle=True) - values_df = [] - assert len(test_img_path) == len(estimated_depth) - if args.depth_mask is not None: - mask = np.load(args.depth_mask) - else: - mask = None - - # Load each GT-estimation pair and extract data in a pandas dataframe - # values_df is at first a list of dataframes which we then concatenate - print("getting results for {} algorithm (file : {})".format(name, p)) - for filepath, fpv in tqdm(zip(test_img_path, fpv_list), total=len(fpv_list)): - GT = np.load((args.dataset_root / filepath).stripext() + ".npy") - new_values = get_values( - GT, - estimated_depth[filepath], - fpv, - args.scale_invariant, - mask, - args.min_depth, - args.max_depth, - ) - if new_values is not None: - values_df.append(new_values) - values_df = pd.concat(values_df) - - # Additional values to the Dataframe - # Note that no mean is computed here, each row in the dataframe is ONE pixel - # The dataframe is thus potentially thousands rows long - values_df["log_GT"] = np.log(values_df["GT"]) - values_df["log_estim"] = np.log(values_df["estim"]) - values_df["diff"] = values_df["estim"] - values_df["GT"] - values_df["absdiff"] = values_df["diff"].abs() - values_df["absdiff2"] = np.power(values_df["diff"], 2) - values_df["reldiff"] = values_df["absdiff"] / values_df["GT"] - values_df["reldiff2"] = np.power(values_df["reldiff"], 2) - values_df["logdiff"] = values_df["log_estim"] - values_df["log_GT"] - values_df["logdiff2"] = np.power(values_df["logdiff"], 2) - values_df["abslogdiff"] = values_df["logdiff"].abs() - values_df["a1"] = (values_df["abslogdiff"] < np.log(1.25)).astype(float) - values_df["a2"] = (values_df["abslogdiff"] < 2 * np.log(1.25)).astype(float) - values_df["a3"] = (values_df["abslogdiff"] < 3 * np.log(1.25)).astype(float) - dataframes[name] = values_df - - for name, df in dataframes.items(): - print() - print("---------------------------") - print("Results for {}".format(name)) - print("---------------------------") - print() - # Compute mean erros, a la Eigen et al. - error_metrics(df, name, "averaged over all points") - # Get mean values per ground truth values, and then mean them - # This way, we have the same weight for each ground truth value - values_df_per_gt = df.groupby(by=np.round(values_df["GT"])).mean() - error_metrics(values_df_per_gt, name, "averaged over gt values") - values_df_per_log_gt = df.groupby(by=0.1 * np.round(10 * values_df["log_GT"])).mean() - error_metrics(values_df_per_log_gt, name, "averaged over log(gt) values") - - # TODO better handling of the parameters, maybe just add it to argparse - plot = True - n_bins = 4 - if plot: - # COMPUTING HISTOGRAMS - - # GT-wise difference with estimation distributions. - # Useful to see if we perform well - # in the depth range we are actually interested in - # Construct the bins for GT-wise error - # You can see this as 3D histogram - # Note that if the assumptions of gaussian difference - # for log values, the log_normal error should be roughly - # the same for all bins - - min_gt = values_df["GT"].min() - max_gt = values_df["GT"].max() - bins = np.linspace(min_gt, max_gt, n_bins + 1) - - histograms = {} - for name, df in dataframes.items(): - histograms[name] = {} - estim_per_GT = {} - for b1, b2 in zip(bins[:-1], bins[1:]): - per_gt = df[(df["GT"] > b1) & (df["GT"] < b2)] - estim_per_GT[(b1 + b2) / 2] = { - "normal": np.histogram(per_gt["diff"], bins=100), - "log_normal": np.histogram(per_gt["logdiff"], bins=100), - "bins": [b1, b2], - } - histograms[name]["estim_per_GT"] = estim_per_GT - - # Global histograms - # Same as above, but with one bin, and thus not GT-wise - - histograms[name]["global_diff"] = np.histogram(df["estim"] - df["GT"], bins=100) - histograms[name]["global_log_diff"] = np.histogram(df["log_estim"] - df["log_GT"], bins=100) - - # Depth error per pixel - # Useful to identify if a region in the screen is particualrly faulty. - # Can help spot dataset inconsistency (eg sky is always in the same place) - # Can also help find calibration artefacts ? - - metric_per_px = df.groupby(by=["x", "y"]).mean() - histograms[name]["mean_diff_per_px"] = error_map(metric_per_px["absdiff"]) - histograms[name]["mean_log_diff_per_px"] = error_map(metric_per_px["logdiff"]) - - # Depth error wrt pixelwise distance to FPV. For SFM, the closer we are to FPV, - # The harde it is to deduce depth. But in the same time, the more - # usefule depth becomes, because it indicates distances of obstacles where - # we are headed to. - - # Note : if fpv is too far, it means it is not on the image - # And thus this metric is not really interesting. - histograms[name]["quantiles_per_fpv"] = group_quantiles( - df[df["fpv_dist"] < 1000], "fpv_dist", ["absdiff", "abslogdiff"] - ) - histograms[name]["quantiles_per_gt"] = group_quantiles(df, "GT", ["absdiff", "abslogdiff"]) - histograms[name]["quantiles_per_estimation"] = group_quantiles(df, "estim", ["absdiff", "abslogdiff"]) - - # PLOTTING - colors = plt.rcParams['axes.prop_cycle'].by_key()['color'] - # First plot, general insight for dataset - fig1, axes = plt.subplots(2, 1, sharex=True) - GT_distrib = None - for name, df in dataframes.items(): - if GT_distrib is None: - GT_distrib = np.histogram(df["GT"], bins=100) - plot_distribution(*GT_distrib, axes[0], label="groundtruth depth") - estim_distrib = np.histogram(df["estim"], bins=100) - plot_distribution(*estim_distrib, axes[1], label=name) - axes[0].set_title("Ground Truth distribution") - axes[0].legend() - axes[1].set_title("depth estimation distribution from {}".format(name)) - axes[1].legend() - if args.output_figures is not None: - fig1.savefig(args.output_figures / "depth_distrib.{}".format(savefig_ext)) - - # Second plot, GT-wise difference one figure per algorithm - for name, h in histograms.items(): - fig2, axes = plt.subplots(1, 2, sharey=True) - for i, (k, v) in enumerate(h["estim_per_GT"].items()): - plot_distribution( - *v["normal"], axes[0], label="$GT \\in [{:.1f},{:.1f}]$".format(*v["bins"]) - ) - plot_distribution( - *v["log_normal"], - axes[1], - label="$GT \\in [{:.1f},{:.1f}]$".format(*v["bins"]), - log_bins=True - ) - # axes[0, 0].set_title("distribution of estimation around GT = {:.2f}".format(k)) - # axes[0, 1].set_title("distribution of log estimation around log GT = {:.2f}".format(np.log(k))) - axes[0].legend() - axes[0].set_title("GT - estimation difference") - axes[1].legend() - axes[1].set_title("logt GT - log estimation difference") - fig2.tight_layout() - if args.output_figures is not None: - fig2.savefig(args.output_figures / "GTwise_depth_diff_{}.{}".format(name, savefig_ext)) - - # Third plot, global diff histogram - fig, axes = plt.subplots(2, 1) - for name, h in histograms.items(): - plot_distribution(*h["global_diff"], axes[0], name) - plot_distribution(*h["global_log_diff"], axes[1], name, log_bins=True) - axes[1].set_title("Global log difference distribution from GT") - axes[0].set_title("Global difference distribution from GT") - axes[1].legend() - axes[0].legend() - plt.tight_layout() - if args.output_figures is not None: - fig.savefig(args.output_figures / "global_depth_diff.{}".format(savefig_ext)) - - def plot_quartile(axes, color, algo_name, df): - index = df.index - diff = df["absdiff"] - logdiff = df["abslogdiff"] - axes[0].fill_between( - index, diff[0.25], diff[0.75], color=c, alpha=0.1 - ) - axes[0].plot(diff[0.5].index, diff[0.5], color=c, label=algo_name) - axes[1].fill_between( - index, logdiff[0.25], logdiff[0.75], color=c, alpha=0.1 - ) - axes[1].plot(logdiff[0.5], label=algo_name) - axes[0].legend() - axes[1].legend() - - # Fourth plot, error wrt distance to fpv - fig, axes = plt.subplots(2, 1, sharex=True) - for c, (name, h) in zip(colors, histograms.items()): - plot_quartile(axes, c, name, h["quantiles_per_fpv"]) - axes[0].set_title("Error wrt to distance to fpv (in px)") - axes[1].set_title("Log error wrt to distance to fpv (in px)") - axes[1].set_yscale('log') - axes[1].set_xlabel("Distance to flight path vector (in px)") - plt.tight_layout() - if args.output_figures is not None: - fig.savefig(args.output_figures / "fpv_error_quantiles.{}".format(savefig_ext)) - - # Fifth plot, another way of plotting GT-wise error: - # For each GT depth, we show 3 points : median, and 50% confidence intervale (2 points) - # We have less info than the full histogram but we can show more GT values - fig, axes = plt.subplots(2, 1, sharex=True) - for c, (name, h) in zip(colors, histograms.items()): - plot_quartile(axes, c, name, h["quantiles_per_gt"]) - axes[0].set_title("Error wrt to groundtruth depth") - axes[1].set_title("Log error wrt to groundtruth depth") - axes[1].set_yscale('log') - axes[1].set_xlabel("Estimated depth (in meters)") - plt.tight_layout() - if args.output_figures is not None: - fig.savefig(args.output_figures / "gt_error_quantiles.{}".format(savefig_ext)) - - # Last plot, error with respect to estimated depth - - fig, axes = plt.subplots(2, 1, sharex=True) - for c, (name, h) in zip(colors, histograms.items()): - plot_quartile(axes, c, name, h["quantiles_per_estimation"]) - axes[0].set_title("Error wrt to estimated depth") - axes[1].set_title("Log error wrt to estimated depth") - axes[1].set_yscale('log') - axes[1].set_xlabel("Estimated depth (in meters)") - plt.tight_layout() - if args.output_figures is not None: - fig.savefig(args.output_figures / "est_error_quantiles.{}".format(savefig_ext)) - - # Last plot, pixelwise error - for name, h in histograms.items(): - fig, axes = plt.subplots(2, 1) - pl = axes[0].imshow(h["mean_diff_per_px"].T) - axes[0].set_title("Mean error for each pixel") - divider = make_axes_locatable(axes[0]) - cax = divider.append_axes("right", size="5%", pad=0.05) - cbar = fig.colorbar(pl, cax=cax) - cbar.ax.tick_params(axis="y", direction="in") - pl = axes[1].imshow(h["mean_log_diff_per_px"].T) - axes[1].set_title("Mean Log error for each pixel") - divider = make_axes_locatable(axes[1]) - cax = divider.append_axes("right", size="5%", pad=0.05) - cbar = fig.colorbar(pl, cax=cax) - cbar.ax.tick_params(axis="y", direction="in") - plt.tight_layout() - if args.output_figures is not None: - fig.savefig(args.output_figures / "pixel_error_map_{}.{}".format(name, savefig_ext)) - if args.output_figures is None: - plt.show() - - -if __name__ == "__main__": - main() diff --git a/evaluation_toolkit/evaluation_toolkit/inference_toolkit.py b/evaluation_toolkit/evaluation_toolkit/inference_toolkit.py deleted file mode 100644 index 831fc13..0000000 --- a/evaluation_toolkit/evaluation_toolkit/inference_toolkit.py +++ /dev/null @@ -1,328 +0,0 @@ -import numpy as np -from path import Path -from imageio import imread -import time -from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter -from scipy.spatial.transform import Rotation -from tqdm import tqdm - - -class Timer: - """ - Timer class is used to measure elapsed time, while being able to - pause it when needed. This is useful to measure algorithm inference - time without measuring time spent retrieving wanted images - """ - def __init__(self): - self._start_time = None - self._elapsed_time = 0 - - def running(self): - return self._start_time is not None - - def start(self): - """Start a new timer""" - if self._start_time is not None: - return - - self._start_time = time.perf_counter() - - def stop(self): - """Stop the timer, and report the elapsed time""" - if self._start_time is None: - return - - self._elapsed_time += time.perf_counter() - self._start_time - self._start_time = None - - def get_elapsed(self): - return self._elapsed_time - - def reset(self): - self.__init__() - - -class inferenceFramework(object): - """Inference Framework used for simulating navigation conditions - for depth algorithms on a dataset created by RDC. It also comes with a way to measure your inference time - and to record your estimated depths. - The framework is iterable, and each iteration gives an Inference Sample Object from which you can get images - to compute depth on. - - Attributes: - root (Path): Root directory where the Final output of RDC is stored. - - max_shift (float): Max number of frames the algorithm is allowed to search in the past. If the algorithm - eg. wants to get a frame that was at a particular distance from the last frame, with a barely moving camera, - the frame can only be as anterior as {max_shift} frames before, even if it means the movement won't be enough. - - estimated_depth_maps (dict): Dictionnary for estimated depth maps, as numpy arrays. Key is image path - of image on which we estimated depth. - - inference_time (List): List of time spent by your algorithm for inference. - Will be used at the end of the evaluation to compute the mean inference time - - frame_transform (function): function which will be used to transform images - before returning them to the algorithm. The function takes a numpy array as - an argument and can return anything your algorithm want, eg. a pytorch tensor. - """ - def __init__(self, root, test_files, max_shift=50, frame_transform=None): - self.root = Path(root) - self.test_files = test_files - self.max_shift = max_shift - self.frame_transform = frame_transform - self.inference_time = [] - self.estimated_depth_maps = {} - - def __getitem__(self, i): - """Get item routine. Before returning the sample, the timer is triggered to measure inference time. - - Args: - i (int): Position of the sample in the test_files list, which has been created with RDC - - Returns: - InferenceSample: Object to compute depth - """ - timer = Timer() - self.i = i - self.current_sample = inferenceSample(self.root, - self.test_files[i], - self.max_shift, - timer, self.frame_transform) - self.current_sample.timer.start() - return self.current_sample - - def finish_frame(self, estimated_depth): - """Finish Frame routine: This method needs to be called each time your algorithm has - finished the depth inference. It also stops the timer and stores the time elapsed for this - sample to compute a mean inference time at the end of the evaluation. - - Args: - estimated_depth (np.array): The output of your depth algorithm. It will then be stored in - a dict, and then saved after when it will be completely populated. - - Returns: - float: time elapsed for inference for this sample - """ - self.current_sample.timer.stop() - elapsed = self.current_sample.timer.get_elapsed() - self.inference_time.append(elapsed) - self.estimated_depth_maps[self.current_sample.file] = estimated_depth - return elapsed - - def finalize(self, output_path=None): - """Finalize: this methods needs to be called at the end of the whole evaluation, - when there is no sample left to estimate depth on. - - Args: - output_path (Path, optional): Where to save all the estimated depth. It will - be saved in a compressed numpy file. - - Returns: - (float, dict): Return the mean inference time and the compute depth maps in a dictionnary - """ - if output_path is not None: - np.savez(output_path, **self.estimated_depth_maps) - mean_inference_time = np.mean(self.inference_time) - return mean_inference_time, self.estimated_depth_maps - - def __len__(self): - return len(self.test_files) - - -class inferenceSample(object): - """Inferance Sample class. Is used to get a particular frame with displacement constraints - For example, you can take the last frame (of which you need to compute the depth map), - and then want the frame that was 0.3 meters from the last one to ensure a sufficient parallax - - Attributes: - root (Path): Same as inferenceFramework. Root directory where the Final output of RDC is stored. - - file (Path): image path of image of which we want to estimate depth. - - frame_transform (function) : Same as InferenceFramework. function used to transform loaded image - into the data format of your choice. - - timer (Timer): timer used to measure time spent computing depth. All the frame gathering and transformation - are not taken into account in order to only measure inference time. - - valid_frames (List of Path): Ordered list of frame paths representing the frame sequence that is going - to be used to get the optimal frame pair/set for the algotihm you want to evaluate. - The order is descending: last frame is first and oldest frames are last. - - poses (np.array): Array of all the poses of the valid_frames list in the R,T format (3x4 matrix). - They are computed relative to the last frame, and as such, first pose is identity - - rotation_angles (1D np.array): computed from poses, the angle magnitude between last frame and any given frame. - This is useful when you don't want rotation to be too large. - - displacement (1D np.array): compute from poses, displacement magnitude between last frame and any given frame. - Useful when you don't want frames to be too close to each other. - - intrinsics (np.array): Intrinsics for each frame, stored in a 3x3 matrix. - """ - def __init__(self, root, file, max_shift, timer, frame_transform=None): - self.root = root - self.file = file - self.frame_transform = frame_transform - self.timer = timer - full_filepath = self.root / file - scene = full_filepath.parent - # Get all frames in the scene folder. Normally, there should be more than "max_shift" frames. - scene_files = sorted(scene.files("*.jpg")) - poses = np.genfromtxt(scene / "poses.txt").reshape((-1, 3, 4)) - sample_id = scene_files.index(full_filepath) - assert(sample_id >= max_shift) - start_id = sample_id - max_shift - # Get all frames between start_id (oldest frame) and sample_id. - # Revert the list so that oldest frames are in the end, like in a buffer - self.valid_frames = scene_files[start_id:sample_id + 1][::-1] - # Flip_ud is equivalent to reverting the row and thus the same as [::-1] - valid_poses = np.flipud(poses[start_id:sample_id + 1]) - # All poses in the sequence should be valid - assert not np.isnan(valid_poses.sum()) - # Change the pose array so that instead of 3x4 matrices, we have 4x4 matrices, which we can invert - last_line = np.broadcast_to(np.array([0, 0, 0, 1]), (valid_poses.shape[0], 1, 4)) - valid_poses_full = np.concatenate([valid_poses, last_line], axis=1) - self.poses = (np.linalg.inv(valid_poses_full[0]) @ valid_poses_full)[:, :3] - R = self.poses[:, :3, :3] - self.rotation_angles = Rotation.from_matrix(R).magnitude() - self.displacements = np.linalg.norm(self.poses[:, :, -1], axis=-1) - - # Case 1 for intrinsics : Zoom level never changed and thus there's only one intrinsics - # matrix for the whole video, stored in intrinsics.txt This is the most usual case - # Case 2 : Each frame has its own intrinsics file _intrinsics.txt - # Case is only here for later compatibility, but it has not been tested thoroughly - if (scene / "intrinsics.txt").isfile(): - self.intrinsics = np.stack([np.genfromtxt(scene / "intrinsics.txt")] * max_shift) - else: - intrinsics_files = [f.stripext() + "_intrinsics.txt" for f in self.valid_frames] - self.intrinsics = np.stack([np.genfromtxt(i) for i in intrinsics_files]) - - def timer_decorator(func, *args, **kwargs): - """ - Decorator used to pause the timer and only restart it when returning the result. - This is used to not penalize the inference algorithm when frame retrieving is slow, - because in real conditions, it's possible you get the wanted frames immediately instead - of searching for them in the memory. - """ - def wrapper(self, *args, **kwargs): - if self.timer.running(): - self.timer.stop() - res = func(self, *args, **kwargs) - self.timer.start() - else: - res = func(self, *args, **kwargs) - return res - return wrapper - - @timer_decorator - def get_frame(self, shift=0): - """Basic function to get frame within a fixed shift. When used without parameters, it returns - the sample frame. - - Args: - shift (int, optional): Position relative to sample frame of the frame we want to get. - Defaults to 0. - - Returns a tuple of 3: - [Unknown type]: Output of the frame_transform function, used on the desired frame, loaded in a np array - np.array: 3x3 intrinsics matrix of returned frame - np.array: 3x4 pose matrix of returned frame - """ - file = self.valid_frames[shift] - img = imread(file) - if self.frame_transform is not None: - img = self.frame_transform(img) - return img, self.intrinsics[shift], self.poses[shift] - - @timer_decorator - def get_previous_frame(self, shift=1, displacement=None, max_rot=1): - """More advanced function, to get a frame within shift, displacement and rotation constraints. Timer is paused when this - function is running. - - Args: - shift (int, optional): As above. Position relative to sample frame of the frame we want to get. - Defaults to 1. - - displacement (Float, optional): Desired displacement (in meters) between sample frame and - the frame we want to get. This parameter overwrite the shift parameter. Defaults to None. - - max_rot (int, optional): Maximum Rotation, in radians. The function cannot return a frame - with a higher rotation than max_rot. It assumes rotation is growing with time - (only true for the first frames). The maximum shift of the returned frame corresponds to - the first frame with a rotation above this threshold. Defaults to 1. - - Returns a tuple of 3: - [Unknow type]: Output of the frame_transform function, - used on the frame that best represent the different constrains. - np.array: 3x3 intrinsics matrix of returned frame - np.array: 3x4 pose matrix of returned frame - """ - if displacement is not None: - shift = max(1, np.abs(self.displacements - displacement).argmin()) - rot_valid = self.rotation_angles < max_rot - assert sum(rot_valid[1: shift + 1] > 0), "Rotation is always higher than {}".format(max_rot) - # Highest shift that has rotation below max_rot thresold - final_shift = np.where(rot_valid[-1 - shift:])[0][-1] - return self.get_frame(final_shift) - - @timer_decorator - def get_previous_frames(self, shifts=[1], displacements=None, max_rot=1): - """Helper function to get multiple frames at the same time. with the previous function. - - Args: - shifts (List): list of wanted shifts - displacements (List): List of wanted displacements, overwrite shifts - max_rot (int, optional): Maximum Rotation, see previous function - - Returns a tuple of 3: - List: Outputs of the frame_transform function for each desired frame - List: 3x3 intrinsics matrices of returned frames - List: 3x4 pose matrices of returned frames - """ - if displacements is not None: - frames = zip(*[self.get_previous_frame(displacement=d, max_rot=max_rot) for d in displacements]) - else: - frames = zip(*[self.get_previous_frame(shift=s, max_rot=max_rot) for s in shifts]) - return frames - - -def inference_toolkit_example(): - parser = ArgumentParser(description='Example usage of Inference toolkit', - formatter_class=ArgumentDefaultsHelpFormatter) - - parser.add_argument('--dataset_root', metavar='DIR', type=Path) - parser.add_argument('--depth_output', metavar='FILE', type=Path, - help='where to store the estimated depth maps, must be a npy file') - parser.add_argument('--evaluation_list_path', metavar='PATH', type=Path, - help='File with list of images to test for depth evaluation') - parser.add_argument('--scale-invariant', action='store_true', - help='If selected, will rescale depth map with ratio of medians') - args = parser.parse_args() - - with open(args.evaluation_list_path) as f: - evaluation_list = [line[:-1] for line in f.readlines()] - - def my_model(frame, previous, pose): - # Mock up function that uses two frames and translation magnitude - # Replace it with your algorithm, eg. DepthNet model - return np.linalg.norm(pose[:, -1]) * np.linalg.norm(frame - previous, axis=-1) - - # This is our transform function. It converts the uint8 array into a float array, - # divides it by 255 to have values in [0,1] and adds the batch dimensions - def my_transform(img): - return img.transpose(2, 0, 1).astype(np.float32)[None] / 255 - - engine = inferenceFramework(args.dataset_root, evaluation_list, my_transform) - for sample in tqdm(engine): - latest_frame, latest_intrinsics, _ = sample.get_frame() - previous_frame, previous_intrinsics, previous_pose = sample.get_previous_frame(displacement=0.3) - engine.finish_frame(my_model(latest_frame, previous_frame, previous_pose)) - - mean_time, _ = engine.finalize(args.depth_output) - print("Mean time per sample : {:.2f}us".format(1e6 * mean_time)) - - -if __name__ == '__main__': - inference_toolkit_example() diff --git a/evaluation_toolkit/setup.py b/evaluation_toolkit/setup.py deleted file mode 100644 index 37a2de1..0000000 --- a/evaluation_toolkit/setup.py +++ /dev/null @@ -1,33 +0,0 @@ -from setuptools import setup - -with open("README.md", "r") as fh: - long_description = fh.read() - -setup(name='inference toolkit', - license='MIT', - author='Clément Pinard', - author_email='clempinard@gmail.com', - description='Inference and evaluation routines to test on a dataset constructed with validation set constructor', - long_description=long_description, - long_description_content_type="text/markdown", - packages=["evaluation_toolkit"], - entry_points={ - 'console_scripts': [ - 'depth_evaluation = evaluation_toolkit.depth_evaluation:main' - ] - }, - install_requires=[ - 'numpy', - 'pandas', - 'path', - 'imageio', - 'scikit-image', - 'scipy', - 'tqdm' - ], - classifiers=[ - "Programming Language :: Python :: 3", - "License :: OSI Approved :: MIT License", - "Intended Audience :: Science/Research" - ] - ) diff --git a/generate_sky_masks.py b/generate_sky_masks.py index 6726ad3..6370f30 100644 --- a/generate_sky_masks.py +++ b/generate_sky_masks.py @@ -61,7 +61,6 @@ def extract_sky_mask(network, image_paths, mask_folder): def process_folder(folder_to_process, colmap_img_root, mask_path, pic_ext, verbose=False, batchsize=8, **env): network = prepare_network() folders = [folder_to_process] + list(folder_to_process.walkdirs()) - for folder in folders: mask_folder = mask_path/colmap_img_root.relpathto(folder) @@ -84,7 +83,6 @@ def process_folder(folder_to_process, colmap_img_root, mask_path, pic_ext, verbo del network torch.cuda.empty_cache() - parser = ArgumentParser(description='sky mask generator using ENet trained on cityscapes', formatter_class=ArgumentDefaultsHelpFormatter) @@ -105,3 +103,4 @@ def process_folder(folder_to_process, colmap_img_root, mask_path, pic_ext, verbo file_exts = ['jpg', 'JPG'] process_folder(args.img_dir, args.colmap_img_root, args.mask_root, file_exts, True, args.batchsize) + diff --git a/main_pipeline_no_lidar.py b/main_pipeline_no_lidar.py index 2dc0e62..2ad4f27 100644 --- a/main_pipeline_no_lidar.py +++ b/main_pipeline_no_lidar.py @@ -74,6 +74,7 @@ def main(): if i not in args.skip_step: print_step(i, "First thorough photogrammetry") env["thorough_recon"].makedirs_p() + print(env["video_frame_list_thorough"]) colmap.extract_features(image_list=env["video_frame_list_thorough"], more=args.more_sift_features) colmap.index_images(vocab_tree_output=env["indexed_vocab_tree"], vocab_tree_input=args.vocab_tree) if env["match_method"] == "vocab_tree": diff --git a/pcl_util/pointcloud_subsampler.h b/pcl_util/pointcloud_subsampler.h index 68abcec..f98e0c1 100644 --- a/pcl_util/pointcloud_subsampler.h +++ b/pcl_util/pointcloud_subsampler.h @@ -19,7 +19,7 @@ typename pcl::PointCloud::Ptr filter(typename pcl::PointCloud::P pcl::VoxelGrid vox; vox.setLeafSize (resolution, resolution, resolution); - for (auto it = octree.leaf_begin(); it != octree.leaf_end(); ++it) { + for (auto it = octree.leaf_depth_begin(); it != octree.leaf_depth_end(); ++it) { pcl::octree::OctreeContainerPointIndices& container = it.getLeafContainer(); pcl::IndicesPtr indexVector(new std::vector); From 6323021f57a6355f0af2bdc2d364ae7d9e0c6d72 Mon Sep 17 00:00:00 2001 From: Abdelaziz Bouzidi Date: Thu, 15 Jun 2023 17:38:11 +0200 Subject: [PATCH 2/2] Added evaluation toolkit --- evaluation_toolkit/README.md | 33 + .../evaluation_toolkit/__init__.py | 0 .../evaluation_toolkit/depth_evaluation.py | 564 ++++++++++++++++++ .../evaluation_toolkit/inference_toolkit.py | 328 ++++++++++ evaluation_toolkit/setup.py | 33 + 5 files changed, 958 insertions(+) create mode 100644 evaluation_toolkit/README.md create mode 100644 evaluation_toolkit/evaluation_toolkit/__init__.py create mode 100644 evaluation_toolkit/evaluation_toolkit/depth_evaluation.py create mode 100644 evaluation_toolkit/evaluation_toolkit/inference_toolkit.py create mode 100644 evaluation_toolkit/setup.py diff --git a/evaluation_toolkit/README.md b/evaluation_toolkit/README.md new file mode 100644 index 0000000..74301c8 --- /dev/null +++ b/evaluation_toolkit/README.md @@ -0,0 +1,33 @@ +# Evaluation Toolkit + +Set of tools to run a particular algorithm on a dataset constructed with the validation set constructor, and evaluate it, along with advanced statistics regarding depth value and pixel position in image with respect to flight path vector. + +## Inference Example + +Get the last frame and a previous frame such that the displacement magnitude is as close to 30cm as possible, with the condition of having a rotation of less that 1 radian. Each frame is preprocessed so that it is of shape `[C, H, W]` and with a range `[0, 1]` instead of `[0, 255]`. + +```python +from evaluation_toolkit import inferenceFramework + +engine = inferenceFramework(dataset_root, evaluation_list, lambda x: x.transpose(2, 0, 1).astype(np.float32)[None]/255) + +for sample in tqdm(engine): + latest_frame, latest_intrinsics, _ = sample.get_frame() + previous_frame, previous_intrinsics, previous_pose = sample.get_previous_frame(displacement=0.3) + estimated_depth_map = my_model(latest_frame, previous_frame, previous_pose) + engine.finish_frame(estimated_depth_map) +mean_inference_time, output_depth_maps = engine.finalize(output_path='output.npz') +``` + +You can find an example usage of this Inference Framework for SfmLearner [here](https://github.com/ClementPinard/SfmLearner-Pytorch/tree/validation_set_constructor) + +## Evaluation + +The evaluation step is a simple script that takes into input the computed depth maps (here in the file `output.npz`). You can combine multiple computed depth maps to compare algorithms. + +``` +depth_evaluation --dataset_root /path/to/dataset/root --est_depth output1.npz output2.npz --algorithm_names name1 name2 --evaluation_list_path /path/to/evaluation_list.txt --flight_path_vector_list /path/to/fligt_path_vector_list.txt <--scale_invariant> <--mask_path /path/to/mask.npy> --output_figures /path/to/figures/folder +``` + +It will output typical metrics and plot advanced statistics regarding the dataset and the depth estimations. +Note that if you want to save the figures, you will need `xelatex` installed in your system. Otherwise, don't specify a parameter to `--output_figures` and it will use `plt.show` \ No newline at end of file diff --git a/evaluation_toolkit/evaluation_toolkit/__init__.py b/evaluation_toolkit/evaluation_toolkit/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/evaluation_toolkit/evaluation_toolkit/depth_evaluation.py b/evaluation_toolkit/evaluation_toolkit/depth_evaluation.py new file mode 100644 index 0000000..5040abb --- /dev/null +++ b/evaluation_toolkit/evaluation_toolkit/depth_evaluation.py @@ -0,0 +1,564 @@ +from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter +from path import Path +import numpy as np +import pandas as pd +import matplotlib +import matplotlib.pyplot as plt +from mpl_toolkits.axes_grid1 import make_axes_locatable +from matplotlib.colors import LinearSegmentedColormap +from matplotlib import cm +from tqdm import tqdm +from imageio import imwrite + +parser = ArgumentParser( + description="Evaluate depth maps with respect to ground truth depth and FPV position", + formatter_class=ArgumentDefaultsHelpFormatter, +) + +parser.add_argument("--dataset_root", metavar="DIR", type=Path) +parser.add_argument( + "--est_depth", + metavar="PATH", + type=Path, + nargs='+', + help="where the estimated depth maps are stored, must be a 3D npz file", +) +parser.add_argument( + "--algorithm_names", + "--names", + metavar="STR", + type=str, + nargs='+', + help="name of the algorithms corresponding to estimated depth arrays") + +parser.add_argument( + "--evaluation_list_path", + "--eval", + metavar="PATH", + type=Path, + help="File with list of images to test for depth evaluation", +) +parser.add_argument( + "--flight_path_vector_list", + "--fpv", + metavar="PATH", + type=Path, + help="File with list of speed vectors, used to compute error wrt direction", +) +parser.add_argument( + "--scale_invariant", + action="store_true", + help="If selected, will rescale depth map with ratio of medians", +) +parser.add_argument( + "--min_depth", + metavar="D", + default=1e-2, + type=float, + help="threshold below which GT is discarded", +) +parser.add_argument( + "--max_depth", + metavar="D", + default=250, + type=float, + help="threshold above which GT is discarded", +) +parser.add_argument( + "--depth_mask", + metavar="PATH", + default=None, + type=Path, + help="path to boolean numpy array. Should be the same size as ground truth. " + "False value will discard the corresponding pixel location for every ground truth", +) + +parser.add_argument( + "--output_figures", + metavar="DIR", + default=None, + type=Path, + help="where to save the figures, in pgf format. If not set, will show them with plt.show()" +) + +parser.add_argument( + "--output_samples", + type=int, + default=0, + metavar='N', + help="Outputs N Gt and estimation vizualisation sampels" +) + +coords = None + + +def get_values( + gt_depth, estim_depth, fpv, scale_invariant=False, mask=None, min_depth=1e-2, max_depth=250 +): + """Given a depth maps and depth estimation, return a table of all valid depth points with + additional metadata + + Args: + gt_depth (np.array): ground truth depth computed by RDC + estim_depth (np.array): Depth estimated with inference toolkit + fpv (np.array): array of 2 floats, representing the fpv coordinates, in pixels + scale_invariant (bool, optional): If set to True, will multiply estimated depth with + ratio between medians. This is representative of how + depth was evaluated in Eigen et al. + mask (np.array, optional): Boolean array of same shape as depth maps. Discard from evaluation + image points where mask[u,v] == False + min_depth (float, optional): Minimal depth below which ground truth is discarded and estimation is clipped.* + Defaults to 1e-2. + max_depth (float, optional): Maximal depth above which ground truth is discarded estimation is clipped. + Defaults to 250. + + Returns: + [type]: [description] + """ + global coords + if coords is None: + coords = np.stack( + np.meshgrid(np.arange(gt_depth.shape[1]), np.arange(gt_depth.shape[0])), axis=-1 + ) + + # TODO : For now, fpv distance is given in pixel distance. + # A more accurate way would be to use angular distance. + fpv_dist = np.linalg.norm(coords - fpv, axis=-1) + estim_depth = np.clip(estim_depth, min_depth, max_depth) + valid = (gt_depth > min_depth) & (gt_depth < max_depth) + if mask is not None: + valid = valid & mask + if valid.sum() == 0: + return + valid_gt, valid_estim = gt_depth[valid], estim_depth[valid] + if scale_invariant: + valid_estim = valid_estim * np.median(valid_gt) / np.median(valid_estim) + fpv_dist = fpv_dist[valid] + valid_coords = coords[valid] + values = np.stack([valid_gt, valid_estim, *valid_coords.T, fpv_dist], axis=-1) + + return pd.DataFrame(values, columns=["GT", "estim", "x", "y", "fpv_dist"]) + + +def plot_distribution(values, bins, ax, label=None, log_bins=False): + """Distribution plotting function, will plot with lines instead of bars + + Args: + values: histogram to plot + bins: Corresponding bins delimitting histogram values + ax: Matplotlib ax to plot on + label (str): Plot label name. Defaults to None. + log_bins (bool): If set to True, will set the scale to log. + Useful for Mean Log Error. Defaults to False. + """ + bin_dists = bins[1:] - bins[:-1] + total = sum(bin_dists) + normalized_values = (values / sum(values)) * bin_dists / total + bin_centers = 0.5 * (bins[1:] + bins[:-1]) + if log_bins: + bin_centers = np.exp(bin_centers) + ax.plot(bin_centers, normalized_values, label=label) + if log_bins: + ax.set_xscale("log") + + +def group_quantiles(df, to_group, columns, quantiles=[0.25, 0.5, 0.75]): + if isinstance(columns, str): + columns = [columns] + grouped_df = df.groupby(by=np.round(df[to_group])) + return grouped_df[columns].quantile(quantiles).unstack() + + +def error_map(error_per_px): + """Compute Image with pixelwise mean depth error + + Args: + error_per_px (pd.Series): Table of errors with pixels as index + + Returns: + np.array: Array with the shape of the image, with mean error at each pixel + """ + x, y = np.stack(error_per_px.index.values, axis=-1).astype(int) + error_map = np.full((int(x.max() + 1), int(y.max() + 1)), np.NaN) + error_map[x, y] = error_per_px.values + return error_map + + +def error_metrics(df, algo_name, suffix=''): + """Compute error metrics from a dataframe. + + Args: + df (pd.DataFrame): Table containing the metrics we are interested in. It can be + constructed with a groupby to have mean computed over a particular value insead of a global mean. + algo_name (str): Algorithm for which we compute the metrics + suffix (str, optional): Precision for the particular metric we are computing, + depending on how the dataframe was constructed and grouped by. Defaults to ''. + """ + error_names = ["AbsDiff", "AbsRel", "AbsLog", "StdDiff", "StdRel", "StdLog", "a1", "a2", "a3"] + errors = [ + df["absdiff"].mean(), + df["reldiff"].mean(), + df["abslogdiff"].mean(), + np.sqrt(df["absdiff2"].mean()), + np.sqrt(df["reldiff2"].mean()), + np.sqrt(df["logdiff2"].mean()), + df["a1"].mean(), + df["a2"].mean(), + df["a3"].mean(), + ] + + # Print the results + # TODO : save the result in latex tab format ? + print("Results for usual metrics for algorithm {}, {}".format(algo_name, suffix)) + print( + "{:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}".format( + *error_names + ) + ) + print( + "{:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}".format( + *errors + ) + ) + + +def viz_depth(depth, max_depth): + """Convert depth to a colored vizualisation. Infinity is black + + Args: + depth (np.array): 2D array of depth values + max_depth (float): max_depth will correspond to the end of the colormap spectrum. + Every value above this will be the same color, expect infinity which will be black. + + Returns: + np.array: np.uint8 array of colorized depth, ready to be saved + """ + opencv_rainbow_data = ( + (0.000, (1.00, 0.00, 0.00)), + (0.400, (1.00, 1.00, 0.00)), + (0.600, (0.00, 1.00, 0.00)), + (0.800, (0.00, 0.00, 1.00)), + (1.000, (0.60, 0.00, 1.00)) + ) + rainbow_cmap = LinearSegmentedColormap.from_list('opencv_rainbow', opencv_rainbow_data, 1000) + bone = cm.get_cmap('bone', 10000) + depth_norm = depth / max_depth + depth_viz = rainbow_cmap(depth_norm, bytes=True)[..., :3] + depth_viz[depth == np.inf] = 0 + return depth_viz + + +def visualize_sample(img_path, gt_path, estimations, algo_names, max_depth, output_folder): + """Visualize a sample and save to output_folder. + A sample consists in the image, the ground truth depth, and the different estimations + + Args: + img_path (Path): Where to load the image + gt_path (Path): Where to load the Ground truth depth map (usually same name as img but with npy extension) + estimations (List[np.array]): List of the estimations from all the algos we are testing + algo_names (List[str]): List of algorithm names, corresponding to the estimations given above + max_depth (float): depth saturation value above which every thing will be the same color + output_folder (Path): Where to save all the different vizualisations + """ + img_path.copy(output_folder) + img_name = img_path.stem + + gt_depth = np.load(gt_path) + max_gt = np.max(gt_depth[gt_depth < np.inf]) + max_depth = min(max_gt, max_depth) + imwrite(output_folder / "{}_GT.png".format(img_name), viz_depth(gt_depth, max_depth)) + for n, e in zip(algo_names, estimations): + imwrite(output_folder / "{}_{}.png".format(img_name, n), viz_depth(e, max_depth)) + + +def main(): + args = parser.parse_args() + assert (len(args.est_depth) == len(args.algorithm_names)) + + if args.output_figures is not None: + matplotlib.use("pgf") + pgf_with_xelatex = { + 'text.usetex': True, + "pgf.texsystem": "xelatex", + "pgf.preamble": r"\usepackage{amssymb} " + r"\usepackage{amsmath} " + r"\usepackage{fontspec} " + r"\usepackage{unicode-math}" + } + # Change to pgf if needed + savefig_ext = "pdf" + matplotlib.rcParams.update(pgf_with_xelatex) + + with open(args.evaluation_list_path, "r") as f: + test_img_path = [line[:-1] for line in f.readlines()] + fpv_list = np.loadtxt(args.flight_path_vector_list) + dataframes = {} + + if args.output_samples > 0 and args.output_figures is not None: + np.random.seed(1) + to_sample = np.random.choice(len(test_img_path), args.output_samples) + for i in to_sample: + estimated_depth_maps = [] + img_path = test_img_path[i] + for p in args.est_depth: + depth = np.load(p, allow_pickle=True)[img_path] + estimated_depth_maps.append(depth) + visualize_sample(args.dataset_root / img_path, + (args.dataset_root / img_path).stripext() + ".npy", + estimated_depth_maps, + args.algorithm_names, + args.max_depth, + args.output_figures) + + for p, name in zip(args.est_depth, args.algorithm_names): + estimated_depth = np.load(p, allow_pickle=True) + values_df = [] + assert len(test_img_path) == len(estimated_depth) + if args.depth_mask is not None: + mask = np.load(args.depth_mask) + else: + mask = None + + # Load each GT-estimation pair and extract data in a pandas dataframe + # values_df is at first a list of dataframes which we then concatenate + print("getting results for {} algorithm (file : {})".format(name, p)) + for filepath, fpv in tqdm(zip(test_img_path, fpv_list), total=len(fpv_list)): + GT = np.load((args.dataset_root / filepath).stripext() + ".npy") + new_values = get_values( + GT, + estimated_depth[filepath], + fpv, + args.scale_invariant, + mask, + args.min_depth, + args.max_depth, + ) + if new_values is not None: + values_df.append(new_values) + values_df = pd.concat(values_df) + + # Additional values to the Dataframe + # Note that no mean is computed here, each row in the dataframe is ONE pixel + # The dataframe is thus potentially thousands rows long + values_df["log_GT"] = np.log(values_df["GT"]) + values_df["log_estim"] = np.log(values_df["estim"]) + values_df["diff"] = values_df["estim"] - values_df["GT"] + values_df["absdiff"] = values_df["diff"].abs() + values_df["absdiff2"] = np.power(values_df["diff"], 2) + values_df["reldiff"] = values_df["absdiff"] / values_df["GT"] + values_df["reldiff2"] = np.power(values_df["reldiff"], 2) + values_df["logdiff"] = values_df["log_estim"] - values_df["log_GT"] + values_df["logdiff2"] = np.power(values_df["logdiff"], 2) + values_df["abslogdiff"] = values_df["logdiff"].abs() + values_df["a1"] = (values_df["abslogdiff"] < np.log(1.25)).astype(float) + values_df["a2"] = (values_df["abslogdiff"] < 2 * np.log(1.25)).astype(float) + values_df["a3"] = (values_df["abslogdiff"] < 3 * np.log(1.25)).astype(float) + dataframes[name] = values_df + + for name, df in dataframes.items(): + print() + print("---------------------------") + print("Results for {}".format(name)) + print("---------------------------") + print() + # Compute mean erros, a la Eigen et al. + error_metrics(df, name, "averaged over all points") + # Get mean values per ground truth values, and then mean them + # This way, we have the same weight for each ground truth value + values_df_per_gt = df.groupby(by=np.round(values_df["GT"])).mean() + error_metrics(values_df_per_gt, name, "averaged over gt values") + values_df_per_log_gt = df.groupby(by=0.1 * np.round(10 * values_df["log_GT"])).mean() + error_metrics(values_df_per_log_gt, name, "averaged over log(gt) values") + + # TODO better handling of the parameters, maybe just add it to argparse + plot = True + n_bins = 4 + if plot: + # COMPUTING HISTOGRAMS + + # GT-wise difference with estimation distributions. + # Useful to see if we perform well + # in the depth range we are actually interested in + # Construct the bins for GT-wise error + # You can see this as 3D histogram + # Note that if the assumptions of gaussian difference + # for log values, the log_normal error should be roughly + # the same for all bins + + min_gt = values_df["GT"].min() + max_gt = values_df["GT"].max() + bins = np.linspace(min_gt, max_gt, n_bins + 1) + + histograms = {} + for name, df in dataframes.items(): + histograms[name] = {} + estim_per_GT = {} + for b1, b2 in zip(bins[:-1], bins[1:]): + per_gt = df[(df["GT"] > b1) & (df["GT"] < b2)] + estim_per_GT[(b1 + b2) / 2] = { + "normal": np.histogram(per_gt["diff"], bins=100), + "log_normal": np.histogram(per_gt["logdiff"], bins=100), + "bins": [b1, b2], + } + histograms[name]["estim_per_GT"] = estim_per_GT + + # Global histograms + # Same as above, but with one bin, and thus not GT-wise + + histograms[name]["global_diff"] = np.histogram(df["estim"] - df["GT"], bins=100) + histograms[name]["global_log_diff"] = np.histogram(df["log_estim"] - df["log_GT"], bins=100) + + # Depth error per pixel + # Useful to identify if a region in the screen is particualrly faulty. + # Can help spot dataset inconsistency (eg sky is always in the same place) + # Can also help find calibration artefacts ? + + metric_per_px = df.groupby(by=["x", "y"]).mean() + histograms[name]["mean_diff_per_px"] = error_map(metric_per_px["absdiff"]) + histograms[name]["mean_log_diff_per_px"] = error_map(metric_per_px["logdiff"]) + + # Depth error wrt pixelwise distance to FPV. For SFM, the closer we are to FPV, + # The harde it is to deduce depth. But in the same time, the more + # usefule depth becomes, because it indicates distances of obstacles where + # we are headed to. + + # Note : if fpv is too far, it means it is not on the image + # And thus this metric is not really interesting. + histograms[name]["quantiles_per_fpv"] = group_quantiles( + df[df["fpv_dist"] < 1000], "fpv_dist", ["absdiff", "abslogdiff"] + ) + histograms[name]["quantiles_per_gt"] = group_quantiles(df, "GT", ["absdiff", "abslogdiff"]) + histograms[name]["quantiles_per_estimation"] = group_quantiles(df, "estim", ["absdiff", "abslogdiff"]) + + # PLOTTING + colors = plt.rcParams['axes.prop_cycle'].by_key()['color'] + # First plot, general insight for dataset + fig1, axes = plt.subplots(2, 1, sharex=True) + GT_distrib = None + for name, df in dataframes.items(): + if GT_distrib is None: + GT_distrib = np.histogram(df["GT"], bins=100) + plot_distribution(*GT_distrib, axes[0], label="groundtruth depth") + estim_distrib = np.histogram(df["estim"], bins=100) + plot_distribution(*estim_distrib, axes[1], label=name) + axes[0].set_title("Ground Truth distribution") + axes[0].legend() + axes[1].set_title("depth estimation distribution from {}".format(name)) + axes[1].legend() + if args.output_figures is not None: + fig1.savefig(args.output_figures / "depth_distrib.{}".format(savefig_ext)) + + # Second plot, GT-wise difference one figure per algorithm + for name, h in histograms.items(): + fig2, axes = plt.subplots(1, 2, sharey=True) + for i, (k, v) in enumerate(h["estim_per_GT"].items()): + plot_distribution( + *v["normal"], axes[0], label="$GT \\in [{:.1f},{:.1f}]$".format(*v["bins"]) + ) + plot_distribution( + *v["log_normal"], + axes[1], + label="$GT \\in [{:.1f},{:.1f}]$".format(*v["bins"]), + log_bins=True + ) + # axes[0, 0].set_title("distribution of estimation around GT = {:.2f}".format(k)) + # axes[0, 1].set_title("distribution of log estimation around log GT = {:.2f}".format(np.log(k))) + axes[0].legend() + axes[0].set_title("GT - estimation difference") + axes[1].legend() + axes[1].set_title("logt GT - log estimation difference") + fig2.tight_layout() + if args.output_figures is not None: + fig2.savefig(args.output_figures / "GTwise_depth_diff_{}.{}".format(name, savefig_ext)) + + # Third plot, global diff histogram + fig, axes = plt.subplots(2, 1) + for name, h in histograms.items(): + plot_distribution(*h["global_diff"], axes[0], name) + plot_distribution(*h["global_log_diff"], axes[1], name, log_bins=True) + axes[1].set_title("Global log difference distribution from GT") + axes[0].set_title("Global difference distribution from GT") + axes[1].legend() + axes[0].legend() + plt.tight_layout() + if args.output_figures is not None: + fig.savefig(args.output_figures / "global_depth_diff.{}".format(savefig_ext)) + + def plot_quartile(axes, color, algo_name, df): + index = df.index + diff = df["absdiff"] + logdiff = df["abslogdiff"] + axes[0].fill_between( + index, diff[0.25], diff[0.75], color=c, alpha=0.1 + ) + axes[0].plot(diff[0.5].index, diff[0.5], color=c, label=algo_name) + axes[1].fill_between( + index, logdiff[0.25], logdiff[0.75], color=c, alpha=0.1 + ) + axes[1].plot(logdiff[0.5], label=algo_name) + axes[0].legend() + axes[1].legend() + + # Fourth plot, error wrt distance to fpv + fig, axes = plt.subplots(2, 1, sharex=True) + for c, (name, h) in zip(colors, histograms.items()): + plot_quartile(axes, c, name, h["quantiles_per_fpv"]) + axes[0].set_title("Error wrt to distance to fpv (in px)") + axes[1].set_title("Log error wrt to distance to fpv (in px)") + axes[1].set_yscale('log') + axes[1].set_xlabel("Distance to flight path vector (in px)") + plt.tight_layout() + if args.output_figures is not None: + fig.savefig(args.output_figures / "fpv_error_quantiles.{}".format(savefig_ext)) + + # Fifth plot, another way of plotting GT-wise error: + # For each GT depth, we show 3 points : median, and 50% confidence intervale (2 points) + # We have less info than the full histogram but we can show more GT values + fig, axes = plt.subplots(2, 1, sharex=True) + for c, (name, h) in zip(colors, histograms.items()): + plot_quartile(axes, c, name, h["quantiles_per_gt"]) + axes[0].set_title("Error wrt to groundtruth depth") + axes[1].set_title("Log error wrt to groundtruth depth") + axes[1].set_yscale('log') + axes[1].set_xlabel("Estimated depth (in meters)") + plt.tight_layout() + if args.output_figures is not None: + fig.savefig(args.output_figures / "gt_error_quantiles.{}".format(savefig_ext)) + + # Last plot, error with respect to estimated depth + + fig, axes = plt.subplots(2, 1, sharex=True) + for c, (name, h) in zip(colors, histograms.items()): + plot_quartile(axes, c, name, h["quantiles_per_estimation"]) + axes[0].set_title("Error wrt to estimated depth") + axes[1].set_title("Log error wrt to estimated depth") + axes[1].set_yscale('log') + axes[1].set_xlabel("Estimated depth (in meters)") + plt.tight_layout() + if args.output_figures is not None: + fig.savefig(args.output_figures / "est_error_quantiles.{}".format(savefig_ext)) + + # Last plot, pixelwise error + for name, h in histograms.items(): + fig, axes = plt.subplots(2, 1) + pl = axes[0].imshow(h["mean_diff_per_px"].T) + axes[0].set_title("Mean error for each pixel") + divider = make_axes_locatable(axes[0]) + cax = divider.append_axes("right", size="5%", pad=0.05) + cbar = fig.colorbar(pl, cax=cax) + cbar.ax.tick_params(axis="y", direction="in") + pl = axes[1].imshow(h["mean_log_diff_per_px"].T) + axes[1].set_title("Mean Log error for each pixel") + divider = make_axes_locatable(axes[1]) + cax = divider.append_axes("right", size="5%", pad=0.05) + cbar = fig.colorbar(pl, cax=cax) + cbar.ax.tick_params(axis="y", direction="in") + plt.tight_layout() + if args.output_figures is not None: + fig.savefig(args.output_figures / "pixel_error_map_{}.{}".format(name, savefig_ext)) + if args.output_figures is None: + plt.show() + + +if __name__ == "__main__": + main() diff --git a/evaluation_toolkit/evaluation_toolkit/inference_toolkit.py b/evaluation_toolkit/evaluation_toolkit/inference_toolkit.py new file mode 100644 index 0000000..831fc13 --- /dev/null +++ b/evaluation_toolkit/evaluation_toolkit/inference_toolkit.py @@ -0,0 +1,328 @@ +import numpy as np +from path import Path +from imageio import imread +import time +from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter +from scipy.spatial.transform import Rotation +from tqdm import tqdm + + +class Timer: + """ + Timer class is used to measure elapsed time, while being able to + pause it when needed. This is useful to measure algorithm inference + time without measuring time spent retrieving wanted images + """ + def __init__(self): + self._start_time = None + self._elapsed_time = 0 + + def running(self): + return self._start_time is not None + + def start(self): + """Start a new timer""" + if self._start_time is not None: + return + + self._start_time = time.perf_counter() + + def stop(self): + """Stop the timer, and report the elapsed time""" + if self._start_time is None: + return + + self._elapsed_time += time.perf_counter() - self._start_time + self._start_time = None + + def get_elapsed(self): + return self._elapsed_time + + def reset(self): + self.__init__() + + +class inferenceFramework(object): + """Inference Framework used for simulating navigation conditions + for depth algorithms on a dataset created by RDC. It also comes with a way to measure your inference time + and to record your estimated depths. + The framework is iterable, and each iteration gives an Inference Sample Object from which you can get images + to compute depth on. + + Attributes: + root (Path): Root directory where the Final output of RDC is stored. + + max_shift (float): Max number of frames the algorithm is allowed to search in the past. If the algorithm + eg. wants to get a frame that was at a particular distance from the last frame, with a barely moving camera, + the frame can only be as anterior as {max_shift} frames before, even if it means the movement won't be enough. + + estimated_depth_maps (dict): Dictionnary for estimated depth maps, as numpy arrays. Key is image path + of image on which we estimated depth. + + inference_time (List): List of time spent by your algorithm for inference. + Will be used at the end of the evaluation to compute the mean inference time + + frame_transform (function): function which will be used to transform images + before returning them to the algorithm. The function takes a numpy array as + an argument and can return anything your algorithm want, eg. a pytorch tensor. + """ + def __init__(self, root, test_files, max_shift=50, frame_transform=None): + self.root = Path(root) + self.test_files = test_files + self.max_shift = max_shift + self.frame_transform = frame_transform + self.inference_time = [] + self.estimated_depth_maps = {} + + def __getitem__(self, i): + """Get item routine. Before returning the sample, the timer is triggered to measure inference time. + + Args: + i (int): Position of the sample in the test_files list, which has been created with RDC + + Returns: + InferenceSample: Object to compute depth + """ + timer = Timer() + self.i = i + self.current_sample = inferenceSample(self.root, + self.test_files[i], + self.max_shift, + timer, self.frame_transform) + self.current_sample.timer.start() + return self.current_sample + + def finish_frame(self, estimated_depth): + """Finish Frame routine: This method needs to be called each time your algorithm has + finished the depth inference. It also stops the timer and stores the time elapsed for this + sample to compute a mean inference time at the end of the evaluation. + + Args: + estimated_depth (np.array): The output of your depth algorithm. It will then be stored in + a dict, and then saved after when it will be completely populated. + + Returns: + float: time elapsed for inference for this sample + """ + self.current_sample.timer.stop() + elapsed = self.current_sample.timer.get_elapsed() + self.inference_time.append(elapsed) + self.estimated_depth_maps[self.current_sample.file] = estimated_depth + return elapsed + + def finalize(self, output_path=None): + """Finalize: this methods needs to be called at the end of the whole evaluation, + when there is no sample left to estimate depth on. + + Args: + output_path (Path, optional): Where to save all the estimated depth. It will + be saved in a compressed numpy file. + + Returns: + (float, dict): Return the mean inference time and the compute depth maps in a dictionnary + """ + if output_path is not None: + np.savez(output_path, **self.estimated_depth_maps) + mean_inference_time = np.mean(self.inference_time) + return mean_inference_time, self.estimated_depth_maps + + def __len__(self): + return len(self.test_files) + + +class inferenceSample(object): + """Inferance Sample class. Is used to get a particular frame with displacement constraints + For example, you can take the last frame (of which you need to compute the depth map), + and then want the frame that was 0.3 meters from the last one to ensure a sufficient parallax + + Attributes: + root (Path): Same as inferenceFramework. Root directory where the Final output of RDC is stored. + + file (Path): image path of image of which we want to estimate depth. + + frame_transform (function) : Same as InferenceFramework. function used to transform loaded image + into the data format of your choice. + + timer (Timer): timer used to measure time spent computing depth. All the frame gathering and transformation + are not taken into account in order to only measure inference time. + + valid_frames (List of Path): Ordered list of frame paths representing the frame sequence that is going + to be used to get the optimal frame pair/set for the algotihm you want to evaluate. + The order is descending: last frame is first and oldest frames are last. + + poses (np.array): Array of all the poses of the valid_frames list in the R,T format (3x4 matrix). + They are computed relative to the last frame, and as such, first pose is identity + + rotation_angles (1D np.array): computed from poses, the angle magnitude between last frame and any given frame. + This is useful when you don't want rotation to be too large. + + displacement (1D np.array): compute from poses, displacement magnitude between last frame and any given frame. + Useful when you don't want frames to be too close to each other. + + intrinsics (np.array): Intrinsics for each frame, stored in a 3x3 matrix. + """ + def __init__(self, root, file, max_shift, timer, frame_transform=None): + self.root = root + self.file = file + self.frame_transform = frame_transform + self.timer = timer + full_filepath = self.root / file + scene = full_filepath.parent + # Get all frames in the scene folder. Normally, there should be more than "max_shift" frames. + scene_files = sorted(scene.files("*.jpg")) + poses = np.genfromtxt(scene / "poses.txt").reshape((-1, 3, 4)) + sample_id = scene_files.index(full_filepath) + assert(sample_id >= max_shift) + start_id = sample_id - max_shift + # Get all frames between start_id (oldest frame) and sample_id. + # Revert the list so that oldest frames are in the end, like in a buffer + self.valid_frames = scene_files[start_id:sample_id + 1][::-1] + # Flip_ud is equivalent to reverting the row and thus the same as [::-1] + valid_poses = np.flipud(poses[start_id:sample_id + 1]) + # All poses in the sequence should be valid + assert not np.isnan(valid_poses.sum()) + # Change the pose array so that instead of 3x4 matrices, we have 4x4 matrices, which we can invert + last_line = np.broadcast_to(np.array([0, 0, 0, 1]), (valid_poses.shape[0], 1, 4)) + valid_poses_full = np.concatenate([valid_poses, last_line], axis=1) + self.poses = (np.linalg.inv(valid_poses_full[0]) @ valid_poses_full)[:, :3] + R = self.poses[:, :3, :3] + self.rotation_angles = Rotation.from_matrix(R).magnitude() + self.displacements = np.linalg.norm(self.poses[:, :, -1], axis=-1) + + # Case 1 for intrinsics : Zoom level never changed and thus there's only one intrinsics + # matrix for the whole video, stored in intrinsics.txt This is the most usual case + # Case 2 : Each frame has its own intrinsics file _intrinsics.txt + # Case is only here for later compatibility, but it has not been tested thoroughly + if (scene / "intrinsics.txt").isfile(): + self.intrinsics = np.stack([np.genfromtxt(scene / "intrinsics.txt")] * max_shift) + else: + intrinsics_files = [f.stripext() + "_intrinsics.txt" for f in self.valid_frames] + self.intrinsics = np.stack([np.genfromtxt(i) for i in intrinsics_files]) + + def timer_decorator(func, *args, **kwargs): + """ + Decorator used to pause the timer and only restart it when returning the result. + This is used to not penalize the inference algorithm when frame retrieving is slow, + because in real conditions, it's possible you get the wanted frames immediately instead + of searching for them in the memory. + """ + def wrapper(self, *args, **kwargs): + if self.timer.running(): + self.timer.stop() + res = func(self, *args, **kwargs) + self.timer.start() + else: + res = func(self, *args, **kwargs) + return res + return wrapper + + @timer_decorator + def get_frame(self, shift=0): + """Basic function to get frame within a fixed shift. When used without parameters, it returns + the sample frame. + + Args: + shift (int, optional): Position relative to sample frame of the frame we want to get. + Defaults to 0. + + Returns a tuple of 3: + [Unknown type]: Output of the frame_transform function, used on the desired frame, loaded in a np array + np.array: 3x3 intrinsics matrix of returned frame + np.array: 3x4 pose matrix of returned frame + """ + file = self.valid_frames[shift] + img = imread(file) + if self.frame_transform is not None: + img = self.frame_transform(img) + return img, self.intrinsics[shift], self.poses[shift] + + @timer_decorator + def get_previous_frame(self, shift=1, displacement=None, max_rot=1): + """More advanced function, to get a frame within shift, displacement and rotation constraints. Timer is paused when this + function is running. + + Args: + shift (int, optional): As above. Position relative to sample frame of the frame we want to get. + Defaults to 1. + + displacement (Float, optional): Desired displacement (in meters) between sample frame and + the frame we want to get. This parameter overwrite the shift parameter. Defaults to None. + + max_rot (int, optional): Maximum Rotation, in radians. The function cannot return a frame + with a higher rotation than max_rot. It assumes rotation is growing with time + (only true for the first frames). The maximum shift of the returned frame corresponds to + the first frame with a rotation above this threshold. Defaults to 1. + + Returns a tuple of 3: + [Unknow type]: Output of the frame_transform function, + used on the frame that best represent the different constrains. + np.array: 3x3 intrinsics matrix of returned frame + np.array: 3x4 pose matrix of returned frame + """ + if displacement is not None: + shift = max(1, np.abs(self.displacements - displacement).argmin()) + rot_valid = self.rotation_angles < max_rot + assert sum(rot_valid[1: shift + 1] > 0), "Rotation is always higher than {}".format(max_rot) + # Highest shift that has rotation below max_rot thresold + final_shift = np.where(rot_valid[-1 - shift:])[0][-1] + return self.get_frame(final_shift) + + @timer_decorator + def get_previous_frames(self, shifts=[1], displacements=None, max_rot=1): + """Helper function to get multiple frames at the same time. with the previous function. + + Args: + shifts (List): list of wanted shifts + displacements (List): List of wanted displacements, overwrite shifts + max_rot (int, optional): Maximum Rotation, see previous function + + Returns a tuple of 3: + List: Outputs of the frame_transform function for each desired frame + List: 3x3 intrinsics matrices of returned frames + List: 3x4 pose matrices of returned frames + """ + if displacements is not None: + frames = zip(*[self.get_previous_frame(displacement=d, max_rot=max_rot) for d in displacements]) + else: + frames = zip(*[self.get_previous_frame(shift=s, max_rot=max_rot) for s in shifts]) + return frames + + +def inference_toolkit_example(): + parser = ArgumentParser(description='Example usage of Inference toolkit', + formatter_class=ArgumentDefaultsHelpFormatter) + + parser.add_argument('--dataset_root', metavar='DIR', type=Path) + parser.add_argument('--depth_output', metavar='FILE', type=Path, + help='where to store the estimated depth maps, must be a npy file') + parser.add_argument('--evaluation_list_path', metavar='PATH', type=Path, + help='File with list of images to test for depth evaluation') + parser.add_argument('--scale-invariant', action='store_true', + help='If selected, will rescale depth map with ratio of medians') + args = parser.parse_args() + + with open(args.evaluation_list_path) as f: + evaluation_list = [line[:-1] for line in f.readlines()] + + def my_model(frame, previous, pose): + # Mock up function that uses two frames and translation magnitude + # Replace it with your algorithm, eg. DepthNet model + return np.linalg.norm(pose[:, -1]) * np.linalg.norm(frame - previous, axis=-1) + + # This is our transform function. It converts the uint8 array into a float array, + # divides it by 255 to have values in [0,1] and adds the batch dimensions + def my_transform(img): + return img.transpose(2, 0, 1).astype(np.float32)[None] / 255 + + engine = inferenceFramework(args.dataset_root, evaluation_list, my_transform) + for sample in tqdm(engine): + latest_frame, latest_intrinsics, _ = sample.get_frame() + previous_frame, previous_intrinsics, previous_pose = sample.get_previous_frame(displacement=0.3) + engine.finish_frame(my_model(latest_frame, previous_frame, previous_pose)) + + mean_time, _ = engine.finalize(args.depth_output) + print("Mean time per sample : {:.2f}us".format(1e6 * mean_time)) + + +if __name__ == '__main__': + inference_toolkit_example() diff --git a/evaluation_toolkit/setup.py b/evaluation_toolkit/setup.py new file mode 100644 index 0000000..37a2de1 --- /dev/null +++ b/evaluation_toolkit/setup.py @@ -0,0 +1,33 @@ +from setuptools import setup + +with open("README.md", "r") as fh: + long_description = fh.read() + +setup(name='inference toolkit', + license='MIT', + author='Clément Pinard', + author_email='clempinard@gmail.com', + description='Inference and evaluation routines to test on a dataset constructed with validation set constructor', + long_description=long_description, + long_description_content_type="text/markdown", + packages=["evaluation_toolkit"], + entry_points={ + 'console_scripts': [ + 'depth_evaluation = evaluation_toolkit.depth_evaluation:main' + ] + }, + install_requires=[ + 'numpy', + 'pandas', + 'path', + 'imageio', + 'scikit-image', + 'scipy', + 'tqdm' + ], + classifiers=[ + "Programming Language :: Python :: 3", + "License :: OSI Approved :: MIT License", + "Intended Audience :: Science/Research" + ] + )