From a8284af443bce6e16def3fbafce93850c77cf12d Mon Sep 17 00:00:00 2001
From: Abdelaziz Bouzidi <abdelaziz.bouzidi@xxii.fr>
Date: Thu, 15 Jun 2023 15:53:26 +0200
Subject: [PATCH 1/2] Bug fixs

---
 evaluation_toolkit/README.md                  |  33 -
 .../evaluation_toolkit/__init__.py            |   0
 .../evaluation_toolkit/depth_evaluation.py    | 564 ------------------
 .../evaluation_toolkit/inference_toolkit.py   | 328 ----------
 evaluation_toolkit/setup.py                   |  33 -
 generate_sky_masks.py                         |   3 +-
 main_pipeline_no_lidar.py                     |   1 +
 pcl_util/pointcloud_subsampler.h              |   2 +-
 8 files changed, 3 insertions(+), 961 deletions(-)
 delete mode 100644 evaluation_toolkit/README.md
 delete mode 100644 evaluation_toolkit/evaluation_toolkit/__init__.py
 delete mode 100644 evaluation_toolkit/evaluation_toolkit/depth_evaluation.py
 delete mode 100644 evaluation_toolkit/evaluation_toolkit/inference_toolkit.py
 delete mode 100644 evaluation_toolkit/setup.py

diff --git a/evaluation_toolkit/README.md b/evaluation_toolkit/README.md
deleted file mode 100644
index 74301c8..0000000
--- a/evaluation_toolkit/README.md
+++ /dev/null
@@ -1,33 +0,0 @@
-# Evaluation Toolkit
-
-Set of tools to run a particular algorithm on a dataset constructed with the validation set constructor, and evaluate it, along with advanced statistics regarding depth value and pixel position in image with respect to flight path vector.
-
-## Inference Example
-
-Get the last frame and a previous frame such that the displacement magnitude is as close to 30cm as possible, with the condition of having a rotation of less that 1 radian. Each frame is preprocessed so that it is of shape `[C, H, W]` and with a range `[0, 1]` instead of `[0, 255]`.
-
-```python
-from evaluation_toolkit import inferenceFramework
-
-engine = inferenceFramework(dataset_root, evaluation_list, lambda x: x.transpose(2, 0, 1).astype(np.float32)[None]/255)
-
-for sample in tqdm(engine):
-    latest_frame, latest_intrinsics, _ = sample.get_frame()
-    previous_frame, previous_intrinsics, previous_pose = sample.get_previous_frame(displacement=0.3)
-    estimated_depth_map = my_model(latest_frame, previous_frame, previous_pose)
-    engine.finish_frame(estimated_depth_map)
-mean_inference_time, output_depth_maps = engine.finalize(output_path='output.npz')
-```
-
-You can find an example usage of this Inference Framework for SfmLearner [here](https://github.com/ClementPinard/SfmLearner-Pytorch/tree/validation_set_constructor)
-
-## Evaluation
-
-The evaluation step is a simple script that takes into input the computed depth maps (here in the file `output.npz`). You can combine multiple computed depth maps to compare algorithms.
-
-```
-depth_evaluation --dataset_root /path/to/dataset/root --est_depth output1.npz output2.npz --algorithm_names name1 name2 --evaluation_list_path /path/to/evaluation_list.txt --flight_path_vector_list /path/to/fligt_path_vector_list.txt <--scale_invariant> <--mask_path /path/to/mask.npy> --output_figures /path/to/figures/folder
-```
-
-It will output typical metrics and plot advanced statistics regarding the dataset and the depth estimations.
-Note that if you want to save the figures, you will need `xelatex` installed in your system. Otherwise, don't specify a parameter to `--output_figures` and it will use `plt.show`
\ No newline at end of file
diff --git a/evaluation_toolkit/evaluation_toolkit/__init__.py b/evaluation_toolkit/evaluation_toolkit/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/evaluation_toolkit/evaluation_toolkit/depth_evaluation.py b/evaluation_toolkit/evaluation_toolkit/depth_evaluation.py
deleted file mode 100644
index 5040abb..0000000
--- a/evaluation_toolkit/evaluation_toolkit/depth_evaluation.py
+++ /dev/null
@@ -1,564 +0,0 @@
-from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
-from path import Path
-import numpy as np
-import pandas as pd
-import matplotlib
-import matplotlib.pyplot as plt
-from mpl_toolkits.axes_grid1 import make_axes_locatable
-from matplotlib.colors import LinearSegmentedColormap
-from matplotlib import cm
-from tqdm import tqdm
-from imageio import imwrite
-
-parser = ArgumentParser(
-    description="Evaluate depth maps with respect to ground truth depth and FPV position",
-    formatter_class=ArgumentDefaultsHelpFormatter,
-)
-
-parser.add_argument("--dataset_root", metavar="DIR", type=Path)
-parser.add_argument(
-    "--est_depth",
-    metavar="PATH",
-    type=Path,
-    nargs='+',
-    help="where the estimated depth maps are stored, must be a 3D npz file",
-)
-parser.add_argument(
-    "--algorithm_names",
-    "--names",
-    metavar="STR",
-    type=str,
-    nargs='+',
-    help="name of the algorithms corresponding to estimated depth arrays")
-
-parser.add_argument(
-    "--evaluation_list_path",
-    "--eval",
-    metavar="PATH",
-    type=Path,
-    help="File with list of images to test for depth evaluation",
-)
-parser.add_argument(
-    "--flight_path_vector_list",
-    "--fpv",
-    metavar="PATH",
-    type=Path,
-    help="File with list of speed vectors, used to compute error wrt direction",
-)
-parser.add_argument(
-    "--scale_invariant",
-    action="store_true",
-    help="If selected, will rescale depth map with ratio of medians",
-)
-parser.add_argument(
-    "--min_depth",
-    metavar="D",
-    default=1e-2,
-    type=float,
-    help="threshold below which GT is discarded",
-)
-parser.add_argument(
-    "--max_depth",
-    metavar="D",
-    default=250,
-    type=float,
-    help="threshold above which GT is discarded",
-)
-parser.add_argument(
-    "--depth_mask",
-    metavar="PATH",
-    default=None,
-    type=Path,
-    help="path to boolean numpy array. Should be the same size as ground truth. "
-    "False value will discard the corresponding pixel location for every ground truth",
-)
-
-parser.add_argument(
-    "--output_figures",
-    metavar="DIR",
-    default=None,
-    type=Path,
-    help="where to save the figures, in pgf format. If not set, will show them with plt.show()"
-)
-
-parser.add_argument(
-    "--output_samples",
-    type=int,
-    default=0,
-    metavar='N',
-    help="Outputs N Gt and estimation vizualisation sampels"
-)
-
-coords = None
-
-
-def get_values(
-    gt_depth, estim_depth, fpv, scale_invariant=False, mask=None, min_depth=1e-2, max_depth=250
-):
-    """Given a depth maps and depth estimation, return a table of all valid depth points with
-    additional metadata
-
-    Args:
-        gt_depth (np.array): ground truth depth computed by RDC
-        estim_depth (np.array): Depth estimated with inference toolkit
-        fpv (np.array): array of 2 floats, representing the fpv coordinates, in pixels
-        scale_invariant (bool, optional): If set to True, will multiply estimated depth with
-                                          ratio between medians. This is representative of how
-                                          depth was evaluated in Eigen et al.
-        mask (np.array, optional): Boolean array of same shape as depth maps. Discard from evaluation
-                                   image points where mask[u,v] == False
-        min_depth (float, optional): Minimal depth below which ground truth is discarded and estimation is clipped.*
-                                     Defaults to 1e-2.
-        max_depth (float, optional): Maximal depth above which ground truth is discarded estimation is clipped.
-                                     Defaults to 250.
-
-    Returns:
-        [type]: [description]
-    """
-    global coords
-    if coords is None:
-        coords = np.stack(
-            np.meshgrid(np.arange(gt_depth.shape[1]), np.arange(gt_depth.shape[0])), axis=-1
-        )
-
-    # TODO : For now, fpv distance is given in pixel distance.
-    # A more accurate way would be to use angular distance.
-    fpv_dist = np.linalg.norm(coords - fpv, axis=-1)
-    estim_depth = np.clip(estim_depth, min_depth, max_depth)
-    valid = (gt_depth > min_depth) & (gt_depth < max_depth)
-    if mask is not None:
-        valid = valid & mask
-    if valid.sum() == 0:
-        return
-    valid_gt, valid_estim = gt_depth[valid], estim_depth[valid]
-    if scale_invariant:
-        valid_estim = valid_estim * np.median(valid_gt) / np.median(valid_estim)
-    fpv_dist = fpv_dist[valid]
-    valid_coords = coords[valid]
-    values = np.stack([valid_gt, valid_estim, *valid_coords.T, fpv_dist], axis=-1)
-
-    return pd.DataFrame(values, columns=["GT", "estim", "x", "y", "fpv_dist"])
-
-
-def plot_distribution(values, bins, ax, label=None, log_bins=False):
-    """Distribution plotting function, will plot with lines instead of bars
-
-    Args:
-        values: histogram to plot
-        bins: Corresponding bins delimitting histogram values
-        ax: Matplotlib ax to plot on
-        label (str): Plot label name. Defaults to None.
-        log_bins (bool): If set to True, will set the scale to log.
-                         Useful for Mean Log Error. Defaults to False.
-    """
-    bin_dists = bins[1:] - bins[:-1]
-    total = sum(bin_dists)
-    normalized_values = (values / sum(values)) * bin_dists / total
-    bin_centers = 0.5 * (bins[1:] + bins[:-1])
-    if log_bins:
-        bin_centers = np.exp(bin_centers)
-    ax.plot(bin_centers, normalized_values, label=label)
-    if log_bins:
-        ax.set_xscale("log")
-
-
-def group_quantiles(df, to_group, columns, quantiles=[0.25, 0.5, 0.75]):
-    if isinstance(columns, str):
-        columns = [columns]
-    grouped_df = df.groupby(by=np.round(df[to_group]))
-    return grouped_df[columns].quantile(quantiles).unstack()
-
-
-def error_map(error_per_px):
-    """Compute Image with pixelwise mean depth error
-
-    Args:
-        error_per_px (pd.Series): Table of errors with pixels as index
-
-    Returns:
-        np.array: Array with the shape of the image, with mean error at each pixel
-    """
-    x, y = np.stack(error_per_px.index.values, axis=-1).astype(int)
-    error_map = np.full((int(x.max() + 1), int(y.max() + 1)), np.NaN)
-    error_map[x, y] = error_per_px.values
-    return error_map
-
-
-def error_metrics(df, algo_name, suffix=''):
-    """Compute error metrics from a dataframe.
-
-    Args:
-        df (pd.DataFrame): Table containing the metrics we are interested in. It can be
-        constructed with a groupby to have mean computed over a particular value insead of a global mean.
-        algo_name (str): Algorithm for which we compute the metrics
-        suffix (str, optional): Precision for the particular metric we are computing,
-        depending on how the dataframe was constructed and grouped by. Defaults to ''.
-    """
-    error_names = ["AbsDiff", "AbsRel", "AbsLog", "StdDiff", "StdRel", "StdLog", "a1", "a2", "a3"]
-    errors = [
-        df["absdiff"].mean(),
-        df["reldiff"].mean(),
-        df["abslogdiff"].mean(),
-        np.sqrt(df["absdiff2"].mean()),
-        np.sqrt(df["reldiff2"].mean()),
-        np.sqrt(df["logdiff2"].mean()),
-        df["a1"].mean(),
-        df["a2"].mean(),
-        df["a3"].mean(),
-    ]
-
-    # Print the results
-    # TODO : save the result in latex tab format ?
-    print("Results for usual metrics for algorithm {}, {}".format(algo_name, suffix))
-    print(
-        "{:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}".format(
-            *error_names
-        )
-    )
-    print(
-        "{:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}".format(
-            *errors
-        )
-    )
-
-
-def viz_depth(depth, max_depth):
-    """Convert depth to a colored vizualisation. Infinity is black
-
-    Args:
-        depth (np.array): 2D array of depth values
-        max_depth (float): max_depth will correspond to the end of the colormap spectrum.
-        Every value above this will be the same color, expect infinity which will be black.
-
-    Returns:
-        np.array: np.uint8 array of colorized depth, ready to be saved
-    """
-    opencv_rainbow_data = (
-        (0.000, (1.00, 0.00, 0.00)),
-        (0.400, (1.00, 1.00, 0.00)),
-        (0.600, (0.00, 1.00, 0.00)),
-        (0.800, (0.00, 0.00, 1.00)),
-        (1.000, (0.60, 0.00, 1.00))
-    )
-    rainbow_cmap = LinearSegmentedColormap.from_list('opencv_rainbow', opencv_rainbow_data, 1000)
-    bone = cm.get_cmap('bone', 10000)
-    depth_norm = depth / max_depth
-    depth_viz = rainbow_cmap(depth_norm, bytes=True)[..., :3]
-    depth_viz[depth == np.inf] = 0
-    return depth_viz
-
-
-def visualize_sample(img_path, gt_path, estimations, algo_names, max_depth, output_folder):
-    """Visualize a sample and save to output_folder.
-    A sample consists in the image, the ground truth depth, and the different estimations
-
-    Args:
-        img_path (Path): Where to load the image
-        gt_path (Path): Where to load the Ground truth depth map (usually same name as img but with npy extension)
-        estimations (List[np.array]): List of the estimations from all the algos we are testing
-        algo_names (List[str]): List of algorithm names, corresponding to the estimations given above
-        max_depth (float): depth saturation value above which every thing will be the same color
-        output_folder (Path): Where to save all the different vizualisations
-    """
-    img_path.copy(output_folder)
-    img_name = img_path.stem
-
-    gt_depth = np.load(gt_path)
-    max_gt = np.max(gt_depth[gt_depth < np.inf])
-    max_depth = min(max_gt, max_depth)
-    imwrite(output_folder / "{}_GT.png".format(img_name), viz_depth(gt_depth, max_depth))
-    for n, e in zip(algo_names, estimations):
-        imwrite(output_folder / "{}_{}.png".format(img_name, n), viz_depth(e, max_depth))
-
-
-def main():
-    args = parser.parse_args()
-    assert (len(args.est_depth) == len(args.algorithm_names))
-
-    if args.output_figures is not None:
-        matplotlib.use("pgf")
-        pgf_with_xelatex = {
-            'text.usetex': True,
-            "pgf.texsystem": "xelatex",
-            "pgf.preamble": r"\usepackage{amssymb} "
-                            r"\usepackage{amsmath} "
-                            r"\usepackage{fontspec} "
-                            r"\usepackage{unicode-math}"
-        }
-        # Change to pgf if needed
-        savefig_ext = "pdf"
-        matplotlib.rcParams.update(pgf_with_xelatex)
-
-    with open(args.evaluation_list_path, "r") as f:
-        test_img_path = [line[:-1] for line in f.readlines()]
-    fpv_list = np.loadtxt(args.flight_path_vector_list)
-    dataframes = {}
-
-    if args.output_samples > 0 and args.output_figures is not None:
-        np.random.seed(1)
-        to_sample = np.random.choice(len(test_img_path), args.output_samples)
-        for i in to_sample:
-            estimated_depth_maps = []
-            img_path = test_img_path[i]
-            for p in args.est_depth:
-                depth = np.load(p, allow_pickle=True)[img_path]
-                estimated_depth_maps.append(depth)
-            visualize_sample(args.dataset_root / img_path,
-                             (args.dataset_root / img_path).stripext() + ".npy",
-                             estimated_depth_maps,
-                             args.algorithm_names,
-                             args.max_depth,
-                             args.output_figures)
-
-    for p, name in zip(args.est_depth, args.algorithm_names):
-        estimated_depth = np.load(p, allow_pickle=True)
-        values_df = []
-        assert len(test_img_path) == len(estimated_depth)
-        if args.depth_mask is not None:
-            mask = np.load(args.depth_mask)
-        else:
-            mask = None
-
-        # Load each GT-estimation pair and extract data in a pandas dataframe
-        # values_df is at first a list of dataframes which we then concatenate
-        print("getting results for {} algorithm (file : {})".format(name, p))
-        for filepath, fpv in tqdm(zip(test_img_path, fpv_list), total=len(fpv_list)):
-            GT = np.load((args.dataset_root / filepath).stripext() + ".npy")
-            new_values = get_values(
-                GT,
-                estimated_depth[filepath],
-                fpv,
-                args.scale_invariant,
-                mask,
-                args.min_depth,
-                args.max_depth,
-            )
-            if new_values is not None:
-                values_df.append(new_values)
-        values_df = pd.concat(values_df)
-
-        # Additional values to the Dataframe
-        # Note that no mean is computed here, each row in the dataframe is ONE pixel
-        # The dataframe is thus potentially thousands rows long
-        values_df["log_GT"] = np.log(values_df["GT"])
-        values_df["log_estim"] = np.log(values_df["estim"])
-        values_df["diff"] = values_df["estim"] - values_df["GT"]
-        values_df["absdiff"] = values_df["diff"].abs()
-        values_df["absdiff2"] = np.power(values_df["diff"], 2)
-        values_df["reldiff"] = values_df["absdiff"] / values_df["GT"]
-        values_df["reldiff2"] = np.power(values_df["reldiff"], 2)
-        values_df["logdiff"] = values_df["log_estim"] - values_df["log_GT"]
-        values_df["logdiff2"] = np.power(values_df["logdiff"], 2)
-        values_df["abslogdiff"] = values_df["logdiff"].abs()
-        values_df["a1"] = (values_df["abslogdiff"] < np.log(1.25)).astype(float)
-        values_df["a2"] = (values_df["abslogdiff"] < 2 * np.log(1.25)).astype(float)
-        values_df["a3"] = (values_df["abslogdiff"] < 3 * np.log(1.25)).astype(float)
-        dataframes[name] = values_df
-
-    for name, df in dataframes.items():
-        print()
-        print("---------------------------")
-        print("Results for {}".format(name))
-        print("---------------------------")
-        print()
-        # Compute mean erros, a la Eigen et al.
-        error_metrics(df, name, "averaged over all points")
-        # Get mean values per ground truth values, and then mean them
-        # This way, we have the same weight for each ground truth value
-        values_df_per_gt = df.groupby(by=np.round(values_df["GT"])).mean()
-        error_metrics(values_df_per_gt, name, "averaged over gt values")
-        values_df_per_log_gt = df.groupby(by=0.1 * np.round(10 * values_df["log_GT"])).mean()
-        error_metrics(values_df_per_log_gt, name, "averaged over log(gt) values")
-
-    # TODO better handling of the parameters, maybe just add it to argparse
-    plot = True
-    n_bins = 4
-    if plot:
-        # COMPUTING HISTOGRAMS
-
-        # GT-wise difference with estimation distributions.
-        # Useful to see if we perform well
-        # in the depth range we are actually interested in
-        # Construct the bins for GT-wise error
-        # You can see this as 3D histogram
-        # Note that if the assumptions of gaussian difference
-        # for log values, the log_normal error should be roughly
-        # the same for all bins
-
-        min_gt = values_df["GT"].min()
-        max_gt = values_df["GT"].max()
-        bins = np.linspace(min_gt, max_gt, n_bins + 1)
-
-        histograms = {}
-        for name, df in dataframes.items():
-            histograms[name] = {}
-            estim_per_GT = {}
-            for b1, b2 in zip(bins[:-1], bins[1:]):
-                per_gt = df[(df["GT"] > b1) & (df["GT"] < b2)]
-                estim_per_GT[(b1 + b2) / 2] = {
-                    "normal": np.histogram(per_gt["diff"], bins=100),
-                    "log_normal": np.histogram(per_gt["logdiff"], bins=100),
-                    "bins": [b1, b2],
-                }
-            histograms[name]["estim_per_GT"] = estim_per_GT
-
-            # Global histograms
-            # Same as above, but with one bin, and thus not GT-wise
-
-            histograms[name]["global_diff"] = np.histogram(df["estim"] - df["GT"], bins=100)
-            histograms[name]["global_log_diff"] = np.histogram(df["log_estim"] - df["log_GT"], bins=100)
-
-            # Depth error per pixel
-            # Useful to identify if a region in the screen is particualrly faulty.
-            # Can help spot dataset inconsistency (eg sky is always in the same place)
-            # Can also help find calibration artefacts ?
-
-            metric_per_px = df.groupby(by=["x", "y"]).mean()
-            histograms[name]["mean_diff_per_px"] = error_map(metric_per_px["absdiff"])
-            histograms[name]["mean_log_diff_per_px"] = error_map(metric_per_px["logdiff"])
-
-            # Depth error wrt pixelwise distance to FPV. For SFM, the closer we are to FPV,
-            # The harde it is to deduce depth. But in the same time, the more
-            # usefule depth becomes, because it indicates distances of obstacles where
-            # we are headed to.
-
-            # Note : if fpv is too far, it means it is not on the image
-            # And thus this metric is not really interesting.
-            histograms[name]["quantiles_per_fpv"] = group_quantiles(
-                df[df["fpv_dist"] < 1000], "fpv_dist", ["absdiff", "abslogdiff"]
-            )
-            histograms[name]["quantiles_per_gt"] = group_quantiles(df, "GT", ["absdiff", "abslogdiff"])
-            histograms[name]["quantiles_per_estimation"] = group_quantiles(df, "estim", ["absdiff", "abslogdiff"])
-
-        # PLOTTING
-        colors = plt.rcParams['axes.prop_cycle'].by_key()['color']
-        # First plot, general insight for dataset
-        fig1, axes = plt.subplots(2, 1, sharex=True)
-        GT_distrib = None
-        for name, df in dataframes.items():
-            if GT_distrib is None:
-                GT_distrib = np.histogram(df["GT"], bins=100)
-                plot_distribution(*GT_distrib, axes[0], label="groundtruth depth")
-            estim_distrib = np.histogram(df["estim"], bins=100)
-            plot_distribution(*estim_distrib, axes[1], label=name)
-        axes[0].set_title("Ground Truth distribution")
-        axes[0].legend()
-        axes[1].set_title("depth estimation distribution from {}".format(name))
-        axes[1].legend()
-        if args.output_figures is not None:
-            fig1.savefig(args.output_figures / "depth_distrib.{}".format(savefig_ext))
-
-        # Second plot, GT-wise difference one figure per algorithm
-        for name, h in histograms.items():
-            fig2, axes = plt.subplots(1, 2, sharey=True)
-            for i, (k, v) in enumerate(h["estim_per_GT"].items()):
-                plot_distribution(
-                    *v["normal"], axes[0], label="$GT \\in [{:.1f},{:.1f}]$".format(*v["bins"])
-                )
-                plot_distribution(
-                    *v["log_normal"],
-                    axes[1],
-                    label="$GT \\in [{:.1f},{:.1f}]$".format(*v["bins"]),
-                    log_bins=True
-                )
-                # axes[0, 0].set_title("distribution of estimation around GT = {:.2f}".format(k))
-                # axes[0, 1].set_title("distribution of log estimation around log GT = {:.2f}".format(np.log(k)))
-            axes[0].legend()
-            axes[0].set_title("GT - estimation difference")
-            axes[1].legend()
-            axes[1].set_title("logt GT - log estimation difference")
-            fig2.tight_layout()
-            if args.output_figures is not None:
-                fig2.savefig(args.output_figures / "GTwise_depth_diff_{}.{}".format(name, savefig_ext))
-
-        # Third plot, global diff histogram
-        fig, axes = plt.subplots(2, 1)
-        for name, h in histograms.items():
-            plot_distribution(*h["global_diff"], axes[0], name)
-            plot_distribution(*h["global_log_diff"], axes[1], name, log_bins=True)
-        axes[1].set_title("Global log difference distribution from GT")
-        axes[0].set_title("Global difference distribution from GT")
-        axes[1].legend()
-        axes[0].legend()
-        plt.tight_layout()
-        if args.output_figures is not None:
-            fig.savefig(args.output_figures / "global_depth_diff.{}".format(savefig_ext))
-
-        def plot_quartile(axes, color, algo_name, df):
-            index = df.index
-            diff = df["absdiff"]
-            logdiff = df["abslogdiff"]
-            axes[0].fill_between(
-                index, diff[0.25], diff[0.75], color=c, alpha=0.1
-            )
-            axes[0].plot(diff[0.5].index, diff[0.5], color=c, label=algo_name)
-            axes[1].fill_between(
-                index, logdiff[0.25], logdiff[0.75], color=c, alpha=0.1
-            )
-            axes[1].plot(logdiff[0.5], label=algo_name)
-            axes[0].legend()
-            axes[1].legend()
-
-        # Fourth plot, error wrt distance to fpv
-        fig, axes = plt.subplots(2, 1, sharex=True)
-        for c, (name, h) in zip(colors, histograms.items()):
-            plot_quartile(axes, c, name, h["quantiles_per_fpv"])
-        axes[0].set_title("Error wrt to distance to fpv (in px)")
-        axes[1].set_title("Log error wrt to distance to fpv (in px)")
-        axes[1].set_yscale('log')
-        axes[1].set_xlabel("Distance to flight path vector (in px)")
-        plt.tight_layout()
-        if args.output_figures is not None:
-            fig.savefig(args.output_figures / "fpv_error_quantiles.{}".format(savefig_ext))
-
-        # Fifth plot, another way of plotting GT-wise error:
-        # For each GT depth, we show 3 points : median, and 50% confidence intervale (2 points)
-        # We have less info than the full histogram but we can show more GT values
-        fig, axes = plt.subplots(2, 1, sharex=True)
-        for c, (name, h) in zip(colors, histograms.items()):
-            plot_quartile(axes, c, name, h["quantiles_per_gt"])
-        axes[0].set_title("Error wrt to groundtruth depth")
-        axes[1].set_title("Log error wrt to groundtruth depth")
-        axes[1].set_yscale('log')
-        axes[1].set_xlabel("Estimated depth (in meters)")
-        plt.tight_layout()
-        if args.output_figures is not None:
-            fig.savefig(args.output_figures / "gt_error_quantiles.{}".format(savefig_ext))
-
-        # Last plot, error with respect to estimated depth
-
-        fig, axes = plt.subplots(2, 1, sharex=True)
-        for c, (name, h) in zip(colors, histograms.items()):
-            plot_quartile(axes, c, name, h["quantiles_per_estimation"])
-        axes[0].set_title("Error wrt to estimated depth")
-        axes[1].set_title("Log error wrt to estimated depth")
-        axes[1].set_yscale('log')
-        axes[1].set_xlabel("Estimated depth (in meters)")
-        plt.tight_layout()
-        if args.output_figures is not None:
-            fig.savefig(args.output_figures / "est_error_quantiles.{}".format(savefig_ext))
-
-        # Last plot, pixelwise error
-        for name, h in histograms.items():
-            fig, axes = plt.subplots(2, 1)
-            pl = axes[0].imshow(h["mean_diff_per_px"].T)
-            axes[0].set_title("Mean error for each pixel")
-            divider = make_axes_locatable(axes[0])
-            cax = divider.append_axes("right", size="5%", pad=0.05)
-            cbar = fig.colorbar(pl, cax=cax)
-            cbar.ax.tick_params(axis="y", direction="in")
-            pl = axes[1].imshow(h["mean_log_diff_per_px"].T)
-            axes[1].set_title("Mean Log error for each pixel")
-            divider = make_axes_locatable(axes[1])
-            cax = divider.append_axes("right", size="5%", pad=0.05)
-            cbar = fig.colorbar(pl, cax=cax)
-            cbar.ax.tick_params(axis="y", direction="in")
-            plt.tight_layout()
-            if args.output_figures is not None:
-                fig.savefig(args.output_figures / "pixel_error_map_{}.{}".format(name, savefig_ext))
-        if args.output_figures is None:
-            plt.show()
-
-
-if __name__ == "__main__":
-    main()
diff --git a/evaluation_toolkit/evaluation_toolkit/inference_toolkit.py b/evaluation_toolkit/evaluation_toolkit/inference_toolkit.py
deleted file mode 100644
index 831fc13..0000000
--- a/evaluation_toolkit/evaluation_toolkit/inference_toolkit.py
+++ /dev/null
@@ -1,328 +0,0 @@
-import numpy as np
-from path import Path
-from imageio import imread
-import time
-from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
-from scipy.spatial.transform import Rotation
-from tqdm import tqdm
-
-
-class Timer:
-    """
-    Timer class is used to measure elapsed time, while being able to
-    pause it when needed. This is useful to measure algorithm inference
-    time without measuring time spent retrieving wanted images
-    """
-    def __init__(self):
-        self._start_time = None
-        self._elapsed_time = 0
-
-    def running(self):
-        return self._start_time is not None
-
-    def start(self):
-        """Start a new timer"""
-        if self._start_time is not None:
-            return
-
-        self._start_time = time.perf_counter()
-
-    def stop(self):
-        """Stop the timer, and report the elapsed time"""
-        if self._start_time is None:
-            return
-
-        self._elapsed_time += time.perf_counter() - self._start_time
-        self._start_time = None
-
-    def get_elapsed(self):
-        return self._elapsed_time
-
-    def reset(self):
-        self.__init__()
-
-
-class inferenceFramework(object):
-    """Inference Framework used for simulating navigation conditions
-    for depth algorithms on a dataset created by RDC. It also comes with a way to measure your inference time
-    and to record your estimated depths.
-    The framework is iterable, and each iteration gives an Inference Sample Object from which you can get images
-    to compute depth on.
-
-    Attributes:
-        root (Path): Root directory where the Final output of RDC is stored.
-
-        max_shift (float): Max number of frames the algorithm is allowed to search in the past. If the algorithm
-        eg. wants to get a frame that was at a particular distance from the last frame, with a barely moving camera,
-        the frame can only be as anterior as {max_shift} frames before, even if it means the movement won't be enough.
-
-        estimated_depth_maps (dict): Dictionnary for estimated depth maps, as numpy arrays. Key is image path
-        of image on which we estimated depth.
-
-        inference_time (List): List of time spent by your algorithm for inference.
-        Will be used at the end of the evaluation to compute the mean inference time
-
-        frame_transform (function): function which will be used to transform images
-        before returning them to the algorithm. The function takes a numpy array as
-        an argument and can return anything your algorithm want, eg. a pytorch tensor.
-    """
-    def __init__(self, root, test_files, max_shift=50, frame_transform=None):
-        self.root = Path(root)
-        self.test_files = test_files
-        self.max_shift = max_shift
-        self.frame_transform = frame_transform
-        self.inference_time = []
-        self.estimated_depth_maps = {}
-
-    def __getitem__(self, i):
-        """Get item routine. Before returning the sample, the timer is triggered to measure inference time.
-
-        Args:
-            i (int): Position of the sample in the test_files list, which has been created with RDC
-
-        Returns:
-            InferenceSample: Object to compute depth
-        """
-        timer = Timer()
-        self.i = i
-        self.current_sample = inferenceSample(self.root,
-                                              self.test_files[i],
-                                              self.max_shift,
-                                              timer, self.frame_transform)
-        self.current_sample.timer.start()
-        return self.current_sample
-
-    def finish_frame(self, estimated_depth):
-        """Finish Frame routine: This method needs to be called each time your algorithm has
-        finished the depth inference. It also stops the timer and stores the time elapsed for this
-        sample to compute a mean inference time at the end of the evaluation.
-
-        Args:
-            estimated_depth (np.array): The output of your depth algorithm. It will then be stored in
-            a dict, and then saved after when it will be completely populated.
-
-        Returns:
-            float: time elapsed for inference for this sample
-        """
-        self.current_sample.timer.stop()
-        elapsed = self.current_sample.timer.get_elapsed()
-        self.inference_time.append(elapsed)
-        self.estimated_depth_maps[self.current_sample.file] = estimated_depth
-        return elapsed
-
-    def finalize(self, output_path=None):
-        """Finalize: this methods needs to be called at the end of the whole evaluation,
-        when there is no sample left to estimate depth on.
-
-        Args:
-            output_path (Path, optional): Where to save all the estimated depth. It will
-            be saved in a compressed numpy file.
-
-        Returns:
-            (float, dict): Return the mean inference time and the compute depth maps in a dictionnary
-        """
-        if output_path is not None:
-            np.savez(output_path, **self.estimated_depth_maps)
-        mean_inference_time = np.mean(self.inference_time)
-        return mean_inference_time, self.estimated_depth_maps
-
-    def __len__(self):
-        return len(self.test_files)
-
-
-class inferenceSample(object):
-    """Inferance Sample class. Is used to get a particular frame with displacement constraints
-    For example, you can take the last frame (of which you need to compute the depth map),
-    and then want the frame that was 0.3 meters from the last one to ensure a sufficient parallax
-
-    Attributes:
-        root (Path): Same as inferenceFramework. Root directory where the Final output of RDC is stored.
-
-        file (Path): image path of image of which we want to estimate depth.
-
-        frame_transform (function) : Same as InferenceFramework. function used to transform loaded image
-        into the data format of your choice.
-
-        timer (Timer): timer used to measure time spent computing depth. All the frame gathering and transformation
-        are not taken into account in order to only measure inference time.
-
-        valid_frames (List of Path): Ordered list of frame paths representing the frame sequence that is going
-        to be used to get the optimal frame pair/set for the algotihm you want to evaluate.
-        The order is descending: last frame is first and oldest frames are last.
-
-        poses (np.array): Array of all the poses of the valid_frames list in the R,T format (3x4 matrix).
-        They are computed relative to the last frame, and as such, first pose is identity
-
-        rotation_angles (1D np.array): computed from poses, the angle magnitude between last frame and any given frame.
-        This is useful when you don't want rotation to be too large.
-
-        displacement (1D np.array): compute from poses, displacement magnitude between last frame and any given frame.
-        Useful when you don't want frames to be too close to each other.
-
-        intrinsics (np.array): Intrinsics for each frame, stored in a 3x3 matrix.
-    """
-    def __init__(self, root, file, max_shift, timer, frame_transform=None):
-        self.root = root
-        self.file = file
-        self.frame_transform = frame_transform
-        self.timer = timer
-        full_filepath = self.root / file
-        scene = full_filepath.parent
-        # Get all frames in the scene folder. Normally, there should be more than "max_shift" frames.
-        scene_files = sorted(scene.files("*.jpg"))
-        poses = np.genfromtxt(scene / "poses.txt").reshape((-1, 3, 4))
-        sample_id = scene_files.index(full_filepath)
-        assert(sample_id >= max_shift)
-        start_id = sample_id - max_shift
-        # Get all frames between start_id (oldest frame) and sample_id.
-        # Revert the list so that oldest frames are in the end, like in a buffer
-        self.valid_frames = scene_files[start_id:sample_id + 1][::-1]
-        # Flip_ud is equivalent to reverting the row and thus the same as [::-1]
-        valid_poses = np.flipud(poses[start_id:sample_id + 1])
-        # All poses in the sequence should be valid
-        assert not np.isnan(valid_poses.sum())
-        # Change the pose array so that instead of 3x4 matrices, we have 4x4 matrices, which we can invert
-        last_line = np.broadcast_to(np.array([0, 0, 0, 1]), (valid_poses.shape[0], 1, 4))
-        valid_poses_full = np.concatenate([valid_poses, last_line], axis=1)
-        self.poses = (np.linalg.inv(valid_poses_full[0]) @  valid_poses_full)[:, :3]
-        R = self.poses[:, :3, :3]
-        self.rotation_angles = Rotation.from_matrix(R).magnitude()
-        self.displacements = np.linalg.norm(self.poses[:, :, -1], axis=-1)
-
-        # Case 1 for intrinsics : Zoom level never changed and thus there's only one intrinsics
-        # matrix for the whole video, stored in intrinsics.txt This is the most usual case
-        # Case 2 : Each frame has its own intrinsics file <name>_intrinsics.txt
-        # Case is only here for later compatibility, but it has not been tested thoroughly
-        if (scene / "intrinsics.txt").isfile():
-            self.intrinsics = np.stack([np.genfromtxt(scene / "intrinsics.txt")] * max_shift)
-        else:
-            intrinsics_files = [f.stripext() + "_intrinsics.txt" for f in self.valid_frames]
-            self.intrinsics = np.stack([np.genfromtxt(i) for i in intrinsics_files])
-
-    def timer_decorator(func, *args, **kwargs):
-        """
-        Decorator used to pause the timer and only restart it when returning the result.
-        This is used to not penalize the inference algorithm when frame retrieving is slow,
-        because in real conditions, it's possible you get the wanted frames immediately instead
-        of searching for them in the memory.
-        """
-        def wrapper(self, *args, **kwargs):
-            if self.timer.running():
-                self.timer.stop()
-                res = func(self, *args, **kwargs)
-                self.timer.start()
-            else:
-                res = func(self, *args, **kwargs)
-            return res
-        return wrapper
-
-    @timer_decorator
-    def get_frame(self, shift=0):
-        """Basic function to get frame within a fixed shift. When used without parameters, it returns
-        the sample frame.
-
-        Args:
-            shift (int, optional): Position relative to sample frame of the frame we want to get.
-            Defaults to 0.
-
-        Returns a tuple of 3:
-            [Unknown type]: Output of the frame_transform function, used on the desired frame, loaded in a np array
-            np.array: 3x3 intrinsics matrix of returned frame
-            np.array: 3x4 pose matrix of returned frame
-        """
-        file = self.valid_frames[shift]
-        img = imread(file)
-        if self.frame_transform is not None:
-            img = self.frame_transform(img)
-        return img, self.intrinsics[shift], self.poses[shift]
-
-    @timer_decorator
-    def get_previous_frame(self, shift=1, displacement=None, max_rot=1):
-        """More advanced function, to get a frame within shift, displacement and rotation constraints. Timer is paused when this
-        function is running.
-
-        Args:
-            shift (int, optional): As above. Position relative to sample frame of the frame we want to get.
-            Defaults to 1.
-
-            displacement (Float, optional): Desired displacement (in meters) between sample frame and
-            the frame we want to get. This parameter overwrite the shift parameter. Defaults to None.
-
-            max_rot (int, optional): Maximum Rotation, in radians. The function cannot return a frame
-            with a higher rotation than max_rot. It assumes rotation is growing with time
-            (only true for the first frames). The maximum shift of the returned frame corresponds to
-            the first frame with a rotation above this threshold. Defaults to 1.
-
-        Returns a tuple of 3:
-            [Unknow type]: Output of the frame_transform function,
-            used on the frame that best represent the different constrains.
-            np.array: 3x3 intrinsics matrix of returned frame
-            np.array: 3x4 pose matrix of returned frame
-        """
-        if displacement is not None:
-            shift = max(1, np.abs(self.displacements - displacement).argmin())
-        rot_valid = self.rotation_angles < max_rot
-        assert sum(rot_valid[1: shift + 1] > 0), "Rotation is always higher than {}".format(max_rot)
-        # Highest shift that has rotation below max_rot thresold
-        final_shift = np.where(rot_valid[-1 - shift:])[0][-1]
-        return self.get_frame(final_shift)
-
-    @timer_decorator
-    def get_previous_frames(self, shifts=[1], displacements=None, max_rot=1):
-        """Helper function to get multiple frames at the same time. with the previous function.
-
-        Args:
-            shifts (List): list of wanted shifts
-            displacements (List): List of wanted displacements, overwrite shifts
-            max_rot (int, optional): Maximum Rotation, see previous function
-
-        Returns a tuple of 3:
-            List: Outputs of the frame_transform function for each desired frame
-            List: 3x3 intrinsics matrices of returned frames
-            List: 3x4 pose matrices of returned frames
-        """
-        if displacements is not None:
-            frames = zip(*[self.get_previous_frame(displacement=d, max_rot=max_rot) for d in displacements])
-        else:
-            frames = zip(*[self.get_previous_frame(shift=s, max_rot=max_rot) for s in shifts])
-        return frames
-
-
-def inference_toolkit_example():
-    parser = ArgumentParser(description='Example usage of Inference toolkit',
-                            formatter_class=ArgumentDefaultsHelpFormatter)
-
-    parser.add_argument('--dataset_root', metavar='DIR', type=Path)
-    parser.add_argument('--depth_output', metavar='FILE', type=Path,
-                        help='where to store the estimated depth maps, must be a npy file')
-    parser.add_argument('--evaluation_list_path', metavar='PATH', type=Path,
-                        help='File with list of images to test for depth evaluation')
-    parser.add_argument('--scale-invariant', action='store_true',
-                        help='If selected, will rescale depth map with ratio of medians')
-    args = parser.parse_args()
-
-    with open(args.evaluation_list_path) as f:
-        evaluation_list = [line[:-1] for line in f.readlines()]
-
-    def my_model(frame, previous, pose):
-        # Mock up function that uses two frames and translation magnitude
-        # Replace it with your algorithm, eg. DepthNet model
-        return np.linalg.norm(pose[:, -1]) * np.linalg.norm(frame - previous, axis=-1)
-
-    # This is our transform function. It converts the uint8 array into a float array,
-    # divides it by 255 to have values in [0,1] and adds the batch dimensions
-    def my_transform(img):
-        return img.transpose(2, 0, 1).astype(np.float32)[None] / 255
-
-    engine = inferenceFramework(args.dataset_root, evaluation_list, my_transform)
-    for sample in tqdm(engine):
-        latest_frame, latest_intrinsics, _ = sample.get_frame()
-        previous_frame, previous_intrinsics, previous_pose = sample.get_previous_frame(displacement=0.3)
-        engine.finish_frame(my_model(latest_frame, previous_frame, previous_pose))
-
-    mean_time, _ = engine.finalize(args.depth_output)
-    print("Mean time per sample : {:.2f}us".format(1e6 * mean_time))
-
-
-if __name__ == '__main__':
-    inference_toolkit_example()
diff --git a/evaluation_toolkit/setup.py b/evaluation_toolkit/setup.py
deleted file mode 100644
index 37a2de1..0000000
--- a/evaluation_toolkit/setup.py
+++ /dev/null
@@ -1,33 +0,0 @@
-from setuptools import setup
-
-with open("README.md", "r") as fh:
-    long_description = fh.read()
-
-setup(name='inference toolkit',
-      license='MIT',
-      author='Clément Pinard',
-      author_email='clempinard@gmail.com',
-      description='Inference and evaluation routines to test on a dataset constructed with validation set constructor',
-      long_description=long_description,
-      long_description_content_type="text/markdown",
-      packages=["evaluation_toolkit"],
-      entry_points={
-          'console_scripts': [
-              'depth_evaluation = evaluation_toolkit.depth_evaluation:main'
-          ]
-      },
-      install_requires=[
-          'numpy',
-          'pandas',
-          'path',
-          'imageio',
-          'scikit-image',
-          'scipy',
-          'tqdm'
-      ],
-      classifiers=[
-          "Programming Language :: Python :: 3",
-          "License :: OSI Approved :: MIT License",
-          "Intended Audience :: Science/Research"
-      ]
-      )
diff --git a/generate_sky_masks.py b/generate_sky_masks.py
index 6726ad3..6370f30 100644
--- a/generate_sky_masks.py
+++ b/generate_sky_masks.py
@@ -61,7 +61,6 @@ def extract_sky_mask(network, image_paths, mask_folder):
 def process_folder(folder_to_process, colmap_img_root, mask_path, pic_ext, verbose=False, batchsize=8, **env):
     network = prepare_network()
     folders = [folder_to_process] + list(folder_to_process.walkdirs())
-
     for folder in folders:
 
         mask_folder = mask_path/colmap_img_root.relpathto(folder)
@@ -84,7 +83,6 @@ def process_folder(folder_to_process, colmap_img_root, mask_path, pic_ext, verbo
     del network
     torch.cuda.empty_cache()
 
-
 parser = ArgumentParser(description='sky mask generator using ENet trained on cityscapes',
                         formatter_class=ArgumentDefaultsHelpFormatter)
 
@@ -105,3 +103,4 @@ def process_folder(folder_to_process, colmap_img_root, mask_path, pic_ext, verbo
     file_exts = ['jpg', 'JPG']
 
     process_folder(args.img_dir, args.colmap_img_root, args.mask_root, file_exts, True, args.batchsize)
+
diff --git a/main_pipeline_no_lidar.py b/main_pipeline_no_lidar.py
index 2dc0e62..2ad4f27 100644
--- a/main_pipeline_no_lidar.py
+++ b/main_pipeline_no_lidar.py
@@ -74,6 +74,7 @@ def main():
     if i not in args.skip_step:
         print_step(i, "First thorough photogrammetry")
         env["thorough_recon"].makedirs_p()
+        print(env["video_frame_list_thorough"])
         colmap.extract_features(image_list=env["video_frame_list_thorough"], more=args.more_sift_features)
         colmap.index_images(vocab_tree_output=env["indexed_vocab_tree"], vocab_tree_input=args.vocab_tree)
         if env["match_method"] == "vocab_tree":
diff --git a/pcl_util/pointcloud_subsampler.h b/pcl_util/pointcloud_subsampler.h
index 68abcec..f98e0c1 100644
--- a/pcl_util/pointcloud_subsampler.h
+++ b/pcl_util/pointcloud_subsampler.h
@@ -19,7 +19,7 @@ typename pcl::PointCloud<PointT>::Ptr filter(typename pcl::PointCloud<PointT>::P
   pcl::VoxelGrid<PointT> vox;
   vox.setLeafSize (resolution, resolution, resolution);
 
-  for (auto it = octree.leaf_begin(); it != octree.leaf_end(); ++it) {
+  for (auto it = octree.leaf_depth_begin(); it != octree.leaf_depth_end(); ++it) {
     pcl::octree::OctreeContainerPointIndices& container = it.getLeafContainer();
 
     pcl::IndicesPtr indexVector(new std::vector<int>);

From 6323021f57a6355f0af2bdc2d364ae7d9e0c6d72 Mon Sep 17 00:00:00 2001
From: Abdelaziz Bouzidi <abdelaziz.bouzidi@xxii.fr>
Date: Thu, 15 Jun 2023 17:38:11 +0200
Subject: [PATCH 2/2] Added evaluation toolkit

---
 evaluation_toolkit/README.md                  |  33 +
 .../evaluation_toolkit/__init__.py            |   0
 .../evaluation_toolkit/depth_evaluation.py    | 564 ++++++++++++++++++
 .../evaluation_toolkit/inference_toolkit.py   | 328 ++++++++++
 evaluation_toolkit/setup.py                   |  33 +
 5 files changed, 958 insertions(+)
 create mode 100644 evaluation_toolkit/README.md
 create mode 100644 evaluation_toolkit/evaluation_toolkit/__init__.py
 create mode 100644 evaluation_toolkit/evaluation_toolkit/depth_evaluation.py
 create mode 100644 evaluation_toolkit/evaluation_toolkit/inference_toolkit.py
 create mode 100644 evaluation_toolkit/setup.py

diff --git a/evaluation_toolkit/README.md b/evaluation_toolkit/README.md
new file mode 100644
index 0000000..74301c8
--- /dev/null
+++ b/evaluation_toolkit/README.md
@@ -0,0 +1,33 @@
+# Evaluation Toolkit
+
+Set of tools to run a particular algorithm on a dataset constructed with the validation set constructor, and evaluate it, along with advanced statistics regarding depth value and pixel position in image with respect to flight path vector.
+
+## Inference Example
+
+Get the last frame and a previous frame such that the displacement magnitude is as close to 30cm as possible, with the condition of having a rotation of less that 1 radian. Each frame is preprocessed so that it is of shape `[C, H, W]` and with a range `[0, 1]` instead of `[0, 255]`.
+
+```python
+from evaluation_toolkit import inferenceFramework
+
+engine = inferenceFramework(dataset_root, evaluation_list, lambda x: x.transpose(2, 0, 1).astype(np.float32)[None]/255)
+
+for sample in tqdm(engine):
+    latest_frame, latest_intrinsics, _ = sample.get_frame()
+    previous_frame, previous_intrinsics, previous_pose = sample.get_previous_frame(displacement=0.3)
+    estimated_depth_map = my_model(latest_frame, previous_frame, previous_pose)
+    engine.finish_frame(estimated_depth_map)
+mean_inference_time, output_depth_maps = engine.finalize(output_path='output.npz')
+```
+
+You can find an example usage of this Inference Framework for SfmLearner [here](https://github.com/ClementPinard/SfmLearner-Pytorch/tree/validation_set_constructor)
+
+## Evaluation
+
+The evaluation step is a simple script that takes into input the computed depth maps (here in the file `output.npz`). You can combine multiple computed depth maps to compare algorithms.
+
+```
+depth_evaluation --dataset_root /path/to/dataset/root --est_depth output1.npz output2.npz --algorithm_names name1 name2 --evaluation_list_path /path/to/evaluation_list.txt --flight_path_vector_list /path/to/fligt_path_vector_list.txt <--scale_invariant> <--mask_path /path/to/mask.npy> --output_figures /path/to/figures/folder
+```
+
+It will output typical metrics and plot advanced statistics regarding the dataset and the depth estimations.
+Note that if you want to save the figures, you will need `xelatex` installed in your system. Otherwise, don't specify a parameter to `--output_figures` and it will use `plt.show`
\ No newline at end of file
diff --git a/evaluation_toolkit/evaluation_toolkit/__init__.py b/evaluation_toolkit/evaluation_toolkit/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/evaluation_toolkit/evaluation_toolkit/depth_evaluation.py b/evaluation_toolkit/evaluation_toolkit/depth_evaluation.py
new file mode 100644
index 0000000..5040abb
--- /dev/null
+++ b/evaluation_toolkit/evaluation_toolkit/depth_evaluation.py
@@ -0,0 +1,564 @@
+from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
+from path import Path
+import numpy as np
+import pandas as pd
+import matplotlib
+import matplotlib.pyplot as plt
+from mpl_toolkits.axes_grid1 import make_axes_locatable
+from matplotlib.colors import LinearSegmentedColormap
+from matplotlib import cm
+from tqdm import tqdm
+from imageio import imwrite
+
+parser = ArgumentParser(
+    description="Evaluate depth maps with respect to ground truth depth and FPV position",
+    formatter_class=ArgumentDefaultsHelpFormatter,
+)
+
+parser.add_argument("--dataset_root", metavar="DIR", type=Path)
+parser.add_argument(
+    "--est_depth",
+    metavar="PATH",
+    type=Path,
+    nargs='+',
+    help="where the estimated depth maps are stored, must be a 3D npz file",
+)
+parser.add_argument(
+    "--algorithm_names",
+    "--names",
+    metavar="STR",
+    type=str,
+    nargs='+',
+    help="name of the algorithms corresponding to estimated depth arrays")
+
+parser.add_argument(
+    "--evaluation_list_path",
+    "--eval",
+    metavar="PATH",
+    type=Path,
+    help="File with list of images to test for depth evaluation",
+)
+parser.add_argument(
+    "--flight_path_vector_list",
+    "--fpv",
+    metavar="PATH",
+    type=Path,
+    help="File with list of speed vectors, used to compute error wrt direction",
+)
+parser.add_argument(
+    "--scale_invariant",
+    action="store_true",
+    help="If selected, will rescale depth map with ratio of medians",
+)
+parser.add_argument(
+    "--min_depth",
+    metavar="D",
+    default=1e-2,
+    type=float,
+    help="threshold below which GT is discarded",
+)
+parser.add_argument(
+    "--max_depth",
+    metavar="D",
+    default=250,
+    type=float,
+    help="threshold above which GT is discarded",
+)
+parser.add_argument(
+    "--depth_mask",
+    metavar="PATH",
+    default=None,
+    type=Path,
+    help="path to boolean numpy array. Should be the same size as ground truth. "
+    "False value will discard the corresponding pixel location for every ground truth",
+)
+
+parser.add_argument(
+    "--output_figures",
+    metavar="DIR",
+    default=None,
+    type=Path,
+    help="where to save the figures, in pgf format. If not set, will show them with plt.show()"
+)
+
+parser.add_argument(
+    "--output_samples",
+    type=int,
+    default=0,
+    metavar='N',
+    help="Outputs N Gt and estimation vizualisation sampels"
+)
+
+coords = None
+
+
+def get_values(
+    gt_depth, estim_depth, fpv, scale_invariant=False, mask=None, min_depth=1e-2, max_depth=250
+):
+    """Given a depth maps and depth estimation, return a table of all valid depth points with
+    additional metadata
+
+    Args:
+        gt_depth (np.array): ground truth depth computed by RDC
+        estim_depth (np.array): Depth estimated with inference toolkit
+        fpv (np.array): array of 2 floats, representing the fpv coordinates, in pixels
+        scale_invariant (bool, optional): If set to True, will multiply estimated depth with
+                                          ratio between medians. This is representative of how
+                                          depth was evaluated in Eigen et al.
+        mask (np.array, optional): Boolean array of same shape as depth maps. Discard from evaluation
+                                   image points where mask[u,v] == False
+        min_depth (float, optional): Minimal depth below which ground truth is discarded and estimation is clipped.*
+                                     Defaults to 1e-2.
+        max_depth (float, optional): Maximal depth above which ground truth is discarded estimation is clipped.
+                                     Defaults to 250.
+
+    Returns:
+        [type]: [description]
+    """
+    global coords
+    if coords is None:
+        coords = np.stack(
+            np.meshgrid(np.arange(gt_depth.shape[1]), np.arange(gt_depth.shape[0])), axis=-1
+        )
+
+    # TODO : For now, fpv distance is given in pixel distance.
+    # A more accurate way would be to use angular distance.
+    fpv_dist = np.linalg.norm(coords - fpv, axis=-1)
+    estim_depth = np.clip(estim_depth, min_depth, max_depth)
+    valid = (gt_depth > min_depth) & (gt_depth < max_depth)
+    if mask is not None:
+        valid = valid & mask
+    if valid.sum() == 0:
+        return
+    valid_gt, valid_estim = gt_depth[valid], estim_depth[valid]
+    if scale_invariant:
+        valid_estim = valid_estim * np.median(valid_gt) / np.median(valid_estim)
+    fpv_dist = fpv_dist[valid]
+    valid_coords = coords[valid]
+    values = np.stack([valid_gt, valid_estim, *valid_coords.T, fpv_dist], axis=-1)
+
+    return pd.DataFrame(values, columns=["GT", "estim", "x", "y", "fpv_dist"])
+
+
+def plot_distribution(values, bins, ax, label=None, log_bins=False):
+    """Distribution plotting function, will plot with lines instead of bars
+
+    Args:
+        values: histogram to plot
+        bins: Corresponding bins delimitting histogram values
+        ax: Matplotlib ax to plot on
+        label (str): Plot label name. Defaults to None.
+        log_bins (bool): If set to True, will set the scale to log.
+                         Useful for Mean Log Error. Defaults to False.
+    """
+    bin_dists = bins[1:] - bins[:-1]
+    total = sum(bin_dists)
+    normalized_values = (values / sum(values)) * bin_dists / total
+    bin_centers = 0.5 * (bins[1:] + bins[:-1])
+    if log_bins:
+        bin_centers = np.exp(bin_centers)
+    ax.plot(bin_centers, normalized_values, label=label)
+    if log_bins:
+        ax.set_xscale("log")
+
+
+def group_quantiles(df, to_group, columns, quantiles=[0.25, 0.5, 0.75]):
+    if isinstance(columns, str):
+        columns = [columns]
+    grouped_df = df.groupby(by=np.round(df[to_group]))
+    return grouped_df[columns].quantile(quantiles).unstack()
+
+
+def error_map(error_per_px):
+    """Compute Image with pixelwise mean depth error
+
+    Args:
+        error_per_px (pd.Series): Table of errors with pixels as index
+
+    Returns:
+        np.array: Array with the shape of the image, with mean error at each pixel
+    """
+    x, y = np.stack(error_per_px.index.values, axis=-1).astype(int)
+    error_map = np.full((int(x.max() + 1), int(y.max() + 1)), np.NaN)
+    error_map[x, y] = error_per_px.values
+    return error_map
+
+
+def error_metrics(df, algo_name, suffix=''):
+    """Compute error metrics from a dataframe.
+
+    Args:
+        df (pd.DataFrame): Table containing the metrics we are interested in. It can be
+        constructed with a groupby to have mean computed over a particular value insead of a global mean.
+        algo_name (str): Algorithm for which we compute the metrics
+        suffix (str, optional): Precision for the particular metric we are computing,
+        depending on how the dataframe was constructed and grouped by. Defaults to ''.
+    """
+    error_names = ["AbsDiff", "AbsRel", "AbsLog", "StdDiff", "StdRel", "StdLog", "a1", "a2", "a3"]
+    errors = [
+        df["absdiff"].mean(),
+        df["reldiff"].mean(),
+        df["abslogdiff"].mean(),
+        np.sqrt(df["absdiff2"].mean()),
+        np.sqrt(df["reldiff2"].mean()),
+        np.sqrt(df["logdiff2"].mean()),
+        df["a1"].mean(),
+        df["a2"].mean(),
+        df["a3"].mean(),
+    ]
+
+    # Print the results
+    # TODO : save the result in latex tab format ?
+    print("Results for usual metrics for algorithm {}, {}".format(algo_name, suffix))
+    print(
+        "{:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}".format(
+            *error_names
+        )
+    )
+    print(
+        "{:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}".format(
+            *errors
+        )
+    )
+
+
+def viz_depth(depth, max_depth):
+    """Convert depth to a colored vizualisation. Infinity is black
+
+    Args:
+        depth (np.array): 2D array of depth values
+        max_depth (float): max_depth will correspond to the end of the colormap spectrum.
+        Every value above this will be the same color, expect infinity which will be black.
+
+    Returns:
+        np.array: np.uint8 array of colorized depth, ready to be saved
+    """
+    opencv_rainbow_data = (
+        (0.000, (1.00, 0.00, 0.00)),
+        (0.400, (1.00, 1.00, 0.00)),
+        (0.600, (0.00, 1.00, 0.00)),
+        (0.800, (0.00, 0.00, 1.00)),
+        (1.000, (0.60, 0.00, 1.00))
+    )
+    rainbow_cmap = LinearSegmentedColormap.from_list('opencv_rainbow', opencv_rainbow_data, 1000)
+    bone = cm.get_cmap('bone', 10000)
+    depth_norm = depth / max_depth
+    depth_viz = rainbow_cmap(depth_norm, bytes=True)[..., :3]
+    depth_viz[depth == np.inf] = 0
+    return depth_viz
+
+
+def visualize_sample(img_path, gt_path, estimations, algo_names, max_depth, output_folder):
+    """Visualize a sample and save to output_folder.
+    A sample consists in the image, the ground truth depth, and the different estimations
+
+    Args:
+        img_path (Path): Where to load the image
+        gt_path (Path): Where to load the Ground truth depth map (usually same name as img but with npy extension)
+        estimations (List[np.array]): List of the estimations from all the algos we are testing
+        algo_names (List[str]): List of algorithm names, corresponding to the estimations given above
+        max_depth (float): depth saturation value above which every thing will be the same color
+        output_folder (Path): Where to save all the different vizualisations
+    """
+    img_path.copy(output_folder)
+    img_name = img_path.stem
+
+    gt_depth = np.load(gt_path)
+    max_gt = np.max(gt_depth[gt_depth < np.inf])
+    max_depth = min(max_gt, max_depth)
+    imwrite(output_folder / "{}_GT.png".format(img_name), viz_depth(gt_depth, max_depth))
+    for n, e in zip(algo_names, estimations):
+        imwrite(output_folder / "{}_{}.png".format(img_name, n), viz_depth(e, max_depth))
+
+
+def main():
+    args = parser.parse_args()
+    assert (len(args.est_depth) == len(args.algorithm_names))
+
+    if args.output_figures is not None:
+        matplotlib.use("pgf")
+        pgf_with_xelatex = {
+            'text.usetex': True,
+            "pgf.texsystem": "xelatex",
+            "pgf.preamble": r"\usepackage{amssymb} "
+                            r"\usepackage{amsmath} "
+                            r"\usepackage{fontspec} "
+                            r"\usepackage{unicode-math}"
+        }
+        # Change to pgf if needed
+        savefig_ext = "pdf"
+        matplotlib.rcParams.update(pgf_with_xelatex)
+
+    with open(args.evaluation_list_path, "r") as f:
+        test_img_path = [line[:-1] for line in f.readlines()]
+    fpv_list = np.loadtxt(args.flight_path_vector_list)
+    dataframes = {}
+
+    if args.output_samples > 0 and args.output_figures is not None:
+        np.random.seed(1)
+        to_sample = np.random.choice(len(test_img_path), args.output_samples)
+        for i in to_sample:
+            estimated_depth_maps = []
+            img_path = test_img_path[i]
+            for p in args.est_depth:
+                depth = np.load(p, allow_pickle=True)[img_path]
+                estimated_depth_maps.append(depth)
+            visualize_sample(args.dataset_root / img_path,
+                             (args.dataset_root / img_path).stripext() + ".npy",
+                             estimated_depth_maps,
+                             args.algorithm_names,
+                             args.max_depth,
+                             args.output_figures)
+
+    for p, name in zip(args.est_depth, args.algorithm_names):
+        estimated_depth = np.load(p, allow_pickle=True)
+        values_df = []
+        assert len(test_img_path) == len(estimated_depth)
+        if args.depth_mask is not None:
+            mask = np.load(args.depth_mask)
+        else:
+            mask = None
+
+        # Load each GT-estimation pair and extract data in a pandas dataframe
+        # values_df is at first a list of dataframes which we then concatenate
+        print("getting results for {} algorithm (file : {})".format(name, p))
+        for filepath, fpv in tqdm(zip(test_img_path, fpv_list), total=len(fpv_list)):
+            GT = np.load((args.dataset_root / filepath).stripext() + ".npy")
+            new_values = get_values(
+                GT,
+                estimated_depth[filepath],
+                fpv,
+                args.scale_invariant,
+                mask,
+                args.min_depth,
+                args.max_depth,
+            )
+            if new_values is not None:
+                values_df.append(new_values)
+        values_df = pd.concat(values_df)
+
+        # Additional values to the Dataframe
+        # Note that no mean is computed here, each row in the dataframe is ONE pixel
+        # The dataframe is thus potentially thousands rows long
+        values_df["log_GT"] = np.log(values_df["GT"])
+        values_df["log_estim"] = np.log(values_df["estim"])
+        values_df["diff"] = values_df["estim"] - values_df["GT"]
+        values_df["absdiff"] = values_df["diff"].abs()
+        values_df["absdiff2"] = np.power(values_df["diff"], 2)
+        values_df["reldiff"] = values_df["absdiff"] / values_df["GT"]
+        values_df["reldiff2"] = np.power(values_df["reldiff"], 2)
+        values_df["logdiff"] = values_df["log_estim"] - values_df["log_GT"]
+        values_df["logdiff2"] = np.power(values_df["logdiff"], 2)
+        values_df["abslogdiff"] = values_df["logdiff"].abs()
+        values_df["a1"] = (values_df["abslogdiff"] < np.log(1.25)).astype(float)
+        values_df["a2"] = (values_df["abslogdiff"] < 2 * np.log(1.25)).astype(float)
+        values_df["a3"] = (values_df["abslogdiff"] < 3 * np.log(1.25)).astype(float)
+        dataframes[name] = values_df
+
+    for name, df in dataframes.items():
+        print()
+        print("---------------------------")
+        print("Results for {}".format(name))
+        print("---------------------------")
+        print()
+        # Compute mean erros, a la Eigen et al.
+        error_metrics(df, name, "averaged over all points")
+        # Get mean values per ground truth values, and then mean them
+        # This way, we have the same weight for each ground truth value
+        values_df_per_gt = df.groupby(by=np.round(values_df["GT"])).mean()
+        error_metrics(values_df_per_gt, name, "averaged over gt values")
+        values_df_per_log_gt = df.groupby(by=0.1 * np.round(10 * values_df["log_GT"])).mean()
+        error_metrics(values_df_per_log_gt, name, "averaged over log(gt) values")
+
+    # TODO better handling of the parameters, maybe just add it to argparse
+    plot = True
+    n_bins = 4
+    if plot:
+        # COMPUTING HISTOGRAMS
+
+        # GT-wise difference with estimation distributions.
+        # Useful to see if we perform well
+        # in the depth range we are actually interested in
+        # Construct the bins for GT-wise error
+        # You can see this as 3D histogram
+        # Note that if the assumptions of gaussian difference
+        # for log values, the log_normal error should be roughly
+        # the same for all bins
+
+        min_gt = values_df["GT"].min()
+        max_gt = values_df["GT"].max()
+        bins = np.linspace(min_gt, max_gt, n_bins + 1)
+
+        histograms = {}
+        for name, df in dataframes.items():
+            histograms[name] = {}
+            estim_per_GT = {}
+            for b1, b2 in zip(bins[:-1], bins[1:]):
+                per_gt = df[(df["GT"] > b1) & (df["GT"] < b2)]
+                estim_per_GT[(b1 + b2) / 2] = {
+                    "normal": np.histogram(per_gt["diff"], bins=100),
+                    "log_normal": np.histogram(per_gt["logdiff"], bins=100),
+                    "bins": [b1, b2],
+                }
+            histograms[name]["estim_per_GT"] = estim_per_GT
+
+            # Global histograms
+            # Same as above, but with one bin, and thus not GT-wise
+
+            histograms[name]["global_diff"] = np.histogram(df["estim"] - df["GT"], bins=100)
+            histograms[name]["global_log_diff"] = np.histogram(df["log_estim"] - df["log_GT"], bins=100)
+
+            # Depth error per pixel
+            # Useful to identify if a region in the screen is particualrly faulty.
+            # Can help spot dataset inconsistency (eg sky is always in the same place)
+            # Can also help find calibration artefacts ?
+
+            metric_per_px = df.groupby(by=["x", "y"]).mean()
+            histograms[name]["mean_diff_per_px"] = error_map(metric_per_px["absdiff"])
+            histograms[name]["mean_log_diff_per_px"] = error_map(metric_per_px["logdiff"])
+
+            # Depth error wrt pixelwise distance to FPV. For SFM, the closer we are to FPV,
+            # The harde it is to deduce depth. But in the same time, the more
+            # usefule depth becomes, because it indicates distances of obstacles where
+            # we are headed to.
+
+            # Note : if fpv is too far, it means it is not on the image
+            # And thus this metric is not really interesting.
+            histograms[name]["quantiles_per_fpv"] = group_quantiles(
+                df[df["fpv_dist"] < 1000], "fpv_dist", ["absdiff", "abslogdiff"]
+            )
+            histograms[name]["quantiles_per_gt"] = group_quantiles(df, "GT", ["absdiff", "abslogdiff"])
+            histograms[name]["quantiles_per_estimation"] = group_quantiles(df, "estim", ["absdiff", "abslogdiff"])
+
+        # PLOTTING
+        colors = plt.rcParams['axes.prop_cycle'].by_key()['color']
+        # First plot, general insight for dataset
+        fig1, axes = plt.subplots(2, 1, sharex=True)
+        GT_distrib = None
+        for name, df in dataframes.items():
+            if GT_distrib is None:
+                GT_distrib = np.histogram(df["GT"], bins=100)
+                plot_distribution(*GT_distrib, axes[0], label="groundtruth depth")
+            estim_distrib = np.histogram(df["estim"], bins=100)
+            plot_distribution(*estim_distrib, axes[1], label=name)
+        axes[0].set_title("Ground Truth distribution")
+        axes[0].legend()
+        axes[1].set_title("depth estimation distribution from {}".format(name))
+        axes[1].legend()
+        if args.output_figures is not None:
+            fig1.savefig(args.output_figures / "depth_distrib.{}".format(savefig_ext))
+
+        # Second plot, GT-wise difference one figure per algorithm
+        for name, h in histograms.items():
+            fig2, axes = plt.subplots(1, 2, sharey=True)
+            for i, (k, v) in enumerate(h["estim_per_GT"].items()):
+                plot_distribution(
+                    *v["normal"], axes[0], label="$GT \\in [{:.1f},{:.1f}]$".format(*v["bins"])
+                )
+                plot_distribution(
+                    *v["log_normal"],
+                    axes[1],
+                    label="$GT \\in [{:.1f},{:.1f}]$".format(*v["bins"]),
+                    log_bins=True
+                )
+                # axes[0, 0].set_title("distribution of estimation around GT = {:.2f}".format(k))
+                # axes[0, 1].set_title("distribution of log estimation around log GT = {:.2f}".format(np.log(k)))
+            axes[0].legend()
+            axes[0].set_title("GT - estimation difference")
+            axes[1].legend()
+            axes[1].set_title("logt GT - log estimation difference")
+            fig2.tight_layout()
+            if args.output_figures is not None:
+                fig2.savefig(args.output_figures / "GTwise_depth_diff_{}.{}".format(name, savefig_ext))
+
+        # Third plot, global diff histogram
+        fig, axes = plt.subplots(2, 1)
+        for name, h in histograms.items():
+            plot_distribution(*h["global_diff"], axes[0], name)
+            plot_distribution(*h["global_log_diff"], axes[1], name, log_bins=True)
+        axes[1].set_title("Global log difference distribution from GT")
+        axes[0].set_title("Global difference distribution from GT")
+        axes[1].legend()
+        axes[0].legend()
+        plt.tight_layout()
+        if args.output_figures is not None:
+            fig.savefig(args.output_figures / "global_depth_diff.{}".format(savefig_ext))
+
+        def plot_quartile(axes, color, algo_name, df):
+            index = df.index
+            diff = df["absdiff"]
+            logdiff = df["abslogdiff"]
+            axes[0].fill_between(
+                index, diff[0.25], diff[0.75], color=c, alpha=0.1
+            )
+            axes[0].plot(diff[0.5].index, diff[0.5], color=c, label=algo_name)
+            axes[1].fill_between(
+                index, logdiff[0.25], logdiff[0.75], color=c, alpha=0.1
+            )
+            axes[1].plot(logdiff[0.5], label=algo_name)
+            axes[0].legend()
+            axes[1].legend()
+
+        # Fourth plot, error wrt distance to fpv
+        fig, axes = plt.subplots(2, 1, sharex=True)
+        for c, (name, h) in zip(colors, histograms.items()):
+            plot_quartile(axes, c, name, h["quantiles_per_fpv"])
+        axes[0].set_title("Error wrt to distance to fpv (in px)")
+        axes[1].set_title("Log error wrt to distance to fpv (in px)")
+        axes[1].set_yscale('log')
+        axes[1].set_xlabel("Distance to flight path vector (in px)")
+        plt.tight_layout()
+        if args.output_figures is not None:
+            fig.savefig(args.output_figures / "fpv_error_quantiles.{}".format(savefig_ext))
+
+        # Fifth plot, another way of plotting GT-wise error:
+        # For each GT depth, we show 3 points : median, and 50% confidence intervale (2 points)
+        # We have less info than the full histogram but we can show more GT values
+        fig, axes = plt.subplots(2, 1, sharex=True)
+        for c, (name, h) in zip(colors, histograms.items()):
+            plot_quartile(axes, c, name, h["quantiles_per_gt"])
+        axes[0].set_title("Error wrt to groundtruth depth")
+        axes[1].set_title("Log error wrt to groundtruth depth")
+        axes[1].set_yscale('log')
+        axes[1].set_xlabel("Estimated depth (in meters)")
+        plt.tight_layout()
+        if args.output_figures is not None:
+            fig.savefig(args.output_figures / "gt_error_quantiles.{}".format(savefig_ext))
+
+        # Last plot, error with respect to estimated depth
+
+        fig, axes = plt.subplots(2, 1, sharex=True)
+        for c, (name, h) in zip(colors, histograms.items()):
+            plot_quartile(axes, c, name, h["quantiles_per_estimation"])
+        axes[0].set_title("Error wrt to estimated depth")
+        axes[1].set_title("Log error wrt to estimated depth")
+        axes[1].set_yscale('log')
+        axes[1].set_xlabel("Estimated depth (in meters)")
+        plt.tight_layout()
+        if args.output_figures is not None:
+            fig.savefig(args.output_figures / "est_error_quantiles.{}".format(savefig_ext))
+
+        # Last plot, pixelwise error
+        for name, h in histograms.items():
+            fig, axes = plt.subplots(2, 1)
+            pl = axes[0].imshow(h["mean_diff_per_px"].T)
+            axes[0].set_title("Mean error for each pixel")
+            divider = make_axes_locatable(axes[0])
+            cax = divider.append_axes("right", size="5%", pad=0.05)
+            cbar = fig.colorbar(pl, cax=cax)
+            cbar.ax.tick_params(axis="y", direction="in")
+            pl = axes[1].imshow(h["mean_log_diff_per_px"].T)
+            axes[1].set_title("Mean Log error for each pixel")
+            divider = make_axes_locatable(axes[1])
+            cax = divider.append_axes("right", size="5%", pad=0.05)
+            cbar = fig.colorbar(pl, cax=cax)
+            cbar.ax.tick_params(axis="y", direction="in")
+            plt.tight_layout()
+            if args.output_figures is not None:
+                fig.savefig(args.output_figures / "pixel_error_map_{}.{}".format(name, savefig_ext))
+        if args.output_figures is None:
+            plt.show()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/evaluation_toolkit/evaluation_toolkit/inference_toolkit.py b/evaluation_toolkit/evaluation_toolkit/inference_toolkit.py
new file mode 100644
index 0000000..831fc13
--- /dev/null
+++ b/evaluation_toolkit/evaluation_toolkit/inference_toolkit.py
@@ -0,0 +1,328 @@
+import numpy as np
+from path import Path
+from imageio import imread
+import time
+from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
+from scipy.spatial.transform import Rotation
+from tqdm import tqdm
+
+
+class Timer:
+    """
+    Timer class is used to measure elapsed time, while being able to
+    pause it when needed. This is useful to measure algorithm inference
+    time without measuring time spent retrieving wanted images
+    """
+    def __init__(self):
+        self._start_time = None
+        self._elapsed_time = 0
+
+    def running(self):
+        return self._start_time is not None
+
+    def start(self):
+        """Start a new timer"""
+        if self._start_time is not None:
+            return
+
+        self._start_time = time.perf_counter()
+
+    def stop(self):
+        """Stop the timer, and report the elapsed time"""
+        if self._start_time is None:
+            return
+
+        self._elapsed_time += time.perf_counter() - self._start_time
+        self._start_time = None
+
+    def get_elapsed(self):
+        return self._elapsed_time
+
+    def reset(self):
+        self.__init__()
+
+
+class inferenceFramework(object):
+    """Inference Framework used for simulating navigation conditions
+    for depth algorithms on a dataset created by RDC. It also comes with a way to measure your inference time
+    and to record your estimated depths.
+    The framework is iterable, and each iteration gives an Inference Sample Object from which you can get images
+    to compute depth on.
+
+    Attributes:
+        root (Path): Root directory where the Final output of RDC is stored.
+
+        max_shift (float): Max number of frames the algorithm is allowed to search in the past. If the algorithm
+        eg. wants to get a frame that was at a particular distance from the last frame, with a barely moving camera,
+        the frame can only be as anterior as {max_shift} frames before, even if it means the movement won't be enough.
+
+        estimated_depth_maps (dict): Dictionnary for estimated depth maps, as numpy arrays. Key is image path
+        of image on which we estimated depth.
+
+        inference_time (List): List of time spent by your algorithm for inference.
+        Will be used at the end of the evaluation to compute the mean inference time
+
+        frame_transform (function): function which will be used to transform images
+        before returning them to the algorithm. The function takes a numpy array as
+        an argument and can return anything your algorithm want, eg. a pytorch tensor.
+    """
+    def __init__(self, root, test_files, max_shift=50, frame_transform=None):
+        self.root = Path(root)
+        self.test_files = test_files
+        self.max_shift = max_shift
+        self.frame_transform = frame_transform
+        self.inference_time = []
+        self.estimated_depth_maps = {}
+
+    def __getitem__(self, i):
+        """Get item routine. Before returning the sample, the timer is triggered to measure inference time.
+
+        Args:
+            i (int): Position of the sample in the test_files list, which has been created with RDC
+
+        Returns:
+            InferenceSample: Object to compute depth
+        """
+        timer = Timer()
+        self.i = i
+        self.current_sample = inferenceSample(self.root,
+                                              self.test_files[i],
+                                              self.max_shift,
+                                              timer, self.frame_transform)
+        self.current_sample.timer.start()
+        return self.current_sample
+
+    def finish_frame(self, estimated_depth):
+        """Finish Frame routine: This method needs to be called each time your algorithm has
+        finished the depth inference. It also stops the timer and stores the time elapsed for this
+        sample to compute a mean inference time at the end of the evaluation.
+
+        Args:
+            estimated_depth (np.array): The output of your depth algorithm. It will then be stored in
+            a dict, and then saved after when it will be completely populated.
+
+        Returns:
+            float: time elapsed for inference for this sample
+        """
+        self.current_sample.timer.stop()
+        elapsed = self.current_sample.timer.get_elapsed()
+        self.inference_time.append(elapsed)
+        self.estimated_depth_maps[self.current_sample.file] = estimated_depth
+        return elapsed
+
+    def finalize(self, output_path=None):
+        """Finalize: this methods needs to be called at the end of the whole evaluation,
+        when there is no sample left to estimate depth on.
+
+        Args:
+            output_path (Path, optional): Where to save all the estimated depth. It will
+            be saved in a compressed numpy file.
+
+        Returns:
+            (float, dict): Return the mean inference time and the compute depth maps in a dictionnary
+        """
+        if output_path is not None:
+            np.savez(output_path, **self.estimated_depth_maps)
+        mean_inference_time = np.mean(self.inference_time)
+        return mean_inference_time, self.estimated_depth_maps
+
+    def __len__(self):
+        return len(self.test_files)
+
+
+class inferenceSample(object):
+    """Inferance Sample class. Is used to get a particular frame with displacement constraints
+    For example, you can take the last frame (of which you need to compute the depth map),
+    and then want the frame that was 0.3 meters from the last one to ensure a sufficient parallax
+
+    Attributes:
+        root (Path): Same as inferenceFramework. Root directory where the Final output of RDC is stored.
+
+        file (Path): image path of image of which we want to estimate depth.
+
+        frame_transform (function) : Same as InferenceFramework. function used to transform loaded image
+        into the data format of your choice.
+
+        timer (Timer): timer used to measure time spent computing depth. All the frame gathering and transformation
+        are not taken into account in order to only measure inference time.
+
+        valid_frames (List of Path): Ordered list of frame paths representing the frame sequence that is going
+        to be used to get the optimal frame pair/set for the algotihm you want to evaluate.
+        The order is descending: last frame is first and oldest frames are last.
+
+        poses (np.array): Array of all the poses of the valid_frames list in the R,T format (3x4 matrix).
+        They are computed relative to the last frame, and as such, first pose is identity
+
+        rotation_angles (1D np.array): computed from poses, the angle magnitude between last frame and any given frame.
+        This is useful when you don't want rotation to be too large.
+
+        displacement (1D np.array): compute from poses, displacement magnitude between last frame and any given frame.
+        Useful when you don't want frames to be too close to each other.
+
+        intrinsics (np.array): Intrinsics for each frame, stored in a 3x3 matrix.
+    """
+    def __init__(self, root, file, max_shift, timer, frame_transform=None):
+        self.root = root
+        self.file = file
+        self.frame_transform = frame_transform
+        self.timer = timer
+        full_filepath = self.root / file
+        scene = full_filepath.parent
+        # Get all frames in the scene folder. Normally, there should be more than "max_shift" frames.
+        scene_files = sorted(scene.files("*.jpg"))
+        poses = np.genfromtxt(scene / "poses.txt").reshape((-1, 3, 4))
+        sample_id = scene_files.index(full_filepath)
+        assert(sample_id >= max_shift)
+        start_id = sample_id - max_shift
+        # Get all frames between start_id (oldest frame) and sample_id.
+        # Revert the list so that oldest frames are in the end, like in a buffer
+        self.valid_frames = scene_files[start_id:sample_id + 1][::-1]
+        # Flip_ud is equivalent to reverting the row and thus the same as [::-1]
+        valid_poses = np.flipud(poses[start_id:sample_id + 1])
+        # All poses in the sequence should be valid
+        assert not np.isnan(valid_poses.sum())
+        # Change the pose array so that instead of 3x4 matrices, we have 4x4 matrices, which we can invert
+        last_line = np.broadcast_to(np.array([0, 0, 0, 1]), (valid_poses.shape[0], 1, 4))
+        valid_poses_full = np.concatenate([valid_poses, last_line], axis=1)
+        self.poses = (np.linalg.inv(valid_poses_full[0]) @  valid_poses_full)[:, :3]
+        R = self.poses[:, :3, :3]
+        self.rotation_angles = Rotation.from_matrix(R).magnitude()
+        self.displacements = np.linalg.norm(self.poses[:, :, -1], axis=-1)
+
+        # Case 1 for intrinsics : Zoom level never changed and thus there's only one intrinsics
+        # matrix for the whole video, stored in intrinsics.txt This is the most usual case
+        # Case 2 : Each frame has its own intrinsics file <name>_intrinsics.txt
+        # Case is only here for later compatibility, but it has not been tested thoroughly
+        if (scene / "intrinsics.txt").isfile():
+            self.intrinsics = np.stack([np.genfromtxt(scene / "intrinsics.txt")] * max_shift)
+        else:
+            intrinsics_files = [f.stripext() + "_intrinsics.txt" for f in self.valid_frames]
+            self.intrinsics = np.stack([np.genfromtxt(i) for i in intrinsics_files])
+
+    def timer_decorator(func, *args, **kwargs):
+        """
+        Decorator used to pause the timer and only restart it when returning the result.
+        This is used to not penalize the inference algorithm when frame retrieving is slow,
+        because in real conditions, it's possible you get the wanted frames immediately instead
+        of searching for them in the memory.
+        """
+        def wrapper(self, *args, **kwargs):
+            if self.timer.running():
+                self.timer.stop()
+                res = func(self, *args, **kwargs)
+                self.timer.start()
+            else:
+                res = func(self, *args, **kwargs)
+            return res
+        return wrapper
+
+    @timer_decorator
+    def get_frame(self, shift=0):
+        """Basic function to get frame within a fixed shift. When used without parameters, it returns
+        the sample frame.
+
+        Args:
+            shift (int, optional): Position relative to sample frame of the frame we want to get.
+            Defaults to 0.
+
+        Returns a tuple of 3:
+            [Unknown type]: Output of the frame_transform function, used on the desired frame, loaded in a np array
+            np.array: 3x3 intrinsics matrix of returned frame
+            np.array: 3x4 pose matrix of returned frame
+        """
+        file = self.valid_frames[shift]
+        img = imread(file)
+        if self.frame_transform is not None:
+            img = self.frame_transform(img)
+        return img, self.intrinsics[shift], self.poses[shift]
+
+    @timer_decorator
+    def get_previous_frame(self, shift=1, displacement=None, max_rot=1):
+        """More advanced function, to get a frame within shift, displacement and rotation constraints. Timer is paused when this
+        function is running.
+
+        Args:
+            shift (int, optional): As above. Position relative to sample frame of the frame we want to get.
+            Defaults to 1.
+
+            displacement (Float, optional): Desired displacement (in meters) between sample frame and
+            the frame we want to get. This parameter overwrite the shift parameter. Defaults to None.
+
+            max_rot (int, optional): Maximum Rotation, in radians. The function cannot return a frame
+            with a higher rotation than max_rot. It assumes rotation is growing with time
+            (only true for the first frames). The maximum shift of the returned frame corresponds to
+            the first frame with a rotation above this threshold. Defaults to 1.
+
+        Returns a tuple of 3:
+            [Unknow type]: Output of the frame_transform function,
+            used on the frame that best represent the different constrains.
+            np.array: 3x3 intrinsics matrix of returned frame
+            np.array: 3x4 pose matrix of returned frame
+        """
+        if displacement is not None:
+            shift = max(1, np.abs(self.displacements - displacement).argmin())
+        rot_valid = self.rotation_angles < max_rot
+        assert sum(rot_valid[1: shift + 1] > 0), "Rotation is always higher than {}".format(max_rot)
+        # Highest shift that has rotation below max_rot thresold
+        final_shift = np.where(rot_valid[-1 - shift:])[0][-1]
+        return self.get_frame(final_shift)
+
+    @timer_decorator
+    def get_previous_frames(self, shifts=[1], displacements=None, max_rot=1):
+        """Helper function to get multiple frames at the same time. with the previous function.
+
+        Args:
+            shifts (List): list of wanted shifts
+            displacements (List): List of wanted displacements, overwrite shifts
+            max_rot (int, optional): Maximum Rotation, see previous function
+
+        Returns a tuple of 3:
+            List: Outputs of the frame_transform function for each desired frame
+            List: 3x3 intrinsics matrices of returned frames
+            List: 3x4 pose matrices of returned frames
+        """
+        if displacements is not None:
+            frames = zip(*[self.get_previous_frame(displacement=d, max_rot=max_rot) for d in displacements])
+        else:
+            frames = zip(*[self.get_previous_frame(shift=s, max_rot=max_rot) for s in shifts])
+        return frames
+
+
+def inference_toolkit_example():
+    parser = ArgumentParser(description='Example usage of Inference toolkit',
+                            formatter_class=ArgumentDefaultsHelpFormatter)
+
+    parser.add_argument('--dataset_root', metavar='DIR', type=Path)
+    parser.add_argument('--depth_output', metavar='FILE', type=Path,
+                        help='where to store the estimated depth maps, must be a npy file')
+    parser.add_argument('--evaluation_list_path', metavar='PATH', type=Path,
+                        help='File with list of images to test for depth evaluation')
+    parser.add_argument('--scale-invariant', action='store_true',
+                        help='If selected, will rescale depth map with ratio of medians')
+    args = parser.parse_args()
+
+    with open(args.evaluation_list_path) as f:
+        evaluation_list = [line[:-1] for line in f.readlines()]
+
+    def my_model(frame, previous, pose):
+        # Mock up function that uses two frames and translation magnitude
+        # Replace it with your algorithm, eg. DepthNet model
+        return np.linalg.norm(pose[:, -1]) * np.linalg.norm(frame - previous, axis=-1)
+
+    # This is our transform function. It converts the uint8 array into a float array,
+    # divides it by 255 to have values in [0,1] and adds the batch dimensions
+    def my_transform(img):
+        return img.transpose(2, 0, 1).astype(np.float32)[None] / 255
+
+    engine = inferenceFramework(args.dataset_root, evaluation_list, my_transform)
+    for sample in tqdm(engine):
+        latest_frame, latest_intrinsics, _ = sample.get_frame()
+        previous_frame, previous_intrinsics, previous_pose = sample.get_previous_frame(displacement=0.3)
+        engine.finish_frame(my_model(latest_frame, previous_frame, previous_pose))
+
+    mean_time, _ = engine.finalize(args.depth_output)
+    print("Mean time per sample : {:.2f}us".format(1e6 * mean_time))
+
+
+if __name__ == '__main__':
+    inference_toolkit_example()
diff --git a/evaluation_toolkit/setup.py b/evaluation_toolkit/setup.py
new file mode 100644
index 0000000..37a2de1
--- /dev/null
+++ b/evaluation_toolkit/setup.py
@@ -0,0 +1,33 @@
+from setuptools import setup
+
+with open("README.md", "r") as fh:
+    long_description = fh.read()
+
+setup(name='inference toolkit',
+      license='MIT',
+      author='Clément Pinard',
+      author_email='clempinard@gmail.com',
+      description='Inference and evaluation routines to test on a dataset constructed with validation set constructor',
+      long_description=long_description,
+      long_description_content_type="text/markdown",
+      packages=["evaluation_toolkit"],
+      entry_points={
+          'console_scripts': [
+              'depth_evaluation = evaluation_toolkit.depth_evaluation:main'
+          ]
+      },
+      install_requires=[
+          'numpy',
+          'pandas',
+          'path',
+          'imageio',
+          'scikit-image',
+          'scipy',
+          'tqdm'
+      ],
+      classifiers=[
+          "Programming Language :: Python :: 3",
+          "License :: OSI Approved :: MIT License",
+          "Intended Audience :: Science/Research"
+      ]
+      )