INGEOTEC · mgraffg · Jan 27, 2026 · May 20, 2025 · Jul 2, 2025 · Jan 26, 2026
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
@@ -1,10 +1,7 @@
-{"image": "mcr.microsoft.com/devcontainers/base:ubuntu",
+{"image": "mcr.microsoft.com/devcontainers/python:3.13",
 "features": {
     "ghcr.io/rocker-org/devcontainer-features/quarto-cli": 
-        {"installChromium": true, "installTinyTex": true},
-    "ghcr.io/rocker-org/devcontainer-features/apt-packages:1": 
-        {"packages": "ca-certificates,fonts-liberation,libasound2,libatk-bridge2.0-0,libatk1.0-0,libc6,libcairo2,libcups2,libdbus-1-3,libexpat1,libfontconfig1,libgbm1,libgcc1,libglib2.0-0,libgtk-3-0,libnspr4,libnss3,libpango-1.0-0,libpangocairo-1.0-0,libstdc++6,libx11-6,libx11-xcb1,libxcb1,libxcomposite1,libxcursor1,libxdamage1,libxext6,libxfixes3,libxi6,libxrandr2,libxrender1,libxss1,libxtst6,lsb-release,wget,xdg-utils"},    
-    "ghcr.io/rocker-org/devcontainer-features/miniforge:2": {}
+        {"installChromium": true, "installTinyTex": true}
     },
-"postCreateCommand": "conda env create --file environment.yml"
+"postCreateCommand": "python -m pip install -r requirements.txt"
 }
diff --git a/.gitignore b/.gitignore
@@ -160,4 +160,5 @@ cython_debug/
 #.idea/
 
 .vscode/settings.json
-quarto/CompStats_files/
+quarto/CompStats_files/
+quarto/
diff --git a/CompStats/__init__.py b/CompStats/__init__.py
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-__version__ = '0.1.13'
+__version__ = '0.1.14'
 from CompStats.bootstrap import StatisticSamples
 from CompStats.measurements import CI, SE, difference_p_value
 from CompStats.performance import performance, difference, all_differences, plot_performance, plot_difference

diff --git a/CompStats/interface.py b/CompStats/interface.py
@@ -19,7 +19,7 @@
 from CompStats.bootstrap import StatisticSamples
 from CompStats.utils import progress_bar
 from CompStats import measurements
-from CompStats.measurements import SE
+from CompStats.measurements import SE, CI
 from CompStats.utils import dataframe
 
 
@@ -270,7 +270,7 @@ def best(self):
                 best = data.argmin()
         self._best = keys[best]
         return self._best
-    
+
     @best.setter
     def best(self, value):
         self._best = value
@@ -279,7 +279,7 @@ def best(self, value):
     def sorting_func(self):
         """Rank systems when multiple performances are used"""
         return self._sorting_func
-    
+
     @sorting_func.setter
     def sorting_func(self, value):
         self._sorting_func = value
@@ -315,7 +315,7 @@ def statistic(self):
         else:
             self._statistic = dict(data)
         return self._statistic
-    
+
     @statistic.setter
     def statistic(self, value):
         """statistic setter"""
@@ -346,6 +346,30 @@ def se(self):
             return list(output.values())[0]
         return output
 
+    @property
+    def ci(self):
+        """Confidence interval
+
+        >>> from sklearn.svm import LinearSVC
+        >>> from sklearn.datasets import load_iris
+        >>> from sklearn.model_selection import train_test_split
+        >>> from CompStats.interface import Perf
+        >>> X, y = load_iris(return_X_y=True)
+        >>> _ = train_test_split(X, y, test_size=0.3)
+        >>> X_train, X_val, y_train, y_val = _
+        >>> m = LinearSVC().fit(X_train, y_train)
+        >>> hy = m.predict(X_val)
+        >>> ens = RandomForestClassifier().fit(X_train, y_train)
+        >>> perf = Perf(y_val, hy, name='LinearSVC')
+        >>> perf.ci
+        (np.float64(0.9333333333333332), np.float64(1.0))
+        """
+
+        output = CI(self.statistic_samples)
+        if len(output) == 1:
+            return list(output.values())[0]
+        return output
+
     def plot(self, value_name:str=None,
              var_name:str='Performance',
              alg_legend:str='Algorithm',
@@ -359,6 +383,7 @@ def plot(self, value_name:str=None,
              winner_legend:str='Best',
              tie_legend:str='Equivalent',
              loser_legend:str='Different',
+             palette:object=None,
              **kwargs):
         """plot with seaborn
 
@@ -403,10 +428,17 @@ def plot(self, value_name:str=None,
         ci = lambda x: measurements.CI(x, alpha=CI)
         if comparison:
             kwargs.update(dict(hue=comp_legend))
+            if palette is None:
+                pal = sns.color_palette("Paired")
+                palette = {winner_legend:pal[1],
+                        tie_legend:pal[3],
+                        loser_legend: pal[5]}
         f_grid = sns.catplot(df, x=value_name, errorbar=ci,
                              y=alg_legend, col=var_name,
                              kind=kind, linestyle=linestyle,
-                             col_wrap=col_wrap, capsize=capsize, **kwargs)
+                             col_wrap=col_wrap, capsize=capsize,
+                             palette=palette,
+                             **kwargs)
         return f_grid
 
     def dataframe(self, comparison:bool=False,
@@ -420,7 +452,22 @@ def dataframe(self, comparison:bool=False,
                   tie_legend:str='Equivalent',
                   loser_legend:str='Different',
                   perf_names:str=None):
-        """Dataframe"""
+        """Dataframe
+
+        >>> from sklearn.svm import LinearSVC
+        >>> from sklearn.ensemble import RandomForestClassifier
+        >>> from sklearn.datasets import load_iris
+        >>> from sklearn.model_selection import train_test_split
+        >>> from CompStats.interface import Perf
+        >>> X, y = load_iris(return_X_y=True)
+        >>> _ = train_test_split(X, y, test_size=0.3)
+        >>> X_train, X_val, y_train, y_val = _
+        >>> m = LinearSVC().fit(X_train, y_train)
+        >>> hy = m.predict(X_val)
+        >>> ens = RandomForestClassifier().fit(X_train, y_train)
+        >>> perf = Perf(y_val, hy, forest=ens.predict(X_val))
+        >>> df = perf.dataframe()
+        """
         if perf_names is None and isinstance(self.best, np.ndarray):
             func_name = self.statistic_func.__name__
             perf_names = [f'{func_name}({i})'
@@ -624,7 +671,7 @@ def _delta_best(self):
             return self.statistic[self.best]
         keys = np.unique(self.best)
         statistic = np.array([self.statistic[k]
-                              for k in keys])
+                                for k in keys])
         m = {v: k for k, v in enumerate(keys)}
         best = np.array([m[x] for x in self.best])
         return statistic[best, np.arange(best.shape[0])]

diff --git a/CompStats/metrics.py b/CompStats/metrics.py
@@ -307,7 +307,7 @@ def macro_f1(y_true, *y_pred, labels=None,
              sample_weight=None, zero_division='warn',
              num_samples: int=500, n_jobs: int=-1, use_tqdm=True,
              **kwargs):
-    """:py:class:`~CompStats.interface.Perf` with :py:func:`~sklearn.metrics.f1_score` (as :py:attr:`score_func`) with the parameteres needed to compute the macro score. The parameters not described can be found in :py:func:`~sklearn.metrics.macro_f1`
+    """:py:class:`~CompStats.interface.Perf` with :py:func:`~sklearn.metrics.f1_score` (as :py:attr:`score_func`) with the parameteres needed to compute the macro score. The parameters not described can be found in :py:func:`~sklearn.metrics.f1_score`
 
     :param y_true: True measurement or could be a pandas.DataFrame where column label 'y' corresponds to the true measurement. 
     :type y_true: numpy.ndarray or pandas.DataFrame 
@@ -332,7 +332,7 @@ def macro_recall(y_true, *y_pred, labels=None,
                  sample_weight=None, zero_division='warn',
                  num_samples: int=500, n_jobs: int=-1, use_tqdm=True,
                  **kwargs):
-    """:py:class:`~CompStats.interface.Perf` with :py:func:`~sklearn.metrics.recall_score` (as :py:attr:`score_func`) with the parameteres needed to compute the macro score. The parameters not described can be found in :py:func:`~sklearn.metrics.macro_recall`
+    """:py:class:`~CompStats.interface.Perf` with :py:func:`~sklearn.metrics.recall_score` (as :py:attr:`score_func`) with the parameteres needed to compute the macro score. The parameters not described can be found in :py:func:`~sklearn.metrics.recall_score`
 
     :param y_true: True measurement or could be a pandas.DataFrame where column label 'y' corresponds to the true measurement. 
     :type y_true: numpy.ndarray or pandas.DataFrame 
@@ -357,7 +357,7 @@ def macro_precision(y_true, *y_pred, labels=None,
                     sample_weight=None, zero_division='warn',
                     num_samples: int=500, n_jobs: int=-1, use_tqdm=True,
                     **kwargs):
-    """:py:class:`~CompStats.interface.Perf` with :py:func:`~sklearn.metrics.precision_score` (as :py:attr:`score_func`) with the parameteres needed to compute the macro score. The parameters not described can be found in :py:func:`~sklearn.metrics.macro_precision`
+    """:py:class:`~CompStats.interface.Perf` with :py:func:`~sklearn.metrics.precision_score` (as :py:attr:`score_func`) with the parameteres needed to compute the macro score. The parameters not described can be found in :py:func:`~sklearn.metrics.precision_score`
 
     :param y_true: True measurement or could be a pandas.DataFrame where column label 'y' corresponds to the true measurement. 
     :type y_true: numpy.ndarray or pandas.DataFrame 

diff --git a/CompStats/tests/test_interface.py b/CompStats/tests/test_interface.py
@@ -81,7 +81,7 @@ def test_Perf_dataframe():
     from CompStats.metrics import f1_score
 
     X, y = load_digits(return_X_y=True)
-    _ = train_test_split(X, y, test_size=0.3)
+    _ = train_test_split(X, y, test_size=0.3, random_state=0)
     X_train, X_val, y_train, y_val = _
     ens = RandomForestClassifier().fit(X_train, y_train)
     nb = GaussianNB().fit(X_train, y_train)
@@ -121,6 +121,7 @@ def test_Perf_plot_multi():
     f_grid = score.plot()
     assert f_grid is not None
 
+
 def test_Perf_statistic_one():
     """Test Perf statistic one alg"""
     from CompStats.metrics import f1_score
@@ -142,6 +143,9 @@ def test_Perf_statistic_one():
     assert isinstance(score.statistic, float)
     assert isinstance(str(score), str)
     assert isinstance(score.se, float)
+    assert isinstance(score.ci, tuple)
+    assert len(score.ci) == 2
+
 
 def test_Perf_best():
     """Test Perf best"""
@@ -191,7 +195,7 @@ def test_difference_best():
     score(svm.predict(X_val), name='svm')
     diff = score.difference()
     assert isinstance(diff.best, str)
-    
+
 
 def test_difference_str__():
     """Test f1_score"""

diff --git a/pyproject.toml b/pyproject.toml
@@ -2,6 +2,7 @@
 name = 'CompStats'
 description = 'CompStats implements an evaluation methodology for statistically analyzing competition results and competition'
 readme = "README.rst"
+license = "Apache-2.0"
 dependencies = [
     'numpy',
     'scikit-learn>=1.3.0',
@@ -17,7 +18,6 @@ classifiers = [
     "Intended Audience :: Developers",
     "Intended Audience :: Information Technology",
     "Intended Audience :: Science/Research",
-    "License :: OSI Approved :: MIT License",
     "Operating System :: OS Independent",
     "Programming Language :: Python",
     "Topic :: Scientific/Engineering :: Artificial Intelligence",

diff --git a/quarto/CompStats.qmd b/quarto/CompStats.qmd
@@ -18,7 +18,7 @@ execute:
 Collaborative competitions have gained popularity in the scientific and technological fields. These competitions involve defining tasks, selecting evaluation scores, and devising result verification methods. In the standard scenario, participants receive a training set and are expected to provide a solution for a held-out dataset kept by organizers. An essential challenge for organizers arises when comparing algorithms' performance, assessing multiple participants, and ranking them. Statistical tools are often used for this purpose; however, traditional statistical methods often fail to capture decisive differences between systems' performance. CompStats implements an evaluation methodology for statistically analyzing competition results and competition. CompStats offers several advantages, including off-the-shell comparisons with correction mechanisms and the inclusion of confidence intervals. 
 :::
 
-::: {.card title='Installing using conda'}
+::: {.card title='Installing using conda' .flow}
 
 `CompStats` can be install using the conda package manager with the following instruction.
 
@@ -27,7 +27,7 @@ conda install --channel conda-forge CompStats
 ``` 
 ::: 
 
-::: {.card title='Installing using pip'} 
+::: {.card title='Installing using pip' .flow} 
 A more general approach to installing `CompStats` is through the use of the command pip, as illustrated in the following instruction.
 
 ```{sh} 
@@ -41,8 +41,12 @@ pip install CompStats
 
 To illustrate the use of `CompStats`, the following snippets show an example. The instructions load the necessary libraries, including the one to obtain the problem (e.g., digits), four different classifiers, and the last line is the score used to measure the performance and compare the algorithm. 
 
+Belowe the imports, it is found the code to load the digits problem and split the dataset into training and validation sets.
+
+::: {.card title="Dataset and libraries" .flow} 
 ```{python} 
 #| echo: true
+#| code-fold: true
 
 from sklearn.svm import LinearSVC
 from sklearn.naive_bayes import GaussianNB
@@ -52,42 +56,51 @@ from sklearn.datasets import load_digits
 from sklearn.model_selection import train_test_split
 from sklearn.base import clone
 from CompStats.metrics import f1_score
+
+X, y = load_digits(return_X_y=True)
+_ = train_test_split(X, y, test_size=0.3)
+X_train, X_val, y_train, y_val = _
 ```
+:::
 
-The first step is to load the digits problem and split the dataset into training and validation sets. The second step is to estimate the parameters of a linear Support Vector Machine and predict the validation set's classes. The predictions are stored in the variable `hy`.
+The first line estimates the parameters of a linear Support Vector Machine and predict the validation set's classes. The predictions are stored in the variable `hy`.
 
+::: {.card title="Linear SVM" .flow} 
 ```{python}
 #| echo: true
 
-X, y = load_digits(return_X_y=True)
-_ = train_test_split(X, y, test_size=0.3)
-X_train, X_val, y_train, y_val = _
 m = LinearSVC().fit(X_train, y_train)
 hy = m.predict(X_val)
 ```
+:::
 
 Once the predictions are available, it is time to measure the algorithm's performance, as seen in the following code. It is essential to note that the API used in `sklearn.metrics` is followed; the difference is that the function returns an instance with different methods that can be used to estimate different performance statistics and compare algorithms. 
 
-## Column 
-
+::: {.card title="Score" .flow}
 ```{python}
 #| echo: true
 
 score = f1_score(y_val, hy, average='macro')
 score
 ```
+:::
+
+## Column 
 
 Continuing with the example, let us assume that one wants to test another classifier on the same problem, in this case, a random forest, as can be seen in the following two lines. The second line predicts the validation set and sets it to the analysis.
 
+::: {.card title="Random Forest" .flow}
 ```{python}
 #| echo: true
 
 ens = RandomForestClassifier().fit(X_train, y_train)
 score(ens.predict(X_val), name='Random Forest')
 ```
+:::
 
 Let us incorporate another predictions, now with Naive Bayes classifier, and Histogram Gradient Boosting as seen below.
 
+::: {.card title="Rest of the classifiers" .flow}
 ```{python}
 #| echo: true
 
@@ -96,4 +109,5 @@ _ = score(nb.predict(X_val), name='Naive Bayes')
 hist = HistGradientBoostingClassifier().fit(X_train, y_train)
 _ = score(hist.predict(X_val), name='Hist. Grad. Boost. Tree')
 score.plot()
-```
+```
+:::
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,8 @@
+numpy
+scikit-learn
+seaborn
+jupyter
+pyyaml
+sphinx
+pytest
+statsmodels