From 3d4c5a3837a6540a769baf5f4684f0e764ec14a4 Mon Sep 17 00:00:00 2001
From: EC2 Default User <ec2-user@ip-10-0-31-160.ec2.internal>
Date: Tue, 18 Jul 2023 19:02:41 +0000
Subject: [PATCH 01/18] Added the bbbc-download-plugin:0.1.0-dev1

---
 utils/bbbc-download-plugin/.bumpversion.      |   0
 utils/bbbc-download-plugin/.bumpversion.cfg   |  27 +
 utils/bbbc-download-plugin/README.md          |   0
 utils/bbbc-download-plugin/VERSION            |   1 +
 utils/bbbc-download-plugin/plugin.json        |  40 +
 utils/bbbc-download-plugin/pyproject.toml     |  29 +
 utils/bbbc-download-plugin/run-plugin.sh      |  19 +
 .../plugins/utils/bbbc_download/BBBC_model.py | 708 ++++++++++++++++++
 .../plugins/utils/bbbc_download/__init__.py   |   2 +
 .../plugins/utils/bbbc_download/__main__.py   | 100 +++
 .../plugins/utils/bbbc_download/download.py   | 144 ++++
 .../plugins/utils/bbbc_download/mapping.py    |   9 +
 utils/bbbc-download-plugin/tests/__init__.py  |   0
 13 files changed, 1079 insertions(+)
 create mode 100644 utils/bbbc-download-plugin/.bumpversion.
 create mode 100644 utils/bbbc-download-plugin/.bumpversion.cfg
 create mode 100644 utils/bbbc-download-plugin/README.md
 create mode 100644 utils/bbbc-download-plugin/VERSION
 create mode 100644 utils/bbbc-download-plugin/plugin.json
 create mode 100644 utils/bbbc-download-plugin/pyproject.toml
 create mode 100644 utils/bbbc-download-plugin/run-plugin.sh
 create mode 100644 utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/BBBC_model.py
 create mode 100644 utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/__init__.py
 create mode 100644 utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/__main__.py
 create mode 100644 utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/download.py
 create mode 100644 utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/mapping.py
 create mode 100644 utils/bbbc-download-plugin/tests/__init__.py
diff --git a/utils/bbbc-download-plugin/.bumpversion. b/utils/bbbc-download-plugin/.bumpversion.
new file mode 100644
index 000000000..e69de29bb
diff --git a/utils/bbbc-download-plugin/.bumpversion.cfg b/utils/bbbc-download-plugin/.bumpversion.cfg
new file mode 100644
index 000000000..8cc773f0b
--- /dev/null
+++ b/utils/bbbc-download-plugin/.bumpversion.cfg
@@ -0,0 +1,27 @@
+[bumpversion]
+current_version = 0.1.0-dev0
+commit = True
+tag = False
+parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\-(?P<release>[a-z]+)(?P<dev>\d+))?
+serialize = 
+	{major}.{minor}.{patch}-{release}{dev}
+	{major}.{minor}.{patch}
+
+[bumpversion:part:release]
+optional_value = _
+first_value = dev
+values = 
+	dev
+	_
+
+[bumpversion:part:dev]
+
+[bumpversion:file:pyproject.toml]
+search = version = "{current_version}"
+replace = version = "{new_version}"
+
+[bumpversion:file:plugin.json]
+
+[bumpversion:file:VERSION]
+
+[bumpversion:file:src/polus/plugins/utils/bbbc_download/__init__.py]
\ No newline at end of file
diff --git a/utils/bbbc-download-plugin/README.md b/utils/bbbc-download-plugin/README.md
new file mode 100644
index 000000000..e69de29bb
diff --git a/utils/bbbc-download-plugin/VERSION b/utils/bbbc-download-plugin/VERSION
new file mode 100644
index 000000000..15a06bec5
--- /dev/null
+++ b/utils/bbbc-download-plugin/VERSION
@@ -0,0 +1 @@
+0.1.0-dev0
\ No newline at end of file
diff --git a/utils/bbbc-download-plugin/plugin.json b/utils/bbbc-download-plugin/plugin.json
new file mode 100644
index 000000000..ba0c6ec5b
--- /dev/null
+++ b/utils/bbbc-download-plugin/plugin.json
@@ -0,0 +1,40 @@
+{
+    "name": "BBBC Download",
+    "version": "0.1.0-dev0",
+    "title": "BBBC Download",
+    "description": "Downloads the datasets on the BBBC website",
+    "author": "Saket Prem(saket.prem@axleinfo.com), Matthew",
+    "institution": "National Center for Advancing Translational Sciences, National Institutes of Health",
+    "repository": "https://github.com/PolusAI/polus-plugins",
+    "website": "https://ncats.nih.gov/preclinical/core/informatics",
+    "citation": "",
+    "containerId": "polusai/bbbc-download-plugin:0.1.0-dev0",
+    "baseCommand": [
+      "python3",
+      "-m",
+      "polus.plugins.utils.bbbc_download"
+    ],
+    "inputs": [
+      {
+        "name": "name",
+        "type": "string",
+        "description": "The name of the datasets to be downloaded(spereate the datasets with a comma. eg: BBBC001,BBBC002,BBBC003 )",
+        "required": true
+      }
+      
+    ],
+    "outputs": [
+      {
+        "name": "outDir",
+        "type": "genericData",
+        "description": "Output collection"
+      }
+    ],
+    "ui": [
+      {
+        "key": "inputs.name",
+        "title": "Input name of datasets as string",
+        "description": "Input the name of the datasets to be downloaded as a string"
+      }
+    ]
+  }
\ No newline at end of file
diff --git a/utils/bbbc-download-plugin/pyproject.toml b/utils/bbbc-download-plugin/pyproject.toml
new file mode 100644
index 000000000..c2b76de04
--- /dev/null
+++ b/utils/bbbc-download-plugin/pyproject.toml
@@ -0,0 +1,29 @@
+[tool.poetry]
+name = "polus-plugins-utils-bbbc-download-plugin"
+version = "0.1.0"
+description = ""
+authors = ["Your Name <you@example.com>"]
+readme = "README.md"
+packages = [{include = "polus", from = "src"}]
+
+[tool.poetry.dependencies]
+python = "^3.9.16"
+typer = "^0.9.0"
+pyarrow = "11.0.0"
+scikit-image = "0.20.0"
+vaex = "4.16.0"
+bfio = "2.3.1.dev0"
+beautifulsoup4 = "4.12.0"
+numpy = "1.24.2"
+pandas = "1.5.3"
+requests = "2.28.2"
+pydantic = "1.10.7"
+mapping = "^0.1.6"
+bump2version = "1.0.1"
+mypy = "1.0.1"
+tqdm = "^4.65.0"
+
+
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"
diff --git a/utils/bbbc-download-plugin/run-plugin.sh b/utils/bbbc-download-plugin/run-plugin.sh
new file mode 100644
index 000000000..637544c99
--- /dev/null
+++ b/utils/bbbc-download-plugin/run-plugin.sh
@@ -0,0 +1,19 @@
+#!/bin/bash
+
+version=$(<VERSION)
+datapath=$(readlink --canonicalize data)
+
+# Inputs
+name="BBBC001"
+
+# Output paths
+outDir=/data/output
+
+# Show the help options
+docker run polusai/bbbc-download-plugin:${version}
+
+# Run the plugin
+docker run --mount type=bind,source=${datapath},target=/data/ \
+            polusai/bbbc-download-plugin:${version} \
+            --name ${name} \
+            --outDir ${outDir}
\ No newline at end of file
diff --git a/utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/BBBC_model.py b/utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/BBBC_model.py
new file mode 100644
index 000000000..de6d20fd2
--- /dev/null
+++ b/utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/BBBC_model.py
@@ -0,0 +1,708 @@
+from typing import List, Dict, Union, Optional
+import shutil
+import os
+from multiprocessing import cpu_count
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from pathlib import Path
+from zipfile import ZipFile
+
+from polus.plugins.utils.bbbc_download.download import download, get_url
+from polus.plugins.utils.bbbc_download.mapping import *
+
+import pydantic
+import requests
+import pandas as pd
+import numpy as np
+from tqdm import tqdm
+import bs4
+from bfio import BioWriter
+import vaex
+from skimage import io
+import pyarrow as pa
+import pyarrow.parquet as pq
+
+
+
+BASE_URL = "https://bbbc.broadinstitute.org/"
+tables = pd.read_html(BASE_URL + "image_sets")[:3]
+root = Path("/BBBC").absolute()
+    
+
+exception_sets = [
+    "BBBC019",
+    "BBBC029",
+    "BBBC041",
+    "BBBC042",
+    "BBBC046",
+    "BBBC054",
+]
+
+# Cleaning up column names
+tables[0] = tables[0].rename(
+    columns={
+        tables[0].columns[3]: "Fields per sample",
+        tables[0].columns[5]: "Total Images",
+        tables[0].columns[6]: "Ground truth",
+    }
+)
+
+
+class Metadata(pydantic.BaseModel):
+    """Class that contains information about a dataset's metadata."""
+
+    path: Path
+    name: str
+
+    @pydantic.root_validator()
+    @classmethod
+    def valid_data(cls, values: dict) -> dict:
+        if not values["path"].exists():
+            raise ValueError("No metadata")
+
+        return values
+
+    @property
+    def size(self) -> int:
+        """Returns the size of the dataset's metadata in bytes."""
+
+        raw_path = root.joinpath(self.name, "raw/Metadata")
+        standard_path = root.joinpath(self.name, "standard/Metadata")
+        raw_sum = sum(os.path.getsize(file) for file in raw_path.rglob("*"))
+        standard_sum = sum(os.path.getsize(file) for file in standard_path.rglob("*"))
+
+        return raw_sum + standard_sum
+
+
+class GroundTruth(pydantic.BaseModel):
+    """Class that contains information about a dataset's ground truth."""
+
+    path: Path
+    name: str
+
+    @pydantic.root_validator()
+    @classmethod
+    def valid_data(cls, values: dict) -> dict:
+        if not values["path"].exists():
+            raise ValueError("No ground truth")
+
+        return values
+
+    @property
+    def size(self) -> int:
+        """Returns the size of the dataset's ground truth in bytes."""
+
+        raw_path = root.joinpath(self.name, "raw/Ground Truth")
+        standard_path = root.joinpath(self.name, "standard/Ground Truth")
+        raw_sum = sum(os.path.getsize(file) for file in raw_path.rglob("*"))
+        standard_sum = sum(os.path.getsize(file) for file in standard_path.rglob("*"))
+
+        return raw_sum + standard_sum
+
+
+class Images(pydantic.BaseModel):
+    """Class that contains information about a dataset's images."""
+
+    path: Path
+    name: str
+
+    @pydantic.root_validator()
+    @classmethod
+    def valid_data(cls, values: dict) -> dict:
+        if not values["path"].exists():
+            raise ValueError("No images")
+
+        return values
+
+    @property
+    def size(self) -> int:
+        """Returns the size of the dataset's images in bytes."""
+
+        raw_path = root.joinpath(self.name, "raw/Images")
+        standard_path = root.joinpath(self.name, "standard/Images")
+        raw_sum = sum(os.path.getsize(file) for file in raw_path.rglob("*"))
+        standard_sum = sum(os.path.getsize(file) for file in standard_path.rglob("*"))
+
+        return raw_sum + standard_sum
+
+
+class BBBCDataset(pydantic.BaseModel):
+    """Class that models a BBBC dataset.
+
+    Attributes:
+        name: The name of the dataset.
+        images: An Images object that contains information about the dataset's images
+        ground_truth: A GroundTruth object that contains information about the dataset's ground truth
+        metadata: A Metadata object that contains information about the dataset's metadata
+    """
+
+    name: str
+    images: Optional[Images] = None
+    ground_truth: Optional[GroundTruth] = None
+    metadata: Optional[Metadata] = None
+
+    @pydantic.validator("name")
+    @classmethod
+    def valid_name(cls, v: str) -> str:
+        """Validates the name of the dataset.
+
+        Args:
+            v: The name of the dataset to be downloaded.
+
+        Returns:
+            The name provided if validation is successful.
+        """
+
+        if v not in list(BBBC.combined_table["Accession"]):
+            raise ValueError(
+                v
+                + " is an invalid dataset name. Valid dataset names belong to an existing BBBC dataset."
+            )
+
+        return v
+
+    @classmethod
+    def create_dataset(cls, name: str) -> Union["BBBCDataset", None]:
+        """Creates a dataset.
+
+        Args:
+            name: The name of the dataset to be created.
+
+        Returns:
+            A new instance of a Dataset object or None if the validation fails.
+        """
+
+        try:
+            if name in exception_sets:
+                dataset_class = globals()[name]
+
+                return dataset_class(name=name)
+            else:
+                return BBBCDataset(name=name)
+        except ValueError as e:
+            print(e)
+
+            return None
+
+    @property
+    def info(self) -> Dict[str, Union[str, np.int64]]:
+        """Provides information about the dataset such as its description and total images.
+
+        Returns:
+            A dictionary that contains information about the dataset.
+        """
+
+        table = BBBC.combined_table
+
+        row = table.loc[table["Accession"] == self.name]
+
+        info = {
+            "Description": row["Description"].values[0],
+            "Mode": row["Mode"].values[0],
+            "Fields per sample": row["Fields per sample"].values[0],
+            "Total Fields": row["Total Fields"].values[0],
+            "Total Images": row["Total Images"].values[0],
+            "Ground truth types": self._ground_truth_types(),
+        }
+
+        return info
+
+    @property
+    def size(self) -> int:
+        """Returns the size of the dataset in bytes."""
+
+        dataset_path = root.joinpath(self.name)
+
+        return sum(os.path.getsize(file) for file in dataset_path.rglob("*"))
+
+    def _ground_truth_types(self) -> List[str]:
+        """Provides the types of ground truth used by the dataset.
+
+        Returns:
+            A list of strings where each string is a type of ground truth.
+        """
+
+        res = requests.get("https://bbbc.broadinstitute.org/image_sets")
+        soup = bs4.BeautifulSoup(res.content, "html.parser")
+        types = []
+
+        for t in soup.find_all("table")[:3]:
+            for row in t.find_all("tr"):
+                cols = row.find_all("td")
+
+                if len(cols) > 0 and cols[0].text == self.name:
+                    for link in cols[6].find_all("a"):
+                        types.append(link.attrs["href"].split("#")[-1])
+
+                    return types
+
+    def _init_data(self,download_path:Path) -> None:
+        """Initializes the images, ground_truth, and metadata attributes of the dataset."""
+        download_path=download_path.joinpath("BBBC")
+
+        images_path = download_path.joinpath(self.name, "raw/Images")
+        truth_path = download_path.joinpath(self.name, "raw/Ground Truth")
+        meta_path = download_path.joinpath(self.name, "raw/Metadata")
+
+        try:
+            self.images = Images(path=images_path, name=self.name)
+        except ValueError:
+            pass
+
+        try:
+            self.ground_truth = GroundTruth(path=truth_path, name=self.name)
+        except ValueError:
+            pass
+
+        try:
+            self.metadata = Metadata(path=meta_path, name=self.name)
+        except ValueError:
+            pass
+
+        if self.images == None:
+            print(self.name + " has no images.")
+
+        if self.ground_truth == None and self.metadata == None:
+            print(self.name + " has no ground truth or metadata.")
+
+        return
+
+    def raw(self,download_path: Path) -> None:
+        """Download the dataset's raw data."""
+
+        download(self.name,download_path)
+        self._init_data(download_path)
+
+        return
+
+    def standard(self, extension: str) -> None:
+        """Standardize the dataset's raw data.
+
+        Args:
+            extension: The extension of the standard image. Can be ".ome.tif" or ".ome.zarr".
+        """
+
+        if extension not in [".ome.tif", ".ome.zarr"]:
+            print(
+                f"ERROR: {extension} is an invalid extension for standardization. Must be .ome.tif or .ome.zarr."
+            )
+            return
+
+        if self.images == None:
+            print(
+                f"ERROR: Images for {self.name} have not been downloaded so they cannot be standardized."
+            )
+            return
+
+        standard_folder = Path(root, self.name, "standard")
+        arrow_file = Path("arrow", self.name + ".arrow")
+        arrow_table = pq.read_table(arrow_file)
+        df = vaex.from_arrow_table(arrow_table)
+
+        if not standard_folder.exists():
+            standard_folder.mkdir(parents=True, exist_ok=True)
+
+        for i, row in df.iterrows():
+            func = globals()[self.name + "_mapping"]
+            out_file = func(row, extension)
+            raw_image = io.imread(row["Path"])
+            num_channels = 1 if len(raw_image.shape) == 2 else raw_image.shape[2]
+
+            if row["Image Type"] == "Intensity":
+                sub_folder = "Images"
+            elif row["Image Type"] == "Ground Truth":
+                sub_folder = "Ground Truth"
+            elif row["Image Type"] == "Metadata":
+                sub_folder = "Metadata"
+            else:
+                print("ERROR: Invalid value for attribute Image Type")
+                return
+
+            save_path = standard_folder.joinpath(sub_folder)
+
+            if not save_path.exists():
+                save_path.mkdir(parents=True, exist_ok=True)
+
+            with BioWriter(save_path.joinpath(out_file)) as bw:
+                bw.X, bw.Y, bw.Z, bw.C = (
+                    raw_image.shape[1],
+                    raw_image.shape[0],
+                    num_channels,
+                    1,
+                )
+                bw.dtype = raw_image.dtype
+                bw[:] = raw_image
+
+        print(f"Finished standardizing {self.name}")
+
+        return
+
+
+class BBBC019(BBBCDataset):
+    def raw(self,download_path:Path) -> None:
+        download(self.name)
+        download_path=download_path.joinpath("BBBC")
+
+        # Separate images from ground truth
+        save_location = download_path.joinpath("BBBC019")
+        images_folder = save_location.joinpath("raw/Images")
+        truth_folder = save_location.joinpath("raw/Ground Truth")
+
+        for set in [
+            x
+            for x in images_folder.iterdir()
+            if x.name not in [".DS_Store", "__MACOSX"]
+        ]:
+            for obj in [
+                x
+                for x in set.iterdir()
+                if x.name not in ["images", "measures.mat", "desktop.ini", ".DS_Store"]
+            ]:
+                src = images_folder.joinpath(set.name, obj.name)
+                dst = truth_folder.joinpath(set.name, obj.name)
+
+                if dst.exists():
+                    try:
+                        shutil.rmtree(src)
+                    except NotADirectoryError as e:
+                        print(e)
+                else:
+                    shutil.move(src, dst)
+
+        self._init_data(download_path)
+
+        return
+
+
+class BBBC029(BBBCDataset):
+    def raw(self,download_path:Path) -> None:
+        print("Started downloading BBBC029")
+        download_path=download_path.joinpath("BBBC")
+
+        save_location = download_path.joinpath("BBBC029", "raw")
+
+        if not save_location.exists():
+            save_location.mkdir(parents=True, exist_ok=True)
+
+        file_path = save_location.joinpath("Images")
+        get_url(
+            "https://data.broadinstitute.org/bbbc/BBBC029/images.zip",
+            file_path,
+            "BBBC029",
+        )
+
+        file_path = save_location.joinpath("Ground Truth")
+        get_url(
+            "https://data.broadinstitute.org/bbbc/BBBC029/ground_truth.zip",
+            file_path,
+            "BBBC029",
+        )
+
+        print("BBBC029 has finished downloading")
+
+        self._init_data(download_path)
+
+        return
+
+
+class BBBC041(BBBCDataset):
+    def raw(self,download_path:Path) -> None:
+        download(self.name)
+        download_path=download_path.joinpath("BBBC")
+
+        # Separate images from ground truth
+        save_location = download_path.joinpath("BBBC041")
+        file_names = ["test.json", "training.json"]
+
+        if not save_location.joinpath("raw/Ground Truth").exists():
+            save_location.joinpath("raw/Ground Truth").mkdir(
+                parents=True, exist_ok=True
+            )
+
+        for file in file_names:
+            src = save_location.joinpath("raw/Images/malaria", file)
+            dst = save_location.joinpath("raw/Ground Truth")
+
+            if dst.joinpath(file).exists():
+                os.remove(src)
+            else:
+                shutil.move(src, dst)
+
+        self._init_data(download_path)
+
+        return
+
+
+class BBBC042(BBBCDataset):
+    def raw(self,download_path:Path) -> None:
+        print("Started downloading BBBC042")
+        download_path=download_path.joinpath("BBBC")
+
+        save_location = download_path.joinpath("BBBC042", "raw")
+
+        if not save_location.exists():
+            save_location.mkdir(parents=True, exist_ok=True)
+
+        file_path = save_location.joinpath("Images")
+        get_url(
+            "https://data.broadinstitute.org/bbbc/BBBC042/images.zip",
+            file_path,
+            "BBBC042",
+        )
+
+        file_path = save_location.joinpath("Ground Truth")
+        get_url(
+            "https://data.broadinstitute.org/bbbc/BBBC042/positions.zip",
+            file_path,
+            "BBBC042",
+        )
+
+        print("BBBC042 has finished downloading")
+
+        self._init_data(download_path)
+
+        return
+
+
+class BBBC046(BBBCDataset):
+    def raw(self, download_path: Path) -> None:
+        download(self.name)
+        download_path=download_path.joinpath("BBBC")
+
+        # Separate images from ground truth
+        try:
+            save_location = download_path.joinpath(self.name)
+            images_folder = save_location.joinpath("raw/Images")
+            truth_folder = save_location.joinpath("raw/Ground Truth")
+
+            # Extract these files because they do not extract automatically
+            for file in ["OE-ID350-AR-1.zip", "OE-ID350-AR-2.zip", "OE-ID350-AR-4.zip", "OE-ID350-AR-8.zip"]:
+                with ZipFile(images_folder.joinpath(file), "r") as zfile:
+                    zfile.extractall(images_folder)
+
+                os.remove(images_folder.joinpath(file))
+
+            if not truth_folder.exists():
+                truth_folder.mkdir(parents=True, exist_ok=True)
+
+            # Iterate over folders in the images folder
+            for folder in images_folder.iterdir():
+                if not truth_folder.joinpath(folder.name).exists():
+                    truth_folder.joinpath(folder.name).mkdir(
+                        parents=True, exist_ok=True
+                    )
+
+                # Move ground truth data to Ground Truth folder
+                for obj in folder.iterdir():
+                    if obj.name.endswith((".txt", ".tif")):
+                        src = obj
+                        dst = truth_folder.joinpath(folder.name, obj.name)
+
+                        if dst.exists():
+                            os.remove(src)
+                        else:
+                            shutil.move(src, dst)
+
+            self._init_data(download_path)
+        except Exception as e:
+            print(
+                "BBBC046 downloaded successfully but an error occurred when organizing raw data."
+            )
+            print("ERROR: " + str(e))
+
+        return
+
+
+class BBBC054(BBBCDataset):
+    def raw(self, download_path:Path) -> None:
+        download(self.name)
+        download_path=download_path.joinpath("BBBC")
+
+        # Separate images from ground truth
+        save_location = download_path.joinpath(self.name)
+        src = save_location.joinpath("raw/Images", "Replicate1annotation.csv")
+        dst = save_location.joinpath("raw/Ground Truth", "Replicate1annotation.csv")
+
+        if not dst.exists():
+            dst.mkdir(parents=True, exist_ok=True)
+
+        if dst.exists():
+            os.remove(src)
+        else:
+            shutil.move(src, dst)
+
+        self._init_data(download_path)
+
+        return
+
+
+class IDAndSegmentation:
+    """Class that models the Identification and segmentation table on https://bbbc.broadinstitute.org/image_sets.
+
+    Attributes:
+        name: The name of the table as seen on the BBBC image set webpage
+        table: The Identification and segmentation table as a pandas DataFrame
+    """
+
+    name: str = "Identification and segmentation"
+    table: pd.DataFrame = tables[0]
+
+    @classmethod
+    @property
+    def datasets(cls) -> List[BBBCDataset]:
+        """Returns a list of all datasets in the table.
+
+        Returns:
+            A list containing a Dataset object for each dataset in the table.
+        """
+
+        return [BBBCDataset.create_dataset(name) for name in cls.table["Accession"]]
+
+    @classmethod
+    def raw(cls,download_path:Path) -> None:
+        """Downloads raw data for every dataset in this table"""
+
+        num_workers = max(cpu_count(), 2)
+        threads = []
+
+        with ThreadPoolExecutor(max_workers=num_workers) as executor:
+            for dataset in IDAndSegmentation.datasets:
+                threads.append(executor.submit(dataset.raw(download_path)))
+
+            for f in tqdm(
+                as_completed(threads), desc=f"Downloading data", total=len(threads)
+            ):
+                f.result()
+
+
+class PhenotypeClassification:
+    """Class that models the Phenotype classification table on https://bbbc.broadinstitute.org/image_sets.
+
+    Attributes:
+        name: The name of the table as seen on the BBBC image set webpage
+        table: The Phenotype classification table as a pandas DataFrame
+    """
+
+    name: str = "Phenotype classification"
+    table: pd.DataFrame = tables[1]
+
+    @classmethod
+    @property
+    def datasets(cls) -> List[BBBCDataset]:
+        """Returns a list of all datasets in the table.
+
+        Returns:
+            A list containing a Dataset object for each dataset in the table.
+        """
+
+        return [BBBCDataset.create_dataset(name) for name in cls.table["Accession"]]
+
+    @classmethod
+    def raw(cls,download_path:Path) -> None:
+        """Downloads raw data for every dataset in this table"""
+
+        num_workers = max(cpu_count(), 2)
+        threads = []
+
+        with ThreadPoolExecutor(max_workers=num_workers) as executor:
+            for dataset in PhenotypeClassification.datasets:
+                threads.append(executor.submit(dataset.raw(download_path)))
+
+            for f in tqdm(
+                as_completed(threads), desc=f"Downloading data", total=len(threads)
+            ):
+                f.result()
+
+
+class ImageBasedProfiling:
+    """Class that models the Image-based Profiling table on https://bbbc.broadinstitute.org/image_sets.
+
+    Attributes:
+        name: The name of the table as seen on the BBBC image set webpage
+        table: The Image-based Profiling table as a pandas DataFrame
+    """
+
+    name: str = "Image-based Profiling"
+    table: pd.DataFrame = tables[2]
+
+    @classmethod
+    @property
+    def datasets(cls) -> List[BBBCDataset]:
+        """Returns a list of all datasets in the table.
+
+        Returns:
+            A list containing a Dataset object for each dataset in the table.
+        """
+
+        return [BBBCDataset.create_dataset(name) for name in cls.table["Accession"]]
+
+    @classmethod
+    def raw(cls,download_path:Path) -> None:
+        """Downloads raw data for every dataset in this table"""
+
+        num_workers = max(cpu_count(), 2)
+        threads = []
+
+        with ThreadPoolExecutor(max_workers=num_workers) as executor:
+            for dataset in ImageBasedProfiling.datasets:
+                threads.append(executor.submit(dataset.raw(download_path)))
+
+            for f in tqdm(
+                as_completed(threads), desc=f"Downloading data", total=len(threads)
+            ):
+                f.result()
+
+
+class BBBC:
+    """Class that models the Broad Bioimage Benchmark Collection (BBBC).
+
+    BBBC has tables that contain datasets. Datasets are separated into tables
+    based on how they can be used. Each dataset has images and ground truth.
+    Read more about BBBC here: https://bbbc.broadinstitute.org.
+    """
+
+    @classmethod
+    @property
+    def datasets(cls) -> List[BBBCDataset]:
+        """Returns a list of all datasets in BBBC.
+
+        Returns:
+            A list containing a Dataset object for each dataset in BBBC.
+        """
+
+        table = BBBC.combined_table
+
+        return [BBBCDataset.create_dataset(name) for name in table["Accession"]]
+
+    @classmethod
+    @property
+    def combined_table(cls) -> pd.DataFrame:
+        """Combines each table on https://bbbc.broadinstitute.org/image_sets into a single table.
+
+        Returns:
+            A pandas DataFrame representation of the combined table.
+        """
+
+        # Combine each table into one table
+        combined_table = (
+            pd.concat(tables)
+            .drop(columns=["Ground truth"])
+            .drop_duplicates("Accession")
+        )
+
+        return combined_table
+
+    @classmethod
+    def raw(cls,download_path:Path) -> None:
+        """Downloads raw data for every dataset."""
+
+        num_workers = max(cpu_count(), 2)
+        threads = []
+
+        with ThreadPoolExecutor(max_workers=num_workers) as executor:
+            for dataset in BBBC.datasets:
+                threads.append(executor.submit(dataset.raw(download_path)))
+
+            for f in tqdm(
+                as_completed(threads), desc=f"Downloading data", total=len(threads)
+            ):
+                f.result()
diff --git a/utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/__init__.py b/utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/__init__.py
new file mode 100644
index 000000000..3a98e7a8a
--- /dev/null
+++ b/utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/__init__.py
@@ -0,0 +1,2 @@
+"""Bbbc Download."""
+__version__ = "0.1.0-dev0"
\ No newline at end of file
diff --git a/utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/__main__.py b/utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/__main__.py
new file mode 100644
index 000000000..79439a500
--- /dev/null
+++ b/utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/__main__.py
@@ -0,0 +1,100 @@
+import json
+import os
+import logging
+from pathlib import Path
+from concurrent.futures import ProcessPoolExecutor, as_completed
+from typing import Any, Optional
+
+import typer
+from tqdm import tqdm
+from polus.plugins.utils.bbbc_download.BBBC_model import BBBC, BBBCDataset, IDAndSegmentation, PhenotypeClassification, ImageBasedProfiling
+from sys import platform
+from multiprocessing import cpu_count
+
+
+
+if platform == "linux" or platform == "linux2":
+    NUM_THREADS = len(os.sched_getaffinity(0))  # type: ignore
+else:
+    NUM_THREADS = max(cpu_count() // 2, 2)
+
+app = typer.Typer()
+
+# Initialize the logger
+logging.basicConfig(
+    format="%(asctime)s - %(name)-8s - %(levelname)-8s - %(message)s",
+    datefmt="%d-%b-%y %H:%M:%S",
+)
+logger = logging.getLogger("polus.plugins.utils.bbbc_download")
+logger.setLevel(os.environ.get("POLUS_LOG", logging.INFO))
+
+@app.command()
+def main(
+    name: str= typer.Option(
+    ..., "--name", help="The name of the dataset that is to be downloaded"
+    ),
+    out_dir: Path= typer.Option(
+    ...,"--outDir", help="The path for downloading the dataset"
+    )
+
+)-> None:
+    """Download the required dataset from the BBBC dataaset."""
+    logger.info(f"name = {name}")
+    logger.info(f"outDir = {out_dir}")
+    """Checking if output directory exists. If it does not exist then a designated path is created."""
+    if not out_dir.exists():
+        out_dir.mkdir()
+        logger.info(f"{out_dir} did not exists. Creating new path.")
+
+    with ProcessPoolExecutor(max_workers=NUM_THREADS) as executor:
+        threads=[]
+        names=name.split(",")
+        for n in names:
+            if(n=='IDAndSegmentation'):
+                threads.append(
+                    executor.submit(IDAndSegmentation.raw,out_dir)
+                    )
+            
+            elif(n=='PhenotypeClassification'):
+                threads.append(
+                    executor.submit(PhenotypeClassification.raw,out_dir)
+                    )
+
+
+
+            elif(n=='ImageBasedProfiling'):
+                threads.append(
+                    executor.submit(ImageBasedProfiling.raw,out_dir)
+                    )
+            
+            elif(n=='BBBC'):
+                threads.append(
+                    executor.submit(BBBC.raw,out_dir)
+                    )
+                
+
+            else:
+                d=executor.submit(BBBCDataset.create_dataset, n)
+                d_name=d.result()
+                threads.append(
+                     executor.submit(d_name.raw,out_dir)
+                )
+
+            
+        for f in tqdm(
+            as_completed(threads),
+            total=len(threads),
+            mininterval=5,
+            desc=f"donwloading the dataset",
+            initial=0,
+            unit_scale=True,
+            colour="cyan",
+        ):
+            f.result()
+        
+                
+        
+
+
+if __name__ == "__main__":
+    app()
\ No newline at end of file
diff --git a/utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/download.py b/utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/download.py
new file mode 100644
index 000000000..b27493bde
--- /dev/null
+++ b/utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/download.py
@@ -0,0 +1,144 @@
+from pathlib import Path
+import re
+from urllib.request import urlretrieve
+from urllib.error import URLError
+from zipfile import ZipFile
+
+import bs4
+import shutil
+import requests
+
+match_str = (
+    "Images|Ground truth|Ground Truth|Metadata|Hand-annotated Ground Truth Images"
+)
+endings = (".txt", ".csv", ".tif", ".xlsx", ".xls", ".lst")
+
+
+def get_lower_tags(tag: bs4.element.Tag) -> list:
+    """Get all tags between the tag argument and the next tag of the same type.
+    Args:
+        tag: Get tags between this tag and the next tag of the same type
+    """
+
+    tags = []
+
+    for sib in tag.find_next_siblings():
+        if sib.name == tag.name:
+            break
+        else:
+            tags.append(sib)
+
+    return tags
+
+
+def get_url(url: str, save_location: Path, name: str) -> None:
+    """Get the given url and save it.
+    Args:
+        url: The url to get
+        save_location: The path where the files will be saved
+        name: The name of the dataset that the url is associated with
+    """
+
+    file_name = url.split("/")[-1]
+
+    for download_attempts in range(10):
+        if url.endswith(endings):
+            try:
+                if not save_location.exists():
+                    save_location.mkdir(parents=True, exist_ok=True)
+
+                urlretrieve(url, save_location.joinpath(file_name))
+            except URLError as e:
+                if download_attempts == 9:
+                    print("FAILED TO DOWNLOAD: " + url + " for " + name)
+                    print("ERROR: " + str(e))
+
+                continue
+        elif url.endswith(".zip"):
+            try:
+                zip_path, _ = urlretrieve(url)
+
+                with ZipFile(zip_path, "r") as zfile:
+                    zfile.extractall(save_location)
+            except URLError as e:
+                if download_attempts == 9:
+                    print("FAILED TO DOWNLOAD: " + url + " for " + name)
+                    print("ERROR: " + str(e))
+
+                continue
+            except Exception as e:
+                print(e)
+
+                continue
+
+        break
+
+    return
+
+def remove_macosx(name:str, save_location:Path)-> None:
+    images_path=save_location.joinpath("Images")
+    folders=[folders for folders in images_path.iterdir() if folders.is_dir()]
+    for f in folders:
+        if f.name=="__MACOSX":
+            shutil.rmtree(f)
+            print("Deleted the __MACOSX folder in " + name)
+
+
+
+
+def download(name: str,download_path:Path) -> None:
+    """Download a single dataset.
+    Args:
+        name: The name of the dataset to be downloaded
+    """
+
+    print("Started downloading " + name)
+    download_path=download_path.joinpath("BBBC")
+
+    save_location = download_path.joinpath(name, "raw")
+
+    if not save_location.exists():
+        save_location.mkdir(parents=True, exist_ok=True)
+
+    dataset_url = "https://bbbc.broadinstitute.org/" + name
+
+    dataset_page = requests.get(dataset_url)
+    soup = bs4.BeautifulSoup(dataset_page.content, "html.parser")
+
+    for heading in soup.find_all("h3"):
+        # Ignore headings that we aren't interested in
+        if re.match(match_str, heading.text.strip()) == None:
+            continue
+
+        if heading.text.strip() == "Images":
+            sub_folder = "Images"
+        elif heading.text.strip() == "Metadata":
+            sub_folder = "Metadata"
+        else:
+            sub_folder = "Ground Truth"
+
+        # Iterate over every tag under the current heading and above the next heading
+        for tag in get_lower_tags(heading):
+            links = tag.find_all("a")
+            data_links = [
+                l for l in links if l.attrs["href"].endswith((".zip", *endings))
+            ]
+
+            for link in data_links:
+                data_url = link.attrs["href"]
+                file_path = save_location.joinpath(sub_folder)
+
+                get_url(data_url, file_path, name)
+
+        # Manually download BBBC018 ground truth because its webpage structure is incorrect
+        if name == "BBBC018" and re.match("Ground truth", heading.text.strip()):
+            url = "https://data.broadinstitute.org/bbbc/BBBC018/BBBC018_v1_outlines.zip"
+
+            file_path = save_location.joinpath(sub_folder)
+
+            get_url(url, file_path, "BBBC018")
+
+    print(name + " has finished downloading")
+    remove_macosx(name,save_location)
+
+    return
diff --git a/utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/mapping.py b/utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/mapping.py
new file mode 100644
index 000000000..6650c6c55
--- /dev/null
+++ b/utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/mapping.py
@@ -0,0 +1,9 @@
+
+
+def BBBC001_mapping(row: dict, extension: str) -> str:
+    # important attributes: plate, well, wel num, control, field, channel, treatment, image type
+
+    return f"a01_w01_n01_p01_f0{row['Field'] + 1}_c01_t00_i01{extension}"
+
+
+__all__ = ["BBBC001_mapping"]
\ No newline at end of file
diff --git a/utils/bbbc-download-plugin/tests/__init__.py b/utils/bbbc-download-plugin/tests/__init__.py
new file mode 100644
index 000000000..e69de29bb

From 5b056b8aaeb3976a1b679cb510de0f4279cb0542 Mon Sep 17 00:00:00 2001
From: EC2 Default User <ec2-user@ip-10-0-31-160.ec2.internal>
Date: Fri, 21 Jul 2023 21:41:39 +0000
Subject: [PATCH 02/18] Tested bbbc plugin

---
 utils/bbbc-download-plugin/Dockerfile         |  20 +++
 utils/bbbc-download-plugin/README.md          | 145 ++++++++++++++++++
 utils/bbbc-download-plugin/build-docker.sh    |   4 +
 utils/bbbc-download-plugin/plugin.json        |   2 +-
 utils/bbbc-download-plugin/pyproject.toml     |   8 +-
 utils/bbbc-download-plugin/run-plugin.sh      |   2 +-
 .../plugins/utils/bbbc_download/BBBC_model.py |  65 +++++---
 .../plugins/utils/bbbc_download/__main__.py   |  26 +++-
 .../plugins/utils/bbbc_download/download.py   |  11 +-
 utils/bbbc-download-plugin/tests/__init__.py  |   1 +
 utils/bbbc-download-plugin/tests/test_main.py |  84 ++++++++++
 11 files changed, 334 insertions(+), 34 deletions(-)
 create mode 100644 utils/bbbc-download-plugin/Dockerfile
 create mode 100644 utils/bbbc-download-plugin/build-docker.sh
 create mode 100644 utils/bbbc-download-plugin/tests/test_main.py

diff --git a/utils/bbbc-download-plugin/Dockerfile b/utils/bbbc-download-plugin/Dockerfile
new file mode 100644
index 000000000..da89ce48c
--- /dev/null
+++ b/utils/bbbc-download-plugin/Dockerfile
@@ -0,0 +1,20 @@
+FROM polusai/bfio:2.1.9
+
+# environment variables defined in polusai/bfio
+ENV EXEC_DIR="/opt/executables"
+ENV POLUS_IMG_EXT=".ome.tif"
+ENV POLUS_TAB_EXT=".csv"    
+ENV POLUS_LOG="INFO"
+
+# Work directory defined in the base container
+WORKDIR ${EXEC_DIR}
+
+COPY pyproject.toml ${EXEC_DIR}
+COPY VERSION ${EXEC_DIR}
+COPY README.md ${EXEC_DIR}
+COPY src ${EXEC_DIR}/src
+
+RUN pip3 install ${EXEC_DIR} --no-cache-dir
+
+ENTRYPOINT ["python3", "-m", "polus.plugins.utils.bbbc_download"]
+CMD ["--help"]
\ No newline at end of file
diff --git a/utils/bbbc-download-plugin/README.md b/utils/bbbc-download-plugin/README.md
index e69de29bb..9e9e4b10c 100644
--- a/utils/bbbc-download-plugin/README.md
+++ b/utils/bbbc-download-plugin/README.md
@@ -0,0 +1,145 @@
+#BBBC Download (0.1.0-dev0)
+
+This plugin is designed to download the necessary datasets from the Broad Bioimage Benchmark Collection(BBBC) website.
+
+For information on the BBBC dataset, visit 
+[BBBC dataset information](https://bbbc.broadinstitute.org/image_sets/).
+
+## Building
+
+To build the Docker image for the conversion plugin, run
+`./build-docker.sh`.
+
+## Options
+
+This plugin takes 1 input arguments and
+1 output argument:
+
+| Name            | Description                                                  | I/O    | Type        |
+| --------------- | ------------------------------------------------------------ | ------ | ----------- |
+| `--name  `      | The name of the datasets to be downloaded                    | Input  | String |
+| `--outDir`      | Directory to store the downloaded datasets                   | Output | genericData |
+
+The Following are valid names for datasets:
+"all"- To download all the datasets from the bbbc website
+"IDAndSegmentation"- To download the datasets from the Identification and segmentation table
+"PhenotypeClassification"- To download the datasets from the Phenotype classification table
+"ImageBasedProfiling"- To download the datasets from the Image-based Profiling table
+
+To download specific datasets from the website, give the name of each dataset in the input argument seperated by a comma. eg: --name="BBBC001,BBBC002,BBBC003" 
+
+
+# BBBC Model
+The classes in BBBC_model.py model the data from the [Broad Bioimage Benchmark Collection (BBBC)](https://bbbc.broadinstitute.org/image_sets). The tables on this webpage classify datasets by their biological application. Each dataset has a webpage that contains links to the data and describes information about the dataset. Almost every dataset has image data and ground truth data. There are a few datasets that have metadata rather than ground truth data.
+
+# Classes
+This section describes the classes and functions used to model the BBBC.
+
+## BBBC
+The `BBBC` class contains functions used for interacting with every dataset in the BBBC.
+
+### Functions
+`datasets()`: Returns a list of all the datasets in the collection.
+
+`combined_table()`: Combines each table on the BBBC image set webpage into a single pandas DataFrame.
+
+`raw()`: Downloads all of the datasets in the collection.
+
+## Table Classes
+There is a class for each table on the BBBC image set webpage. The classes are `IDAndSegmentation`, `PhenotypeClassification`, and `ImageBasedProfiling`. They have the same attributes and functions.
+
+### Attributes
+`name`: The name of the table as it appears on the BBBC image set webpage.
+
+`table`: A pandas DataFrame representation of the table.
+
+### Functions
+`datasets()`: Returns a list of all the datasets in the table.
+
+`raw()`: Downloads all of the datasets in the table.
+
+## BBBCDataset
+The `BBBCDataset` class models individual datasets. 
+
+*Note*: some datasets need specialized functionality so they cannot be modeled by the general BBBCDataset class. These datasets have their own classes with the specialized functionality implemented there.
+
+### Attributes
+`name`: A string that represents the dataset's name. The provided name must be the name of an existing dataset or else an exception will be raised.
+
+`images`: An Images object that contains information about the dataset's images. Set to `None` until raw data is downloaded.
+
+`ground_truth`: A GroundTruth object that contains information about the dataset's ground truth. Set to `None` until raw data is downloaded.
+
+`metadata`: A Metadata object that contains information about the dataset's metadata. Set to `None` until raw data is downloaded.
+
+*Note*: The `images`, `ground_truth`, or `metadata` attributes will be `None` after downloading raw data if the dataset has no images, ground truth, or metadata.
+
+### Functions
+`create_dataset(name)`: Takes in a name as a string and returns a BBBCDataset object for the dataset with that name. If there is no dataset with this name, then an error message is displayed and `None` is returned.
+
+`info()`: Returns a dictionary containing information about the dataset. The information includes:
+
+- A description of the dataset
+- The microscopy technique used for the dataset
+- The number of fields per sample
+- The total number of fields
+- The total number of images
+- The types of ground truth used for the dataset
+
+`size()`: Computes and returns the total size of the dataset in bytes.
+
+`raw()`: Downloads the raw data for the dataset. Initializes the `images`, `ground_truth`, and `metadata` attributes.
+
+`standard(extension)`: Standardizes the dataset's raw data. The extension argument indicates which file format to save to. It can be `".ome.tif"` or `".ome.zarr"`.
+
+## Data Classes
+Each dataset has image and ground truth data. A few datasets have metadata rather than ground truth. The `Images`, `GroundTruth`, and `Metadata` classes contain information about the dataset's images, ground truth, and metadata respectively. They have the same attributes and functions.
+
+### Attributes
+`path`: The path to the folder where the data is stored.
+`name`: The name of the dataset that the data belongs to.
+
+### Functions
+`size()`: Computes and returns the size of the data in bytes.
+
+# Example Workflow
+This section provides an example of how to use these classes and functions.
+
+```python
+    from BBBC_model import BBBC, BBBCDataset, IDAndSegmentation
+
+    # Print all datasets
+    for d in BBBC.datasets:
+        print(d.name)
+
+    # Print all datasets in the Identification and segmentation table
+    print(IDAndSegmentation.name)
+    for d in IDAndSegmentation.datasets:
+        print(d.name)
+
+    # Create a dataset
+    d = BBBCDataset.create_dataset("BBBC001")
+
+    # Print some information about the dataset
+    print(d.name)
+    print(d.info)
+
+    # Download dataset's raw data
+    d.raw()
+
+    # Print information about the dataset after downloading its raw data
+    print(d.size)
+    print(d.images.size)
+    print(d.ground_truth.size)
+
+    # This will print None because this dataset has no metadata
+    print(d.metadata)
+
+    # Standardize the raw data
+    d.standard(".ome.tif")
+
+    # Print information about the dataset after standardizing
+    print(d.size)
+    print(d.images.size)
+    print(d.ground_truth.size)
+```
\ No newline at end of file
diff --git a/utils/bbbc-download-plugin/build-docker.sh b/utils/bbbc-download-plugin/build-docker.sh
new file mode 100644
index 000000000..3c751e602
--- /dev/null
+++ b/utils/bbbc-download-plugin/build-docker.sh
@@ -0,0 +1,4 @@
+#!/bin/bash
+
+version=$(<VERSION)
+docker build . -t polusai/bbbc-download-plugin:${version}
\ No newline at end of file
diff --git a/utils/bbbc-download-plugin/plugin.json b/utils/bbbc-download-plugin/plugin.json
index ba0c6ec5b..455d86631 100644
--- a/utils/bbbc-download-plugin/plugin.json
+++ b/utils/bbbc-download-plugin/plugin.json
@@ -3,7 +3,7 @@
     "version": "0.1.0-dev0",
     "title": "BBBC Download",
     "description": "Downloads the datasets on the BBBC website",
-    "author": "Saket Prem(saket.prem@axleinfo.com), Matthew",
+    "author": "Saket Prem(saket.prem@axleinfo.com), Matthew McIntyre(Matthew.McIntyre@axleinfo.com)",
     "institution": "National Center for Advancing Translational Sciences, National Institutes of Health",
     "repository": "https://github.com/PolusAI/polus-plugins",
     "website": "https://ncats.nih.gov/preclinical/core/informatics",
diff --git a/utils/bbbc-download-plugin/pyproject.toml b/utils/bbbc-download-plugin/pyproject.toml
index c2b76de04..061163daa 100644
--- a/utils/bbbc-download-plugin/pyproject.toml
+++ b/utils/bbbc-download-plugin/pyproject.toml
@@ -1,8 +1,11 @@
 [tool.poetry]
 name = "polus-plugins-utils-bbbc-download-plugin"
-version = "0.1.0"
+version = "0.1.0-dev0"
 description = ""
-authors = ["Your Name <you@example.com>"]
+authors = [
+    "Saket Prem <saket.prem@axleinfo.com>", 
+    "Matthew McIntyre <Matthew.McIntyre@axleinfo.com>"
+    ]
 readme = "README.md"
 packages = [{include = "polus", from = "src"}]
 
@@ -22,6 +25,7 @@ mapping = "^0.1.6"
 bump2version = "1.0.1"
 mypy = "1.0.1"
 tqdm = "^4.65.0"
+pytest = "^7.4.0"
 
 
 [build-system]
diff --git a/utils/bbbc-download-plugin/run-plugin.sh b/utils/bbbc-download-plugin/run-plugin.sh
index 637544c99..57408ac85 100644
--- a/utils/bbbc-download-plugin/run-plugin.sh
+++ b/utils/bbbc-download-plugin/run-plugin.sh
@@ -10,7 +10,7 @@ name="BBBC001"
 outDir=/data/output
 
 # Show the help options
-docker run polusai/bbbc-download-plugin:${version}
+#docker run polusai/bbbc-download-plugin:${version}
 
 # Run the plugin
 docker run --mount type=bind,source=${datapath},target=/data/ \
diff --git a/utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/BBBC_model.py b/utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/BBBC_model.py
index de6d20fd2..617e33fd3 100644
--- a/utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/BBBC_model.py
+++ b/utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/BBBC_model.py
@@ -6,7 +6,7 @@
 from pathlib import Path
 from zipfile import ZipFile
 
-from polus.plugins.utils.bbbc_download.download import download, get_url
+from polus.plugins.utils.bbbc_download.download import download, get_url, remove_macosx
 from polus.plugins.utils.bbbc_download.mapping import *
 
 import pydantic
@@ -139,6 +139,7 @@ class BBBCDataset(pydantic.BaseModel):
     images: Optional[Images] = None
     ground_truth: Optional[GroundTruth] = None
     metadata: Optional[Metadata] = None
+    output_path: Optional[Path]= None
 
     @pydantic.validator("name")
     @classmethod
@@ -210,7 +211,7 @@ def info(self) -> Dict[str, Union[str, np.int64]]:
     def size(self) -> int:
         """Returns the size of the dataset in bytes."""
 
-        dataset_path = root.joinpath(self.name)
+        dataset_path = self.output_path.joinpath("BBBC",self.name)
 
         return sum(os.path.getsize(file) for file in dataset_path.rglob("*"))
 
@@ -268,6 +269,7 @@ def _init_data(self,download_path:Path) -> None:
 
     def raw(self,download_path: Path) -> None:
         """Download the dataset's raw data."""
+        self.output_path=download_path
 
         download(self.name,download_path)
         self._init_data(download_path)
@@ -339,14 +341,14 @@ def standard(self, extension: str) -> None:
 
 class BBBC019(BBBCDataset):
     def raw(self,download_path:Path) -> None:
-        download(self.name)
-        download_path=download_path.joinpath("BBBC")
+        download(self.name,download_path)
+        self.output_path=download_path
+        save_location=download_path.joinpath("BBBC")
 
         # Separate images from ground truth
-        save_location = download_path.joinpath("BBBC019")
+        save_location = save_location.joinpath("BBBC019")
         images_folder = save_location.joinpath("raw/Images")
         truth_folder = save_location.joinpath("raw/Ground Truth")
-
         for set in [
             x
             for x in images_folder.iterdir()
@@ -368,6 +370,7 @@ def raw(self,download_path:Path) -> None:
                 else:
                     shutil.move(src, dst)
 
+
         self._init_data(download_path)
 
         return
@@ -376,9 +379,10 @@ def raw(self,download_path:Path) -> None:
 class BBBC029(BBBCDataset):
     def raw(self,download_path:Path) -> None:
         print("Started downloading BBBC029")
-        download_path=download_path.joinpath("BBBC")
+        self.output_path=download_path
+        save_location=download_path.joinpath("BBBC")
 
-        save_location = download_path.joinpath("BBBC029", "raw")
+        save_location = save_location.joinpath("BBBC029", "raw")
 
         if not save_location.exists():
             save_location.mkdir(parents=True, exist_ok=True)
@@ -398,6 +402,21 @@ def raw(self,download_path:Path) -> None:
         )
 
         print("BBBC029 has finished downloading")
+        images_folder=save_location.joinpath("Images")
+        truth_folder=save_location.joinpath("Ground Truth")
+        remove_macosx("BBBC029",images_folder)
+        remove_macosx("BBBC029",truth_folder)
+        source_directory=images_folder.joinpath("images")
+        for source_file in source_directory.glob("*"):
+            destination_file = images_folder / source_file.name
+            shutil.move(source_file, destination_file)
+        shutil.rmtree(source_directory)   
+
+        source_directory=truth_folder.joinpath("ground_truth")
+        for source_file in source_directory.glob("*"):
+            destination_file = truth_folder / source_file.name
+            shutil.move(source_file, destination_file)
+        shutil.rmtree(source_directory)  
 
         self._init_data(download_path)
 
@@ -406,11 +425,12 @@ def raw(self,download_path:Path) -> None:
 
 class BBBC041(BBBCDataset):
     def raw(self,download_path:Path) -> None:
-        download(self.name)
-        download_path=download_path.joinpath("BBBC")
+        download(self.name,download_path)
+        self.output_path=download_path
+        save_location=download_path.joinpath("BBBC")
 
         # Separate images from ground truth
-        save_location = download_path.joinpath("BBBC041")
+        save_location = save_location.joinpath("BBBC041")
         file_names = ["test.json", "training.json"]
 
         if not save_location.joinpath("raw/Ground Truth").exists():
@@ -435,9 +455,10 @@ def raw(self,download_path:Path) -> None:
 class BBBC042(BBBCDataset):
     def raw(self,download_path:Path) -> None:
         print("Started downloading BBBC042")
-        download_path=download_path.joinpath("BBBC")
+        self.output_path=download_path
+        save_location=download_path.joinpath("BBBC")
 
-        save_location = download_path.joinpath("BBBC042", "raw")
+        save_location = save_location.joinpath("BBBC042", "raw")
 
         if not save_location.exists():
             save_location.mkdir(parents=True, exist_ok=True)
@@ -457,6 +478,10 @@ def raw(self,download_path:Path) -> None:
         )
 
         print("BBBC042 has finished downloading")
+        images_folder=save_location.joinpath("Images")
+        truth_folder=save_location.joinpath("Ground Truth")
+        remove_macosx("BBBC029",images_folder)
+        remove_macosx("BBBC029",truth_folder)
 
         self._init_data(download_path)
 
@@ -465,12 +490,13 @@ def raw(self,download_path:Path) -> None:
 
 class BBBC046(BBBCDataset):
     def raw(self, download_path: Path) -> None:
-        download(self.name)
-        download_path=download_path.joinpath("BBBC")
+        download(self.name,download_path)
+        self.output_path=download_path
+        save_location=download_path.joinpath("BBBC")
 
         # Separate images from ground truth
         try:
-            save_location = download_path.joinpath(self.name)
+            save_location = save_location.joinpath(self.name)
             images_folder = save_location.joinpath("raw/Images")
             truth_folder = save_location.joinpath("raw/Ground Truth")
 
@@ -514,11 +540,12 @@ def raw(self, download_path: Path) -> None:
 
 class BBBC054(BBBCDataset):
     def raw(self, download_path:Path) -> None:
-        download(self.name)
-        download_path=download_path.joinpath("BBBC")
+        download(self.name,download_path)
+        self.output_path=download_path
+        save_location=download_path.joinpath("BBBC")
 
         # Separate images from ground truth
-        save_location = download_path.joinpath(self.name)
+        save_location = save_location.joinpath(self.name)
         src = save_location.joinpath("raw/Images", "Replicate1annotation.csv")
         dst = save_location.joinpath("raw/Ground Truth", "Replicate1annotation.csv")
 
diff --git a/utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/__main__.py b/utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/__main__.py
index 79439a500..84b023170 100644
--- a/utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/__main__.py
+++ b/utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/__main__.py
@@ -2,7 +2,7 @@
 import os
 import logging
 from pathlib import Path
-from concurrent.futures import ProcessPoolExecutor, as_completed
+from concurrent.futures import ThreadPoolExecutor, as_completed
 from typing import Any, Optional
 
 import typer
@@ -10,6 +10,7 @@
 from polus.plugins.utils.bbbc_download.BBBC_model import BBBC, BBBCDataset, IDAndSegmentation, PhenotypeClassification, ImageBasedProfiling
 from sys import platform
 from multiprocessing import cpu_count
+import time
 
 
 
@@ -36,6 +37,7 @@ def main(
     out_dir: Path= typer.Option(
     ...,"--outDir", help="The path for downloading the dataset"
     )
+    
 
 )-> None:
     """Download the required dataset from the BBBC dataaset."""
@@ -43,10 +45,15 @@ def main(
     logger.info(f"outDir = {out_dir}")
     """Checking if output directory exists. If it does not exist then a designated path is created."""
     if not out_dir.exists():
-        out_dir.mkdir()
         logger.info(f"{out_dir} did not exists. Creating new path.")
+        out_dir.mkdir()
+        if(not out_dir.exists):
+            raise ValueError("Directory does not exist")
+
+
 
-    with ProcessPoolExecutor(max_workers=NUM_THREADS) as executor:
+    with ThreadPoolExecutor(max_workers=NUM_THREADS) as executor:
+        start_time = time.time()
         threads=[]
         names=name.split(",")
         for n in names:
@@ -67,7 +74,7 @@ def main(
                     executor.submit(ImageBasedProfiling.raw,out_dir)
                     )
             
-            elif(n=='BBBC'):
+            elif(n=='All'):
                 threads.append(
                     executor.submit(BBBC.raw,out_dir)
                     )
@@ -91,10 +98,15 @@ def main(
             colour="cyan",
         ):
             f.result()
-        
+        end_time = time.time()
+        execution_time = (end_time - start_time)
+        execution_time_min=execution_time/60
+        logger.info(f"The execution time is {execution_time} in seconds")
+        logger.info(f"The execution time is {execution_time_min} in minutes") 
                 
         
+if __name__ == "__main__":
+    app()
+
 
 
-if __name__ == "__main__":
-    app()
\ No newline at end of file
diff --git a/utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/download.py b/utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/download.py
index b27493bde..d77c4fde1 100644
--- a/utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/download.py
+++ b/utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/download.py
@@ -75,9 +75,8 @@ def get_url(url: str, save_location: Path, name: str) -> None:
 
     return
 
-def remove_macosx(name:str, save_location:Path)-> None:
-    images_path=save_location.joinpath("Images")
-    folders=[folders for folders in images_path.iterdir() if folders.is_dir()]
+def remove_macosx(name:str, location:Path)-> None:
+    folders=[folders for folders in location.iterdir() if folders.is_dir()]
     for f in folders:
         if f.name=="__MACOSX":
             shutil.rmtree(f)
@@ -139,6 +138,10 @@ def download(name: str,download_path:Path) -> None:
             get_url(url, file_path, "BBBC018")
 
     print(name + " has finished downloading")
-    remove_macosx(name,save_location)
+    images_path=save_location.joinpath("Images")
+    remove_macosx(name,images_path)
+    ground_path=save_location.joinpath("Ground Truth")
+    if ground_path.exists():
+        remove_macosx(name,ground_path)
 
     return
diff --git a/utils/bbbc-download-plugin/tests/__init__.py b/utils/bbbc-download-plugin/tests/__init__.py
index e69de29bb..fa93c893c 100644
--- a/utils/bbbc-download-plugin/tests/__init__.py
+++ b/utils/bbbc-download-plugin/tests/__init__.py
@@ -0,0 +1 @@
+"""bbbc download plugin."""
\ No newline at end of file
diff --git a/utils/bbbc-download-plugin/tests/test_main.py b/utils/bbbc-download-plugin/tests/test_main.py
new file mode 100644
index 000000000..852e2fb64
--- /dev/null
+++ b/utils/bbbc-download-plugin/tests/test_main.py
@@ -0,0 +1,84 @@
+import pathlib
+import shutil
+import tempfile
+import numpy as np
+import pytest
+import requests
+import skimage
+from bfio import BioReader
+from skimage import io
+from typer.testing import CliRunner
+
+from polus.plugins.utils.bbbc_download.__main__ import app as app
+from polus.plugins.utils.bbbc_download import BBBC_model,mapping,download
+
+runner = CliRunner()
+
+@pytest.fixture
+def output_directory():
+    """Generate random output directory."""
+    out_dir = pathlib.Path(tempfile.mkdtemp(dir=pathlib.Path.cwd()))
+    yield out_dir
+    shutil.rmtree(out_dir)
+
+@pytest.fixture
+def macosx_directory():
+    """Generate random directory."""
+    test_dir = pathlib.Path(tempfile.mkdtemp(dir=pathlib.Path.cwd()))
+    macosx_dir=test_dir.joinpath("Images","__MACOSX")
+    macosx_dir.mkdir(parents=True)
+    yield macosx_dir
+    shutil.rmtree(macosx_dir.parents[1])
+
+
+def test_delete_macosx(macosx_directory) -> None:
+    
+    mac_dir=macosx_directory
+    mac_dir=pathlib.Path(mac_dir)
+    
+    mac_dir_test= mac_dir.parent
+    macosx_test_name="testname"
+    download.remove_macosx(macosx_test_name,mac_dir_test)
+    assert mac_dir.exists()==False
+
+
+def test_bbbc_datasets()->None:
+    d_test=BBBC_model.BBBC.datasets
+    assert len(d_test)==50
+
+def test_raw(output_directory)->None:
+    d=BBBC_model.BBBCDataset.create_dataset("BBBC054")
+    output_dir=pathlib.Path(output_directory)
+    d.raw(output_dir)
+    assert d.size >0
+
+def test_IDAndSegmentation()-> None:
+    d_test_IDAndSegmentation= BBBC_model.IDAndSegmentation.datasets
+    assert len(d_test_IDAndSegmentation)==32
+
+def test_PhenotypeClassification()-> None:
+    d_test_PhenotypeClassification= BBBC_model.PhenotypeClassification.datasets
+    assert len(d_test_PhenotypeClassification)==14
+
+def test_ImageBasedProfiling()-> None:
+    d_test_ImageBasedProfiling= BBBC_model.ImageBasedProfiling.datasets
+    assert len(d_test_ImageBasedProfiling)==6
+
+def test_cli(output_directory) -> None:
+    """Test Cli."""
+    name="BBBC001,BBBC002"
+    output_dir=pathlib.Path(output_directory)
+
+    result = runner.invoke(
+        app,
+        [
+            "--name",
+            name,
+            "--outDir",
+            output_dir,
+        ],
+    )
+
+    assert result.exit_code == 0
+
+

From 0b88e5e1ddeab4e962c8553fe5fde414f3e97206 Mon Sep 17 00:00:00 2001
From: EC2 Default User <ec2-user@ip-10-0-31-160.ec2.internal>
Date: Mon, 24 Jul 2023 17:04:11 +0000
Subject: [PATCH 03/18] modified the readme file and the check the docker
 container files

---
 utils/bbbc-download-plugin/README.md          | 130 ++----------------
 utils/bbbc-download-plugin/pyproject.toml     |   3 +-
 utils/bbbc-download-plugin/run-plugin.sh      |  18 +--
 .../plugins/utils/bbbc_download/download.py   |   5 +
 4 files changed, 29 insertions(+), 127 deletions(-)

diff --git a/utils/bbbc-download-plugin/README.md b/utils/bbbc-download-plugin/README.md
index 9e9e4b10c..cdbf1b8cb 100644
--- a/utils/bbbc-download-plugin/README.md
+++ b/utils/bbbc-download-plugin/README.md
@@ -1,14 +1,20 @@
-#BBBC Download (0.1.0-dev0)
+# BBBC Download (0.1.0-dev0)
 
 This plugin is designed to download the necessary datasets from the Broad Bioimage Benchmark Collection(BBBC) website.
 
 For information on the BBBC dataset, visit 
 [BBBC dataset information](https://bbbc.broadinstitute.org/image_sets/).
+The tables on this webpage classify datasets by their biological application. Each dataset has a webpage that contains links to the data and describes information about the dataset. Almost every dataset has image data and ground truth data. There are a few datasets that have metadata rather than ground truth data.
 
 ## Building
 
-To build the Docker image for the conversion plugin, run
-`./build-docker.sh`.
+To build the Docker image for the download plugin, run
+`bash build-docker.sh`.
+
+## Executing
+
+To execute the build docker image for the download plugin, run 
+'bash run-plugin.sh'
 
 ## Options
 
@@ -28,118 +34,8 @@ The Following are valid names for datasets:
 
 To download specific datasets from the website, give the name of each dataset in the input argument seperated by a comma. eg: --name="BBBC001,BBBC002,BBBC003" 
 
+### NOTE
+There may be some errors while running th plugin for BBBC046 dataset.   
 
-# BBBC Model
-The classes in BBBC_model.py model the data from the [Broad Bioimage Benchmark Collection (BBBC)](https://bbbc.broadinstitute.org/image_sets). The tables on this webpage classify datasets by their biological application. Each dataset has a webpage that contains links to the data and describes information about the dataset. Almost every dataset has image data and ground truth data. There are a few datasets that have metadata rather than ground truth data.
-
-# Classes
-This section describes the classes and functions used to model the BBBC.
-
-## BBBC
-The `BBBC` class contains functions used for interacting with every dataset in the BBBC.
-
-### Functions
-`datasets()`: Returns a list of all the datasets in the collection.
-
-`combined_table()`: Combines each table on the BBBC image set webpage into a single pandas DataFrame.
-
-`raw()`: Downloads all of the datasets in the collection.
-
-## Table Classes
-There is a class for each table on the BBBC image set webpage. The classes are `IDAndSegmentation`, `PhenotypeClassification`, and `ImageBasedProfiling`. They have the same attributes and functions.
-
-### Attributes
-`name`: The name of the table as it appears on the BBBC image set webpage.
-
-`table`: A pandas DataFrame representation of the table.
-
-### Functions
-`datasets()`: Returns a list of all the datasets in the table.
-
-`raw()`: Downloads all of the datasets in the table.
-
-## BBBCDataset
-The `BBBCDataset` class models individual datasets. 
-
-*Note*: some datasets need specialized functionality so they cannot be modeled by the general BBBCDataset class. These datasets have their own classes with the specialized functionality implemented there.
-
-### Attributes
-`name`: A string that represents the dataset's name. The provided name must be the name of an existing dataset or else an exception will be raised.
-
-`images`: An Images object that contains information about the dataset's images. Set to `None` until raw data is downloaded.
-
-`ground_truth`: A GroundTruth object that contains information about the dataset's ground truth. Set to `None` until raw data is downloaded.
-
-`metadata`: A Metadata object that contains information about the dataset's metadata. Set to `None` until raw data is downloaded.
-
-*Note*: The `images`, `ground_truth`, or `metadata` attributes will be `None` after downloading raw data if the dataset has no images, ground truth, or metadata.
-
-### Functions
-`create_dataset(name)`: Takes in a name as a string and returns a BBBCDataset object for the dataset with that name. If there is no dataset with this name, then an error message is displayed and `None` is returned.
-
-`info()`: Returns a dictionary containing information about the dataset. The information includes:
-
-- A description of the dataset
-- The microscopy technique used for the dataset
-- The number of fields per sample
-- The total number of fields
-- The total number of images
-- The types of ground truth used for the dataset
-
-`size()`: Computes and returns the total size of the dataset in bytes.
-
-`raw()`: Downloads the raw data for the dataset. Initializes the `images`, `ground_truth`, and `metadata` attributes.
-
-`standard(extension)`: Standardizes the dataset's raw data. The extension argument indicates which file format to save to. It can be `".ome.tif"` or `".ome.zarr"`.
-
-## Data Classes
-Each dataset has image and ground truth data. A few datasets have metadata rather than ground truth. The `Images`, `GroundTruth`, and `Metadata` classes contain information about the dataset's images, ground truth, and metadata respectively. They have the same attributes and functions.
-
-### Attributes
-`path`: The path to the folder where the data is stored.
-`name`: The name of the dataset that the data belongs to.
-
-### Functions
-`size()`: Computes and returns the size of the data in bytes.
-
-# Example Workflow
-This section provides an example of how to use these classes and functions.
-
-```python
-    from BBBC_model import BBBC, BBBCDataset, IDAndSegmentation
-
-    # Print all datasets
-    for d in BBBC.datasets:
-        print(d.name)
-
-    # Print all datasets in the Identification and segmentation table
-    print(IDAndSegmentation.name)
-    for d in IDAndSegmentation.datasets:
-        print(d.name)
-
-    # Create a dataset
-    d = BBBCDataset.create_dataset("BBBC001")
-
-    # Print some information about the dataset
-    print(d.name)
-    print(d.info)
-
-    # Download dataset's raw data
-    d.raw()
-
-    # Print information about the dataset after downloading its raw data
-    print(d.size)
-    print(d.images.size)
-    print(d.ground_truth.size)
-
-    # This will print None because this dataset has no metadata
-    print(d.metadata)
-
-    # Standardize the raw data
-    d.standard(".ome.tif")
-
-    # Print information about the dataset after standardizing
-    print(d.size)
-    print(d.images.size)
-    print(d.ground_truth.size)
-```
\ No newline at end of file
+## Sample docker command:
+docker run -v /home/ec2-user/polus-plugins/utils/bbbc-download-plugin/data/:/home/ec2-user/polus-plugins/utils/bbbc-download-plugin/data/ polusai/bbbc-download-plugin:0.1.0-dev0 --name="BBBC001" --outDir=/home/ec2-user/polus-plugins/utils/bbbc-download-plugin/data
\ No newline at end of file
diff --git a/utils/bbbc-download-plugin/pyproject.toml b/utils/bbbc-download-plugin/pyproject.toml
index 061163daa..006521d58 100644
--- a/utils/bbbc-download-plugin/pyproject.toml
+++ b/utils/bbbc-download-plugin/pyproject.toml
@@ -10,7 +10,7 @@ readme = "README.md"
 packages = [{include = "polus", from = "src"}]
 
 [tool.poetry.dependencies]
-python = "^3.9.16"
+python = ">=3.9,<4"
 typer = "^0.9.0"
 pyarrow = "11.0.0"
 scikit-image = "0.20.0"
@@ -26,6 +26,7 @@ bump2version = "1.0.1"
 mypy = "1.0.1"
 tqdm = "^4.65.0"
 pytest = "^7.4.0"
+xmlschema = "^2.3.1"
 
 
 [build-system]
diff --git a/utils/bbbc-download-plugin/run-plugin.sh b/utils/bbbc-download-plugin/run-plugin.sh
index 57408ac85..c78c4cb52 100644
--- a/utils/bbbc-download-plugin/run-plugin.sh
+++ b/utils/bbbc-download-plugin/run-plugin.sh
@@ -2,18 +2,18 @@
 
 version=$(<VERSION)
 datapath=$(readlink --canonicalize data)
-
+mkdir ${datapath}
 # Inputs
-name="BBBC001"
+name="BBBC002"
 
-# Output paths
-outDir=/data/output
+# # Output paths
+outDir=${datapath}
 
-# Show the help options
-#docker run polusai/bbbc-download-plugin:${version}
+# # Show the help options
+# docker run polusai/bbbc-download-plugin:${version}
 
-# Run the plugin
-docker run --mount type=bind,source=${datapath},target=/data/ \
+# # Run the plugin
+docker run -v ${datapath}:${datapath} \
             polusai/bbbc-download-plugin:${version} \
             --name ${name} \
-            --outDir ${outDir}
\ No newline at end of file
+            --outDir ${outDir}
diff --git a/utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/download.py b/utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/download.py
index d77c4fde1..1ee02a44e 100644
--- a/utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/download.py
+++ b/utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/download.py
@@ -76,6 +76,11 @@ def get_url(url: str, save_location: Path, name: str) -> None:
     return
 
 def remove_macosx(name:str, location:Path)-> None:
+    """ Remove the __MACOSX folder from the downlpoaded dataset.
+    Args:
+        name: The name of the dataset
+        location: The partent directory of the __MACOSX folder.
+    """
     folders=[folders for folders in location.iterdir() if folders.is_dir()]
     for f in folders:
         if f.name=="__MACOSX":

From 7abf6ec22147fdc8aa23180ffc70af6f049e5559 Mon Sep 17 00:00:00 2001
From: EC2 Default User <ec2-user@ip-10-0-31-160.ec2.internal>
Date: Mon, 24 Jul 2023 17:51:22 +0000
Subject: [PATCH 04/18] sample dockeer command in readme file updated

---
 utils/bbbc-download-plugin/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/utils/bbbc-download-plugin/README.md b/utils/bbbc-download-plugin/README.md
index cdbf1b8cb..0970c74bb 100644
--- a/utils/bbbc-download-plugin/README.md
+++ b/utils/bbbc-download-plugin/README.md
@@ -38,4 +38,4 @@ To download specific datasets from the website, give the name of each dataset in
 There may be some errors while running th plugin for BBBC046 dataset.   
 
 ## Sample docker command:
-docker run -v /home/ec2-user/polus-plugins/utils/bbbc-download-plugin/data/:/home/ec2-user/polus-plugins/utils/bbbc-download-plugin/data/ polusai/bbbc-download-plugin:0.1.0-dev0 --name="BBBC001" --outDir=/home/ec2-user/polus-plugins/utils/bbbc-download-plugin/data
\ No newline at end of file
+``` docker run -v /home/ec2-user/polus-plugins/utils/bbbc-download-plugin/data/:/home/ec2-user/polus-plugins/utils/bbbc-download-plugin/data/ polusai/bbbc-download-plugin:0.1.0-dev0 --name="BBBC001" --outDir=/home/ec2-user/polus-plugins/utils/bbbc-download-plugin/data ```
\ No newline at end of file

From 9e6b7d1b58e14664166ba8e07c6378c2d41b853b Mon Sep 17 00:00:00 2001
From: EC2 Default User <ec2-user@ip-10-0-31-160.ec2.internal>
Date: Mon, 24 Jul 2023 18:23:34 +0000
Subject: [PATCH 05/18] removed mapping.py adn the mapping dependency in
 project.toml

---
 utils/bbbc-download-plugin/README.md                     | 2 +-
 utils/bbbc-download-plugin/pyproject.toml                | 1 -
 utils/bbbc-download-plugin/run-plugin.sh                 | 2 +-
 .../src/polus/plugins/utils/bbbc_download/BBBC_model.py  | 2 +-
 .../src/polus/plugins/utils/bbbc_download/mapping.py     | 9 ---------
 5 files changed, 3 insertions(+), 13 deletions(-)
 delete mode 100644 utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/mapping.py

diff --git a/utils/bbbc-download-plugin/README.md b/utils/bbbc-download-plugin/README.md
index 0970c74bb..12d35d059 100644
--- a/utils/bbbc-download-plugin/README.md
+++ b/utils/bbbc-download-plugin/README.md
@@ -35,7 +35,7 @@ The Following are valid names for datasets:
 To download specific datasets from the website, give the name of each dataset in the input argument seperated by a comma. eg: --name="BBBC001,BBBC002,BBBC003" 
 
 ### NOTE
-There may be some errors while running th plugin for BBBC046 dataset.   
+BBBC046 dataset download is not supported by this plugin   
 
 ## Sample docker command:
 ``` docker run -v /home/ec2-user/polus-plugins/utils/bbbc-download-plugin/data/:/home/ec2-user/polus-plugins/utils/bbbc-download-plugin/data/ polusai/bbbc-download-plugin:0.1.0-dev0 --name="BBBC001" --outDir=/home/ec2-user/polus-plugins/utils/bbbc-download-plugin/data ```
\ No newline at end of file
diff --git a/utils/bbbc-download-plugin/pyproject.toml b/utils/bbbc-download-plugin/pyproject.toml
index 006521d58..5bcf26c72 100644
--- a/utils/bbbc-download-plugin/pyproject.toml
+++ b/utils/bbbc-download-plugin/pyproject.toml
@@ -21,7 +21,6 @@ numpy = "1.24.2"
 pandas = "1.5.3"
 requests = "2.28.2"
 pydantic = "1.10.7"
-mapping = "^0.1.6"
 bump2version = "1.0.1"
 mypy = "1.0.1"
 tqdm = "^4.65.0"
diff --git a/utils/bbbc-download-plugin/run-plugin.sh b/utils/bbbc-download-plugin/run-plugin.sh
index c78c4cb52..1b85d5652 100644
--- a/utils/bbbc-download-plugin/run-plugin.sh
+++ b/utils/bbbc-download-plugin/run-plugin.sh
@@ -4,7 +4,7 @@ version=$(<VERSION)
 datapath=$(readlink --canonicalize data)
 mkdir ${datapath}
 # Inputs
-name="BBBC002"
+name="BBBC001"
 
 # # Output paths
 outDir=${datapath}
diff --git a/utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/BBBC_model.py b/utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/BBBC_model.py
index 617e33fd3..7ecd6f86d 100644
--- a/utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/BBBC_model.py
+++ b/utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/BBBC_model.py
@@ -7,7 +7,7 @@
 from zipfile import ZipFile
 
 from polus.plugins.utils.bbbc_download.download import download, get_url, remove_macosx
-from polus.plugins.utils.bbbc_download.mapping import *
+
 
 import pydantic
 import requests
diff --git a/utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/mapping.py b/utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/mapping.py
deleted file mode 100644
index 6650c6c55..000000000
--- a/utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/mapping.py
+++ /dev/null
@@ -1,9 +0,0 @@
-
-
-def BBBC001_mapping(row: dict, extension: str) -> str:
-    # important attributes: plate, well, wel num, control, field, channel, treatment, image type
-
-    return f"a01_w01_n01_p01_f0{row['Field'] + 1}_c01_t00_i01{extension}"
-
-
-__all__ = ["BBBC001_mapping"]
\ No newline at end of file

From 787e409bf994f8f438ef802e28924a1d81e91e21 Mon Sep 17 00:00:00 2001
From: EC2 Default User <ec2-user@ip-10-0-31-160.ec2.internal>
Date: Mon, 24 Jul 2023 19:18:34 +0000
Subject: [PATCH 06/18] removed mapping import from test_main

---
 utils/bbbc-download-plugin/tests/test_main.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/utils/bbbc-download-plugin/tests/test_main.py b/utils/bbbc-download-plugin/tests/test_main.py
index 852e2fb64..5a1443690 100644
--- a/utils/bbbc-download-plugin/tests/test_main.py
+++ b/utils/bbbc-download-plugin/tests/test_main.py
@@ -10,7 +10,7 @@
 from typer.testing import CliRunner
 
 from polus.plugins.utils.bbbc_download.__main__ import app as app
-from polus.plugins.utils.bbbc_download import BBBC_model,mapping,download
+from polus.plugins.utils.bbbc_download import BBBC_model,download
 
 runner = CliRunner()
 
@@ -47,7 +47,7 @@ def test_bbbc_datasets()->None:
     assert len(d_test)==50
 
 def test_raw(output_directory)->None:
-    d=BBBC_model.BBBCDataset.create_dataset("BBBC054")
+    d=BBBC_model.BBBCDataset.create_dataset("BBBC001")
     output_dir=pathlib.Path(output_directory)
     d.raw(output_dir)
     assert d.size >0

From 15218e44589abdb86e4ddcfc34bb5654d2ea55f6 Mon Sep 17 00:00:00 2001
From: EC2 Default User <ec2-user@ip-10-0-31-160.ec2.internal>
Date: Thu, 27 Jul 2023 14:05:35 +0000
Subject: [PATCH 07/18] Changed the folder name of Ground Truth to
 Ground_Truth.

---
 .../plugins/utils/bbbc_download/BBBC_model.py | 26 +++++++++----------
 .../plugins/utils/bbbc_download/download.py   |  4 +--
 utils/bbbc-download-plugin/tests/test_main.py |  9 +++++--
 3 files changed, 22 insertions(+), 17 deletions(-)

diff --git a/utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/BBBC_model.py b/utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/BBBC_model.py
index 7ecd6f86d..9b71d04aa 100644
--- a/utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/BBBC_model.py
+++ b/utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/BBBC_model.py
@@ -91,8 +91,8 @@ def valid_data(cls, values: dict) -> dict:
     def size(self) -> int:
         """Returns the size of the dataset's ground truth in bytes."""
 
-        raw_path = root.joinpath(self.name, "raw/Ground Truth")
-        standard_path = root.joinpath(self.name, "standard/Ground Truth")
+        raw_path = root.joinpath(self.name, "raw/Ground_Truth")
+        standard_path = root.joinpath(self.name, "standard/Ground_Truth")
         raw_sum = sum(os.path.getsize(file) for file in raw_path.rglob("*"))
         standard_sum = sum(os.path.getsize(file) for file in standard_path.rglob("*"))
 
@@ -241,7 +241,7 @@ def _init_data(self,download_path:Path) -> None:
         download_path=download_path.joinpath("BBBC")
 
         images_path = download_path.joinpath(self.name, "raw/Images")
-        truth_path = download_path.joinpath(self.name, "raw/Ground Truth")
+        truth_path = download_path.joinpath(self.name, "raw/Ground_Truth")
         meta_path = download_path.joinpath(self.name, "raw/Metadata")
 
         try:
@@ -312,7 +312,7 @@ def standard(self, extension: str) -> None:
             if row["Image Type"] == "Intensity":
                 sub_folder = "Images"
             elif row["Image Type"] == "Ground Truth":
-                sub_folder = "Ground Truth"
+                sub_folder = "Ground_Truth"
             elif row["Image Type"] == "Metadata":
                 sub_folder = "Metadata"
             else:
@@ -348,7 +348,7 @@ def raw(self,download_path:Path) -> None:
         # Separate images from ground truth
         save_location = save_location.joinpath("BBBC019")
         images_folder = save_location.joinpath("raw/Images")
-        truth_folder = save_location.joinpath("raw/Ground Truth")
+        truth_folder = save_location.joinpath("raw/Ground_Truth")
         for set in [
             x
             for x in images_folder.iterdir()
@@ -394,7 +394,7 @@ def raw(self,download_path:Path) -> None:
             "BBBC029",
         )
 
-        file_path = save_location.joinpath("Ground Truth")
+        file_path = save_location.joinpath("Ground_Truth")
         get_url(
             "https://data.broadinstitute.org/bbbc/BBBC029/ground_truth.zip",
             file_path,
@@ -403,7 +403,7 @@ def raw(self,download_path:Path) -> None:
 
         print("BBBC029 has finished downloading")
         images_folder=save_location.joinpath("Images")
-        truth_folder=save_location.joinpath("Ground Truth")
+        truth_folder=save_location.joinpath("Ground_Truth")
         remove_macosx("BBBC029",images_folder)
         remove_macosx("BBBC029",truth_folder)
         source_directory=images_folder.joinpath("images")
@@ -433,14 +433,14 @@ def raw(self,download_path:Path) -> None:
         save_location = save_location.joinpath("BBBC041")
         file_names = ["test.json", "training.json"]
 
-        if not save_location.joinpath("raw/Ground Truth").exists():
-            save_location.joinpath("raw/Ground Truth").mkdir(
+        if not save_location.joinpath("raw/Ground_Truth").exists():
+            save_location.joinpath("raw/Ground_Truth").mkdir(
                 parents=True, exist_ok=True
             )
 
         for file in file_names:
             src = save_location.joinpath("raw/Images/malaria", file)
-            dst = save_location.joinpath("raw/Ground Truth")
+            dst = save_location.joinpath("raw/Ground_Truth")
 
             if dst.joinpath(file).exists():
                 os.remove(src)
@@ -479,7 +479,7 @@ def raw(self,download_path:Path) -> None:
 
         print("BBBC042 has finished downloading")
         images_folder=save_location.joinpath("Images")
-        truth_folder=save_location.joinpath("Ground Truth")
+        truth_folder=save_location.joinpath("Ground_Truth")
         remove_macosx("BBBC029",images_folder)
         remove_macosx("BBBC029",truth_folder)
 
@@ -498,7 +498,7 @@ def raw(self, download_path: Path) -> None:
         try:
             save_location = save_location.joinpath(self.name)
             images_folder = save_location.joinpath("raw/Images")
-            truth_folder = save_location.joinpath("raw/Ground Truth")
+            truth_folder = save_location.joinpath("raw/Ground_Truth")
 
             # Extract these files because they do not extract automatically
             for file in ["OE-ID350-AR-1.zip", "OE-ID350-AR-2.zip", "OE-ID350-AR-4.zip", "OE-ID350-AR-8.zip"]:
@@ -547,7 +547,7 @@ def raw(self, download_path:Path) -> None:
         # Separate images from ground truth
         save_location = save_location.joinpath(self.name)
         src = save_location.joinpath("raw/Images", "Replicate1annotation.csv")
-        dst = save_location.joinpath("raw/Ground Truth", "Replicate1annotation.csv")
+        dst = save_location.joinpath("raw/Ground_Truth", "Replicate1annotation.csv")
 
         if not dst.exists():
             dst.mkdir(parents=True, exist_ok=True)
diff --git a/utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/download.py b/utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/download.py
index 1ee02a44e..d9da36e11 100644
--- a/utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/download.py
+++ b/utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/download.py
@@ -119,7 +119,7 @@ def download(name: str,download_path:Path) -> None:
         elif heading.text.strip() == "Metadata":
             sub_folder = "Metadata"
         else:
-            sub_folder = "Ground Truth"
+            sub_folder = "Ground_Truth"
 
         # Iterate over every tag under the current heading and above the next heading
         for tag in get_lower_tags(heading):
@@ -145,7 +145,7 @@ def download(name: str,download_path:Path) -> None:
     print(name + " has finished downloading")
     images_path=save_location.joinpath("Images")
     remove_macosx(name,images_path)
-    ground_path=save_location.joinpath("Ground Truth")
+    ground_path=save_location.joinpath("Ground_Truth")
     if ground_path.exists():
         remove_macosx(name,ground_path)
 
diff --git a/utils/bbbc-download-plugin/tests/test_main.py b/utils/bbbc-download-plugin/tests/test_main.py
index 5a1443690..3514f15e0 100644
--- a/utils/bbbc-download-plugin/tests/test_main.py
+++ b/utils/bbbc-download-plugin/tests/test_main.py
@@ -23,7 +23,7 @@ def output_directory():
 
 @pytest.fixture
 def macosx_directory():
-    """Generate random directory."""
+    """Generate random directory named __MACOSX."""
     test_dir = pathlib.Path(tempfile.mkdtemp(dir=pathlib.Path.cwd()))
     macosx_dir=test_dir.joinpath("Images","__MACOSX")
     macosx_dir.mkdir(parents=True)
@@ -32,7 +32,7 @@ def macosx_directory():
 
 
 def test_delete_macosx(macosx_directory) -> None:
-    
+    """Testing the delete_macosx function in download.py"""
     mac_dir=macosx_directory
     mac_dir=pathlib.Path(mac_dir)
     
@@ -43,24 +43,29 @@ def test_delete_macosx(macosx_directory) -> None:
 
 
 def test_bbbc_datasets()->None:
+    """Test to check if all the datasets on the BBBC website are recognized."""
     d_test=BBBC_model.BBBC.datasets
     assert len(d_test)==50
 
 def test_raw(output_directory)->None:
+    """A function to test the download functionality."""
     d=BBBC_model.BBBCDataset.create_dataset("BBBC001")
     output_dir=pathlib.Path(output_directory)
     d.raw(output_dir)
     assert d.size >0
 
 def test_IDAndSegmentation()-> None:
+    """Test to check if all the datasets on the Identification and segmentation table are recognized."""
     d_test_IDAndSegmentation= BBBC_model.IDAndSegmentation.datasets
     assert len(d_test_IDAndSegmentation)==32
 
 def test_PhenotypeClassification()-> None:
+    """Test to check if all the datasets on the Phenotype CLassification table are recognized."""
     d_test_PhenotypeClassification= BBBC_model.PhenotypeClassification.datasets
     assert len(d_test_PhenotypeClassification)==14
 
 def test_ImageBasedProfiling()-> None:
+    """Test to check if all the datasets on the Image based profiling table are recognized."""
     d_test_ImageBasedProfiling= BBBC_model.ImageBasedProfiling.datasets
     assert len(d_test_ImageBasedProfiling)==6
 

From e67503f18a25431994f5ad623b51673aad5bbb56 Mon Sep 17 00:00:00 2001
From: EC2 Default User <ec2-user@ip-10-0-31-160.ec2.internal>
Date: Thu, 27 Jul 2023 16:27:50 +0000
Subject: [PATCH 08/18] Changes to readme and added comments to test_main.py

---
 utils/bbbc-download-plugin/README.md          | 22 +++++++++----------
 .../plugins/utils/bbbc_download/BBBC_model.py |  2 +-
 utils/bbbc-download-plugin/tests/test_main.py |  2 +-
 3 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/utils/bbbc-download-plugin/README.md b/utils/bbbc-download-plugin/README.md
index 12d35d059..cb4472eb8 100644
--- a/utils/bbbc-download-plugin/README.md
+++ b/utils/bbbc-download-plugin/README.md
@@ -11,10 +11,10 @@ The tables on this webpage classify datasets by their biological application. Ea
 To build the Docker image for the download plugin, run
 `bash build-docker.sh`.
 
-## Executing
+## Run the Docker image
 
 To execute the build docker image for the download plugin, run 
-'bash run-plugin.sh'
+`bash run-plugin.sh`.
 
 ## Options
 
@@ -23,19 +23,19 @@ This plugin takes 1 input arguments and
 
 | Name            | Description                                                  | I/O    | Type        |
 | --------------- | ------------------------------------------------------------ | ------ | ----------- |
-| `--name  `      | The name of the datasets to be downloaded                    | Input  | String |
+| `--name  `      | The name of the datasets to be downloaded                    | Input  | String      |
 | `--outDir`      | Directory to store the downloaded datasets                   | Output | genericData |
 
-The Following are valid names for datasets:
-"all"- To download all the datasets from the bbbc website
-"IDAndSegmentation"- To download the datasets from the Identification and segmentation table
-"PhenotypeClassification"- To download the datasets from the Phenotype classification table
-"ImageBasedProfiling"- To download the datasets from the Image-based Profiling table
+The following are valid names for datasets:  
+`"all"`- To download all the datasets from the bbbc website  
+`"IDAndSegmentation"`- To download the datasets from the Identification and segmentation table  
+`"PhenotypeClassification"`- To download the datasets from the Phenotype classification table  
+`"ImageBasedProfiling"`- To download the datasets from the Image-based Profiling table
 
-To download specific datasets from the website, give the name of each dataset in the input argument seperated by a comma. eg: --name="BBBC001,BBBC002,BBBC003" 
+To download specific datasets from the website, give the name of each dataset in the input argument seperated by a comma. example: `--name="BBBC001,BBBC002,BBBC003"` 
 
 ### NOTE
-BBBC046 dataset download is not supported by this plugin   
+BBBC046 dataset download is not supported by this plugin.  
 
 ## Sample docker command:
-``` docker run -v /home/ec2-user/polus-plugins/utils/bbbc-download-plugin/data/:/home/ec2-user/polus-plugins/utils/bbbc-download-plugin/data/ polusai/bbbc-download-plugin:0.1.0-dev0 --name="BBBC001" --outDir=/home/ec2-user/polus-plugins/utils/bbbc-download-plugin/data ```
\ No newline at end of file
+```docker run -v /home/ec2-user/data/:/home/ec2-user/data/ polusai/bbbc-download-plugin:0.1.0-dev0 --name="BBBC001" --outDir=/home/ec2-user/data/output```
\ No newline at end of file
diff --git a/utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/BBBC_model.py b/utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/BBBC_model.py
index 9b71d04aa..c0109d4ee 100644
--- a/utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/BBBC_model.py
+++ b/utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/BBBC_model.py
@@ -470,7 +470,7 @@ def raw(self,download_path:Path) -> None:
             "BBBC042",
         )
 
-        file_path = save_location.joinpath("Ground Truth")
+        file_path = save_location.joinpath("Ground_Truth")
         get_url(
             "https://data.broadinstitute.org/bbbc/BBBC042/positions.zip",
             file_path,
diff --git a/utils/bbbc-download-plugin/tests/test_main.py b/utils/bbbc-download-plugin/tests/test_main.py
index 3514f15e0..24be86ae6 100644
--- a/utils/bbbc-download-plugin/tests/test_main.py
+++ b/utils/bbbc-download-plugin/tests/test_main.py
@@ -49,7 +49,7 @@ def test_bbbc_datasets()->None:
 
 def test_raw(output_directory)->None:
     """A function to test the download functionality."""
-    d=BBBC_model.BBBCDataset.create_dataset("BBBC001")
+    d=BBBC_model.BBBCDataset.create_dataset("BBBC054") #change dataset name to test
     output_dir=pathlib.Path(output_directory)
     d.raw(output_dir)
     assert d.size >0

From badf30cc9b7f28d0745ba9b1c17a664706b165c7 Mon Sep 17 00:00:00 2001
From: EC2 Default User <ec2-user@ip-10-0-31-160.ec2.internal>
Date: Thu, 27 Jul 2023 16:29:53 +0000
Subject: [PATCH 09/18] Changes all to All in readme

---
 utils/bbbc-download-plugin/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/utils/bbbc-download-plugin/README.md b/utils/bbbc-download-plugin/README.md
index cb4472eb8..029d12793 100644
--- a/utils/bbbc-download-plugin/README.md
+++ b/utils/bbbc-download-plugin/README.md
@@ -27,7 +27,7 @@ This plugin takes 1 input arguments and
 | `--outDir`      | Directory to store the downloaded datasets                   | Output | genericData |
 
 The following are valid names for datasets:  
-`"all"`- To download all the datasets from the bbbc website  
+`"All"`- To download all the datasets from the bbbc website  
 `"IDAndSegmentation"`- To download the datasets from the Identification and segmentation table  
 `"PhenotypeClassification"`- To download the datasets from the Phenotype classification table  
 `"ImageBasedProfiling"`- To download the datasets from the Image-based Profiling table

From dfd65e976eb11098bc2ec6981aa40dca54326da8 Mon Sep 17 00:00:00 2001
From: EC2 Default User <ec2-user@ip-10-0-31-160.ec2.internal>
Date: Thu, 27 Jul 2023 16:49:29 +0000
Subject: [PATCH 10/18] Changed spelling error in readme file

---
 utils/bbbc-download-plugin/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/utils/bbbc-download-plugin/README.md b/utils/bbbc-download-plugin/README.md
index 029d12793..d1a2c39bc 100644
--- a/utils/bbbc-download-plugin/README.md
+++ b/utils/bbbc-download-plugin/README.md
@@ -13,7 +13,7 @@ To build the Docker image for the download plugin, run
 
 ## Run the Docker image
 
-To execute the build docker image for the download plugin, run 
+To execute the built docker image for the download plugin, run 
 `bash run-plugin.sh`.
 
 ## Options

From 52f10fb080983daa32da7b5564eecaf81537dbdb Mon Sep 17 00:00:00 2001
From: EC2 Default User <ec2-user@ip-10-0-31-160.ec2.internal>
Date: Fri, 28 Jul 2023 18:59:26 +0000
Subject: [PATCH 11/18] Changes made after checcking with pre-commit

---
 utils/bbbc-download-plugin/.bumpversion.cfg   |   2 +-
 utils/bbbc-download-plugin/Dockerfile         |   4 +-
 utils/bbbc-download-plugin/README.md          |  18 +--
 utils/bbbc-download-plugin/VERSION            |   2 +-
 utils/bbbc-download-plugin/build-docker.sh    |   2 +-
 utils/bbbc-download-plugin/plugin.json        |  77 +++++++------
 utils/bbbc-download-plugin/run-plugin.sh      |   1 +
 .../plugins/utils/bbbc_download/__init__.py   |   2 +-
 .../plugins/utils/bbbc_download/__main__.py   | 106 ++++++++----------
 utils/bbbc-download-plugin/tests/__init__.py  |   2 +-
 utils/bbbc-download-plugin/tests/test_main.py |  63 ++++++-----
 11 files changed, 136 insertions(+), 143 deletions(-)

diff --git a/utils/bbbc-download-plugin/.bumpversion.cfg b/utils/bbbc-download-plugin/.bumpversion.cfg
index 8cc773f0b..182a51988 100644
--- a/utils/bbbc-download-plugin/.bumpversion.cfg
+++ b/utils/bbbc-download-plugin/.bumpversion.cfg
@@ -24,4 +24,4 @@ replace = version = "{new_version}"
 
 [bumpversion:file:VERSION]
 
-[bumpversion:file:src/polus/plugins/utils/bbbc_download/__init__.py]
\ No newline at end of file
+[bumpversion:file:src/polus/plugins/utils/bbbc_download/__init__.py]
diff --git a/utils/bbbc-download-plugin/Dockerfile b/utils/bbbc-download-plugin/Dockerfile
index da89ce48c..d89987fd6 100644
--- a/utils/bbbc-download-plugin/Dockerfile
+++ b/utils/bbbc-download-plugin/Dockerfile
@@ -3,7 +3,7 @@ FROM polusai/bfio:2.1.9
 # environment variables defined in polusai/bfio
 ENV EXEC_DIR="/opt/executables"
 ENV POLUS_IMG_EXT=".ome.tif"
-ENV POLUS_TAB_EXT=".csv"    
+ENV POLUS_TAB_EXT=".csv"
 ENV POLUS_LOG="INFO"
 
 # Work directory defined in the base container
@@ -17,4 +17,4 @@ COPY src ${EXEC_DIR}/src
 RUN pip3 install ${EXEC_DIR} --no-cache-dir
 
 ENTRYPOINT ["python3", "-m", "polus.plugins.utils.bbbc_download"]
-CMD ["--help"]
\ No newline at end of file
+CMD ["--help"]
diff --git a/utils/bbbc-download-plugin/README.md b/utils/bbbc-download-plugin/README.md
index d1a2c39bc..4218fc9f9 100644
--- a/utils/bbbc-download-plugin/README.md
+++ b/utils/bbbc-download-plugin/README.md
@@ -2,7 +2,7 @@
 
 This plugin is designed to download the necessary datasets from the Broad Bioimage Benchmark Collection(BBBC) website.
 
-For information on the BBBC dataset, visit 
+For information on the BBBC dataset, visit
 [BBBC dataset information](https://bbbc.broadinstitute.org/image_sets/).
 The tables on this webpage classify datasets by their biological application. Each dataset has a webpage that contains links to the data and describes information about the dataset. Almost every dataset has image data and ground truth data. There are a few datasets that have metadata rather than ground truth data.
 
@@ -13,7 +13,7 @@ To build the Docker image for the download plugin, run
 
 ## Run the Docker image
 
-To execute the built docker image for the download plugin, run 
+To execute the built docker image for the download plugin, run
 `bash run-plugin.sh`.
 
 ## Options
@@ -26,16 +26,16 @@ This plugin takes 1 input arguments and
 | `--name  `      | The name of the datasets to be downloaded                    | Input  | String      |
 | `--outDir`      | Directory to store the downloaded datasets                   | Output | genericData |
 
-The following are valid names for datasets:  
-`"All"`- To download all the datasets from the bbbc website  
-`"IDAndSegmentation"`- To download the datasets from the Identification and segmentation table  
-`"PhenotypeClassification"`- To download the datasets from the Phenotype classification table  
+The following are valid names for datasets:
+`"All"`- To download all the datasets from the bbbc website
+`"IDAndSegmentation"`- To download the datasets from the Identification and segmentation table
+`"PhenotypeClassification"`- To download the datasets from the Phenotype classification table
 `"ImageBasedProfiling"`- To download the datasets from the Image-based Profiling table
 
-To download specific datasets from the website, give the name of each dataset in the input argument seperated by a comma. example: `--name="BBBC001,BBBC002,BBBC003"` 
+To download specific datasets from the website, give the name of each dataset in the input argument seperated by a comma. example: `--name="BBBC001,BBBC002,BBBC003"`
 
 ### NOTE
-BBBC046 dataset download is not supported by this plugin.  
+BBBC046 dataset download is not supported by this plugin.
 
 ## Sample docker command:
-```docker run -v /home/ec2-user/data/:/home/ec2-user/data/ polusai/bbbc-download-plugin:0.1.0-dev0 --name="BBBC001" --outDir=/home/ec2-user/data/output```
\ No newline at end of file
+```docker run -v /home/ec2-user/data/:/home/ec2-user/data/ polusai/bbbc-download-plugin:0.1.0-dev0 --name="BBBC001" --outDir=/home/ec2-user/data/output```
diff --git a/utils/bbbc-download-plugin/VERSION b/utils/bbbc-download-plugin/VERSION
index 15a06bec5..206c0852b 100644
--- a/utils/bbbc-download-plugin/VERSION
+++ b/utils/bbbc-download-plugin/VERSION
@@ -1 +1 @@
-0.1.0-dev0
\ No newline at end of file
+0.1.0-dev0
diff --git a/utils/bbbc-download-plugin/build-docker.sh b/utils/bbbc-download-plugin/build-docker.sh
index 3c751e602..3bfcb041b 100644
--- a/utils/bbbc-download-plugin/build-docker.sh
+++ b/utils/bbbc-download-plugin/build-docker.sh
@@ -1,4 +1,4 @@
 #!/bin/bash
 
 version=$(<VERSION)
-docker build . -t polusai/bbbc-download-plugin:${version}
\ No newline at end of file
+docker build . -t polusai/bbbc-download-plugin:${version}
diff --git a/utils/bbbc-download-plugin/plugin.json b/utils/bbbc-download-plugin/plugin.json
index 455d86631..dbe8689b2 100644
--- a/utils/bbbc-download-plugin/plugin.json
+++ b/utils/bbbc-download-plugin/plugin.json
@@ -1,40 +1,39 @@
 {
-    "name": "BBBC Download",
-    "version": "0.1.0-dev0",
-    "title": "BBBC Download",
-    "description": "Downloads the datasets on the BBBC website",
-    "author": "Saket Prem(saket.prem@axleinfo.com), Matthew McIntyre(Matthew.McIntyre@axleinfo.com)",
-    "institution": "National Center for Advancing Translational Sciences, National Institutes of Health",
-    "repository": "https://github.com/PolusAI/polus-plugins",
-    "website": "https://ncats.nih.gov/preclinical/core/informatics",
-    "citation": "",
-    "containerId": "polusai/bbbc-download-plugin:0.1.0-dev0",
-    "baseCommand": [
-      "python3",
-      "-m",
-      "polus.plugins.utils.bbbc_download"
-    ],
-    "inputs": [
-      {
-        "name": "name",
-        "type": "string",
-        "description": "The name of the datasets to be downloaded(spereate the datasets with a comma. eg: BBBC001,BBBC002,BBBC003 )",
-        "required": true
-      }
-      
-    ],
-    "outputs": [
-      {
-        "name": "outDir",
-        "type": "genericData",
-        "description": "Output collection"
-      }
-    ],
-    "ui": [
-      {
-        "key": "inputs.name",
-        "title": "Input name of datasets as string",
-        "description": "Input the name of the datasets to be downloaded as a string"
-      }
-    ]
-  }
\ No newline at end of file
+  "name": "BBBC Download",
+  "version": "0.1.0-dev0",
+  "title": "BBBC Download",
+  "description": "Downloads the datasets on the Broad Bioimage Benchmark Collection website",
+  "author": "Saket Prem(saket.prem@axleinfo.com), Matthew McIntyre(Matthew.McIntyre@axleinfo.com)",
+  "institution": "National Center for Advancing Translational Sciences, National Institutes of Health",
+  "repository": "https://github.com/PolusAI/polus-plugins",
+  "website": "https://ncats.nih.gov/preclinical/core/informatics",
+  "citation": "",
+  "containerId": "polusai/bbbc-download-plugin:0.1.0-dev0",
+  "baseCommand": [
+    "python3",
+    "-m",
+    "polus.plugins.utils.bbbc_download"
+  ],
+  "inputs": [
+    {
+      "name": "name",
+      "type": "string",
+      "description": "The name of the datasets to be downloaded(spereate the datasets with a comma. eg: BBBC001,BBBC002,BBBC003 )",
+      "required": true
+    }
+  ],
+  "outputs": [
+    {
+      "name": "outDir",
+      "type": "genericData",
+      "description": "Output collection"
+    }
+  ],
+  "ui": [
+    {
+      "key": "inputs.name",
+      "title": "Input name of datasets as string",
+      "description": "Input the name of the datasets to be downloaded as a string"
+    }
+  ]
+}
diff --git a/utils/bbbc-download-plugin/run-plugin.sh b/utils/bbbc-download-plugin/run-plugin.sh
index 1b85d5652..5cb74e231 100644
--- a/utils/bbbc-download-plugin/run-plugin.sh
+++ b/utils/bbbc-download-plugin/run-plugin.sh
@@ -12,6 +12,7 @@ outDir=${datapath}
 # # Show the help options
 # docker run polusai/bbbc-download-plugin:${version}
 
+
 # # Run the plugin
 docker run -v ${datapath}:${datapath} \
             polusai/bbbc-download-plugin:${version} \
diff --git a/utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/__init__.py b/utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/__init__.py
index 3a98e7a8a..6b548254d 100644
--- a/utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/__init__.py
+++ b/utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/__init__.py
@@ -1,2 +1,2 @@
 """Bbbc Download."""
-__version__ = "0.1.0-dev0"
\ No newline at end of file
+__version__ = "0.1.0-dev0"
diff --git a/utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/__main__.py b/utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/__main__.py
index 84b023170..cb99fb011 100644
--- a/utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/__main__.py
+++ b/utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/__main__.py
@@ -1,18 +1,20 @@
-import json
-import os
+"""BBBC Download."""
 import logging
+import os
+import time
+from concurrent.futures import ThreadPoolExecutor
+from concurrent.futures import as_completed
+from multiprocessing import cpu_count
 from pathlib import Path
-from concurrent.futures import ThreadPoolExecutor, as_completed
-from typing import Any, Optional
+from sys import platform
 
 import typer
+from polus.plugins.utils.bbbc_download.BBBC_model import BBBC
+from polus.plugins.utils.bbbc_download.BBBC_model import BBBCDataset
+from polus.plugins.utils.bbbc_download.BBBC_model import IDAndSegmentation
+from polus.plugins.utils.bbbc_download.BBBC_model import ImageBasedProfiling
+from polus.plugins.utils.bbbc_download.BBBC_model import PhenotypeClassification
 from tqdm import tqdm
-from polus.plugins.utils.bbbc_download.BBBC_model import BBBC, BBBCDataset, IDAndSegmentation, PhenotypeClassification, ImageBasedProfiling
-from sys import platform
-from multiprocessing import cpu_count
-import time
-
-
 
 if platform == "linux" or platform == "linux2":
     NUM_THREADS = len(os.sched_getaffinity(0))  # type: ignore
@@ -29,84 +31,70 @@
 logger = logging.getLogger("polus.plugins.utils.bbbc_download")
 logger.setLevel(os.environ.get("POLUS_LOG", logging.INFO))
 
+
 @app.command()
 def main(
-    name: str= typer.Option(
-    ..., "--name", help="The name of the dataset that is to be downloaded"
+    name: str = typer.Option(
+        ...,
+        "--name",
+        help="The name of the dataset that is to be downloaded",
     ),
-    out_dir: Path= typer.Option(
-    ...,"--outDir", help="The path for downloading the dataset"
-    )
-    
-
-)-> None:
+    out_dir: Path = typer.Option(
+        ...,
+        "--outDir",
+        help="The path for downloading the dataset",
+    ),
+) -> None:
     """Download the required dataset from the BBBC dataaset."""
     logger.info(f"name = {name}")
     logger.info(f"outDir = {out_dir}")
-    """Checking if output directory exists. If it does not exist then a designated path is created."""
+    """Checking if output directory exists.
+    If it does not exist then a designated path is created."""
     if not out_dir.exists():
         logger.info(f"{out_dir} did not exists. Creating new path.")
         out_dir.mkdir()
-        if(not out_dir.exists):
-            raise ValueError("Directory does not exist")
-
-
+        if not out_dir.exists():
+            msg = "Directory does not exist"
+            raise ValueError(msg)
 
     with ThreadPoolExecutor(max_workers=NUM_THREADS) as executor:
         start_time = time.time()
-        threads=[]
-        names=name.split(",")
+        threads = []
+        names = name.split(",")
         for n in names:
-            if(n=='IDAndSegmentation'):
-                threads.append(
-                    executor.submit(IDAndSegmentation.raw,out_dir)
-                    )
-            
-            elif(n=='PhenotypeClassification'):
-                threads.append(
-                    executor.submit(PhenotypeClassification.raw,out_dir)
-                    )
+            if n == "IDAndSegmentation":
+                threads.append(executor.submit(IDAndSegmentation.raw, out_dir))
 
+            elif n == "PhenotypeClassification":
+                threads.append(executor.submit(PhenotypeClassification.raw, out_dir))
 
+            elif n == "ImageBasedProfiling":
+                threads.append(executor.submit(ImageBasedProfiling.raw, out_dir))
 
-            elif(n=='ImageBasedProfiling'):
-                threads.append(
-                    executor.submit(ImageBasedProfiling.raw,out_dir)
-                    )
-            
-            elif(n=='All'):
-                threads.append(
-                    executor.submit(BBBC.raw,out_dir)
-                    )
-                
+            elif n == "All":
+                threads.append(executor.submit(BBBC.raw, out_dir))
 
             else:
-                d=executor.submit(BBBCDataset.create_dataset, n)
-                d_name=d.result()
-                threads.append(
-                     executor.submit(d_name.raw,out_dir)
-                )
+                d = executor.submit(BBBCDataset.create_dataset, n)
+                d_name = d.result()
+                threads.append(executor.submit(d_name.raw, out_dir))
 
-            
         for f in tqdm(
             as_completed(threads),
             total=len(threads),
             mininterval=5,
-            desc=f"donwloading the dataset",
+            desc="donwloading the dataset",
             initial=0,
             unit_scale=True,
             colour="cyan",
         ):
             f.result()
         end_time = time.time()
-        execution_time = (end_time - start_time)
-        execution_time_min=execution_time/60
+        execution_time = end_time - start_time
+        execution_time_min = execution_time / 60
         logger.info(f"The execution time is {execution_time} in seconds")
-        logger.info(f"The execution time is {execution_time_min} in minutes") 
-                
-        
-if __name__ == "__main__":
-    app()
-
+        logger.info(f"The execution time is {execution_time_min} in minutes")
 
 
+if __name__ == "__main__":
+    app()
diff --git a/utils/bbbc-download-plugin/tests/__init__.py b/utils/bbbc-download-plugin/tests/__init__.py
index fa93c893c..437dfbef1 100644
--- a/utils/bbbc-download-plugin/tests/__init__.py
+++ b/utils/bbbc-download-plugin/tests/__init__.py
@@ -1 +1 @@
-"""bbbc download plugin."""
\ No newline at end of file
+"""bbbc download plugin."""
diff --git a/utils/bbbc-download-plugin/tests/test_main.py b/utils/bbbc-download-plugin/tests/test_main.py
index 24be86ae6..6ea042992 100644
--- a/utils/bbbc-download-plugin/tests/test_main.py
+++ b/utils/bbbc-download-plugin/tests/test_main.py
@@ -10,10 +10,11 @@
 from typer.testing import CliRunner
 
 from polus.plugins.utils.bbbc_download.__main__ import app as app
-from polus.plugins.utils.bbbc_download import BBBC_model,download
+from polus.plugins.utils.bbbc_download import BBBC_model, download
 
 runner = CliRunner()
 
+
 @pytest.fixture
 def output_directory():
     """Generate random output directory."""
@@ -21,11 +22,12 @@ def output_directory():
     yield out_dir
     shutil.rmtree(out_dir)
 
+
 @pytest.fixture
 def macosx_directory():
     """Generate random directory named __MACOSX."""
     test_dir = pathlib.Path(tempfile.mkdtemp(dir=pathlib.Path.cwd()))
-    macosx_dir=test_dir.joinpath("Images","__MACOSX")
+    macosx_dir = test_dir.joinpath("Images", "__MACOSX")
     macosx_dir.mkdir(parents=True)
     yield macosx_dir
     shutil.rmtree(macosx_dir.parents[1])
@@ -33,46 +35,51 @@ def macosx_directory():
 
 def test_delete_macosx(macosx_directory) -> None:
     """Testing the delete_macosx function in download.py"""
-    mac_dir=macosx_directory
-    mac_dir=pathlib.Path(mac_dir)
-    
-    mac_dir_test= mac_dir.parent
-    macosx_test_name="testname"
-    download.remove_macosx(macosx_test_name,mac_dir_test)
-    assert mac_dir.exists()==False
+    mac_dir = macosx_directory
+    mac_dir = pathlib.Path(mac_dir)
+
+    mac_dir_test = mac_dir.parent
+    macosx_test_name = "testname"
+    download.remove_macosx(macosx_test_name, mac_dir_test)
+    assert mac_dir.exists() == False
 
 
-def test_bbbc_datasets()->None:
+def test_bbbc_datasets() -> None:
     """Test to check if all the datasets on the BBBC website are recognized."""
-    d_test=BBBC_model.BBBC.datasets
-    assert len(d_test)==50
+    d_test = BBBC_model.BBBC.datasets
+    assert len(d_test) == 50
 
-def test_raw(output_directory)->None:
+
+def test_raw(output_directory) -> None:
     """A function to test the download functionality."""
-    d=BBBC_model.BBBCDataset.create_dataset("BBBC054") #change dataset name to test
-    output_dir=pathlib.Path(output_directory)
+    d = BBBC_model.BBBCDataset.create_dataset("BBBC054")  # change dataset name to test
+    output_dir = pathlib.Path(output_directory)
     d.raw(output_dir)
-    assert d.size >0
+    assert d.size > 0
+
 
-def test_IDAndSegmentation()-> None:
+def test_IDAndSegmentation() -> None:
     """Test to check if all the datasets on the Identification and segmentation table are recognized."""
-    d_test_IDAndSegmentation= BBBC_model.IDAndSegmentation.datasets
-    assert len(d_test_IDAndSegmentation)==32
+    d_test_IDAndSegmentation = BBBC_model.IDAndSegmentation.datasets
+    assert len(d_test_IDAndSegmentation) == 32
 
-def test_PhenotypeClassification()-> None:
+
+def test_PhenotypeClassification() -> None:
     """Test to check if all the datasets on the Phenotype CLassification table are recognized."""
-    d_test_PhenotypeClassification= BBBC_model.PhenotypeClassification.datasets
-    assert len(d_test_PhenotypeClassification)==14
+    d_test_PhenotypeClassification = BBBC_model.PhenotypeClassification.datasets
+    assert len(d_test_PhenotypeClassification) == 14
+
 
-def test_ImageBasedProfiling()-> None:
+def test_ImageBasedProfiling() -> None:
     """Test to check if all the datasets on the Image based profiling table are recognized."""
-    d_test_ImageBasedProfiling= BBBC_model.ImageBasedProfiling.datasets
-    assert len(d_test_ImageBasedProfiling)==6
+    d_test_ImageBasedProfiling = BBBC_model.ImageBasedProfiling.datasets
+    assert len(d_test_ImageBasedProfiling) == 6
+
 
 def test_cli(output_directory) -> None:
     """Test Cli."""
-    name="BBBC001,BBBC002"
-    output_dir=pathlib.Path(output_directory)
+    name = "BBBC001,BBBC002"
+    output_dir = pathlib.Path(output_directory)
 
     result = runner.invoke(
         app,
@@ -85,5 +92,3 @@ def test_cli(output_directory) -> None:
     )
 
     assert result.exit_code == 0
-
-

From e000834f804f129e2851862f05875b5e386394fd Mon Sep 17 00:00:00 2001
From: EC2 Default User <ec2-user@ip-10-0-31-160.ec2.internal>
Date: Mon, 21 Aug 2023 15:35:30 +0000
Subject: [PATCH 12/18] Added nested zip file extraction

---
 utils/bbbc-download-plugin/.bumpversion.cfg   |  2 +-
 utils/bbbc-download-plugin/README.md          |  2 +-
 utils/bbbc-download-plugin/VERSION            |  2 +-
 utils/bbbc-download-plugin/plugin.json        |  2 +-
 utils/bbbc-download-plugin/pyproject.toml     |  2 +-
 .../plugins/utils/bbbc_download/__init__.py   |  2 +-
 .../plugins/utils/bbbc_download/download.py   | 33 +++++++++++++++++++
 7 files changed, 39 insertions(+), 6 deletions(-)

diff --git a/utils/bbbc-download-plugin/.bumpversion.cfg b/utils/bbbc-download-plugin/.bumpversion.cfg
index 182a51988..2495ab3c5 100644
--- a/utils/bbbc-download-plugin/.bumpversion.cfg
+++ b/utils/bbbc-download-plugin/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.1.0-dev0
+current_version = 0.1.0-dev1
 commit = True
 tag = False
 parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\-(?P<release>[a-z]+)(?P<dev>\d+))?
diff --git a/utils/bbbc-download-plugin/README.md b/utils/bbbc-download-plugin/README.md
index 4218fc9f9..d10719a2c 100644
--- a/utils/bbbc-download-plugin/README.md
+++ b/utils/bbbc-download-plugin/README.md
@@ -1,4 +1,4 @@
-# BBBC Download (0.1.0-dev0)
+# BBBC Download (0.1.0-dev1)
 
 This plugin is designed to download the necessary datasets from the Broad Bioimage Benchmark Collection(BBBC) website.
 
diff --git a/utils/bbbc-download-plugin/VERSION b/utils/bbbc-download-plugin/VERSION
index 206c0852b..6b1a238a7 100644
--- a/utils/bbbc-download-plugin/VERSION
+++ b/utils/bbbc-download-plugin/VERSION
@@ -1 +1 @@
-0.1.0-dev0
+0.1.0-dev1
diff --git a/utils/bbbc-download-plugin/plugin.json b/utils/bbbc-download-plugin/plugin.json
index dbe8689b2..355bc21ab 100644
--- a/utils/bbbc-download-plugin/plugin.json
+++ b/utils/bbbc-download-plugin/plugin.json
@@ -1,6 +1,6 @@
 {
   "name": "BBBC Download",
-  "version": "0.1.0-dev0",
+  "version": "0.1.0-dev1",
   "title": "BBBC Download",
   "description": "Downloads the datasets on the Broad Bioimage Benchmark Collection website",
   "author": "Saket Prem(saket.prem@axleinfo.com), Matthew McIntyre(Matthew.McIntyre@axleinfo.com)",
diff --git a/utils/bbbc-download-plugin/pyproject.toml b/utils/bbbc-download-plugin/pyproject.toml
index 5bcf26c72..ad00e70f8 100644
--- a/utils/bbbc-download-plugin/pyproject.toml
+++ b/utils/bbbc-download-plugin/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "polus-plugins-utils-bbbc-download-plugin"
-version = "0.1.0-dev0"
+version = "0.1.0-dev1"
 description = ""
 authors = [
     "Saket Prem <saket.prem@axleinfo.com>", 
diff --git a/utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/__init__.py b/utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/__init__.py
index 6b548254d..223215dcf 100644
--- a/utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/__init__.py
+++ b/utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/__init__.py
@@ -1,2 +1,2 @@
 """Bbbc Download."""
-__version__ = "0.1.0-dev0"
+__version__ = "0.1.0-dev1"
diff --git a/utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/download.py b/utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/download.py
index d9da36e11..89823680f 100644
--- a/utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/download.py
+++ b/utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/download.py
@@ -30,6 +30,29 @@ def get_lower_tags(tag: bs4.element.Tag) -> list:
 
     return tags
 
+def extract_nested_zips(name: str,zip_path:Path, extract_path:Path):
+    """Unzip nested zip files.
+    Args:
+        name: Name of the dataset
+        zip_path: Path to the zip file
+        extract_path: The path where the unzipped files will be saved
+    """
+
+    with ZipFile(zip_path, 'r') as zip_ref:
+        zip_ref.extractall(extract_path)
+    zip_path.unlink()
+    print(zip_path)
+    print(zip_path.exists())
+    extracted_folder_name = zip_path.stem  # Name with .zip extension
+    extracted_folder_name = extract_path.joinpath(extracted_folder_name.replace('.zip',''))
+    remove_macosx(name,extract_path)
+    
+    nested_zip_files = list(extracted_folder_name.glob("*.zip"))
+    print(nested_zip_files)
+    for nested_zip_file in nested_zip_files:
+        nested_extract_path = nested_zip_file.parent
+        extract_nested_zips(nested_zip_file, nested_extract_path)
+        
 
 def get_url(url: str, save_location: Path, name: str) -> None:
     """Get the given url and save it.
@@ -60,6 +83,9 @@ def get_url(url: str, save_location: Path, name: str) -> None:
 
                 with ZipFile(zip_path, "r") as zfile:
                     zfile.extractall(save_location)
+                    
+                            
+                                
             except URLError as e:
                 if download_attempts == 9:
                     print("FAILED TO DOWNLOAD: " + url + " for " + name)
@@ -148,5 +174,12 @@ def download(name: str,download_path:Path) -> None:
     ground_path=save_location.joinpath("Ground_Truth")
     if ground_path.exists():
         remove_macosx(name,ground_path)
+    
+    # unzip nested zip files
+    zip_files = list(images_path.glob("**/*.zip"))
+    print(zip_files)
+    for zip_file in zip_files:
+        extract_path = zip_file.parent
+        extract_nested_zips(name,zip_file, extract_path)
 
     return

From 09a3611d4ec5c68282e1422c0b0ce7015e1038fc Mon Sep 17 00:00:00 2001
From: EC2 Default User <ec2-user@ip-10-0-31-160.ec2.internal>
Date: Mon, 21 Aug 2023 15:40:43 +0000
Subject: [PATCH 13/18] Modified doc string in download function

---
 .../src/polus/plugins/utils/bbbc_download/download.py            | 1 +
 1 file changed, 1 insertion(+)

diff --git a/utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/download.py b/utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/download.py
index 89823680f..d98186b56 100644
--- a/utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/download.py
+++ b/utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/download.py
@@ -120,6 +120,7 @@ def download(name: str,download_path:Path) -> None:
     """Download a single dataset.
     Args:
         name: The name of the dataset to be downloaded
+        downlaod_path: Path to donwload the dataset
     """
 
     print("Started downloading " + name)

From 8f23b43f00190b3b6b02cbabba955b763276edf6 Mon Sep 17 00:00:00 2001
From: EC2 Default User <ec2-user@ip-10-0-31-160.ec2.internal>
Date: Thu, 24 Aug 2023 20:27:20 +0000
Subject: [PATCH 14/18] plugin.json contaianerId modified to dev1

---
 utils/bbbc-download-plugin/plugin.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/utils/bbbc-download-plugin/plugin.json b/utils/bbbc-download-plugin/plugin.json
index 355bc21ab..a4f8af711 100644
--- a/utils/bbbc-download-plugin/plugin.json
+++ b/utils/bbbc-download-plugin/plugin.json
@@ -8,7 +8,7 @@
   "repository": "https://github.com/PolusAI/polus-plugins",
   "website": "https://ncats.nih.gov/preclinical/core/informatics",
   "citation": "",
-  "containerId": "polusai/bbbc-download-plugin:0.1.0-dev0",
+  "containerId": "polusai/bbbc-download-plugin:0.1.0-dev1",
   "baseCommand": [
     "python3",
     "-m",

From 08f74b36474e61269646462c125ca276e86cff6f Mon Sep 17 00:00:00 2001
From: EC2 Default User <ec2-user@ip-10-0-31-160.ec2.internal>
Date: Fri, 25 Aug 2023 18:49:52 +0000
Subject: [PATCH 15/18] Changes the print statements to logger in BBBC_model.py
 and download.py

---
 utils/bbbc-download-plugin/plugin.json        |  2 +-
 .../plugins/utils/bbbc_download/BBBC_model.py | 33 ++++++++++---------
 .../plugins/utils/bbbc_download/download.py   | 28 +++++++---------
 3 files changed, 30 insertions(+), 33 deletions(-)

diff --git a/utils/bbbc-download-plugin/plugin.json b/utils/bbbc-download-plugin/plugin.json
index a4f8af711..8246dd3ac 100644
--- a/utils/bbbc-download-plugin/plugin.json
+++ b/utils/bbbc-download-plugin/plugin.json
@@ -33,7 +33,7 @@
     {
       "key": "inputs.name",
       "title": "Input name of datasets as string",
-      "description": "Input the name of the datasets to be downloaded as a string"
+      "description": "Input the name of the datasets to be downloaded."
     }
   ]
 }
diff --git a/utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/BBBC_model.py b/utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/BBBC_model.py
index c0109d4ee..d4e755422 100644
--- a/utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/BBBC_model.py
+++ b/utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/BBBC_model.py
@@ -20,7 +20,8 @@
 from skimage import io
 import pyarrow as pa
 import pyarrow.parquet as pq
-
+import logging
+logger = logging.getLogger(__name__)
 
 
 BASE_URL = "https://bbbc.broadinstitute.org/"
@@ -180,7 +181,7 @@ def create_dataset(cls, name: str) -> Union["BBBCDataset", None]:
             else:
                 return BBBCDataset(name=name)
         except ValueError as e:
-            print(e)
+            logger.info(f"{e}")
 
             return None
 
@@ -260,10 +261,10 @@ def _init_data(self,download_path:Path) -> None:
             pass
 
         if self.images == None:
-            print(self.name + " has no images.")
+            logger.info(f"{self.name} has no images")
 
         if self.ground_truth == None and self.metadata == None:
-            print(self.name + " has no ground truth or metadata.")
+            logger.info(f"{self.name} has no ground truth or metadata")
 
         return
 
@@ -284,13 +285,13 @@ def standard(self, extension: str) -> None:
         """
 
         if extension not in [".ome.tif", ".ome.zarr"]:
-            print(
+            logger.info(
                 f"ERROR: {extension} is an invalid extension for standardization. Must be .ome.tif or .ome.zarr."
             )
             return
 
         if self.images == None:
-            print(
+            logger.info(
                 f"ERROR: Images for {self.name} have not been downloaded so they cannot be standardized."
             )
             return
@@ -316,7 +317,7 @@ def standard(self, extension: str) -> None:
             elif row["Image Type"] == "Metadata":
                 sub_folder = "Metadata"
             else:
-                print("ERROR: Invalid value for attribute Image Type")
+                logger.info(f"ERROR: Invalid value for attribute Image Type")
                 return
 
             save_path = standard_folder.joinpath(sub_folder)
@@ -334,7 +335,7 @@ def standard(self, extension: str) -> None:
                 bw.dtype = raw_image.dtype
                 bw[:] = raw_image
 
-        print(f"Finished standardizing {self.name}")
+        logger.info(f"Finished standardizing {self.name}")
 
         return
 
@@ -366,7 +367,7 @@ def raw(self,download_path:Path) -> None:
                     try:
                         shutil.rmtree(src)
                     except NotADirectoryError as e:
-                        print(e)
+                        logger.info(f"{e}")
                 else:
                     shutil.move(src, dst)
 
@@ -378,7 +379,7 @@ def raw(self,download_path:Path) -> None:
 
 class BBBC029(BBBCDataset):
     def raw(self,download_path:Path) -> None:
-        print("Started downloading BBBC029")
+        logger.info(f"Started downloading BBBC029")
         self.output_path=download_path
         save_location=download_path.joinpath("BBBC")
 
@@ -401,7 +402,7 @@ def raw(self,download_path:Path) -> None:
             "BBBC029",
         )
 
-        print("BBBC029 has finished downloading")
+        logger.info(f"BBBC029 has finished downloading")
         images_folder=save_location.joinpath("Images")
         truth_folder=save_location.joinpath("Ground_Truth")
         remove_macosx("BBBC029",images_folder)
@@ -454,7 +455,7 @@ def raw(self,download_path:Path) -> None:
 
 class BBBC042(BBBCDataset):
     def raw(self,download_path:Path) -> None:
-        print("Started downloading BBBC042")
+        logger.info(f"Started downloading BBBC042")
         self.output_path=download_path
         save_location=download_path.joinpath("BBBC")
 
@@ -477,7 +478,7 @@ def raw(self,download_path:Path) -> None:
             "BBBC042",
         )
 
-        print("BBBC042 has finished downloading")
+        logger.info(f"BBBC042 has finished downloading")
         images_folder=save_location.joinpath("Images")
         truth_folder=save_location.joinpath("Ground_Truth")
         remove_macosx("BBBC029",images_folder)
@@ -530,10 +531,10 @@ def raw(self, download_path: Path) -> None:
 
             self._init_data(download_path)
         except Exception as e:
-            print(
-                "BBBC046 downloaded successfully but an error occurred when organizing raw data."
+            logger.info(
+                f"BBBC046 downloaded successfully but an error occurred when organizing raw data."
             )
-            print("ERROR: " + str(e))
+            logger.info(f"ERROR: {str(e)}")
 
         return
 
diff --git a/utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/download.py b/utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/download.py
index d98186b56..7936a8817 100644
--- a/utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/download.py
+++ b/utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/download.py
@@ -3,6 +3,7 @@
 from urllib.request import urlretrieve
 from urllib.error import URLError
 from zipfile import ZipFile
+import logging
 
 import bs4
 import shutil
@@ -12,7 +13,7 @@
     "Images|Ground truth|Ground Truth|Metadata|Hand-annotated Ground Truth Images"
 )
 endings = (".txt", ".csv", ".tif", ".xlsx", ".xls", ".lst")
-
+logger = logging.getLogger(__name__)
 
 def get_lower_tags(tag: bs4.element.Tag) -> list:
     """Get all tags between the tag argument and the next tag of the same type.
@@ -41,14 +42,12 @@ def extract_nested_zips(name: str,zip_path:Path, extract_path:Path):
     with ZipFile(zip_path, 'r') as zip_ref:
         zip_ref.extractall(extract_path)
     zip_path.unlink()
-    print(zip_path)
-    print(zip_path.exists())
     extracted_folder_name = zip_path.stem  # Name with .zip extension
     extracted_folder_name = extract_path.joinpath(extracted_folder_name.replace('.zip',''))
     remove_macosx(name,extract_path)
     
     nested_zip_files = list(extracted_folder_name.glob("*.zip"))
-    print(nested_zip_files)
+
     for nested_zip_file in nested_zip_files:
         nested_extract_path = nested_zip_file.parent
         extract_nested_zips(nested_zip_file, nested_extract_path)
@@ -73,8 +72,8 @@ def get_url(url: str, save_location: Path, name: str) -> None:
                 urlretrieve(url, save_location.joinpath(file_name))
             except URLError as e:
                 if download_attempts == 9:
-                    print("FAILED TO DOWNLOAD: " + url + " for " + name)
-                    print("ERROR: " + str(e))
+                    logger.info(f"FAILED TO DOWNLOAD {url} for {name}")
+                    logger.info(f"ERROR {str(e)}")
 
                 continue
         elif url.endswith(".zip"):
@@ -88,12 +87,12 @@ def get_url(url: str, save_location: Path, name: str) -> None:
                                 
             except URLError as e:
                 if download_attempts == 9:
-                    print("FAILED TO DOWNLOAD: " + url + " for " + name)
-                    print("ERROR: " + str(e))
+                    logger.info(f"FAILED TO DOWNLOAD {url} for {name}")
+                    logger.info(f"ERROR {str(e)}")
 
                 continue
             except Exception as e:
-                print(e)
+                logger.info(f"{e}")
 
                 continue
 
@@ -111,10 +110,7 @@ def remove_macosx(name:str, location:Path)-> None:
     for f in folders:
         if f.name=="__MACOSX":
             shutil.rmtree(f)
-            print("Deleted the __MACOSX folder in " + name)
-
-
-
+            logger.info(f"Deleted the __MACOSX  folder in {name}")
 
 def download(name: str,download_path:Path) -> None:
     """Download a single dataset.
@@ -123,7 +119,7 @@ def download(name: str,download_path:Path) -> None:
         downlaod_path: Path to donwload the dataset
     """
 
-    print("Started downloading " + name)
+    logger.info(f"Started downloading {name}")
     download_path=download_path.joinpath("BBBC")
 
     save_location = download_path.joinpath(name, "raw")
@@ -169,7 +165,8 @@ def download(name: str,download_path:Path) -> None:
 
             get_url(url, file_path, "BBBC018")
 
-    print(name + " has finished downloading")
+    logger.info(f"{name} has finished downloading")
+    
     images_path=save_location.joinpath("Images")
     remove_macosx(name,images_path)
     ground_path=save_location.joinpath("Ground_Truth")
@@ -178,7 +175,6 @@ def download(name: str,download_path:Path) -> None:
     
     # unzip nested zip files
     zip_files = list(images_path.glob("**/*.zip"))
-    print(zip_files)
     for zip_file in zip_files:
         extract_path = zip_file.parent
         extract_nested_zips(name,zip_file, extract_path)

From c7f019c162f42a4e97bcea89749bf1b0a3c0a556 Mon Sep 17 00:00:00 2001
From: Jane Van Lam <75lam@cua.edu>
Date: Mon, 16 Mar 2026 12:21:15 -0400
Subject: [PATCH 16/18] update packeages to work with cp3.13,remove vaex update
 test_main.py, BBBC_model.py

---
 utils/bbbc-download-plugin/.bumpversion.      |  0
 utils/bbbc-download-plugin/Dockerfile         |  2 +-
 utils/bbbc-download-plugin/pyproject.toml     | 30 +++++-----
 .../plugins/utils/bbbc_download/BBBC_model.py | 58 ++++++++-----------
 utils/bbbc-download-plugin/tests/test_main.py |  8 +--
 5 files changed, 44 insertions(+), 54 deletions(-)
 delete mode 100644 utils/bbbc-download-plugin/.bumpversion.

diff --git a/utils/bbbc-download-plugin/.bumpversion. b/utils/bbbc-download-plugin/.bumpversion.
deleted file mode 100644
index e69de29bb..000000000
diff --git a/utils/bbbc-download-plugin/Dockerfile b/utils/bbbc-download-plugin/Dockerfile
index d89987fd6..00653c1bc 100644
--- a/utils/bbbc-download-plugin/Dockerfile
+++ b/utils/bbbc-download-plugin/Dockerfile
@@ -1,4 +1,4 @@
-FROM polusai/bfio:2.1.9
+FROM python:3.13-slim
 
 # environment variables defined in polusai/bfio
 ENV EXEC_DIR="/opt/executables"
diff --git a/utils/bbbc-download-plugin/pyproject.toml b/utils/bbbc-download-plugin/pyproject.toml
index ad00e70f8..48275df4c 100644
--- a/utils/bbbc-download-plugin/pyproject.toml
+++ b/utils/bbbc-download-plugin/pyproject.toml
@@ -10,22 +10,22 @@ readme = "README.md"
 packages = [{include = "polus", from = "src"}]
 
 [tool.poetry.dependencies]
-python = ">=3.9,<4"
-typer = "^0.9.0"
-pyarrow = "11.0.0"
-scikit-image = "0.20.0"
-vaex = "4.16.0"
-bfio = "2.3.1.dev0"
-beautifulsoup4 = "4.12.0"
-numpy = "1.24.2"
-pandas = "1.5.3"
-requests = "2.28.2"
-pydantic = "1.10.7"
+python = ">=3.12"
+typer = ">=0.24.0"
+pyarrow = ">=23.0.0"
+scikit-image = ">=0.25.0"
+bfio = ">=2.5.0"
+beautifulsoup4 = ">=4.14.3"
+numpy = ">=1.26.0"
+pandas = ">=2.2.3"
+requests = ">=2.32.5"
+pydantic = ">=2.12.5"
 bump2version = "1.0.1"
-mypy = "1.0.1"
-tqdm = "^4.65.0"
-pytest = "^7.4.0"
-xmlschema = "^2.3.1"
+mypy = ">=1.19.1"
+tqdm = ">=4.67.0"
+pytest = ">=9.0.0"
+xmlschema = ">=4.3.1"
+lxml = ">=6.0.2"
 
 
 [build-system]
diff --git a/utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/BBBC_model.py b/utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/BBBC_model.py
index d4e755422..11839b8e8 100644
--- a/utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/BBBC_model.py
+++ b/utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/BBBC_model.py
@@ -1,10 +1,11 @@
-from typing import List, Dict, Union, Optional
+from typing import List, Dict, Union, Optional, Self
 import shutil
 import os
 from multiprocessing import cpu_count
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from pathlib import Path
 from zipfile import ZipFile
+from pydantic import model_validator, field_validator
 
 from polus.plugins.utils.bbbc_download.download import download, get_url, remove_macosx
 
@@ -16,7 +17,6 @@
 from tqdm import tqdm
 import bs4
 from bfio import BioWriter
-import vaex
 from skimage import io
 import pyarrow as pa
 import pyarrow.parquet as pq
@@ -54,14 +54,12 @@ class Metadata(pydantic.BaseModel):
     path: Path
     name: str
 
-    @pydantic.root_validator()
-    @classmethod
-    def valid_data(cls, values: dict) -> dict:
-        if not values["path"].exists():
+    @model_validator(mode="after")
+    def validate_data(self) -> Self:
+        if not self.path.exists():
             raise ValueError("No metadata")
-
-        return values
-
+        return self
+        
     @property
     def size(self) -> int:
         """Returns the size of the dataset's metadata in bytes."""
@@ -80,13 +78,12 @@ class GroundTruth(pydantic.BaseModel):
     path: Path
     name: str
 
-    @pydantic.root_validator()
-    @classmethod
-    def valid_data(cls, values: dict) -> dict:
-        if not values["path"].exists():
+    @model_validator(mode="after")
+    def validate_data(self) -> Self:
+        if not self.path.exists():
             raise ValueError("No ground truth")
 
-        return values
+        return self
 
     @property
     def size(self) -> int:
@@ -106,13 +103,12 @@ class Images(pydantic.BaseModel):
     path: Path
     name: str
 
-    @pydantic.root_validator()
-    @classmethod
-    def valid_data(cls, values: dict) -> dict:
-        if not values["path"].exists():
+
+    def validate_data(self) -> Self:
+        if not self.path.exists():
             raise ValueError("No images")
 
-        return values
+        return self
 
     @property
     def size(self) -> int:
@@ -142,7 +138,7 @@ class BBBCDataset(pydantic.BaseModel):
     metadata: Optional[Metadata] = None
     output_path: Optional[Path]= None
 
-    @pydantic.validator("name")
+    @field_validator("name")
     @classmethod
     def valid_name(cls, v: str) -> str:
         """Validates the name of the dataset.
@@ -154,7 +150,7 @@ def valid_name(cls, v: str) -> str:
             The name provided if validation is successful.
         """
 
-        if v not in list(BBBC.combined_table["Accession"]):
+        if v not in list(BBBC.combined_table()["Accession"]):
             raise ValueError(
                 v
                 + " is an invalid dataset name. Valid dataset names belong to an existing BBBC dataset."
@@ -193,7 +189,7 @@ def info(self) -> Dict[str, Union[str, np.int64]]:
             A dictionary that contains information about the dataset.
         """
 
-        table = BBBC.combined_table
+        table = BBBC.combined_table()
 
         row = table.loc[table["Accession"] == self.name]
 
@@ -299,8 +295,7 @@ def standard(self, extension: str) -> None:
         standard_folder = Path(root, self.name, "standard")
         arrow_file = Path("arrow", self.name + ".arrow")
         arrow_table = pq.read_table(arrow_file)
-        df = vaex.from_arrow_table(arrow_table)
-
+        df = arrow_table.to_pandas()
         if not standard_folder.exists():
             standard_folder.mkdir(parents=True, exist_ok=True)
 
@@ -575,7 +570,6 @@ class IDAndSegmentation:
     table: pd.DataFrame = tables[0]
 
     @classmethod
-    @property
     def datasets(cls) -> List[BBBCDataset]:
         """Returns a list of all datasets in the table.
 
@@ -593,7 +587,7 @@ def raw(cls,download_path:Path) -> None:
         threads = []
 
         with ThreadPoolExecutor(max_workers=num_workers) as executor:
-            for dataset in IDAndSegmentation.datasets:
+            for dataset in IDAndSegmentation.datasets():
                 threads.append(executor.submit(dataset.raw(download_path)))
 
             for f in tqdm(
@@ -614,7 +608,6 @@ class PhenotypeClassification:
     table: pd.DataFrame = tables[1]
 
     @classmethod
-    @property
     def datasets(cls) -> List[BBBCDataset]:
         """Returns a list of all datasets in the table.
 
@@ -632,7 +625,7 @@ def raw(cls,download_path:Path) -> None:
         threads = []
 
         with ThreadPoolExecutor(max_workers=num_workers) as executor:
-            for dataset in PhenotypeClassification.datasets:
+            for dataset in PhenotypeClassification.datasets():
                 threads.append(executor.submit(dataset.raw(download_path)))
 
             for f in tqdm(
@@ -653,7 +646,6 @@ class ImageBasedProfiling:
     table: pd.DataFrame = tables[2]
 
     @classmethod
-    @property
     def datasets(cls) -> List[BBBCDataset]:
         """Returns a list of all datasets in the table.
 
@@ -671,7 +663,7 @@ def raw(cls,download_path:Path) -> None:
         threads = []
 
         with ThreadPoolExecutor(max_workers=num_workers) as executor:
-            for dataset in ImageBasedProfiling.datasets:
+            for dataset in ImageBasedProfiling.datasets():
                 threads.append(executor.submit(dataset.raw(download_path)))
 
             for f in tqdm(
@@ -689,7 +681,6 @@ class BBBC:
     """
 
     @classmethod
-    @property
     def datasets(cls) -> List[BBBCDataset]:
         """Returns a list of all datasets in BBBC.
 
@@ -697,12 +688,11 @@ def datasets(cls) -> List[BBBCDataset]:
             A list containing a Dataset object for each dataset in BBBC.
         """
 
-        table = BBBC.combined_table
+        table = BBBC.combined_table()
 
         return [BBBCDataset.create_dataset(name) for name in table["Accession"]]
 
     @classmethod
-    @property
     def combined_table(cls) -> pd.DataFrame:
         """Combines each table on https://bbbc.broadinstitute.org/image_sets into a single table.
 
@@ -727,7 +717,7 @@ def raw(cls,download_path:Path) -> None:
         threads = []
 
         with ThreadPoolExecutor(max_workers=num_workers) as executor:
-            for dataset in BBBC.datasets:
+            for dataset in BBBC.datasets():
                 threads.append(executor.submit(dataset.raw(download_path)))
 
             for f in tqdm(
diff --git a/utils/bbbc-download-plugin/tests/test_main.py b/utils/bbbc-download-plugin/tests/test_main.py
index 6ea042992..fd65f7cb1 100644
--- a/utils/bbbc-download-plugin/tests/test_main.py
+++ b/utils/bbbc-download-plugin/tests/test_main.py
@@ -46,7 +46,7 @@ def test_delete_macosx(macosx_directory) -> None:
 
 def test_bbbc_datasets() -> None:
     """Test to check if all the datasets on the BBBC website are recognized."""
-    d_test = BBBC_model.BBBC.datasets
+    d_test = BBBC_model.BBBC.datasets()
     assert len(d_test) == 50
 
 
@@ -60,19 +60,19 @@ def test_raw(output_directory) -> None:
 
 def test_IDAndSegmentation() -> None:
     """Test to check if all the datasets on the Identification and segmentation table are recognized."""
-    d_test_IDAndSegmentation = BBBC_model.IDAndSegmentation.datasets
+    d_test_IDAndSegmentation = BBBC_model.IDAndSegmentation.datasets()
     assert len(d_test_IDAndSegmentation) == 32
 
 
 def test_PhenotypeClassification() -> None:
     """Test to check if all the datasets on the Phenotype CLassification table are recognized."""
-    d_test_PhenotypeClassification = BBBC_model.PhenotypeClassification.datasets
+    d_test_PhenotypeClassification = BBBC_model.PhenotypeClassification.datasets()
     assert len(d_test_PhenotypeClassification) == 14
 
 
 def test_ImageBasedProfiling() -> None:
     """Test to check if all the datasets on the Image based profiling table are recognized."""
-    d_test_ImageBasedProfiling = BBBC_model.ImageBasedProfiling.datasets
+    d_test_ImageBasedProfiling = BBBC_model.ImageBasedProfiling.datasets()
     assert len(d_test_ImageBasedProfiling) == 6
 
 

From aa561ff09f07685bef2c17bf645c4b2062fd6798 Mon Sep 17 00:00:00 2001
From: Jane Van Lam <75lam@cua.edu>
Date: Mon, 16 Mar 2026 12:54:53 -0400
Subject: [PATCH 17/18] update gitignore and Dockerfile

---
 .gitignore                            | 1 +
 utils/bbbc-download-plugin/Dockerfile | 9 +++++----
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/.gitignore b/.gitignore
index 9649798c4..975d1c690 100644
--- a/.gitignore
+++ b/.gitignore
@@ -169,3 +169,4 @@ data
 src/polus/plugins/_plugins/manifests/*
 # allow python scripts insied manifests dir
 !src/polus/plugins/_plugins/manifests/*.py
+uv.lock
diff --git a/utils/bbbc-download-plugin/Dockerfile b/utils/bbbc-download-plugin/Dockerfile
index 00653c1bc..4f10e8d3b 100644
--- a/utils/bbbc-download-plugin/Dockerfile
+++ b/utils/bbbc-download-plugin/Dockerfile
@@ -9,10 +9,11 @@ ENV POLUS_LOG="INFO"
 # Work directory defined in the base container
 WORKDIR ${EXEC_DIR}
 
-COPY pyproject.toml ${EXEC_DIR}
-COPY VERSION ${EXEC_DIR}
-COPY README.md ${EXEC_DIR}
-COPY src ${EXEC_DIR}/src
+# When building from repo root: -f utils/bbbc-download-plugin/Dockerfile .
+COPY utils/bbbc-download-plugin/pyproject.toml ${EXEC_DIR}
+COPY utils/bbbc-download-plugin/VERSION ${EXEC_DIR}
+COPY utils/bbbc-download-plugin/README.md ${EXEC_DIR}
+COPY utils/bbbc-download-plugin/src ${EXEC_DIR}/src
 
 RUN pip3 install ${EXEC_DIR} --no-cache-dir
 

From bd1ab204952616f97fc74795e88886ff7eaaa077 Mon Sep 17 00:00:00 2001
From: Jane Van Lam <75lam@cua.edu>
Date: Mon, 16 Mar 2026 12:55:31 -0400
Subject: [PATCH 18/18] =?UTF-8?q?Bump=20version:=200.1.0-dev1=20=E2=86=92?=
 =?UTF-8?q?=200.1.1-dev0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 utils/bbbc-download-plugin/.bumpversion.cfg                   | 2 +-
 utils/bbbc-download-plugin/VERSION                            | 2 +-
 utils/bbbc-download-plugin/plugin.json                        | 4 ++--
 utils/bbbc-download-plugin/pyproject.toml                     | 2 +-
 .../src/polus/plugins/utils/bbbc_download/__init__.py         | 2 +-
 5 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/utils/bbbc-download-plugin/.bumpversion.cfg b/utils/bbbc-download-plugin/.bumpversion.cfg
index 2495ab3c5..ba5924e46 100644
--- a/utils/bbbc-download-plugin/.bumpversion.cfg
+++ b/utils/bbbc-download-plugin/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.1.0-dev1
+current_version = 0.1.1-dev0
 commit = True
 tag = False
 parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\-(?P<release>[a-z]+)(?P<dev>\d+))?
diff --git a/utils/bbbc-download-plugin/VERSION b/utils/bbbc-download-plugin/VERSION
index 6b1a238a7..44bf4db83 100644
--- a/utils/bbbc-download-plugin/VERSION
+++ b/utils/bbbc-download-plugin/VERSION
@@ -1 +1 @@
-0.1.0-dev1
+0.1.1-dev0
diff --git a/utils/bbbc-download-plugin/plugin.json b/utils/bbbc-download-plugin/plugin.json
index 8246dd3ac..493442d4b 100644
--- a/utils/bbbc-download-plugin/plugin.json
+++ b/utils/bbbc-download-plugin/plugin.json
@@ -1,6 +1,6 @@
 {
   "name": "BBBC Download",
-  "version": "0.1.0-dev1",
+  "version": "0.1.1-dev0",
   "title": "BBBC Download",
   "description": "Downloads the datasets on the Broad Bioimage Benchmark Collection website",
   "author": "Saket Prem(saket.prem@axleinfo.com), Matthew McIntyre(Matthew.McIntyre@axleinfo.com)",
@@ -8,7 +8,7 @@
   "repository": "https://github.com/PolusAI/polus-plugins",
   "website": "https://ncats.nih.gov/preclinical/core/informatics",
   "citation": "",
-  "containerId": "polusai/bbbc-download-plugin:0.1.0-dev1",
+  "containerId": "polusai/bbbc-download-plugin:0.1.1-dev0",
   "baseCommand": [
     "python3",
     "-m",
diff --git a/utils/bbbc-download-plugin/pyproject.toml b/utils/bbbc-download-plugin/pyproject.toml
index 48275df4c..f1b4ac73f 100644
--- a/utils/bbbc-download-plugin/pyproject.toml
+++ b/utils/bbbc-download-plugin/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "polus-plugins-utils-bbbc-download-plugin"
-version = "0.1.0-dev1"
+version = "0.1.1-dev0"
 description = ""
 authors = [
     "Saket Prem <saket.prem@axleinfo.com>", 
diff --git a/utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/__init__.py b/utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/__init__.py
index 223215dcf..1e5dffd13 100644
--- a/utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/__init__.py
+++ b/utils/bbbc-download-plugin/src/polus/plugins/utils/bbbc_download/__init__.py
@@ -1,2 +1,2 @@
 """Bbbc Download."""
-__version__ = "0.1.0-dev1"
+__version__ = "0.1.1-dev0"