diff --git a/requirements-dev.txt b/requirements-dev.txt index d87777773..f5a647ec3 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,6 +1,6 @@ -r requirements.txt -gradio -pytest -pytest-cov -pre-commit -tensorboard +gradio==6.5.1 +pytest==9.0.2 +pytest-cov==7.0.0 +pre-commit==4.5.1 +tensorboard==2.20.0 diff --git a/requirements.txt b/requirements.txt index f6d336cbb..d5c1ece2d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,15 +1,15 @@ -einops -faster-coco-eval -graphviz -hydra-core -lightning -loguru -numpy -opencv-python -Pillow -pycocotools -requests -rich -torch -torchvision -wandb +einops==0.8.2 +faster-coco-eval==1.7.1 +graphviz==0.21 +hydra-core==1.3.2 +lightning==2.6.1 +loguru==0.7.3 +numpy==2.4.2 +opencv-python==4.13.0.92 +Pillow==12.1.1 +pycocotools==2.0.11 +requests==2.32.5 +rich==14.3.2 +torch==2.10.0 +torchvision==0.25.0 +wandb==0.24.2 diff --git a/tests/conftest.py b/tests/conftest.py index 3be303bc2..8316273e9 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -73,7 +73,7 @@ def solver(train_cfg: Config) -> Trainer: trainer = Trainer( accelerator="auto", max_epochs=getattr(train_cfg.task, "epoch", None), - precision="16-mixed", + precision="32", callbacks=callbacks, logger=loggers, log_every_n_steps=1, diff --git a/yolo/utils/logging_utils.py b/yolo/utils/logging_utils.py index b687608ce..58f5bd407 100644 --- a/yolo/utils/logging_utils.py +++ b/yolo/utils/logging_utils.py @@ -21,7 +21,12 @@ import torch import wandb from lightning import LightningModule, Trainer, seed_everything -from lightning.pytorch.callbacks import Callback, RichModelSummary, RichProgressBar +from lightning.pytorch.callbacks import ( + Callback, + ModelCheckpoint, + RichModelSummary, + RichProgressBar, +) from lightning.pytorch.callbacks.progress.rich_progress import CustomProgress from lightning.pytorch.loggers import TensorBoardLogger, WandbLogger from lightning.pytorch.utilities import rank_zero_only @@ -157,7 +162,7 @@ def on_validation_end(self, trainer: "Trainer", pl_module: "LightningModule") -> self.past_results.append((trainer.current_epoch, ap_main)) @override - def refresh(self) -> None: + def refresh(self, *args, **kwargs) -> None: if self.progress: self.progress.refresh() @@ -272,9 +277,27 @@ def custom_wandb_log(string="", level=int, newline=True, repeat=True, prefix=Tru progress, loggers = [], [] - if cfg.task.task == "train" and hasattr(cfg.task.data, "equivalent_batch_size"): - progress.append(GradientAccumulation(data_cfg=cfg.task.data, scheduler_cfg=cfg.task.scheduler)) + if cfg.task.task == "train": + if hasattr(cfg.task.data, "equivalent_batch_size"): + progress.append(GradientAccumulation(data_cfg=cfg.task.data, scheduler_cfg=cfg.task.scheduler)) + + progress.append( + ModelCheckpoint( + dirpath=save_path, + filename="best-{epoch}-{map:.3f}", + monitor="map", + save_last=True, + save_top_k=3, + mode="max", + ) + ) + progress.append( + ModelCheckpoint( + dirpath=save_path, + save_weights_only=True, + ) + ) if hasattr(cfg.task, "ema") and cfg.task.ema.enable: progress.append(EMA(cfg.task.ema.decay)) if quiet: