Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion physionet/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
from .api import PhysioNetClient
from physionet.api import PhysioNetClient

try:
from importlib.metadata import version
__version__ = version("physionet")
except Exception:
__version__ = "unknown"

__all__ = ["PhysioNetClient"]
7 changes: 7 additions & 0 deletions physionet/__main__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
"""Allow running the CLI as a module: python -m physionet."""

import sys
from physionet.cli import main

if __name__ == "__main__":
sys.exit(main())
142 changes: 142 additions & 0 deletions physionet/cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
"""Command-line interface for physionet package."""

import argparse
import json
import sys
from pathlib import Path

from physionet.validate import validate_dataset, ValidationConfig


def main():
"""Main entry point for the CLI."""
parser = argparse.ArgumentParser(
prog="physionet",
description="Tools for working with PhysioNet datasets",
)

subparsers = parser.add_subparsers(dest="command", help="Available commands")

# Validate subcommand
validate_parser = subparsers.add_parser(
"validate",
help="Validate a dataset before submission to PhysioNet",
)
validate_parser.add_argument(
"path",
help="Path to the dataset directory to validate",
)
validate_parser.add_argument(
"--report",
metavar="FILE",
help="Generate detailed JSON report and save to FILE",
)
validate_parser.add_argument(
"--checks",
metavar="CATEGORIES",
help="Comma-separated list of check categories to run (filesystem,documentation,integrity,quality,privacy)",
)
validate_parser.add_argument(
"--level",
choices=["error", "warning", "info"],
default="info",
help="Minimum severity level to display (default: info)",
)
validate_parser.add_argument(
"--no-sampling",
action="store_true",
help="Disable sampling for large files (scan all rows, slower but more thorough)",
)
validate_parser.add_argument(
"--max-rows",
type=int,
metavar="N",
help="Maximum rows to scan per CSV file (default: 10000)",
)

args = parser.parse_args()

if args.command == "validate":
return _handle_validate(args)
elif args.command is None:
parser.print_help()
return 0
else:
print(f"Unknown command: {args.command}", file=sys.stderr)
return 1


def _handle_validate(args):
"""Handle the validate subcommand."""
# Validate path
dataset_path = Path(args.path)
if not dataset_path.exists():
print(f"Error: Path does not exist: {args.path}", file=sys.stderr)
return 1

if not dataset_path.is_dir():
print(f"Error: Path is not a directory: {args.path}", file=sys.stderr)
return 1

# Configure validation
config = ValidationConfig()

# Parse check categories if specified
if args.checks:
categories = [c.strip().lower() for c in args.checks.split(",")]
config.check_filesystem = "filesystem" in categories
config.check_documentation = "documentation" in categories
config.check_integrity = "integrity" in categories
config.check_quality = "quality" in categories
config.check_phi = "privacy" in categories

# Configure sampling options
if args.no_sampling:
config.sample_large_files = False
if args.max_rows:
config.max_rows_to_scan = args.max_rows

# Run validation
try:
print(f"Validating dataset: {dataset_path}")
result = validate_dataset(str(dataset_path), config, show_progress=True)
print()

print(result.summary())

# Save validation report - either to specified path or default location
if args.report:
report_path = Path(args.report)
# Determine format based on file extension
if report_path.suffix.lower() == '.json':
# Save as JSON
with open(report_path, "w", encoding="utf-8") as f:
json.dump(result.to_dict(), f, indent=2)
else:
# Save as Markdown
with open(report_path, "w", encoding="utf-8") as f:
f.write(result.summary())
else:
# Default: save as Markdown in the root of the dataset folder
report_path = dataset_path / "PHYSIONET_REPORT.md"
with open(report_path, "w", encoding="utf-8") as f:
f.write(result.summary())

print()
print(f"Validation report saved to: {report_path}")

if result.status == "error":
return 1
elif result.status == "warning" and args.level == "error":
return 0 # Warnings don't fail if level is error
return 0

except Exception as e:
print(f"Error during validation: {str(e)}", file=sys.stderr)
import traceback
traceback.print_exc()
return 1


if __name__ == "__main__":
sys.exit(main())
7 changes: 7 additions & 0 deletions physionet/validate/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
"""Dataset validation module for PhysioNet submissions."""

from physionet.validate.validator import validate_dataset
from physionet.validate.config import ValidationConfig
from physionet.validate.models import ValidationResult

__all__ = ["validate_dataset", "ValidationConfig", "ValidationResult"]
15 changes: 15 additions & 0 deletions physionet/validate/checks/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
"""Validation check modules."""

from physionet.validate.checks.filesystem import check_filesystem
from physionet.validate.checks.documentation import check_documentation
from physionet.validate.checks.integrity import check_integrity
from physionet.validate.checks.quality import check_quality
from physionet.validate.checks.privacy import check_privacy

__all__ = [
"check_filesystem",
"check_documentation",
"check_integrity",
"check_quality",
"check_privacy",
]
48 changes: 48 additions & 0 deletions physionet/validate/checks/documentation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
"""Documentation validation checks."""

from pathlib import Path

from physionet.validate.models import CheckResult, ValidationIssue, CheckCategory, Severity
from physionet.validate.config import ValidationConfig


def check_documentation(path: Path, config: ValidationConfig) -> CheckResult:
"""
Check documentation completeness.

Validates:
- Required files exist (if any are specified in config)

Args:
path: Path to dataset directory
config: Validation configuration

Returns:
CheckResult with any documentation issues found
"""
result = CheckResult(category=CheckCategory.DOCUMENTATION)

# Check for required files
for required_file in config.required_files:
file_path = path / required_file
if not file_path.exists():
# Customize suggestion for README.md
if required_file == "README.md":
suggestion = (
"Add README.md to your dataset. At minimum, the file should include "
"a title and a brief description of the package content."
)
else:
suggestion = f"Add {required_file} to your dataset"

result.issues.append(
ValidationIssue(
severity=Severity.ERROR,
category=CheckCategory.DOCUMENTATION,
file=required_file,
message=f"Required file not found: {required_file}",
suggestion=suggestion,
)
)

return result
Loading