diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml deleted file mode 100644 index d6e089f..0000000 --- a/.github/workflows/docs.yml +++ /dev/null @@ -1,35 +0,0 @@ -name: Build and Deploy Docs - -on: - push: - branches: [main] - pull_request: - branches: [main] - -jobs: - build-docs: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: '3.11' - - - name: Install dependencies - run: | - pip install sphinx sphinx-rtd-theme sphinx-autodoc-typehints - pip install -e . # Install your package - - - name: Build documentation - run: | - cd docs - make html - - - name: Deploy to GitHub Pages - if: github.event_name == 'push' && github.ref == 'refs/heads/main' - uses: peaceiris/actions-gh-pages@v3 - with: - github_token: ${{ secrets.GITHUB_TOKEN }} - publish_dir: ./docs/_build/html \ No newline at end of file diff --git a/.readthedocs.yaml b/.readthedocs.yaml new file mode 100644 index 0000000..fc64a37 --- /dev/null +++ b/.readthedocs.yaml @@ -0,0 +1,22 @@ +# Read the Docs configuration file +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details + +# Required +version: 2 + +# Set the OS, Python version, and other tools you might need +build: + os: ubuntu-24.04 + tools: + python: "3.13" + +# Build documentation in the "docs/" directory with Sphinx +sphinx: + configuration: docs/conf.py + +# Optionally, but recommended, +# declare the Python requirements required to build your documentation +# See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html +python: + install: + - requirements: docs/requirements.txt \ No newline at end of file diff --git a/docs/requirements.txt b/docs/requirements.txt new file mode 100644 index 0000000..8bddd90 --- /dev/null +++ b/docs/requirements.txt @@ -0,0 +1,12 @@ +sphinx +piccolo_theme +sphinx-autodoc-typehints +sphinx-copybutton +myst-parser +numpy +polars +tqdm +statsmodels +matplotlib +pyarrow +lifelines \ No newline at end of file diff --git a/docs/source/conf.py b/docs/source/conf.py index 31dec20..f02a4fd 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -5,16 +5,36 @@ # -- Project information ----------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information +import os +import sys +from datetime import date +import importlib.metadata + +version = importlib.metadata.version("pySEQTarget") +sys.path.insert(0, os.path.abspath("../")) project = "pySEQTarget" -copyright = "2025, Ryan O'Dea, Alejandro Szmulewicz, Tom Palmer, Miguel Hernan" +copyright = f"{date.today().year}, Ryan O'Dea, Alejandro Szmulewicz, Tom Palmer, Miguel Hernan" author = "Ryan O'Dea, Alejandro Szmulewicz, Tom Palmer, Miguel Hernan" -release = "0.9.0" +release = version # -- General configuration --------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration -extensions = [] +extensions = [ + "sphinx.ext.duration", + "sphinx.ext.doctest", + "sphinx.ext.autodoc", + "sphinx.ext.autosummary", + "sphinx.ext.intersphinx", + "sphinx_copybutton", + "sphinx_autodoc_typehints", + "myst_parser" +] + +intersphinx_mapping = { + "py": ("https://docs.python.org/3", None) +} templates_path = ["_templates"] exclude_patterns = [] @@ -23,5 +43,5 @@ # -- Options for HTML output ------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output -html_theme = "alabaster" +html_theme = "piccolo_theme" html_static_path = ["_static"] diff --git a/docs/source/index.rst b/docs/source/index.rst index cee549f..dffb816 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -1,17 +1,12 @@ -.. pySEQTarget documentation master file, created by - sphinx-quickstart on Mon Nov 24 20:43:34 2025. - You can adapt this file completely to your liking, but it should at least - contain the root `toctree` directive. - -pySEQTarget documentation -========================= - -Add your content using ``reStructuredText`` syntax. See the -`reStructuredText `_ -documentation for details. +.. include:: ../../README.md + :parser: myst_parser.sphinx_ .. toctree:: :maxdepth: 2 + :hidden: :caption: Contents: + sequential/seqopts + sequential/sequential + sequential/seqoutput diff --git a/docs/source/sequential/seqopts.rst b/docs/source/sequential/seqopts.rst new file mode 100644 index 0000000..52e9b2d --- /dev/null +++ b/docs/source/sequential/seqopts.rst @@ -0,0 +1,4 @@ +SEQopts +=========== +.. autoclass:: pySEQTarget.SEQopts + :members: \ No newline at end of file diff --git a/docs/source/sequential/seqoutput.rst b/docs/source/sequential/seqoutput.rst new file mode 100644 index 0000000..ef4604a --- /dev/null +++ b/docs/source/sequential/seqoutput.rst @@ -0,0 +1,5 @@ +SEQoutput +================ + +.. autoclass:: pySEQTarget.SEQoutput + :members: diff --git a/docs/source/sequential/sequential.rst b/docs/source/sequential/sequential.rst new file mode 100644 index 0000000..6066b77 --- /dev/null +++ b/docs/source/sequential/sequential.rst @@ -0,0 +1,5 @@ +SEQuential +==================== + +.. autoclass:: pySEQTarget.SEQuential + :members: \ No newline at end of file diff --git a/pySEQTarget/SEQopts.py b/pySEQTarget/SEQopts.py index 4a59de6..688dedb 100644 --- a/pySEQTarget/SEQopts.py +++ b/pySEQTarget/SEQopts.py @@ -5,6 +5,97 @@ @dataclass class SEQopts: + """ + Parameter builder for ``pySEQTarget.SEQuential`` analysis + + :param bootstrap_nboot: Number of bootstraps to preform + :type bootstrap_nboot: int + :param bootstrap_sample: Subsampling proportion of ID-Trials gathered for each bootstrapping iteration + :type bootstrap_sample: float + :param bootstrap_CI: If bootstrapped, confidence interval level + :type bootstrap_CI: float + :param bootstrap_CI_method: If bootstrapped, confidence method generation method ['SE' or 'percentile'] + :type bootstrap_CI_method: str + :param cense_colname: Column name for censoring effect (LTFU, etc.) + :type cense_colname: str + :param cense_denominator: Override to specify denominator patsy formula for censoring models + :type cense_denominator: Optional[str] or None + :param cense_numerator: Override to specify numerator patsy formula for censoring models + :type cense_numerator: Optional[str] or None + :param cense_eligible_colname: Column name to identify which rows are eligible for censoring model fitting + :type cense_eligible_colname: Optional[str] or None + :param compevent_colname: Column name specifying a competing event to the outcome + :type compevent_colname: str + :param covariates: Override to specify the outcome patsy formula for outcome model fitting + :type covariates: Optional[str] or None + :param denominator: Override to specify the outcome patsy formula for denominator model fitting + :type denominator: Optional[str] or None + :param excused: Boolean to allow excused conditions when method is censoring + :type excused: bool + :param excused_colnames: Column names (at the same length of treatment_level) specifying excused conditions + :type excused_colnames: List[str] or [] + :param followup_class: Boolean to force followup values to be treated as classes + :type followup_class: bool + :param followup_include: Boolean to force regular followup values into model covariates + :type followup_include: bool + :param followup_spline: Boolean to force followup values to be fit to cubic spline + :type followup_spline: bool + :param followup_max: Maximum allowed followup in analysis + :type followup_max: int or None + :param followup_min: Minimum allowed followup in analysis + :type followup_min: int + :param hazard_estimate: Boolean to create hazard estimates + :type hazard_estimate: bool + :param indicator_baseline: How to indicate baseline columns in models + :type indicator_baseline: str + :param indicator_squared: How to indicate squared columns in models + :type indicator_baseline: str + :param km_curves: Boolean to create survival, risk, and incidence (if applicable) estimates + :type km_curves: bool + :param ncores: Number of cores to use if running in parallel + :type ncores: int + :param numerator: Override to specify the outcome patsy formula for numerator models + :type numerator: str + :param parallel: Boolean to run model fitting in parallel + :type parallel: bool + :param plot_colors: List of colors for KM plots, if applicable + :type plot_colors: List[str] + :param plot_labels: List of length treat_level to specify treatment labeling + :type plot_labels: List[str] + :param plot_title: Plot title + :type plot_title: str + :param plot_type: Type of plot to show ["risk", "survival" or "incidence" if compevent is specified] + :type plot_type: str + :param seed: RNG seed + :type seed: int + :param selection_first_trial: Boolean to only use first trial for analysis (similar to non-expanded) + :type selection_first_trial: bool + :param selection_sample: Subsampling proportion of ID-trials which did not initiate a treatment + :type selection_sample: float + :param selection_random: Boolean to randomly downsample ID-trials which did not initiate a treatment + :type selection_random: bool + :param subgroup_colname: Column name for subgroups to share the same weighting but different outcome model fits + :type subgroup_colname: str + :param treatment_level: List of eligible treatment levels within treatment_col + :type treatment_level: List[int] + :param trial_include: Boolean to force trial values into model covariates + :type trial_include: bool + :param weight_eligible_colnames: List of column names of length treatment_level to identify which rows are eligible for weight fitting + :type weight_eligible_colnames: List[str] + :param weight_min: Minimum weight + :type weight_min: float + :param weight_max: Maximum weight + :type weight_max: float or None + :param weight_lag_condition: Boolean to fit weights based on their treatment lag + :type weight_lag_condition: bool + :param weight_p99: Boolean to force weight min and max to be 1st and 99th percentile respectively + :type weight_p99: bool + :param weight_preexpansion: Boolean to fit weights on preexpanded data + :type weight_preexpansion: bool + :param weighted: Boolean to weight analysis + :type weighted: bool + """ + bootstrap_nboot: int = 0 bootstrap_sample: float = 0.8 bootstrap_CI: float = 0.95 @@ -38,7 +129,7 @@ class SEQopts: plot_type: Literal["risk", "survival", "incidence"] = "risk" seed: Optional[int] = None selection_first_trial: bool = False - selection_probability: float = 0.8 + selection_sample: float = 0.8 selection_random: bool = False subgroup_colname: str = None treatment_level: List[int] = field(default_factory=lambda: [0, 1]) diff --git a/pySEQTarget/SEQoutput.py b/pySEQTarget/SEQoutput.py index ed1ea74..8919fc3 100644 --- a/pySEQTarget/SEQoutput.py +++ b/pySEQTarget/SEQoutput.py @@ -10,6 +10,37 @@ @dataclass class SEQoutput: + """ + Collector class for results from ``SEQuential`` + + :param options: Options used in the SEQuential process + :type options: SEQopts or None + :param method: Method of analysis ['ITT', 'dose-response', or 'censoring'] + :type method: str + :param numerator_models: Numerator models, if applicable, from the weighting process + :type numerator_models: List[ResultsWrapper] or None + :param denominator_models: Denominator models, if applicable, from the weighting process + :type denominator_models: List[ResultsWrapper] or None + :param compevent_models: Competing event models, if applicable + :type compevent_models: List[ResultsWrapper] or None + :param weight_statistics: Weight statistics once returned back to the expanded dataset + :type weight_statistics: dict or None + :param hazard: Hazard ratio if applicable + :type hazard: pl.DataFrame or None + :param km_data: Dataframe of risk, survival, and incidence data if applicable at all followups + :type km_data: pl.DataFrame or None + :param km_graph: Figure of survival, risk, or incidence over followup times + :type km_graph: matplotlib.figure.Figure or None + :param risk_ratio: Dataframe of risk ratios, compared between treatments and subgroups + :type risk_ratio: pl.DataFrame or None + :param risk_difference: Dataframe of risk differences, compared between treatments and subgroups + :type risk_difference: pl.DataFrame or None + :param time: Timings for every step of the process completed thus far + :type time: dict or None + :param diagnostic_tables: Diagnostic tables for unique and nonunique outcome events and treatment switches + :type diagnostic_tables: dict or None + """ + options: SEQopts = None method: str = None numerator_models: List[ResultsWrapper] = None @@ -25,12 +56,20 @@ class SEQoutput: time: dict = None diagnostic_tables: dict = None - def plot(self): + def plot(self) -> None: + """ + Prints the kaplan-meier graph + """ print(self.km_graph) def summary( self, type=Optional[Literal["numerator", "denominator", "outcome", "compevent"]] - ): + ) -> List: + """ + Returns a list of model summaries of either the numerator, denominator, outcome, or competing event models + :param type: Indicator for which model list you would like returned + :type type: str + """ match type: case "numerator": models = self.numerator_models @@ -57,7 +96,12 @@ def retrieve_data( "nonunique_switches", ] ], - ): + ) -> pl.DataFrame: + """ + Getter for data stored within ``SEQoutput`` + :param type: Data which you would like to access, ['km_data', 'hazard', 'risk_ratio', 'risk_difference', 'unique_outcomes', 'nonunique_outcomes', 'unique_switches', 'nonunique_switches'] + :type type: str + """ match type: case "hazard": data = self.hazard diff --git a/pySEQTarget/SEQuential.py b/pySEQTarget/SEQuential.py index 0079891..d13685f 100644 --- a/pySEQTarget/SEQuential.py +++ b/pySEQTarget/SEQuential.py @@ -7,22 +7,64 @@ import numpy as np import polars as pl -from .analysis import (_calculate_hazard, _calculate_survival, _outcome_fit, - _pred_risk, _risk_estimates, _subgroup_fit) +from .analysis import ( + _calculate_hazard, + _calculate_survival, + _outcome_fit, + _pred_risk, + _risk_estimates, + _subgroup_fit, +) from .error import _datachecker, _param_checker from .expansion import _binder, _diagnostics, _dynamic, _random_selection from .helpers import _col_string, _format_time, bootstrap_loop -from .initialization import (_cense_denominator, _cense_numerator, - _denominator, _numerator, _outcome) +from .initialization import ( + _cense_denominator, + _cense_numerator, + _denominator, + _numerator, + _outcome, +) from .plot import _survival_plot from .SEQopts import SEQopts from .SEQoutput import SEQoutput -from .weighting import (_fit_denominator, _fit_LTFU, _fit_numerator, - _weight_bind, _weight_predict, _weight_setup, - _weight_stats) +from .weighting import ( + _fit_denominator, + _fit_LTFU, + _fit_numerator, + _weight_bind, + _weight_predict, + _weight_setup, + _weight_stats, +) class SEQuential: + """ + Primary class initializer for SEQuentially nested target trial emulation + + :param data: Data for analysis + :type data: pl.DataFrame + :param id_col: Column name for unique patient IDs + :type id_col: str + :param time_col: Column name for observational time points + :type time_col: str + :param eligible_col: Column name for analytical eligibility + :type eligible_col: str + :param treatment_col: Column name specifying treatment per time_col + :type treatment_col: str + :param outcome_col: Column name specifying outcome per time_col + :type outcome_col: str + :param time_varying_cols: Time-varying column names as covariates (BMI, Age, etc.) + :type time_varying_cols: Optional[List[str]] or None + :param fixed_cols: Fixed column names as covariates (Sex, YOB, etc.) + :type fixed_cols: Optional[List[str]] or None + :param method: Method for analysis ['ITT', 'dose-response', or 'censoring'] + :type method: str + :param parameters: Parameters to augment analysis, specified with ``pySEQTarget.SEQopts`` + :type parameters: Optional[SEQopts] or None + """ + def __init__( self, data: pl.DataFrame, @@ -78,7 +120,10 @@ def __init__( _param_checker(self) _datachecker(self) - def expand(self): + def expand(self) -> None: + """ + Creates the sequentially nested, emulated target trial structure + """ start = time.perf_counter() kept = [ self.cense_colname, @@ -136,7 +181,10 @@ def expand(self): end = time.perf_counter() self._expansion_time = _format_time(start, end) - def bootstrap(self, **kwargs): + def bootstrap(self, **kwargs) -> None: + """ + Internally sets up bootstrapping - creating a list of IDs to use per iteration + """ allowed = { "bootstrap_nboot", "bootstrap_sample", @@ -148,7 +196,6 @@ def bootstrap(self, **kwargs): setattr(self, key, value) else: raise ValueError(f"Unknown argument: {key}") - UIDs = self.DT.select(pl.col(self.id_col)).unique().to_series().to_list() NIDs = len(UIDs) @@ -162,7 +209,10 @@ def bootstrap(self, **kwargs): return self @bootstrap_loop - def fit(self): + def fit(self) -> None: + """ + Fits weight models (numerator, denominator, censoring) and outcome models (outcome, competing event) + """ if self.bootstrap_nboot > 0 and not hasattr(self, "_boot_samples"): raise ValueError( "Bootstrap sampling not found. Please run the 'bootstrap' method before fitting with bootstrapping." @@ -211,7 +261,17 @@ def fit(self): ) return models - def survival(self): + def survival(self, **kwargs) -> None: + """ + Uses fit outcome models (outcome, competing event) to estimate risk, survival, and incidence curves + """ + allowed = {"bootstrap_CI", "bootstrap_CI_method"} + for key, val in kwargs.items(): + if key in allowed: + setattr(self, key, val) + else: + raise ValueError(f"Unknown or misplaced arugment: {key}") + if not hasattr(self, "outcome_model") or not self.outcome_model: raise ValueError( "Outcome model not found. Please run the 'fit' method before calculating survival." @@ -227,7 +287,10 @@ def survival(self): end = time.perf_counter() self._survival_time = _format_time(start, end) - def hazard(self): + def hazard(self) -> None: + """ + Uses fit outcome models (outcome, competing event) to estimate hazard ratios + """ start = time.perf_counter() if not hasattr(self, "outcome_model") or not self.outcome_model: @@ -239,10 +302,22 @@ def hazard(self): end = time.perf_counter() self._hazard_time = _format_time(start, end) - def plot(self): + def plot(self, **kwargs) -> None: + """ + Shows a plot specific to plot_type + """ + allowed = {"plot_type", "plot_colors", "plot_title", "plot_labels"} + for key, val in kwargs.items(): + if key in allowed: + setattr(self, key, val) + else: + raise ValueError(f"Unknown or misplaced arugment: {key}") self.km_graph = _survival_plot(self) - def collect(self): + def collect(self) -> SEQoutput: + """ + Collects all results current created into ``SEQoutput`` class + """ self._time_collected = datetime.datetime.now() generated = [ diff --git a/pySEQTarget/analysis/__init__.py b/pySEQTarget/analysis/__init__.py index 6799dfd..e35ceb7 100644 --- a/pySEQTarget/analysis/__init__.py +++ b/pySEQTarget/analysis/__init__.py @@ -3,6 +3,5 @@ from ._risk_estimates import _risk_estimates as _risk_estimates from ._subgroup_fit import _subgroup_fit as _subgroup_fit from ._survival_pred import _calculate_survival as _calculate_survival -from ._survival_pred import \ - _get_outcome_predictions as _get_outcome_predictions +from ._survival_pred import _get_outcome_predictions as _get_outcome_predictions from ._survival_pred import _pred_risk as _pred_risk diff --git a/pySEQTarget/expansion/_selection.py b/pySEQTarget/expansion/_selection.py index 9f58cd7..2984ad5 100644 --- a/pySEQTarget/expansion/_selection.py +++ b/pySEQTarget/expansion/_selection.py @@ -19,7 +19,7 @@ def _random_selection(self): NIDs = len(UIDs) sample = self._rng.choice( - UIDs, size=int(self.selection_probability * NIDs), replace=False + UIDs, size=int(self.selection_sample * NIDs), replace=False ) self.DT = ( diff --git a/pyproject.toml b/pyproject.toml index 9b83517..6c81048 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -52,8 +52,9 @@ Repository = "https://github.com/CausalInference/pySEQTarget" "University of Bristol (ROR)" = "https://ror.org/0524sp257" "Harvard University (ROR)" = "https://ror.org/03vek6s52" -[tool.setuptools] -packages = ["pySEQTarget", "pySEQTarget.data"] +[tool.setuptools.packages.find] +where = ["."] +include = ["pySEQTarget*"] [tool.setuptools.package-data] SEQdata = ["data/*.csv"]