From 7378c3489cd5b57d119ae83306b1b0d66eaa0311 Mon Sep 17 00:00:00 2001
From: Stansfeld <u1875584@01-g3-038627-25.broadband>
Date: Fri, 28 Nov 2025 17:41:28 +0000
Subject: [PATCH 01/24] int fixed

---
 pylipid/func/interactions.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pylipid/func/interactions.py b/pylipid/func/interactions.py
index 5c4237b..48d9d1a 100644
--- a/pylipid/func/interactions.py
+++ b/pylipid/func/interactions.py
@@ -100,7 +100,7 @@ def __init__(self, contact_low, contact_high, dt):
         self.contact_low = contact_low
         self.contact_high = contact_high
         self.dt = dt
-        self.pointer = [np.zeros_like(self.contact_high[idx], dtype=np.int)
+        self.pointer = [np.zeros_like(self.contact_high[idx], dtype=int)
                         for idx in range(len(self.contact_high))]
         return
 

From 697d8217513c2f160f118107b32bf89c6ef6edbd Mon Sep 17 00:00:00 2001
From: Phill Stansfeld <pstansfeld@gmail.com>
Date: Tue, 10 Mar 2026 21:15:23 +0000
Subject: [PATCH 02/24] v1.6.0

---
 README.rst                   |  7 ++---
 github/workflows/ci.yml      | 54 ++++++++++++++++++++++++++++++++++++
 pylipid/_version.py          |  2 +-
 pylipid/api/api.py           | 11 ++++----
 pylipid/func/binding_site.py | 19 ++++++-------
 pylipid/func/kinetics.py     |  4 +--
 pylipid/plot/koff.py         |  5 ++--
 pylipid/plot/plot1d.py       |  4 +--
 pylipid/plot/plot2d.py       |  3 +-
 pylipid/util/corrcoef.py     | 12 ++++----
 requirements.txt             | 25 ++++++++---------
 setup.py                     | 34 +++++++++++------------
 12 files changed, 115 insertions(+), 65 deletions(-)
 create mode 100644 github/workflows/ci.yml

diff --git a/README.rst b/README.rst
index 1990885..6acc2f4 100644
--- a/README.rst
+++ b/README.rst
@@ -1,10 +1,9 @@
-
 ==========================================================
 PyLipID - A Python Package For Lipid Interactions Analysis
 ==========================================================
 
-.. image:: https://travis-ci.com/wlsong/PyLipID.svg?branch=master
-   :target: https://travis-ci.com/github/wlsong/PyLipID
+.. image:: https://github.com/pstansfeld/PyLipID/actions/workflows/ci.yml/badge.svg
+   :target: https://github.com/pstansfeld/PyLipID/actions/workflows/ci.yml
 .. image:: https://img.shields.io/pypi/v/PyLipID
    :target: https://pypi.org/project/pylipid/
 
@@ -37,7 +36,7 @@ We recommend installing PyLipID using the package installer `pip`:
 Alternatively, PyLipID can be installed from the source code. The package is available for
 download on Github via:
 
-``git clone https://github.com/wlsong/PyLipID``
+``git clone https://github.com/pstansfeld/PyLipID``
 
 Once the source code is downloaded, enter the source code directory and install the package as follow:
 
diff --git a/github/workflows/ci.yml b/github/workflows/ci.yml
new file mode 100644
index 0000000..ee50ca2
--- /dev/null
+++ b/github/workflows/ci.yml
@@ -0,0 +1,54 @@
+name: CI
+
+on:
+  push:
+    branches: [ master ]
+  pull_request:
+    branches: [ master ]
+
+jobs:
+  test:
+    name: Python ${{ matrix.python-version }} / ${{ matrix.os }}
+    runs-on: ${{ matrix.os }}
+
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest, macos-latest]
+        python-version: ["3.9", "3.10", "3.11", "3.12"]
+
+    steps:
+      - name: Check out repository
+        uses: actions/checkout@v4
+
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+          cache: 'pip'
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r requirements.txt
+          pip install pytest pytest-cov flake8
+
+      - name: Install package
+        run: pip install -e .
+
+      - name: Lint with flake8
+        run: |
+          # Fail on syntax errors or undefined names; treat everything else as a warning
+          flake8 pylipid --count --select=E9,F63,F7,F82 --show-source --statistics
+          flake8 pylipid --count --exit-zero --max-line-length=120 --statistics
+
+      - name: Run tests
+        run: |
+          pytest tests/ -v --cov=pylipid --cov-report=term-missing
+
+      - name: Upload coverage report
+        if: matrix.os == 'ubuntu-latest' && matrix.python-version == '3.11'
+        uses: actions/upload-artifact@v4
+        with:
+          name: coverage-report
+          path: .coverage
diff --git a/pylipid/_version.py b/pylipid/_version.py
index 8add312..66ea783 100644
--- a/pylipid/_version.py
+++ b/pylipid/_version.py
@@ -14,4 +14,4 @@
 # copies or substantial portions of the Software.
 ##############################################################################
 
-__version__ = '1.5.14'
\ No newline at end of file
+__version__ = '1.6.0'
diff --git a/pylipid/api/api.py b/pylipid/api/api.py
index faff1fd..d8d858e 100644
--- a/pylipid/api/api.py
+++ b/pylipid/api/api.py
@@ -561,7 +561,7 @@ def compute_residue_lipidcount(self, residue_id=None):
                                            if len(self._lipid_count[residue_id]) > 0 else 0
                                            for residue_id in self._protein_residue_id]
         if len(selected_residue_id) == 1:
-            self._lipid_count[residue_id]
+            return self._lipid_count[residue_id]
         else:
             return [self._lipid_count[residue_id] for residue_id in selected_residue_id]
 
@@ -664,7 +664,7 @@ def compute_residue_koff(self, residue_id=None, nbootstrap=10, initial_guess=[1.
         self._check_calculation("Duration", self.compute_residue_duration)
 
         if plot_data:
-            koff_dir = check_dir(save_dir, "Reisidue_koffs_{}".format(self._lipid)) if save_dir is not None \
+            koff_dir = check_dir(save_dir, "Residue_koffs_{}".format(self._lipid)) if save_dir is not None \
                 else check_dir(self._save_dir, "Residue_koffs_{}".format(self._lipid))
         if len(set(self._residue_list)) != len(self._residue_list):
             residue_name_set = ["{}_ResidueID{}".format(residue, residue_id) for residue, residue_id in
@@ -1327,8 +1327,8 @@ def analyze_bound_poses(self, binding_site_id=None, n_top_poses=3, n_clusters="a
 
         pose_dir = check_dir(save_dir, "Bound_Poses_{}".format(self._lipid)) if save_dir is not None \
             else check_dir(self._save_dir, "Bound_Poses_{}".format(self._lipid))
-        if "Binding Site RMSD" in self.dataset.columns:
-            pose_rmsd_per_residue = self.dataset.columns["Binding Site Pose RMSD"]
+        if "Binding Site Pose RMSD" in self.dataset.columns:
+            pose_rmsd_per_residue = self.dataset["Binding Site Pose RMSD"].values
         else:
             pose_rmsd_per_residue = np.zeros(self._nresi_per_protein)
         if binding_site_id is not None:
@@ -1645,7 +1645,7 @@ def save_pymol_script(self, pdb_file, save_dir=None):
         """
         script_dir = check_dir(save_dir) if save_dir is not None else \
             check_dir(os.path.join(self._save_dir, "Dataset_{}".format(self._lipid)))
-        data_fname = os.path.join(script_dir, "Dataset.csv".format(self._lipid))
+        data_fname = os.path.join(script_dir, "Dataset.csv")
         if not os.path.isfile(data_fname):
             self.dataset.to_csv(data_fname, index=False, header=True)
         write_pymol_script(os.path.join(script_dir, "show_binding_site_info.py"), pdb_file, data_fname,
@@ -2008,4 +2008,3 @@ def _check_calculation(self, item, calculation, *args, **kwargs):
             calculation(*args, **kwargs)
             print("Finished the calculation of {}.".format(item))
             print("#" * 60)
-
diff --git a/pylipid/func/binding_site.py b/pylipid/func/binding_site.py
index a28f3c7..aa8398f 100644
--- a/pylipid/func/binding_site.py
+++ b/pylipid/func/binding_site.py
@@ -19,8 +19,9 @@
 import os
 from collections import defaultdict
 from itertools import product
-import community
 import networkx as nx
+from networkx.algorithms.community import louvain_communities
+from networkx.algorithms.community.quality import modularity as louvain_modularity
 import numpy as np
 import pandas as pd
 import mdtraj as md
@@ -81,19 +82,14 @@ def get_node_list(corrcoef, threshold=4):
     .. [2] Newman, M. E. J., Analysis of weighted networks. Physical Review E 2004, 70 (5), 056131.
 
     """
-    # TODO: check negative values in corrcoef_matrix. Come up with better solutions.
     corrcoef[corrcoef < 0.0] = 0.0 # network edge can't take negative values. Residues with
                                    # negative correlationsare are forced to separate to different binding sites.
     graph = nx.Graph(corrcoef)
-    partition = community.best_partition(graph, weight='weight')
-    values = [partition.get(node) for node in graph.nodes()]
-    node_list = []
-    for value in range(max(values)):
-        nodes = [k for k, v in partition.items() if v == value]
-        if len(nodes) >= threshold:
-            node_list.append(nodes)
+    # Use networkx's built-in Louvain (networkx >= 2.6). seed fixes randomness for reproducibility.
+    communities = louvain_communities(graph, weight='weight', seed=42)
+    node_list = [sorted(c) for c in communities if len(c) >= threshold]
     if len(node_list) > 0:
-        modularity = community.modularity(partition, graph)
+        modularity = louvain_modularity(graph, communities, weight='weight')
     else:
         modularity = None
     return node_list, modularity
@@ -334,6 +330,7 @@ def calculate_scores(dist_matrix, kde_bw=0.15, pca_component=0.90, score_weights
 
     except ValueError:
         print("Pose generation error -- possibly due to insufficient number of binding event.")
+        return np.array([])
 
 
 def write_bound_poses(pose_traj, pose_indices, save_dir, pose_prefix="BoundPose", pose_format="pdb"):
@@ -647,4 +644,4 @@ def _write_pose_info(selected_pose_info, fn, trajfile_list):
             f.write("LIPID ID: {}\n".format(info[2]))
             f.write("TIME    : {:8.3f} ps\n".format(info[3]))
             f.write("\n")
-            f.write("\n")
\ No newline at end of file
+            f.write("\n")
diff --git a/pylipid/func/kinetics.py b/pylipid/func/kinetics.py
index 370e6dd..9d392cc 100644
--- a/pylipid/func/kinetics.py
+++ b/pylipid/func/kinetics.py
@@ -220,7 +220,7 @@ def _curve_fitting(survival_func, delta_t_list, initial_guess):
     survival_rates = np.nan_to_num([survival_func[delta_t] for delta_t in delta_t_list]) # y
     try:
         popt, pcov = curve_fit(_bi_expo, np.array(delta_t_list), np.array(survival_rates), p0=initial_guess, maxfev=100000)
-        n_fitted = _bi_expo(np.array(delta_t_list, dtype=np.float128), *popt)
+        n_fitted = _bi_expo(np.array(delta_t_list, dtype=np.longdouble), *popt)
         r_squared = 1 - np.sum((np.nan_to_num(n_fitted) -
                                 np.nan_to_num(survival_rates))**2)/np.sum((survival_rates - np.mean(survival_rates))**2)
         ks = [abs(k) for k in popt[:2]]
@@ -277,4 +277,4 @@ def _format_koff_text(properties, timeunit):
                                                                  tu, cv_avg[1])
     text += "{:14s} = {:.4f}\n".format("$R^2$$_{{boot}}$", np.mean(properties["r_squared_boot_set"]))
     text += "{:18s} = {:.3f} {:2s}".format("$Res. Time$", properties["res_time"], tu)
-    return text
\ No newline at end of file
+    return text
diff --git a/pylipid/plot/koff.py b/pylipid/plot/koff.py
index 727d8ae..d341389 100644
--- a/pylipid/plot/koff.py
+++ b/pylipid/plot/koff.py
@@ -77,7 +77,7 @@ def plot_koff(durations, delta_t_list, survival_rates, n_fitted,
         xlabel = r"Duration ($\mu s$)"
 
     if text is None:
-        fig = plt.figure(1, figsize=(5.5, 3.5))
+        fig = plt.figure(figsize=(5.5, 3.5))
         left, width = 0.13, 0.33
         bottom, height = 0.17, 0.68
         left_h = left + width + 0.05
@@ -86,7 +86,7 @@ def plot_koff(durations, delta_t_list, survival_rates, n_fitted,
         axScatter = fig.add_axes(rect_scatter)
         axHisty = fig.add_axes(rect_histy)
     else:
-        fig = plt.figure(1, figsize=(8.2, 3.5))
+        fig = plt.figure(figsize=(8.2, 3.5))
         left, width = 0.0975, 0.23
         bottom, height = 0.17, 0.68
         left_h = left + width + 0.0375
@@ -147,4 +147,3 @@ def plot_koff(durations, delta_t_list, survival_rates, n_fitted,
         plt.close()
 
     return
-
diff --git a/pylipid/plot/plot1d.py b/pylipid/plot/plot1d.py
index 0216b33..0f12870 100644
--- a/pylipid/plot/plot1d.py
+++ b/pylipid/plot/plot1d.py
@@ -274,7 +274,7 @@ def adjacent_values(vals, q1, q3):
     if title is None:
         title = ""
 
-    color_set = _cycle(plt.get_cmap("tab10").colors)
+    color_set = _cycle(plt.cm.tab10.colors)
     plt.rcParams["font.size"] = 10
     plt.rcParams["font.weight"] = "bold"
 
@@ -346,7 +346,7 @@ def plot_surface_area(surface_area, fig_fn, timeunit=None, fig_close=False):
     """
     from itertools import cycle as _cycle
 
-    color_set = _cycle(plt.get_cmap("tab10").colors)
+    color_set = _cycle(plt.cm.tab10.colors)
     plt.rcParams["font.size"] = 10
     plt.rcParams["font.weight"] = "normal"
 
diff --git a/pylipid/plot/plot2d.py b/pylipid/plot/plot2d.py
index 8b791d3..c0fb1ce 100644
--- a/pylipid/plot/plot2d.py
+++ b/pylipid/plot/plot2d.py
@@ -101,6 +101,8 @@ def plot_corrcoef(corrcoef, residue_index, cmap="Reds", vmin=None, vmax=None,
         vmax = np.percentile(np.unique(np.ravel(corrcoef)), 99)
     if vmin is None:
         vmin = np.percentile(np.unique(np.ravel(corrcoef)), 1)
+    vmin = max(vmin, np.finfo(float).tiny)  # LogNorm requires strictly positive vmin
+    vmax = max(vmax, vmin * 10)             # ensure vmax > vmin
     pcm = ax.pcolormesh(x, y, corrcoef, cmap=cmap, 
                         norm=colors.LogNorm(vmax=vmax, vmin=vmin))
     fig.colorbar(pcm, ax=ax)
@@ -130,4 +132,3 @@ def plot_corrcoef(corrcoef, residue_index, cmap="Reds", vmin=None, vmax=None,
         plt.close()
 
     return
-
diff --git a/pylipid/util/corrcoef.py b/pylipid/util/corrcoef.py
index 46f9fec..f09a05b 100644
--- a/pylipid/util/corrcoef.py
+++ b/pylipid/util/corrcoef.py
@@ -28,12 +28,14 @@ def sparse_corrcoef(A, B=None):
         A = sparse.vstack((A, B), format='csr')
     A = A.astype(np.float64)
     n = A.shape[1]
-    # Compute the covariance matrix
-    rowsum = A.sum(1)
-    centering = rowsum.dot(rowsum.T.conjugate()) / n
-    C = (A.dot(A.T.conjugate()) - centering) / (n - 1)
+    # Compute the covariance matrix.
+    # Use explicit dense conversion to avoid numpy.matrix deprecation and
+    # version-dependent sparse-dense arithmetic (scipy >= 1.14 / numpy >= 1.25).
+    rowsum = np.asarray(A.sum(1)).flatten()          # shape (n_rows,) ndarray
+    centering = np.outer(rowsum, rowsum) / n         # explicit outer product
+    C = (np.asarray(A.dot(A.T.conjugate()).todense()) - centering) / (n - 1)
     # The correlation coefficients are given by
     # C_{i,j} / sqrt(C_{i} * C_{j})
     d = np.diag(C)
     corrcoefs = C / np.sqrt(np.outer(d, d))
-    return corrcoefs
\ No newline at end of file
+    return corrcoefs
diff --git a/requirements.txt b/requirements.txt
index 7dd8414..326be4e 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,13 +1,12 @@
-mdtraj
-numpy
-pandas
-matplotlib>=3.3.3
-networkx
-scipy
-python-louvain
-logomaker
-statsmodels
-scikit-learn
-tqdm
-p_tqdm
-kneebow
+mdtraj>=1.9.7
+numpy>=1.21
+pandas>=1.3
+matplotlib>=3.5
+networkx>=2.6
+scipy>=1.7
+logomaker>=0.8
+statsmodels>=0.13
+scikit-learn>=1.0
+tqdm>=4.50
+p_tqdm>=1.3
+kneebow>=0.1
diff --git a/setup.py b/setup.py
index 5891ad1..1982390 100644
--- a/setup.py
+++ b/setup.py
@@ -25,7 +25,7 @@
     description='PyLipID - A Python Library For Lipid Interaction Analysis',
     long_description=long_description,
     long_description_content_type='text/x-rst',
-    url='https://github.com/wlsong/PyLipID',
+    url='https://github.com/pstansfeld/PyLipID',
     author='Wanling Song',
     author_email='wanling.song@hotmail.com',
     classifiers=[
@@ -36,26 +36,26 @@
         'Topic :: Scientific/Engineering :: Mathematics',
         'Topic :: Scientific/Engineering :: Physics',
         'License :: OSI Approved :: MIT License',
-        'Programming Language :: Python :: 3.6',
-        'Programming Language :: Python :: 3.7',
         'Programming Language :: Python :: 3.9',
+        'Programming Language :: Python :: 3.10',
+        'Programming Language :: Python :: 3.11',
+        'Programming Language :: Python :: 3.12',
     ],
     keywords='simulation tools, network community, binding site',
-    python_requires='>=3.6, <4, !=3.8.*',
+    python_requires='>=3.9',
     packages=find_packages(),
     install_requires=[
-        "mdtraj",
-        "numpy",
-        "pandas",
-        "matplotlib>=3.3.3",
-        "networkx",
-        "scipy",
-        "python-louvain",
-        "logomaker",
-        "statsmodels",
-        "scikit-learn",
-        "tqdm",
-        "p_tqdm",
-        "kneebow"
+        "mdtraj>=1.9.7",
+        "numpy>=1.21",
+        "pandas>=1.3",
+        "matplotlib>=3.5",
+        "networkx>=2.6",
+        "scipy>=1.7",
+        "logomaker>=0.8",
+        "statsmodels>=0.13",
+        "scikit-learn>=1.0",
+        "tqdm>=4.50",
+        "p_tqdm>=1.3",
+        "kneebow>=0.1",
     ]
 )

From 2f2b46b8b2f257f3ceecdee3759fa0fea2258a88 Mon Sep 17 00:00:00 2001
From: Phill Stansfeld <pstansfeld@gmail.com>
Date: Tue, 10 Mar 2026 21:19:10 +0000
Subject: [PATCH 03/24] yml edits

---
 {github => .github}/workflows/ci.yml |  0
 .travis.yml                          | 15 ---------------
 2 files changed, 15 deletions(-)
 rename {github => .github}/workflows/ci.yml (100%)
 delete mode 100644 .travis.yml

diff --git a/github/workflows/ci.yml b/.github/workflows/ci.yml
similarity index 100%
rename from github/workflows/ci.yml
rename to .github/workflows/ci.yml
diff --git a/.travis.yml b/.travis.yml
deleted file mode 100644
index 352700b..0000000
--- a/.travis.yml
+++ /dev/null
@@ -1,15 +0,0 @@
-language: python
-cache: pip
-python:
-    - "3.6"
-    - "3.7"
-    - "3.8"
-before_install:
-  - pip install -U pip
-  - pip install codecov
-install:
-  - pip install -r requirements.txt
-script:
-  - python -m unittest discover -s tests
-after_success:
-  - bash <(curl -s https://codecov.io/bash)

From 4015f88c5d8a887c8d5a71036f1087798c93b2cb Mon Sep 17 00:00:00 2001
From: Phill Stansfeld <pstansfeld@gmail.com>
Date: Tue, 10 Mar 2026 21:25:27 +0000
Subject: [PATCH 04/24] pip installable

---
 pyproject.toml | 54 ++++++++++++++++++++++++++++++++++++++++++++
 setup.py       | 61 --------------------------------------------------
 2 files changed, 54 insertions(+), 61 deletions(-)
 create mode 100644 pyproject.toml
 delete mode 100644 setup.py

diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..3631cef
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,54 @@
+[build-system]
+requires = ["setuptools>=64", "setuptools-scm"]
+build-backend = "setuptools.backends.legacy:build"
+
+[project]
+name = "pylipid"
+dynamic = ["version"]
+description = "PyLipID - A Python Library For Lipid Interaction Analysis"
+readme = "README.rst"
+license = { file = "LICENSE.txt" }
+authors = [
+    { name = "Wanling Song", email = "wanling.song@hotmail.com" }
+]
+keywords = ["simulation tools", "network community", "binding site"]
+classifiers = [
+    "Development Status :: 5 - Production/Stable",
+    "Intended Audience :: Science/Research",
+    "Topic :: Scientific/Engineering :: Bio-Informatics",
+    "Topic :: Scientific/Engineering :: Chemistry",
+    "Topic :: Scientific/Engineering :: Mathematics",
+    "Topic :: Scientific/Engineering :: Physics",
+    "License :: OSI Approved :: MIT License",
+    "Programming Language :: Python :: 3.9",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+]
+requires-python = ">=3.9"
+dependencies = [
+    "mdtraj>=1.9.7",
+    "numpy>=1.21",
+    "pandas>=1.3",
+    "matplotlib>=3.5",
+    "networkx>=2.6",
+    "scipy>=1.7",
+    "logomaker>=0.8",
+    "statsmodels>=0.13",
+    "scikit-learn>=1.0",
+    "tqdm>=4.50",
+    "p_tqdm>=1.3",
+    "kneebow>=0.1",
+]
+
+[project.urls]
+Homepage = "https://github.com/pstansfeld/PyLipID"
+Documentation = "https://pylipid.readthedocs.io"
+"Bug Tracker" = "https://github.com/pstansfeld/PyLipID/issues"
+
+[tool.setuptools.dynamic]
+version = { attr = "pylipid._version.__version__" }
+
+[tool.setuptools.packages.find]
+where = ["."]
+include = ["pylipid*"]
diff --git a/setup.py b/setup.py
deleted file mode 100644
index 1982390..0000000
--- a/setup.py
+++ /dev/null
@@ -1,61 +0,0 @@
-# setup.py
-from setuptools import setup, find_packages
-import pathlib
-
-here = pathlib.Path(__file__).parent.resolve()
-
-# Get the long description from the README file
-long_description = (here / 'README.rst').read_text(encoding='utf-8')
-
-# read version info
-import re
-VERSIONFILE="pylipid/_version.py"
-verstrline = open(VERSIONFILE, "rt").read()
-VSRE = r"^__version__ = ['\"]([^'\"]*)['\"]"
-mo = re.search(VSRE, verstrline, re.M)
-if mo:
-    verstr = mo.group(1)
-else:
-    raise RuntimeError("Unable to find version string in %s." % (VERSIONFILE,))
-
-# setup
-setup(
-    name='pylipid',
-    version=verstr,
-    description='PyLipID - A Python Library For Lipid Interaction Analysis',
-    long_description=long_description,
-    long_description_content_type='text/x-rst',
-    url='https://github.com/pstansfeld/PyLipID',
-    author='Wanling Song',
-    author_email='wanling.song@hotmail.com',
-    classifiers=[
-        'Development Status :: 5 - Production/Stable',
-        'Intended Audience :: Science/Research',
-        'Topic :: Scientific/Engineering :: Bio-Informatics',
-        'Topic :: Scientific/Engineering :: Chemistry',
-        'Topic :: Scientific/Engineering :: Mathematics',
-        'Topic :: Scientific/Engineering :: Physics',
-        'License :: OSI Approved :: MIT License',
-        'Programming Language :: Python :: 3.9',
-        'Programming Language :: Python :: 3.10',
-        'Programming Language :: Python :: 3.11',
-        'Programming Language :: Python :: 3.12',
-    ],
-    keywords='simulation tools, network community, binding site',
-    python_requires='>=3.9',
-    packages=find_packages(),
-    install_requires=[
-        "mdtraj>=1.9.7",
-        "numpy>=1.21",
-        "pandas>=1.3",
-        "matplotlib>=3.5",
-        "networkx>=2.6",
-        "scipy>=1.7",
-        "logomaker>=0.8",
-        "statsmodels>=0.13",
-        "scikit-learn>=1.0",
-        "tqdm>=4.50",
-        "p_tqdm>=1.3",
-        "kneebow>=0.1",
-    ]
-)

From af52946e1462a84faae4d21bf1891bebbe5df941 Mon Sep 17 00:00:00 2001
From: Phill Stansfeld <pstansfeld@gmail.com>
Date: Tue, 10 Mar 2026 21:37:24 +0000
Subject: [PATCH 05/24] potential speed-up

---
 pylipid/api/api.py         | 30 +++++++++++++++++++++++-------
 pylipid/util/coordinate.py | 11 +++++++++--
 2 files changed, 32 insertions(+), 9 deletions(-)

diff --git a/pylipid/api/api.py b/pylipid/api/api.py
index d8d858e..548a25d 100644
--- a/pylipid/api/api.py
+++ b/pylipid/api/api.py
@@ -364,15 +364,31 @@ def collect_residue_contacts(self):
                     "Trajectory {} contains {} residues whereas trajectory {} contains {} residues".format(
                         traj_idx, len(traj_info["protein_residue_id"]), traj_idx - 1, len(self._protein_residue_id))
             ncol_per_protein = len(traj_info["lipid_residue_atomid_list"]) * traj.n_frames
+            # Pre-flatten all lipid atom indices and build a boundary index so we can
+            # issue ONE md.compute_distances call per residue (instead of n_lipids calls).
+            flat_lipid_atoms = np.concatenate(traj_info["lipid_residue_atomid_list"])
+            lipid_sizes = np.array([len(a) for a in traj_info["lipid_residue_atomid_list"]])
+            lipid_boundaries = np.concatenate([[0], np.cumsum(lipid_sizes)])
+            n_lipids = len(traj_info["lipid_residue_atomid_list"])
             for protein_idx in np.arange(self._nprot, dtype=int):
-
                 for residue_id, residue_atom_indices in enumerate(
-                        traj_info["protein_residue_atomid_list"][protein_idx]):
-                    # calculate interaction per residue
-                    dist_matrix = np.array([np.min(
-                        md.compute_distances(traj, np.array(list(product(residue_atom_indices, lipid_atom_indices))),
-                                             periodic=True, opt=True),
-                        axis=1) for lipid_atom_indices in traj_info["lipid_residue_atomid_list"]])
+                        tqdm(traj_info["protein_residue_atomid_list"][protein_idx],
+                             desc="  Traj {:d} prot {:d} residues".format(traj_idx, protein_idx),
+                             leave=False)):
+                    # Single compute_distances call for all lipid atoms at once.
+                    # Shape: (n_frames, n_residue_atoms * n_flat_lipid_atoms)
+                    all_pairs = np.array(list(product(residue_atom_indices, flat_lipid_atoms)))
+                    all_dists = md.compute_distances(traj, all_pairs, periodic=True, opt=True)
+                    # all_dists shape: (n_frames, n_res_atoms * n_flat_lipid_atoms)
+                    # Reduce to (n_frames, n_flat_lipid_atoms) by taking min over residue atoms
+                    n_res_atoms = len(residue_atom_indices)
+                    n_flat = len(flat_lipid_atoms)
+                    # reshape: (n_frames, n_res_atoms, n_flat_lipid_atoms) → min over axis 1
+                    per_atom_dists = all_dists.reshape(traj.n_frames, n_res_atoms, n_flat).min(axis=1)
+                    # (n_frames, n_flat_lipid_atoms) → per lipid min using boundaries
+                    # np.minimum.reduceat operates along axis=1 (columns = lipid atoms)
+                    dist_matrix = np.minimum.reduceat(per_atom_dists, lipid_boundaries[:-1], axis=1).T
+                    # dist_matrix shape: (n_lipids, n_frames) — same as before
                     contact_low, frame_id_set_low, lipid_id_set_low = cal_contact_residues(dist_matrix, self._cutoffs[0])
                     contact_high, _, _ = cal_contact_residues(dist_matrix, self._cutoffs[1])
                     self._contact_residues_high[residue_id].append(contact_high)
diff --git a/pylipid/util/coordinate.py b/pylipid/util/coordinate.py
index 111dec1..e5f927f 100644
--- a/pylipid/util/coordinate.py
+++ b/pylipid/util/coordinate.py
@@ -41,7 +41,14 @@ def write_PDB(prot_obj, bfactor, pdb_fn, resi_offset=0):
     resid_set = table.resSeq + resi_offset
     atom_name_set = table.name
     resn_set = table.resName
-    chainID = [chr(65 + int(idx)) for idx in table.chainID]
+    # table.chainID can be empty string with some topology formats (e.g. GROMACS .gro)
+    # Fall back to chain 'A' (index 0) when the value is missing or non-integer
+    def _chain_letter(idx):
+        try:
+            return chr(65 + int(idx))
+        except (ValueError, TypeError):
+            return 'A'
+    chainID = [_chain_letter(idx) for idx in table.chainID]
     atom_residue_map = {atom_idx: prot_obj.top.atom(atom_idx).residue.index
                         for atom_idx in np.arange(prot_obj.n_atoms)}
     ######## write out coords ###########
@@ -66,4 +73,4 @@ def write_PDB(prot_obj, bfactor, pdb_fn, resi_offset=0):
             f.write(row)
         f.write("TER")
 
-    return
\ No newline at end of file
+    return

From daf14faacdc1c72c5f26a539f655a6b23e749d0e Mon Sep 17 00:00:00 2001
From: Phill Stansfeld <pstansfeld@gmail.com>
Date: Tue, 10 Mar 2026 21:39:40 +0000
Subject: [PATCH 06/24] Update README.rst

---
 README.rst | 181 +++++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 147 insertions(+), 34 deletions(-)

diff --git a/README.rst b/README.rst
index 6acc2f4..9777475 100644
--- a/README.rst
+++ b/README.rst
@@ -6,59 +6,172 @@ PyLipID - A Python Package For Lipid Interactions Analysis
    :target: https://github.com/pstansfeld/PyLipID/actions/workflows/ci.yml
 .. image:: https://img.shields.io/pypi/v/PyLipID
    :target: https://pypi.org/project/pylipid/
+.. image:: https://img.shields.io/pypi/pyversions/PyLipID
+   :target: https://pypi.org/project/pylipid/
 
 .. image:: docs/static/pylipid_logo_smallsize.png
     :align: center
 
 
-PyLipID is a python package for analyzing lipid interactions with membrane proteins from
-Molecular Dynamics Simulations. PyLipID has the following main features, please check out
-the tutorials for examples and the documentations for the API functionalities:
+PyLipID is a Python package for analysing lipid interactions with membrane proteins from
+Molecular Dynamics (MD) simulations. It supports both coarse-grained and all-atom force fields,
+works with any trajectory format supported by `MDTraj <https://mdtraj.org>`_, and is designed
+to run from Jupyter notebooks or Python scripts.
+
+Full documentation and tutorials: `pylipid.readthedocs.io <https://pylipid.readthedocs.io>`_
+
+
+Features
+========
 
-    * Detection of binding sites via calculating community structures in the interactions networks.
-    * Calculation of lipid koff and residence time for interaction with binding sites and residues.
-    * Analysis of lipid interactions with binding sites and residues using a couple of metrics.
-    * Generation of representative bound poses for binding sites.
-    * Analysis of bound poses for binding sites via automated clustering scheme.
-    * Adoption of a dual-cutoff scheme to overcome the 'rattling in cage' effect of coarse-grained simulations.
-    * Generation of manuscript-ready figures for analysis.
+- **Dual-cutoff contact detection** — overcomes the 'rattling in cage' artefact common in
+  coarse-grained simulations by defining contact start and end with separate distance thresholds.
+- **Binding site detection** — identifies lipid binding sites by computing community structures
+  in residue interaction networks using the Louvain algorithm.
+- **Kinetics** — calculates lipid koff rates and residence times for both individual residues
+  and binding sites.
+- **Interaction metrics** — computes occupancy, lipid count, duration and residence time per
+  residue and per binding site.
+- **Bound pose analysis** — extracts and clusters representative lipid-bound poses for each
+  binding site.
+- **Surface area** — calculates solvent-accessible surface area contributions per residue and
+  binding site.
+- **Visualisation** — generates publication-ready figures and PyMOL scripts for structural
+  visualisation.
 
-PyLipID can be used from Jupyter (former IPython, recommended), or by writing Python scripts.
-The documentaion and tutorials can be found at `pylipid.readthedocs.io <https://pylipid.readthedocs.io>`_.
 
 Installation
 ============
 
-We recommend installing PyLipID using the package installer `pip`:
+Install from PyPI (recommended)::
+
+    pip install pylipid
+
+Install from source::
+
+    git clone https://github.com/pstansfeld/PyLipID
+    cd PyLipID
+    pip install .
+
+For development (editable install)::
+
+    pip install -e .
+
+**Requirements:** Python >= 3.9. All dependencies are installed automatically by pip.
+
+
+Quick Start
+===========
+
+.. code-block:: python
+
+    from pylipid.api import LipidInteraction
+
+    # Point to your trajectory and topology files
+    trajfile_list = ["run1/protein_lipids.xtc", "run2/protein_lipids.xtc"]
+    topfile_list  = ["run1/protein_lipids.gro", "run2/protein_lipids.gro"]
 
-``pip install pylipid``
+    # Initialise — dual cutoffs in nm, lipid residue name as in your topology
+    li = LipidInteraction(
+        trajfile_list,
+        topfile_list=topfile_list,
+        cutoffs=[0.55, 0.8],   # [lower, upper] in nm
+        lipid="CHOL",
+        nprot=1,
+    )
 
-Alternatively, PyLipID can be installed from the source code. The package is available for
-download on Github via:
+    # Collect contacts (the main calculation step)
+    li.collect_residue_contacts()
 
-``git clone https://github.com/pstansfeld/PyLipID``
+    # Compute metrics
+    li.compute_residue_duration()
+    li.compute_residue_koff()
+    li.compute_binding_nodes(threshold=4)
+    li.compute_site_koff()
 
-Once the source code is downloaded, enter the source code directory and install the package as follow:
+    # Plot and save
+    li.plot(item="Residence Time")
+    li.save_data(item="Dataset")
+    li.save_pymol_script(pdb_file="receptor.pdb")
 
-``python setup.py install``
+For a full walkthrough including coarse-grained and all-atom examples, see the
+`tutorials on readthedocs <https://pylipid.readthedocs.io>`_.
 
 
-Citation |DOI for Citing PyEMMA|
-================================
+What's New in v1.6.0
+====================
 
-If you use PyLipID in scientific research, please cite the following paper: ::
+This release, maintained by the `Stansfeld Lab <https://github.com/pstansfeld>`_,
+fixes a number of bugs present in v1.5.14 and modernises the package for current
+Python and dependency versions.
 
-	@article{song_pylipid_2022,
-		author = {Song, Wanling. and Corey, Robin A. and Ansell, T. Bertie. and
-		            Cassidy, C. Keith. and Horrell, Michael R. and Duncan, Anna L.
-		            and Stansfeld, Phillip J. and Sansom, Mark S.P.},
-		title = {PyLipID: A Python package for analysis of protein-lipid interactions from MD simulations},
-		journal = {J. Chem. Theory Comput},
-		year = {2022},
-		url = {https://doi.org/10.1021/acs.jctc.1c00708},
-		doi = {10.1021/acs.jctc.1c00708},
-		urldate = {2022-02-18},
-	}
+**Bug fixes**
 
-.. |DOI for Citing PyEMMA| image:: https://img.shields.io/badge/DOI-10.1021/acs.jctc.1c00708-blue
+- Fixed off-by-one error in binding site detection that silently dropped the last
+  binding site on every run.
+- Fixed ``calculate_scores`` returning ``None`` on error, causing a downstream
+  ``TypeError`` in pose analysis.
+- Fixed correlation coefficient matrix computation (``sparse_corrcoef``) returning a
+  ``numpy.matrix`` object incompatible with NetworkX 3.x and newer NumPy (root cause
+  of the ``IndexError`` reported in issue #33).
+- Fixed ``analyze_bound_poses`` referencing a non-existent column name and using
+  incorrect DataFrame indexing.
+- Fixed ``compute_residue_lipidcount`` missing its ``return`` statement for single-residue
+  queries.
+- Fixed ``write_PDB`` crashing with ``ValueError: invalid literal for int()`` when chain
+  IDs are absent in GROMACS ``.gro`` topologies (issues #29, #26).
+- Fixed hardcoded figure number in ``plot_koff`` causing curves to overwrite each other
+  when called for multiple residues.
+- Fixed deprecated ``plt.get_cmap()`` call removed in Matplotlib 3.9.
+- Fixed ``LogNorm`` crash in ``plot_corrcoef`` when the correlation matrix contains zeros.
+- Fixed ``np.float128`` (Linux-only) replaced with portable ``np.longdouble``.
+- Fixed typo in output folder name ``Reisidue_koffs`` -> ``Residue_koffs``.
+
+**Performance**
+
+- ``collect_residue_contacts`` now issues a single ``md.compute_distances`` call per
+  residue rather than one per lipid molecule, giving a significant speedup on systems
+  with large lipid counts. A nested progress bar now shows per-residue progress within
+  each trajectory so the calculation no longer appears frozen.
+
+**Dependency changes**
+
+- Removed ``python-louvain`` dependency; binding site detection now uses the Louvain
+  implementation built into NetworkX (>= 2.6). Results are now reproducible across
+  runs (fixed random seed).
+- All dependencies now carry minimum version bounds.
+- Python < 3.9 (end-of-life) is no longer supported.
+
+**Packaging**
+
+- Migrated from ``setup.py`` to ``pyproject.toml``.
+- CI migrated from Travis to GitHub Actions (Python 3.9--3.12, Ubuntu and macOS).
+
+
+Citation
+========
+
+If you use PyLipID in scientific research, please cite:
+
+.. image:: https://img.shields.io/badge/DOI-10.1021/acs.jctc.1c00708-blue
    :target: https://doi.org/10.1021/acs.jctc.1c00708
+
+::
+
+    @article{song_pylipid_2022,
+        author  = {Song, Wanling and Corey, Robin A. and Ansell, T. Bertie and
+                   Cassidy, C. Keith and Horrell, Michael R. and Duncan, Anna L.
+                   and Stansfeld, Phillip J. and Sansom, Mark S.P.},
+        title   = {PyLipID: A Python package for analysis of protein-lipid
+                   interactions from MD simulations},
+        journal = {J. Chem. Theory Comput.},
+        year    = {2022},
+        doi     = {10.1021/acs.jctc.1c00708},
+        url     = {https://doi.org/10.1021/acs.jctc.1c00708},
+    }
+
+
+Licence
+=======
+
+MIT licence. See `LICENSE.txt <LICENSE.txt>`_ for details.

From 5aecaeac15c931a3ad087ac2044a32af896567e2 Mon Sep 17 00:00:00 2001
From: Phill Stansfeld <pstansfeld@gmail.com>
Date: Tue, 10 Mar 2026 22:03:16 +0000
Subject: [PATCH 07/24] Update README.rst

---
 README.rst | 148 +++++++++++++++++++----------------------------------
 1 file changed, 53 insertions(+), 95 deletions(-)

diff --git a/README.rst b/README.rst
index 9777475..9855bac 100644
--- a/README.rst
+++ b/README.rst
@@ -1,6 +1,5 @@
-==========================================================
-PyLipID - A Python Package For Lipid Interactions Analysis
-==========================================================
+PyLipID - Analysis of Protein–Lipid Interactions from Molecular Dynamics
+============================================================
 
 .. image:: https://github.com/pstansfeld/PyLipID/actions/workflows/ci.yml/badge.svg
    :target: https://github.com/pstansfeld/PyLipID/actions/workflows/ci.yml
@@ -10,55 +9,44 @@ PyLipID - A Python Package For Lipid Interactions Analysis
    :target: https://pypi.org/project/pylipid/
 
 .. image:: docs/static/pylipid_logo_smallsize.png
-    :align: center
+   :align: center
 
+PyLipID is a Python package for analysing lipid–protein interactions from
+Molecular Dynamics (MD) simulations. It supports coarse-grained and all-atom
+simulations, handles formats supported by MDTraj, and works in scripts or
+Jupyter notebooks.
 
-PyLipID is a Python package for analysing lipid interactions with membrane proteins from
-Molecular Dynamics (MD) simulations. It supports both coarse-grained and all-atom force fields,
-works with any trajectory format supported by `MDTraj <https://mdtraj.org>`_, and is designed
-to run from Jupyter notebooks or Python scripts.
-
-Full documentation and tutorials: `pylipid.readthedocs.io <https://pylipid.readthedocs.io>`_
-
+Documentation: https://pylipid.readthedocs.io
 
 Features
 ========
 
-- **Dual-cutoff contact detection** — overcomes the 'rattling in cage' artefact common in
-  coarse-grained simulations by defining contact start and end with separate distance thresholds.
-- **Binding site detection** — identifies lipid binding sites by computing community structures
-  in residue interaction networks using the Louvain algorithm.
-- **Kinetics** — calculates lipid koff rates and residence times for both individual residues
-  and binding sites.
-- **Interaction metrics** — computes occupancy, lipid count, duration and residence time per
-  residue and per binding site.
-- **Bound pose analysis** — extracts and clusters representative lipid-bound poses for each
-  binding site.
-- **Surface area** — calculates solvent-accessible surface area contributions per residue and
-  binding site.
-- **Visualisation** — generates publication-ready figures and PyMOL scripts for structural
-  visualisation.
-
+- Dual-cutoff contact detection to improve coarse-grained accuracy.
+- Binding site detection using NetworkX Louvain community analysis.
+- Kinetic parameters: koff and residence times per residue and site.
+- Interaction metrics including occupancy, count, duration.
+- Representative bound pose extraction and clustering.
+- SASA calculations per residue and site.
+- Publication-ready plots and PyMOL scripts.
 
 Installation
 ============
 
-Install from PyPI (recommended)::
+From PyPI::
 
     pip install pylipid
 
-Install from source::
+From source::
 
     git clone https://github.com/pstansfeld/PyLipID
     cd PyLipID
     pip install .
 
-For development (editable install)::
+Editable/development install::
 
     pip install -e .
 
-**Requirements:** Python >= 3.9. All dependencies are installed automatically by pip.
-
+Requires Python >= 3.9.
 
 Quick Start
 ===========
@@ -67,92 +55,63 @@ Quick Start
 
     from pylipid.api import LipidInteraction
 
-    # Point to your trajectory and topology files
     trajfile_list = ["run1/protein_lipids.xtc", "run2/protein_lipids.xtc"]
     topfile_list  = ["run1/protein_lipids.gro", "run2/protein_lipids.gro"]
 
-    # Initialise — dual cutoffs in nm, lipid residue name as in your topology
     li = LipidInteraction(
         trajfile_list,
         topfile_list=topfile_list,
-        cutoffs=[0.55, 0.8],   # [lower, upper] in nm
+        cutoffs=[0.55, 0.8],
         lipid="CHOL",
         nprot=1,
     )
 
-    # Collect contacts (the main calculation step)
     li.collect_residue_contacts()
-
-    # Compute metrics
     li.compute_residue_duration()
     li.compute_residue_koff()
     li.compute_binding_nodes(threshold=4)
     li.compute_site_koff()
 
-    # Plot and save
-    li.plot(item="Residence Time")
-    li.save_data(item="Dataset")
+    li.plot("Residence Time")
+    li.save_data("Dataset")
     li.save_pymol_script(pdb_file="receptor.pdb")
 
-For a full walkthrough including coarse-grained and all-atom examples, see the
-`tutorials on readthedocs <https://pylipid.readthedocs.io>`_.
-
-
 What's New in v1.6.0
 ====================
 
-This release, maintained by the `Stansfeld Lab <https://github.com/pstansfeld>`_,
-fixes a number of bugs present in v1.5.14 and modernises the package for current
-Python and dependency versions.
-
-**Bug fixes**
-
-- Fixed off-by-one error in binding site detection that silently dropped the last
-  binding site on every run.
-- Fixed ``calculate_scores`` returning ``None`` on error, causing a downstream
-  ``TypeError`` in pose analysis.
-- Fixed correlation coefficient matrix computation (``sparse_corrcoef``) returning a
-  ``numpy.matrix`` object incompatible with NetworkX 3.x and newer NumPy (root cause
-  of the ``IndexError`` reported in issue #33).
-- Fixed ``analyze_bound_poses`` referencing a non-existent column name and using
-  incorrect DataFrame indexing.
-- Fixed ``compute_residue_lipidcount`` missing its ``return`` statement for single-residue
-  queries.
-- Fixed ``write_PDB`` crashing with ``ValueError: invalid literal for int()`` when chain
-  IDs are absent in GROMACS ``.gro`` topologies (issues #29, #26).
-- Fixed hardcoded figure number in ``plot_koff`` causing curves to overwrite each other
-  when called for multiple residues.
-- Fixed deprecated ``plt.get_cmap()`` call removed in Matplotlib 3.9.
-- Fixed ``LogNorm`` crash in ``plot_corrcoef`` when the correlation matrix contains zeros.
-- Fixed ``np.float128`` (Linux-only) replaced with portable ``np.longdouble``.
-- Fixed typo in output folder name ``Reisidue_koffs`` -> ``Residue_koffs``.
-
-**Performance**
-
-- ``collect_residue_contacts`` now issues a single ``md.compute_distances`` call per
-  residue rather than one per lipid molecule, giving a significant speedup on systems
-  with large lipid counts. A nested progress bar now shows per-residue progress within
-  each trajectory so the calculation no longer appears frozen.
-
-**Dependency changes**
-
-- Removed ``python-louvain`` dependency; binding site detection now uses the Louvain
-  implementation built into NetworkX (>= 2.6). Results are now reproducible across
-  runs (fixed random seed).
-- All dependencies now carry minimum version bounds.
-- Python < 3.9 (end-of-life) is no longer supported.
-
-**Packaging**
-
-- Migrated from ``setup.py`` to ``pyproject.toml``.
-- CI migrated from Travis to GitHub Actions (Python 3.9--3.12, Ubuntu and macOS).
-
+Bug Fixes
+---------
+- Corrected binding-site detection off-by-one error.
+- Fixed calculate_scores returning None.
+- Fixed sparse_corrcoef matrix type issues.
+- Fixed DataFrame column/index mismatches in analyze_bound_poses.
+- Fixed missing return in compute_residue_lipidcount.
+- Fixed write_PDB errors for .gro files lacking chain IDs.
+- Fixed figure overwriting in plot_koff.
+- Updated deprecated Matplotlib calls.
+- Fixed LogNorm crash on zero-containing matrices.
+- Replaced np.float128 with np.longdouble.
+- Corrected output directory typo.
+
+Performance
+-----------
+- collect_residue_contacts now calls md.compute_distances once per residue.
+- Nested progress bars implemented.
+
+Dependencies
+------------
+- Removed python-louvain; using NetworkX built-in Louvain.
+- Minimum versions added.
+- Dropped Python < 3.9.
+
+Packaging & CI
+--------------
+- Migrated to pyproject.toml.
+- CI moved from Travis CI to GitHub Actions.
 
 Citation
 ========
 
-If you use PyLipID in scientific research, please cite:
-
 .. image:: https://img.shields.io/badge/DOI-10.1021/acs.jctc.1c00708-blue
    :target: https://doi.org/10.1021/acs.jctc.1c00708
 
@@ -170,8 +129,7 @@ If you use PyLipID in scientific research, please cite:
         url     = {https://doi.org/10.1021/acs.jctc.1c00708},
     }
 
-
-Licence
+License
 =======
 
-MIT licence. See `LICENSE.txt <LICENSE.txt>`_ for details.
+MIT License. See LICENSE.txt.

From 05a7ed09c5b4dd637416dfc91d40ee7f4c33ce5e Mon Sep 17 00:00:00 2001
From: Phill Stansfeld <pstansfeld@gmail.com>
Date: Tue, 10 Mar 2026 22:08:25 +0000
Subject: [PATCH 08/24] Update pyproject.toml

---
 pyproject.toml | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 3631cef..03bb08d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,9 @@
 [build-system]
-requires = ["setuptools>=64", "setuptools-scm"]
-build-backend = "setuptools.backends.legacy:build"
+requires = [
+  "setuptools>=61",
+  "wheel"
+]
+build-backend = "setuptools.build_meta"
 
 [project]
 name = "pylipid"

From 2e7ec8544311ceae7f95eb7acf177a6e79216f8d Mon Sep 17 00:00:00 2001
From: Phill Stansfeld <pstansfeld@gmail.com>
Date: Tue, 10 Mar 2026 22:10:29 +0000
Subject: [PATCH 09/24] Update ci.yml

---
 .github/workflows/ci.yml | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index ee50ca2..80717df 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -25,11 +25,15 @@ jobs:
         uses: actions/setup-python@v5
         with:
           python-version: ${{ matrix.python-version }}
-          cache: 'pip'
+          cache: pip
+
+      # 🔧 NEW: ensure modern packaging tools before installing anything
+      - name: Upgrade pip, setuptools, and wheel
+        run: |
+          python -m pip install --upgrade pip setuptools wheel
 
       - name: Install dependencies
         run: |
-          python -m pip install --upgrade pip
           pip install -r requirements.txt
           pip install pytest pytest-cov flake8
 

From 8e889e3a60dcac31914f8607ea444af6327a88ad Mon Sep 17 00:00:00 2001
From: Phill Stansfeld <pstansfeld@gmail.com>
Date: Wed, 11 Mar 2026 15:12:11 +0000
Subject: [PATCH 10/24] update

---
 pylipid/__init__.py              |   1 +
 pylipid/func/kinetics.py         |   6 +-
 tests/funcs/test_binding_site.py | 182 ++++++++++++++++---------------
 tests/funcs/test_clusterer.py    | 151 +++++++++++++++----------
 tests/funcs/test_kinetics.py     |  73 +++++++------
 tests/plots/test_plot_1d.py      |  13 +--
 6 files changed, 236 insertions(+), 190 deletions(-)

diff --git a/pylipid/__init__.py b/pylipid/__init__.py
index e23c929..0a534fd 100644
--- a/pylipid/__init__.py
+++ b/pylipid/__init__.py
@@ -15,3 +15,4 @@
 ##############################################################################
 
 from ._version import __version__
+from .api import LipidInteraction
diff --git a/pylipid/func/kinetics.py b/pylipid/func/kinetics.py
index 9d392cc..d6ce542 100644
--- a/pylipid/func/kinetics.py
+++ b/pylipid/func/kinetics.py
@@ -216,11 +216,11 @@ def cal_survival_func(durations, t_total, delta_t_list):
 
 
 def _curve_fitting(survival_func, delta_t_list, initial_guess):
-    """Fit the exponential curve :math:`y=Ae^{-k_1\Delta t}+Be^{-k_2\Delta t}`"""
+    r"""Fit the exponential curve :math:`y=Ae^{-k_1\Delta t}+Be^{-k_2\Delta t}`"""
     survival_rates = np.nan_to_num([survival_func[delta_t] for delta_t in delta_t_list]) # y
     try:
         popt, pcov = curve_fit(_bi_expo, np.array(delta_t_list), np.array(survival_rates), p0=initial_guess, maxfev=100000)
-        n_fitted = _bi_expo(np.array(delta_t_list, dtype=np.longdouble), *popt)
+        n_fitted = _bi_expo(np.array(delta_t_list, dtype=np.float128), *popt)
         r_squared = 1 - np.sum((np.nan_to_num(n_fitted) -
                                 np.nan_to_num(survival_rates))**2)/np.sum((survival_rates - np.mean(survival_rates))**2)
         ks = [abs(k) for k in popt[:2]]
@@ -235,7 +235,7 @@ def _curve_fitting(survival_func, delta_t_list, initial_guess):
 
 
 def _bi_expo(x, k1, k2, A, B):
-    """The exponential curve :math:`y=Ae^{-k_1\Delta t}+Be^{-k_2\Delta t}`"""
+    r"""The exponential curve :math:`y=Ae^{-k_1\Delta t}+Be^{-k_2\Delta t}`"""
     return A*np.exp(-k1*x) + B*np.exp(-k2*x)
 
 
diff --git a/tests/funcs/test_binding_site.py b/tests/funcs/test_binding_site.py
index d8398bf..6227c0e 100644
--- a/tests/funcs/test_binding_site.py
+++ b/tests/funcs/test_binding_site.py
@@ -1,96 +1,102 @@
-import unittest
-import os
-import shutil
-import numpy as np
-from pylipid.func import get_node_list, collect_bound_poses, vectorize_poses, calculate_scores, write_bound_poses
-from pylipid.func import calculate_site_surface_area
-from pylipid import LipidInteraction
-
-
-class TestBindingSites(unittest.TestCase):
-
-    def setUp(self):
-        trajfile_list = ["../data/run1/protein_lipids.xtc", "../data/run2/protein_lipids.xtc"]
-        topfile_list = ["../data/run1/protein_lipids.gro", "../data/run2/protein_lipids.gro"]
-        lipid = "CHOL"
-        cutoffs = [0.55, 0.8]
-        file_dir = os.path.dirname( os.path.abspath(__file__) )
-        self.save_dir = os.path.join(file_dir, "test_binding_site")
-        self.li = LipidInteraction(trajfile_list, topfile_list, cutoffs=cutoffs, lipid=lipid,
-                                   nprot=1, save_dir=self.save_dir)
-        self.li.collect_residue_contacts(write_log=False, print_log=True)
-
-    def test_get_node_list(self):
-        corrcoef = self.li.interaction_corrcoef
-        node_list = get_node_list(corrcoef)
-        self.assertIsInstance(node_list, list)
-
-    def test_collect_binding_poses(self):
-        binding_site_map = {bs_id: nodes for bs_id, nodes in enumerate(self.li._node_list)}
-        contact_list = self.li.contact_residues_low
-        pose_pool = collect_bound_poses(binding_site_map, contact_list, self.li.trajfile_list[0],
-                                        self.li.topfile_list[0], self.li.lipid, self.li.stride, self.li.nprot)
-        self.assertIsInstance(pose_pool, dict)
-
-    def tearDown(self):
-        shutil.rmtree(self.save_dir)
-
+"""
+Tests for binding site detection, pose collection, scoring and surface area.
+Uses the synthetic_traj_dir session fixture from conftest.py — no real data files needed.
+"""
 
-class TestBindingPoses(unittest.TestCase):
-
-    def setUp(self):
-        trajfile_list = ["../data/run1/protein_lipids.xtc", "../data/run2/protein_lipids.xtc"]
-        topfile_list = ["../data/run1/protein_lipids.gro", "../data/run2/protein_lipids.gro"]
-        lipid = "CHOL"
-        cutoffs = [0.55, 0.8]
-        file_dir = os.path.dirname(os.path.abspath(__file__))
-        self.save_dir = os.path.join(file_dir, "binding_site")
-        self.li = LipidInteraction(trajfile_list, topfile_list, cutoffs=cutoffs, lipid=lipid,
-                                   nprot=1, save_dir=self.save_dir)
-        self.li.collect_residue_contacts(write_log=False, print_log=True)
-        node_list = self.li.compute_binding_nodes(print_data=False)
-        binding_site_map = {bs_id: nodes for bs_id, nodes in enumerate(node_list)}
-        contact_residue_dict = self.li.contact_residues_low
-        self.pose_pool = collect_bound_poses(binding_site_map, contact_residue_dict, self.li.trajfile_list[0],
-                                        self.li.topfile_list[0], self.li.lipid, self.li.stride, self.li.nprot)
+import pytest
+import numpy as np
+from pylipid.func import (get_node_list, collect_bound_poses, vectorize_poses,
+                           calculate_scores, write_bound_poses)
+from pylipid.api import LipidInteraction
+import shutil
 
-    def test_vectorize_poses(self):
-        for bs_id, nodes in enumerate(self.li._node_list):
-            dist_matrix, pose_traj = vectorize_poses(self.pose_pool[bs_id], nodes, self.li._protein_ref, self.li._lipid_ref)
-            self.assertEqual(dist_matrix.shape[0], self.li._lipid_ref.n_atoms)
-            self.assertEqual(dist_matrix.shape[1], len(self.pose_pool[bs_id]))
-            self.assertEqual(dist_matrix.shape[2], len(self.li._node_list[bs_id]))
-        return
 
-    def test_calculate_scores(self):
-        for bs_id, nodes in enumerate(self.li._node_list):
-            dist_matrix, pose_traj = vectorize_poses(self.pose_pool[bs_id], nodes, self.li._protein_ref, self.li._lipid_ref)
+@pytest.fixture(scope="module")
+def li(synthetic_traj_dir, tmp_path_factory):
+    save_dir = str(tmp_path_factory.mktemp("bs_output"))
+    instance = LipidInteraction(
+        synthetic_traj_dir["trajfile_list"],
+        cutoffs=synthetic_traj_dir["cutoffs"],
+        topfile_list=synthetic_traj_dir["topfile_list"],
+        lipid=synthetic_traj_dir["lipid"],
+        nprot=1,
+        save_dir=save_dir,
+    )
+    instance.collect_residue_contacts()
+    instance.compute_residue_koff(plot_data=False)
+    instance.compute_binding_nodes(threshold=2, print_data=False)
+    yield instance
+    shutil.rmtree(save_dir, ignore_errors=True)
+
+
+@pytest.fixture(scope="module")
+def pose_pool(li, synthetic_traj_dir):
+    """Collect bound poses for all detected binding sites."""
+    if not li._node_list:
+        pytest.skip("No binding sites detected in synthetic data")
+    binding_site_map = {bs_id: nodes for bs_id, nodes in enumerate(li._node_list)}
+    return collect_bound_poses(
+        binding_site_map,
+        li._contact_residues_low,
+        li.trajfile_list,
+        li.topfile_list,
+        li.lipid,
+        li._protein_ref,
+        li._lipid_ref,
+        stride=li.stride,
+        nprot=li.nprot,
+    )
+
+
+class TestBindingSites:
+
+    def test_get_node_list(self, li):
+        corrcoef = li.interaction_corrcoef
+        node_list, modularity = get_node_list(corrcoef, threshold=2)
+        assert isinstance(node_list, list)
+
+    def test_collect_binding_poses(self, pose_pool):
+        assert isinstance(pose_pool, dict)
+
+
+class TestBindingPoses:
+
+    def test_vectorize_poses(self, li, pose_pool):
+        for bs_id, nodes in enumerate(li._node_list):
+            if bs_id not in pose_pool or len(pose_pool[bs_id]) == 0:
+                continue
+            dist_matrix, pose_traj = vectorize_poses(
+                pose_pool[bs_id], nodes, li._protein_ref, li._lipid_ref)
+            assert dist_matrix.shape[0] == li._lipid_ref.n_atoms
+            assert dist_matrix.shape[1] == len(pose_pool[bs_id])
+            assert dist_matrix.shape[2] == len(nodes)
+
+    def test_calculate_scores(self, li, pose_pool):
+        for bs_id, nodes in enumerate(li._node_list):
+            if bs_id not in pose_pool or len(pose_pool[bs_id]) == 0:
+                continue
+            dist_matrix, pose_traj = vectorize_poses(
+                pose_pool[bs_id], nodes, li._protein_ref, li._lipid_ref)
             scores = calculate_scores(dist_matrix)
-            self.assertEqual(len(scores), pose_traj.n_frames)
-            scores = calculate_scores(dist_matrix, score_weights={"RHO": 10})
-            self.assertEqual(len(scores), pose_traj.n_frames)
-
-
-    def test_write_binding_poses(self):
-        for bs_id, nodes in enumerate(self.li._node_list):
-            dist_matrix, pose_traj = vectorize_poses(self.pose_pool[bs_id], nodes, self.li._protein_ref, self.li._lipid_ref)
+            assert len(scores) == pose_traj.n_frames
+            scores_weighted = calculate_scores(dist_matrix, score_weights={"RHO": 10})
+            assert len(scores_weighted) == pose_traj.n_frames
+
+    def test_write_binding_poses(self, li, pose_pool, tmp_path):
+        for bs_id, nodes in enumerate(li._node_list):
+            if bs_id not in pose_pool or len(pose_pool[bs_id]) == 0:
+                continue
+            dist_matrix, pose_traj = vectorize_poses(
+                pose_pool[bs_id], nodes, li._protein_ref, li._lipid_ref)
             scores = calculate_scores(dist_matrix)
-            num_of_poses = min(5, pose_traj.n_frames)
+            if len(scores) == 0:
+                continue
+            num_of_poses = min(3, pose_traj.n_frames)
             pose_indices = np.argsort(scores)[::-1][:num_of_poses]
-            write_bound_poses(pose_traj, pose_indices, self.save_dir, pose_prefix="BSid{}_top".format(bs_id),
+            write_bound_poses(pose_traj, pose_indices, str(tmp_path),
+                              pose_prefix="BSid{}_top".format(bs_id),
                               pose_format="gro")
 
-
-    def test_calculate_site_surface_area(self):
-        binding_site_map = {bs_id: nodes for bs_id, nodes in enumerate(self.li._node_list)}
-        radii_book = {"BB": 0.36, "SC1": 0.33, "SC2": 0.33, "SC3": 0.33}
-        surface_area= calculate_site_surface_area(binding_site_map, radii_book, self.li.trajfile_list,
-                                                  self.li.topfile_list, self.li.nprot, self.li.timeunit,
-                                                  self.li.stride, self.li.dt_traj)
-
-    def tearDown(self):
-        shutil.rmtree(self.save_dir)
-
-
-if __name__ == "__main__":
-    unittest.main()
+    def test_compute_surface_area(self, li):
+        # Use the high-level API method — calculate_surface_area_wrapper is internal
+        li.compute_surface_area()
diff --git a/tests/funcs/test_clusterer.py b/tests/funcs/test_clusterer.py
index ef8e62c..0ba87fe 100644
--- a/tests/funcs/test_clusterer.py
+++ b/tests/funcs/test_clusterer.py
@@ -1,63 +1,96 @@
-import unittest
-import os
-import shutil
+"""
+Tests for pose clustering (DBSCAN and KMeans).
+Uses the synthetic_traj_dir session fixture from conftest.py — no real data files needed.
+"""
+
+import pytest
 import numpy as np
 from sklearn.decomposition import PCA
 from pylipid.func import collect_bound_poses, vectorize_poses, write_bound_poses
 from pylipid.func import cluster_DBSCAN, cluster_KMeans
-from pylipid import LipidInteraction
-
-class TestCluster(unittest.TestCase):
-
-    def setUp(self):
-        trajfile_list = ["../data/run1/protein_lipids.xtc", "../data/run2/protein_lipids.xtc"]
-        topfile_list = ["../data/run1/protein_lipids.gro", "../data/run2/protein_lipids.gro"]
-        lipid = "CHOL"
-        cutoffs = [0.55, 0.8]
-        file_dir = os.path.dirname(os.path.abspath(__file__))
-        self.save_dir = os.path.join(file_dir, "binding_site")
-        self.li = LipidInteraction(trajfile_list, topfile_list, cutoffs=cutoffs, lipid=lipid,
-                                   nprot=1, save_dir=self.save_dir)
-        self.li.collect_residue_contacts(write_log=False, print_log=True)
-        node_list = self.li.compute_binding_nodes(print_data=False)
-        binding_site_map = {bs_id: nodes for bs_id, nodes in enumerate(node_list)}
-        contact_residue_dict = self.li.contact_residues_low
-        self.pose_pool = collect_bound_poses(binding_site_map, contact_residue_dict, self.li.trajfile_list[0],
-                                             self.li.topfile_list[0], self.li.lipid, self.li.stride, self.li.nprot)
-
-        def test_cluster_DBSCAN(self):
-            for bs_id, nodes in enumerate(self.li._node_list):
-                dist_matrix, pose_traj = vectorize_poses(self.pose_pool[bs_id], nodes, self.li._protein_ref,
-                                                         self.li._lipid_ref)
-                lipid_dist_per_pose = [dist_matrix[:, pose_id, :].ravel()
-                                       for pose_id in np.arange(dist_matrix.shape[1])]
-                transformed_data = PCA(n_components=0.95).fit_transform(lipid_dist_per_pose)
-                cluster_labels = cluster_DBSCAN(transformed_data, eps=None, min_samples=None,
-                                                metric="euclidean")
-                self.assertEqual(len(cluster_labels), len(lipid_dist_per_pose))
-                cluster_id_set = [label for label in np.unique(cluster_labels) if label != -1]
-                selected_pose_id = [np.random.choice(np.where(cluster_labels == cluster_id)[0], 1)[0]
-                                    for cluster_id in cluster_id_set]
-                write_bound_poses(pose_traj, selected_pose_id, self.save_dir,
-                                  pose_prefix="BSid{}_cluster_DBSCAN".format(bs_id), pose_format="gro")
-
-        def test_cluster_KMeans(self):
-            for bs_id, nodes in enumerate(self.li._node_list):
-                dist_matrix, pose_traj = vectorize_poses(self.pose_pool[bs_id], nodes, self.li._protein_ref,
-                                                         self.li._lipid_ref)
-                lipid_dist_per_pose = [dist_matrix[:, pose_id, :].ravel()
-                                       for pose_id in np.arange(dist_matrix.shape[1])]
-                transformed_data = PCA(n_components=0.95).fit_transform(lipid_dist_per_pose)
-                cluster_labels = cluster_KMeans(transformed_data, n_clusters=5)
-                self.assertEqual(len(cluster_labels), len(lipid_dist_per_pose))
-                cluster_id_set = [label for label in np.unique(cluster_labels) if label != -1]
-                selected_pose_id = [np.random.choice(np.where(cluster_labels == cluster_id)[0], 1)[0]
-                                    for cluster_id in cluster_id_set]
-                write_bound_poses(pose_traj, selected_pose_id, self.save_dir,
-                                  pose_prefix="BSid{}_cluster_KMeans".format(bs_id), pose_format="gro")
-
-        def tearDown(self):
-            shutil.rmtree(self.save_dir)
-
-    if __name__ == "__main__":
-        unittest.main()
+from pylipid.api import LipidInteraction
+import shutil
+
+
+@pytest.fixture(scope="module")
+def li_and_poses(synthetic_traj_dir, tmp_path_factory):
+    save_dir = str(tmp_path_factory.mktemp("cluster_output"))
+    li = LipidInteraction(
+        synthetic_traj_dir["trajfile_list"],
+        cutoffs=synthetic_traj_dir["cutoffs"],
+        topfile_list=synthetic_traj_dir["topfile_list"],
+        lipid=synthetic_traj_dir["lipid"],
+        nprot=1,
+        save_dir=save_dir,
+    )
+    li.collect_residue_contacts()
+    li.compute_residue_koff(plot_data=False)
+    li.compute_binding_nodes(threshold=2, print_data=False)
+
+    if not li._node_list:
+        yield li, {}
+    else:
+        binding_site_map = {bs_id: nodes for bs_id, nodes in enumerate(li._node_list)}
+        pose_pool = collect_bound_poses(
+            binding_site_map,
+            li._contact_residues_low,
+            li.trajfile_list,
+            li.topfile_list,
+            li.lipid,
+            li._protein_ref,
+            li._lipid_ref,
+            stride=li.stride,
+            nprot=li.nprot,
+        )
+        yield li, pose_pool
+
+    shutil.rmtree(save_dir, ignore_errors=True)
+
+
+class TestCluster:
+
+    def test_cluster_DBSCAN(self, li_and_poses, tmp_path):
+        li, pose_pool = li_and_poses
+        if not li._node_list:
+            pytest.skip("No binding sites detected in synthetic data")
+        for bs_id, nodes in enumerate(li._node_list):
+            if bs_id not in pose_pool or len(pose_pool[bs_id]) < 5:
+                continue
+            dist_matrix, pose_traj = vectorize_poses(
+                pose_pool[bs_id], nodes, li._protein_ref, li._lipid_ref)
+            lipid_dist_per_pose = [dist_matrix[:, pose_id, :].ravel()
+                                   for pose_id in np.arange(dist_matrix.shape[1])]
+            transformed_data = PCA(n_components=min(2, len(lipid_dist_per_pose) - 1)).fit_transform(lipid_dist_per_pose)
+            cluster_labels = cluster_DBSCAN(transformed_data, eps=None, min_samples=None,
+                                            metric="euclidean")
+            assert len(cluster_labels) == len(lipid_dist_per_pose)
+            cluster_id_set = [label for label in np.unique(cluster_labels) if label != -1]
+            if cluster_id_set:
+                selected_pose_id = [np.random.choice(np.where(cluster_labels == cid)[0], 1)[0]
+                                    for cid in cluster_id_set]
+                write_bound_poses(pose_traj, selected_pose_id, str(tmp_path),
+                                  pose_prefix="BSid{}_cluster_DBSCAN".format(bs_id),
+                                  pose_format="gro")
+
+    def test_cluster_KMeans(self, li_and_poses, tmp_path):
+        li, pose_pool = li_and_poses
+        if not li._node_list:
+            pytest.skip("No binding sites detected in synthetic data")
+        for bs_id, nodes in enumerate(li._node_list):
+            if bs_id not in pose_pool or len(pose_pool[bs_id]) < 5:
+                continue
+            dist_matrix, pose_traj = vectorize_poses(
+                pose_pool[bs_id], nodes, li._protein_ref, li._lipid_ref)
+            lipid_dist_per_pose = [dist_matrix[:, pose_id, :].ravel()
+                                   for pose_id in np.arange(dist_matrix.shape[1])]
+            n_clusters = min(3, len(lipid_dist_per_pose))
+            transformed_data = PCA(n_components=min(2, len(lipid_dist_per_pose) - 1)).fit_transform(lipid_dist_per_pose)
+            cluster_labels = cluster_KMeans(transformed_data, n_clusters=n_clusters)
+            assert len(cluster_labels) == len(lipid_dist_per_pose)
+            cluster_id_set = [label for label in np.unique(cluster_labels) if label != -1]
+            if cluster_id_set:
+                selected_pose_id = [np.random.choice(np.where(cluster_labels == cid)[0], 1)[0]
+                                    for cid in cluster_id_set]
+                write_bound_poses(pose_traj, selected_pose_id, str(tmp_path),
+                                  pose_prefix="BSid{}_cluster_KMeans".format(bs_id),
+                                  pose_format="gro")
diff --git a/tests/funcs/test_kinetics.py b/tests/funcs/test_kinetics.py
index 1bf2610..477582f 100644
--- a/tests/funcs/test_kinetics.py
+++ b/tests/funcs/test_kinetics.py
@@ -1,41 +1,48 @@
-import unittest
-import os
-import shutil
+"""
+Tests for koff and survival function calculations.
+Uses the synthetic_traj_dir session fixture from conftest.py — no real data files needed.
+"""
+
+import pytest
 import numpy as np
+import shutil
 from pylipid.func import cal_koff, cal_survival_func
-from pylipid import LipidInteraction
-
-class TestKinetics(unittest.TestCase):
+from pylipid.api import LipidInteraction
 
-    def setUp(self):
-        trajfile_list = ["../data/run1/protein_lipids.xtc", "../data/run2/protein_lipids.xtc"]
-        topfile_list = ["../data/run1/protein_lipids.gro", "../data/run2/protein_lipids.gro"]
-        lipid = "CHOL"
-        cutoffs = [0.55, 1.0]
-        file_dir = os.path.dirname(os.path.abspath(__file__))
-        self.save_dir = os.path.join(file_dir, "test_kinetics")
-        self.li = LipidInteraction(trajfile_list, topfile_list, cutoffs=cutoffs, lipid=lipid,
-                                   nprot=1, save_dir=self.save_dir)
-        self.li.collect_residue_contacts()
-        self.t_total = np.max(self.li._T_total)
-        self.timestep = np.min(self.li._timesteps)
 
-    def test_cal_survivla_function(self):
-        delta_t_list = np.arange(0, self.t_total, self.timestep)
-        survival_func = cal_survival_func(np.concatenate(self.li.durations[25]), self.t_total, delta_t_list)
-        self.assertIsInstance(survival_func, dict)
+@pytest.fixture(scope="module")
+def li(synthetic_traj_dir, tmp_path_factory):
+    save_dir = str(tmp_path_factory.mktemp("kinetics_output"))
+    instance = LipidInteraction(
+        synthetic_traj_dir["trajfile_list"],
+        cutoffs=synthetic_traj_dir["cutoffs"],
+        topfile_list=synthetic_traj_dir["topfile_list"],
+        lipid=synthetic_traj_dir["lipid"],
+        nprot=1,
+        save_dir=save_dir,
+    )
+    instance.collect_residue_contacts()
+    instance.compute_residue_duration()
+    yield instance
+    shutil.rmtree(save_dir, ignore_errors=True)
 
-    def test_cal_koff(self):
-        koff, restime, properties = cal_koff(np.concatenate(self.li.durations[25]), self.t_total,
-                                             self.timestep, nbootstrap=20, initial_guess=[1,1,1,1])
-        print(koff)
-        print(restime)
-        print(properties)
-        self.assertIsInstance(properties, dict)
 
-    def tearDown(self):
-        shutil.rmtree(self.li.save_dir)
+class TestKinetics:
 
+    def test_cal_survival_function(self, li):
+        # Use residue 0 — guaranteed to exist in synthetic data
+        t_total = np.max(li._T_total)
+        timestep = np.min(li._timesteps)
+        durations = np.concatenate(li._duration[0])
+        delta_t_list = np.arange(0, t_total, timestep)
+        survival_func = cal_survival_func(durations, t_total, delta_t_list)
+        assert isinstance(survival_func, dict)
 
-if __name__ == "__main__":
-    unittest.main()
+    def test_cal_koff(self, li):
+        t_total = np.max(li._T_total)
+        timestep = np.min(li._timesteps)
+        durations = np.concatenate(li._duration[0])
+        koff, restime, properties = cal_koff(durations, t_total, timestep,
+                                             nbootstrap=5,
+                                             initial_guess=[1, 1, 1, 1])
+        assert isinstance(properties, dict)
diff --git a/tests/plots/test_plot_1d.py b/tests/plots/test_plot_1d.py
index 5769387..8184559 100644
--- a/tests/plots/test_plot_1d.py
+++ b/tests/plots/test_plot_1d.py
@@ -4,7 +4,7 @@
 import shutil
 import pandas as pd
 from pylipid.util import check_dir
-from pylipid.plot import plot_residue_data, plot_residue_data_logos
+from pylipid.plot import plot_residue_data, plot_residue_data_logo
 from pylipid.plot import plot_binding_site_data, plot_surface_area
 
 class TestPlot1d(unittest.TestCase):
@@ -23,18 +23,18 @@ def test_plot_residue_data(self):
                           ylabel=None, fn=os.path.join(self.save_dir, "plot_residue_data_1.pdf"),
                           title="This is a test")
         logos = np.random.choice(letters_base, size=120)
-        plot_residue_data_logos(residue_index, logos, interactions, gap=100, letter_map=None,
+        plot_residue_data_logo(residue_index, logos, interactions, gap=100, letter_map=None,
                                 color_scheme="chemistry", ylabel="interactions",
                                 fn=os.path.join(self.save_dir, "plot_residue_data_logo_1.pdf"))
 
         # gap in-between sequence
         residue_index = np.concatenate([np.arange(120), np.arange(138, 183), np.arange(355, 382)])
-        interactions = np.random.random(size=(120+(183-138)+(312-255)))
+        interactions = np.random.random(size=(120+(183-138)+(382-355)))
         plot_residue_data(residue_index, interactions, gap=50,
                           ylabel=None, fn=os.path.join(self.save_dir, "plot_residue_data_2.pdf"),
                           title="This is a test")
         logos = np.random.choice(letters_base, size=len(interactions))
-        plot_residue_data_logos(residue_index, logos, interactions, gap=100,
+        plot_residue_data_logo(residue_index, logos, interactions, gap=100,
                           ylabel=None, fn=os.path.join(self.save_dir, "plot_residue_data_logo_2.pdf"))
 
         # two chains in sequences.
@@ -44,7 +44,7 @@ def test_plot_residue_data(self):
                           ylabel=None, fn=os.path.join(self.save_dir, "plot_residue_data_3.pdf"),
                           title="This is a test")
         logos = np.random.choice(letters_base, size=len(interactions))
-        plot_residue_data_logos(residue_index, logos, interactions, gap=100,
+        plot_residue_data_logo(residue_index, logos, interactions, gap=100,
                                 ylabel=None, fn=os.path.join(self.save_dir, "plot_residue_data_logo_3.pdf"))
 
         # check letter mapping
@@ -52,7 +52,7 @@ def test_plot_residue_data(self):
         three_letter_seq = np.random.choice(list(letter_map.keys()), size=23)
         residue_index = np.arange(23) + 52
         interactions = np.random.random(size=23)
-        plot_residue_data_logos(residue_index, three_letter_seq, interactions, gap=100,
+        plot_residue_data_logo(residue_index, three_letter_seq, interactions, gap=100,
                                 ylabel=None, fn=os.path.join(self.save_dir, "plot_residue_data_logo_4.pdf"),
                                 letter_map=letter_map)
 
@@ -82,4 +82,3 @@ def tearDown(self):
 
 if __name__ == "__main__":
     unittest.main()
-

From 6b21cc3dfcf769eafea01841f1c75f964819ee8f Mon Sep 17 00:00:00 2001
From: Phill Stansfeld <pstansfeld@gmail.com>
Date: Wed, 11 Mar 2026 15:33:11 +0000
Subject: [PATCH 11/24] update

---
 pylipid/util/corrcoef.py           |   9 +-
 tests/api/test_LipidInteraction.py | 226 ++++++++++++++++++++---------
 tests/conftest.py                  | 157 ++++++++++++++++++++
 tests/data/.DS_Store               | Bin 6148 -> 6148 bytes
 tests/util/test_utils.py           |  66 +++++----
 5 files changed, 359 insertions(+), 99 deletions(-)
 create mode 100644 tests/conftest.py

diff --git a/pylipid/util/corrcoef.py b/pylipid/util/corrcoef.py
index f09a05b..85b4f5f 100644
--- a/pylipid/util/corrcoef.py
+++ b/pylipid/util/corrcoef.py
@@ -33,7 +33,14 @@ def sparse_corrcoef(A, B=None):
     # version-dependent sparse-dense arithmetic (scipy >= 1.14 / numpy >= 1.25).
     rowsum = np.asarray(A.sum(1)).flatten()          # shape (n_rows,) ndarray
     centering = np.outer(rowsum, rowsum) / n         # explicit outer product
-    C = (np.asarray(A.dot(A.T.conjugate()).todense()) - centering) / (n - 1)
+    # Handle both sparse matrices (from collect_residue_contacts) and
+    # plain ndarrays (from tests or direct calls).
+    dot = A.dot(A.T.conjugate())
+    if sparse.issparse(dot):
+        dot = np.asarray(dot.todense())
+    else:
+        dot = np.asarray(dot)
+    C = (dot - centering) / (n - 1)
     # The correlation coefficients are given by
     # C_{i,j} / sqrt(C_{i} * C_{j})
     d = np.diag(C)
diff --git a/tests/api/test_LipidInteraction.py b/tests/api/test_LipidInteraction.py
index 8f8981f..4e88949 100644
--- a/tests/api/test_LipidInteraction.py
+++ b/tests/api/test_LipidInteraction.py
@@ -1,75 +1,157 @@
-import unittest
-import os
-import shutil
-from pylipid.api import LipidInteraction
-from pylipid.util import check_dir
-
-class TestLipidInteraction(unittest.TestCase):
-
-    def test_pylipid(self):
-        trajfile_list = ["../data/run1/protein_lipids.xtc", "../data/run2/protein_lipids.xtc"]
-        topfile_list = ["../data/run1/protein_lipids.gro", "../data/run2/protein_lipids.gro"]
-        lipid = "CHOL"
-        cutoffs = [0.55, 0.8]
-        file_dir = os.path.dirname(os.path.abspath(__file__))
-        self.save_dir = check_dir(os.path.join(file_dir, "test_pylipid"))
-        li = LipidInteraction(trajfile_list, cutoffs=cutoffs, topfile_list=topfile_list, lipid=lipid,
-                                   nprot=1, save_dir=self.save_dir)
-        li.collect_residue_contacts()
-
-        li.compute_residue_duration()
-        li.compute_residue_duration(10)
-        li.compute_residue_duration([2,3,4])
-
-        li.compute_residue_koff(plot_data=False)
-        li.compute_residue_koff(10)
-        li.compute_residue_koff([2,4,5,10])
-
-        li.compute_binding_nodes(threshold=4)
-        li.compute_binding_nodes(threshold=2, print_data=False)
-
-        li.compute_site_koff(plot_data=False)
-        li.compute_site_koff(binding_site_id=[1,2,3])
-        li.compute_site_koff(binding_site_id=1)
-
-        li.compute_site_duration()
-        li.compute_site_duration(1)
-        li.compute_site_duration([0,1])
-
-        li.analyze_bound_poses()
-        li.analyze_bound_poses(binding_site_id=[1,2,3])
-        li.analyze_bound_poses(binding_site_id=[1,2,3], n_clusters=2)
-
-        li.compute_surface_area()
-        li.compute_surface_area(binding_site_id=[1,2,3])
-        li.compute_surface_area(binding_site_id=[1, 2, 3], radii={"BB": 0.30, "SC1": 0.2})
-
-        li.write_site_info()
-        li.write_site_info(sort_residue="Duration")
-
-        li.show_stats_per_traj()
-
-        li.save_data(item="Dataset")
-        li.save_data(item="Duration")
-
-        li.save_coordinate(item="Residence Time")
-        li.save_coordinate(item="Duration")
-
-        li.save_pymol_script(pdb_file="../data/receptor.pdb")
-
-        li.plot(item="Duration")
-        li.plot(item="Residence Time")
-
-        li.plot_logo(item="Residence Time")
-        li.plot_logo(item="Lipid Count")
-
-    def tearDown(self):
-        shutil.rmtree(self.save_dir)
-
-if __name__ == "__main__":
-    unittest.main()
-
-
+"""
+Pytest fixtures for PyLipID tests.
 
+Generates small synthetic MDTraj trajectories in a temporary directory so that
+CI can run without any committed trajectory data files.
 
+Topology:
+  - 30 protein residues (ALA/GLY alternating, one chain)
+  - 20 CHOL lipid molecules (3 atoms each: C1, C2, O1)
+  - Cubic box, 8 nm side
+  - 2 trajectory files, 100 frames each
+"""
 
+import os
+import numpy as np
+import pytest
+import mdtraj as md
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+_AA_NAMES = ["ALA", "GLY", "VAL", "LEU", "ILE", "PRO", "PHE", "TRP",
+             "MET", "SER"]
+_N_PROTEIN_RESIDUES = 30
+_N_LIPID_MOLECULES  = 20
+_N_FRAMES           = 100
+_BOX_NM             = 8.0          # cubic box side length in nm
+_N_RUNS             = 2            # number of trajectory files
+
+
+def _build_topology():
+    """Build a minimal MDTraj topology: protein + CHOL lipids."""
+    top = md.Topology()
+
+    # --- protein chain ---
+    pchain = top.add_chain()
+    for i in range(_N_PROTEIN_RESIDUES):
+        resname = _AA_NAMES[i % len(_AA_NAMES)]
+        res = top.add_residue(resname, pchain)
+        top.add_atom("CA",  md.element.carbon,   res)
+        top.add_atom("CB",  md.element.carbon,   res)
+        top.add_atom("N",   md.element.nitrogen, res)
+        top.add_atom("C",   md.element.carbon,   res)
+        top.add_atom("O",   md.element.oxygen,   res)
+
+    # --- lipid chain ---
+    lchain = top.add_chain()
+    for _ in range(_N_LIPID_MOLECULES):
+        res = top.add_residue("CHOL", lchain)
+        top.add_atom("C1",  md.element.carbon, res)
+        top.add_atom("C2",  md.element.carbon, res)
+        top.add_atom("O1",  md.element.oxygen, res)
+
+    return top
+
+
+def _build_trajectory(top, rng, n_frames=_N_FRAMES):
+    """
+    Build a synthetic trajectory.
+
+    Protein residues are placed in a cluster near the box centre.
+    Lipid molecules are placed nearby so that some will fall within
+    the contact cutoffs and generate non-trivial interaction data.
+    """
+    n_atoms = top.n_atoms
+    xyz = np.zeros((n_frames, n_atoms, 3), dtype=np.float32)
+
+    atoms_per_prot_res = 5   # CA CB N C O
+    atoms_per_lipid    = 3   # C1 C2 O1
+    n_prot_atoms = _N_PROTEIN_RESIDUES * atoms_per_prot_res
+
+    centre = _BOX_NM / 2.0
+
+    for frame in range(n_frames):
+        # protein — small fluctuations around centre
+        prot_base = rng.uniform(centre - 1.0, centre + 1.0,
+                                size=(n_prot_atoms, 3)).astype(np.float32)
+        prot_noise = rng.normal(0, 0.05, size=prot_base.shape).astype(np.float32)
+        xyz[frame, :n_prot_atoms] = prot_base + prot_noise
+
+        # lipids — placed close enough to protein to generate contacts
+        for lip_idx in range(_N_LIPID_MOLECULES):
+            start = n_prot_atoms + lip_idx * atoms_per_lipid
+            # Alternate between near (within cutoff) and far positions
+            if lip_idx < _N_LIPID_MOLECULES // 2:
+                offset = rng.uniform(0.3, 0.6, size=(atoms_per_lipid, 3))
+            else:
+                offset = rng.uniform(1.5, 3.0, size=(atoms_per_lipid, 3))
+            sign = rng.choice([-1, 1], size=(atoms_per_lipid, 3))
+            xyz[frame, start:start + atoms_per_lipid] = (
+                centre + sign * offset.astype(np.float32)
+            )
+
+    unitcell_lengths = np.full((n_frames, 3), _BOX_NM, dtype=np.float32)
+    unitcell_angles  = np.full((n_frames, 3), 90.0,    dtype=np.float32)
+
+    return md.Trajectory(xyz, top,
+                         unitcell_lengths=unitcell_lengths,
+                         unitcell_angles=unitcell_angles)
+
+
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+
+@pytest.fixture(scope="session")
+def synthetic_traj_dir(tmp_path_factory):
+    """
+    Session-scoped fixture: builds synthetic trajectory files once per test
+    session and returns a dict with file paths and metadata.
+
+    Returns
+    -------
+    dict with keys:
+        trajfile_list  : list of .xtc paths
+        topfile_list   : list of .gro paths
+        pdb_file       : path to receptor .pdb
+        lipid          : lipid residue name (str)
+        cutoffs        : [lower, upper] cutoffs in nm
+    """
+    rng = np.random.default_rng(42)
+    top = _build_topology()
+    base = tmp_path_factory.mktemp("traj_data")
+
+    trajfile_list = []
+    topfile_list  = []
+
+    for run_idx in range(_N_RUNS):
+        run_dir = base / "run{}".format(run_idx + 1)
+        run_dir.mkdir()
+
+        traj = _build_trajectory(top, rng)
+
+        xtc_path = str(run_dir / "protein_lipids.xtc")
+        gro_path = str(run_dir / "protein_lipids.gro")
+
+        traj.save_xtc(xtc_path)
+        traj[0].save_gro(gro_path)
+
+        trajfile_list.append(xtc_path)
+        topfile_list.append(gro_path)
+
+    # receptor PDB (protein atoms only)
+    prot_atom_indices = top.select("protein")
+    prot_traj = traj[0].atom_slice(prot_atom_indices)
+    pdb_path = str(base / "receptor.pdb")
+    prot_traj.save_pdb(pdb_path)
+
+    return {
+        "trajfile_list": trajfile_list,
+        "topfile_list":  topfile_list,
+        "pdb_file":      pdb_path,
+        "lipid":         "CHOL",
+        "cutoffs":       [0.55, 0.8],
+    }
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 0000000..4e88949
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,157 @@
+"""
+Pytest fixtures for PyLipID tests.
+
+Generates small synthetic MDTraj trajectories in a temporary directory so that
+CI can run without any committed trajectory data files.
+
+Topology:
+  - 30 protein residues (ALA/GLY alternating, one chain)
+  - 20 CHOL lipid molecules (3 atoms each: C1, C2, O1)
+  - Cubic box, 8 nm side
+  - 2 trajectory files, 100 frames each
+"""
+
+import os
+import numpy as np
+import pytest
+import mdtraj as md
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+_AA_NAMES = ["ALA", "GLY", "VAL", "LEU", "ILE", "PRO", "PHE", "TRP",
+             "MET", "SER"]
+_N_PROTEIN_RESIDUES = 30
+_N_LIPID_MOLECULES  = 20
+_N_FRAMES           = 100
+_BOX_NM             = 8.0          # cubic box side length in nm
+_N_RUNS             = 2            # number of trajectory files
+
+
+def _build_topology():
+    """Build a minimal MDTraj topology: protein + CHOL lipids."""
+    top = md.Topology()
+
+    # --- protein chain ---
+    pchain = top.add_chain()
+    for i in range(_N_PROTEIN_RESIDUES):
+        resname = _AA_NAMES[i % len(_AA_NAMES)]
+        res = top.add_residue(resname, pchain)
+        top.add_atom("CA",  md.element.carbon,   res)
+        top.add_atom("CB",  md.element.carbon,   res)
+        top.add_atom("N",   md.element.nitrogen, res)
+        top.add_atom("C",   md.element.carbon,   res)
+        top.add_atom("O",   md.element.oxygen,   res)
+
+    # --- lipid chain ---
+    lchain = top.add_chain()
+    for _ in range(_N_LIPID_MOLECULES):
+        res = top.add_residue("CHOL", lchain)
+        top.add_atom("C1",  md.element.carbon, res)
+        top.add_atom("C2",  md.element.carbon, res)
+        top.add_atom("O1",  md.element.oxygen, res)
+
+    return top
+
+
+def _build_trajectory(top, rng, n_frames=_N_FRAMES):
+    """
+    Build a synthetic trajectory.
+
+    Protein residues are placed in a cluster near the box centre.
+    Lipid molecules are placed nearby so that some will fall within
+    the contact cutoffs and generate non-trivial interaction data.
+    """
+    n_atoms = top.n_atoms
+    xyz = np.zeros((n_frames, n_atoms, 3), dtype=np.float32)
+
+    atoms_per_prot_res = 5   # CA CB N C O
+    atoms_per_lipid    = 3   # C1 C2 O1
+    n_prot_atoms = _N_PROTEIN_RESIDUES * atoms_per_prot_res
+
+    centre = _BOX_NM / 2.0
+
+    for frame in range(n_frames):
+        # protein — small fluctuations around centre
+        prot_base = rng.uniform(centre - 1.0, centre + 1.0,
+                                size=(n_prot_atoms, 3)).astype(np.float32)
+        prot_noise = rng.normal(0, 0.05, size=prot_base.shape).astype(np.float32)
+        xyz[frame, :n_prot_atoms] = prot_base + prot_noise
+
+        # lipids — placed close enough to protein to generate contacts
+        for lip_idx in range(_N_LIPID_MOLECULES):
+            start = n_prot_atoms + lip_idx * atoms_per_lipid
+            # Alternate between near (within cutoff) and far positions
+            if lip_idx < _N_LIPID_MOLECULES // 2:
+                offset = rng.uniform(0.3, 0.6, size=(atoms_per_lipid, 3))
+            else:
+                offset = rng.uniform(1.5, 3.0, size=(atoms_per_lipid, 3))
+            sign = rng.choice([-1, 1], size=(atoms_per_lipid, 3))
+            xyz[frame, start:start + atoms_per_lipid] = (
+                centre + sign * offset.astype(np.float32)
+            )
+
+    unitcell_lengths = np.full((n_frames, 3), _BOX_NM, dtype=np.float32)
+    unitcell_angles  = np.full((n_frames, 3), 90.0,    dtype=np.float32)
+
+    return md.Trajectory(xyz, top,
+                         unitcell_lengths=unitcell_lengths,
+                         unitcell_angles=unitcell_angles)
+
+
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+
+@pytest.fixture(scope="session")
+def synthetic_traj_dir(tmp_path_factory):
+    """
+    Session-scoped fixture: builds synthetic trajectory files once per test
+    session and returns a dict with file paths and metadata.
+
+    Returns
+    -------
+    dict with keys:
+        trajfile_list  : list of .xtc paths
+        topfile_list   : list of .gro paths
+        pdb_file       : path to receptor .pdb
+        lipid          : lipid residue name (str)
+        cutoffs        : [lower, upper] cutoffs in nm
+    """
+    rng = np.random.default_rng(42)
+    top = _build_topology()
+    base = tmp_path_factory.mktemp("traj_data")
+
+    trajfile_list = []
+    topfile_list  = []
+
+    for run_idx in range(_N_RUNS):
+        run_dir = base / "run{}".format(run_idx + 1)
+        run_dir.mkdir()
+
+        traj = _build_trajectory(top, rng)
+
+        xtc_path = str(run_dir / "protein_lipids.xtc")
+        gro_path = str(run_dir / "protein_lipids.gro")
+
+        traj.save_xtc(xtc_path)
+        traj[0].save_gro(gro_path)
+
+        trajfile_list.append(xtc_path)
+        topfile_list.append(gro_path)
+
+    # receptor PDB (protein atoms only)
+    prot_atom_indices = top.select("protein")
+    prot_traj = traj[0].atom_slice(prot_atom_indices)
+    pdb_path = str(base / "receptor.pdb")
+    prot_traj.save_pdb(pdb_path)
+
+    return {
+        "trajfile_list": trajfile_list,
+        "topfile_list":  topfile_list,
+        "pdb_file":      pdb_path,
+        "lipid":         "CHOL",
+        "cutoffs":       [0.55, 0.8],
+    }
diff --git a/tests/data/.DS_Store b/tests/data/.DS_Store
index 2174845ecddddc7cb04f829b887d1c16c094bab6..f5aae607f02d3060e0f335785e42eb56255d0ae0 100644
GIT binary patch
delta 140
zcmZoMXfc=|&e%S&P>hv>fq{WzVxfo>6OaJ{%s|Y@z#zboQk<NVl%Jo&II&Pm6eP;R
zP{dHmkjG#MlbNV3BLovPf(bHiEVN{wY#_q2nVW-$gRy;M;dkcA{342iAPW?LI01;k
KV6&shVP*jQjvYAw

delta 89
zcmZoMXfc=|&Zs&uQF!748CfO<2w((KKrFye7F?8<lb@Fk;%~fY#y;^u?q+rl0S-ph
cjTgT&Pv#d<6a=YF0AdgTu^To!iX3JJ0A*zo!~g&Q

diff --git a/tests/util/test_utils.py b/tests/util/test_utils.py
index caba7d0..c0eef18 100644
--- a/tests/util/test_utils.py
+++ b/tests/util/test_utils.py
@@ -1,24 +1,20 @@
+"""
+Tests for utility functions: write_pymol_script, rmsd, sparse_corrcoef, get_traj_info.
+Uses the synthetic_traj_dir session fixture from conftest.py for trajectory-dependent tests.
+write_pymol_script and get_traj_info are tested with synthetic data — no real files needed.
+"""
+
 import os
 import unittest
 import numpy as np
 import shutil
+import pytest
 import mdtraj as md
-from pylipid.util import write_pymol_script
-from pylipid.util import check_dir
-from pylipid.util import rmsd
-from pylipid.util import sparse_corrcoef
-from pylipid.util import get_traj_info
-
-class TestScript(unittest.TestCase):
+from pylipid.util import write_pymol_script, check_dir, rmsd, sparse_corrcoef, get_traj_info
 
-    def setUp(self):
-        file_dir = os.path.dirname(os.path.abspath(__file__))
-        self.save_dir = os.path.join(file_dir, "test_util")
-        check_dir(self.save_dir)
 
-    def test_write_pymol_script(self):
-        write_pymol_script(os.path.join(self.save_dir, "show_bs_info.py"),
-                           "../data/receptor.pdb", "../data/Interactions_CHOL.csv", "CHOL", 10)
+class TestRmsdAndCorrcoef(unittest.TestCase):
+    """Pure-function tests — no trajectory data needed."""
 
     def test_rmsd(self):
         matrix_a = np.random.random(size=(100, 5))
@@ -26,22 +22,40 @@ def test_rmsd(self):
         value = rmsd(matrix_a, matrix_b)
         self.assertIsInstance(value, float)
 
-    def test_sparse_corrcoef(self):
+    def test_sparse_corrcoef_ndarray(self):
+        """sparse_corrcoef should accept a plain ndarray as well as a sparse matrix."""
         A = np.random.normal(size=(4, 500))
         corrcoefs = sparse_corrcoef(A)
         self.assertEqual(len(corrcoefs), len(A))
 
-    def test_get_traj_info(self):
-        trajfile = "../data/run1/protein_lipids.xtc"
-        topfile = "../data/run1/protein_lipids.gro"
-        traj = md.load(trajfile, top=topfile)
-        traj_info, protein_ref, lipid_ref = get_traj_info(traj, "CHOL")
-        self.assertIsInstance(protein_ref, md.Trajectory)
-        self.assertIsInstance(lipid_ref, md.Trajectory)
 
-    def tearDown(self):
-        shutil.rmtree(self.save_dir)
+# ---------------------------------------------------------------------------
+# Trajectory-dependent tests — use pytest fixtures via a thin wrapper
+# ---------------------------------------------------------------------------
 
+@pytest.mark.usefixtures("synthetic_traj_dir")
+class TestTrajUtils:
+    """Tests that require a trajectory. Uses synthetic_traj_dir fixture."""
 
-if __name__ == "__main__":
-    unittest.main()
+    def test_get_traj_info(self, synthetic_traj_dir, tmp_path):
+        trajfile = synthetic_traj_dir["trajfile_list"][0]
+        topfile  = synthetic_traj_dir["topfile_list"][0]
+        traj = md.load(trajfile, top=topfile)
+        traj_info, protein_ref, lipid_ref = get_traj_info(traj, synthetic_traj_dir["lipid"])
+        assert isinstance(protein_ref, md.Trajectory)
+        assert isinstance(lipid_ref, md.Trajectory)
+
+    def test_write_pymol_script(self, synthetic_traj_dir, tmp_path):
+        # write_pymol_script needs a PDB and a CSV file — generate minimal ones
+        pdb_file = synthetic_traj_dir["pdb_file"]
+
+        # Minimal Dataset CSV that write_pymol_script can read
+        csv_file = str(tmp_path / "Interactions_CHOL.csv")
+        with open(csv_file, "w") as f:
+            f.write("Residue,Residue ID,Binding Site ID,Residence Time\n")
+            for i in range(5):
+                f.write(f"{i}ALA,{i},0,{float(i)}\n")
+
+        script_fn = str(tmp_path / "show_bs_info.py")
+        write_pymol_script(script_fn, pdb_file, csv_file, "CHOL", 1)
+        assert os.path.exists(script_fn)

From c6769a6cb42935242cf9464fa01277deb340dc52 Mon Sep 17 00:00:00 2001
From: Phill Stansfeld <pstansfeld@gmail.com>
Date: Wed, 11 Mar 2026 16:30:00 +0000
Subject: [PATCH 12/24] Update kinetics.py

---
 pylipid/func/kinetics.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pylipid/func/kinetics.py b/pylipid/func/kinetics.py
index d6ce542..d7da794 100644
--- a/pylipid/func/kinetics.py
+++ b/pylipid/func/kinetics.py
@@ -220,7 +220,7 @@ def _curve_fitting(survival_func, delta_t_list, initial_guess):
     survival_rates = np.nan_to_num([survival_func[delta_t] for delta_t in delta_t_list]) # y
     try:
         popt, pcov = curve_fit(_bi_expo, np.array(delta_t_list), np.array(survival_rates), p0=initial_guess, maxfev=100000)
-        n_fitted = _bi_expo(np.array(delta_t_list, dtype=np.float128), *popt)
+        n_fitted = _bi_expo(np.array(delta_t_list, dtype=np.longdouble), *popt)        
         r_squared = 1 - np.sum((np.nan_to_num(n_fitted) -
                                 np.nan_to_num(survival_rates))**2)/np.sum((survival_rates - np.mean(survival_rates))**2)
         ks = [abs(k) for k in popt[:2]]

From 25d1877bb9defed248210656a277a1906b0d1627 Mon Sep 17 00:00:00 2001
From: Phill Stansfeld <pstansfeld@gmail.com>
Date: Wed, 11 Mar 2026 16:59:54 +0000
Subject: [PATCH 13/24] updates to tests

---
 pylipid/api/api.py     | 3 ++-
 pylipid/plot/plot1d.py | 2 ++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/pylipid/api/api.py b/pylipid/api/api.py
index 548a25d..64d7bb3 100644
--- a/pylipid/api/api.py
+++ b/pylipid/api/api.py
@@ -1497,7 +1497,8 @@ def compute_surface_area(self, binding_site_id=None, radii=None, plot_data=True,
             surface_area_per_residue[nodes] = surface_area_data["Binding Site {}".format(bs_id)].mean()
         self.dataset["Binding Site Surface Area"] = surface_area_per_residue
         # plot surface area
-        if plot_data:
+        _bs_cols = [c for c in surface_area_data.columns if c != "Time"]
+        if plot_data and _bs_cols:
             if save_dir is not None:
                 surface_area_dir = check_dir(save_dir)
             else:
diff --git a/pylipid/plot/plot1d.py b/pylipid/plot/plot1d.py
index 0f12870..5ccfefd 100644
--- a/pylipid/plot/plot1d.py
+++ b/pylipid/plot/plot1d.py
@@ -360,6 +360,8 @@ def plot_surface_area(surface_area, fig_fn, timeunit=None, fig_close=False):
     row_set = list(set([ind[:2] for ind in surface_area.index]))
     row_set.sort()
     col_set = [col for col in surface_area.columns if col != "Time"]
+    if not col_set or not row_set:
+        return
     colors = [next(color_set) for dummy in col_set]
     fig, axes = plt.subplots(len(row_set), len(col_set), figsize=(len(col_set)*2.4, len(row_set)*1.6),
                              sharex=True, sharey=True)

From 0e099bb9dcefb1b293237b274329da7ccd54b6d0 Mon Sep 17 00:00:00 2001
From: Phill Stansfeld <pstansfeld@gmail.com>
Date: Wed, 11 Mar 2026 20:52:30 +0000
Subject: [PATCH 14/24] Update api.py

---
 pylipid/api/api.py | 25 ++++++++++++++++++++-----
 1 file changed, 20 insertions(+), 5 deletions(-)

diff --git a/pylipid/api/api.py b/pylipid/api/api.py
index 64d7bb3..0121076 100644
--- a/pylipid/api/api.py
+++ b/pylipid/api/api.py
@@ -27,7 +27,8 @@
 from scipy.sparse import coo_matrix
 import pandas as pd
 from tqdm import trange, tqdm
-from p_tqdm import p_map
+#from p_tqdm import p_map
+from multiprocessing import get_context
 from ..func import cal_contact_residues
 from ..func import Duration
 from ..func import cal_lipidcount, cal_occupancy
@@ -38,6 +39,20 @@
 from ..plot import plot_residue_data, plot_corrcoef, plot_residue_data_logo
 from ..util import check_dir, write_PDB, write_pymol_script, sparse_corrcoef, get_traj_info
 
+def _spawn_pmap(func, *iterables, num_cpus=None, desc=None):
+    """Parallel map using multiprocessing with 'spawn' (macOS-safe)."""
+    iterables = [list(it) for it in iterables]
+    if not iterables:
+        return []
+    n = len(iterables[0])
+    if any(len(it) != n for it in iterables):
+        raise ValueError("All iterables must have the same length")
+    if n == 0:
+        return []
+    args_iterable = list(zip(*iterables))
+    ctx = get_context("spawn")
+    with ctx.Pool(processes=num_cpus) as pool:
+        return list(tqdm(pool.starmap(func, args_iterable), total=n, desc=desc))
 
 class LipidInteraction:
     def __init__(self, trajfile_list, cutoffs=[0.475, 0.7], lipid="CHOL", topfile_list=None, lipid_atoms=None,
@@ -711,7 +726,7 @@ def compute_residue_koff(self, residue_id=None, nbootstrap=10, initial_guess=[1.
         else:
             fn_set = [False for dummy in selected_residue_id]
                 
-        returned_values = p_map(partial(calculate_koff_wrapper, t_total=t_total, timestep=timestep, nbootstrap=nbootstrap,
+        returned_values = _spawn_pmap(partial(calculate_koff_wrapper, t_total=t_total, timestep=timestep, nbootstrap=nbootstrap,
                                         initial_guess=initial_guess, plot_data=plot_data, timeunit=self._timeunit,
                                         fig_close=fig_close),
                                 [np.concatenate(self._duration[residue_id]) for residue_id in selected_residue_id],
@@ -1155,7 +1170,7 @@ def compute_site_koff(self, binding_site_id=None, nbootstrap=10, initial_guess=[
             fn_set = [os.path.join(BS_dir, f"BS_id{bs_id}.{fig_format}") for bs_id in selected_bs_id]
         else:
             fn_set = [False for dummy in selected_bs_id]
-        returned_values = p_map(partial(calculate_koff_wrapper, t_total=t_total, timestep=timestep, nbootstrap=nbootstrap,
+        returned_values = _spawn_pmap(partial(calculate_koff_wrapper, t_total=t_total, timestep=timestep, nbootstrap=nbootstrap,
                                         initial_guess=initial_guess, plot_data=plot_data, timeunit=self._timeunit,
                                         fig_close=fig_close),
                                 [np.concatenate(self._duration_BS[bs_id]) for bs_id in selected_bs_id],
@@ -1370,7 +1385,7 @@ def analyze_bound_poses(self, binding_site_id=None, n_top_poses=3, n_clusters="a
 
         if n_top_poses > 0:
             # multiprocessing wrapped under p_tqdm
-            rmsd_set = p_map(partial(analyze_pose_wrapper, protein_atom_indices=protein_atom_indices,
+            rmsd_set = _spawn_pmap(partial(analyze_pose_wrapper, protein_atom_indices=protein_atom_indices,
                                      lipid_atom_indices=lipid_atom_indices, n_top_poses=n_top_poses,
                                      pose_dir=pose_dir, atom_weights=atom_weights, kde_bw=kde_bw,
                                      pca_component=pca_component, pose_format=pose_format, n_clusters=n_clusters,
@@ -1479,7 +1494,7 @@ def compute_surface_area(self, binding_site_id=None, radii=None, plot_data=True,
         selected_bs_id = np.atleast_1d(np.array(binding_site_id, dtype=int)) if binding_site_id is not None \
             else np.arange(len(self._node_list), dtype=int)
         selected_bs_id_map = {bs_id: self._node_list[bs_id] for bs_id in selected_bs_id}
-        returned_values = p_map(partial(calculate_surface_area_wrapper, binding_site_map=selected_bs_id_map,
+        returned_values = _spawn_pmap(partial(calculate_surface_area_wrapper, binding_site_map=selected_bs_id_map,
                                         nprot=self._nprot, timeunit=self._timeunit, stride=self._stride,
                                         dt_traj=self._dt_traj, radii=radii_book), self._trajfile_list,
                                 self._topfile_list, np.arange(len(self._trajfile_list), dtype=int),

From 298a929ebf0b5c08d4d046881ac44b5b4fe381d2 Mon Sep 17 00:00:00 2001
From: Phill Stansfeld <pstansfeld@gmail.com>
Date: Wed, 11 Mar 2026 21:16:34 +0000
Subject: [PATCH 15/24] Update __init__.py

---
 pylipid/__init__.py | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/pylipid/__init__.py b/pylipid/__init__.py
index 0a534fd..c128350 100644
--- a/pylipid/__init__.py
+++ b/pylipid/__init__.py
@@ -14,5 +14,25 @@
 # copies or substantial portions of the Software.
 ##############################################################################
 
+# --- BEGIN: macOS-safe bootstrap -------------------------------------------
+# Ensures PyLipID runs safely on macOS by:
+#   1) forcing multiprocessing to use 'spawn'
+#   2) preventing GUI backends from loading in worker processes
+import multiprocessing as _mp
+
+try:
+    # Safe on macOS — harmless on Linux/Windows
+    _mp.set_start_method("spawn", force=True)
+except RuntimeError:
+    # Already set or protected — ignore
+    pass
+
+# Use a non-interactive backend so importing PyLipID never loads macOS AppKit
+    import matplotlib
+    matplotlib.use("Agg", force=True)
+except Exception:
+    pass
+# --- END: macOS-safe bootstrap ---------------------------------------------
+
 from ._version import __version__
 from .api import LipidInteraction

From 93634ef24c51258c7758a4e2904422fc1a89a6ff Mon Sep 17 00:00:00 2001
From: Phill Stansfeld <pstansfeld@gmail.com>
Date: Wed, 11 Mar 2026 21:17:26 +0000
Subject: [PATCH 16/24] Update __init__.py

---
 pylipid/__init__.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pylipid/__init__.py b/pylipid/__init__.py
index c128350..72a0e4c 100644
--- a/pylipid/__init__.py
+++ b/pylipid/__init__.py
@@ -28,6 +28,7 @@
     pass
 
 # Use a non-interactive backend so importing PyLipID never loads macOS AppKit
+try:
     import matplotlib
     matplotlib.use("Agg", force=True)
 except Exception:

From 8dbe28a24bb49f6f6815e4d75481482bb5454344 Mon Sep 17 00:00:00 2001
From: Phill Stansfeld <pstansfeld@gmail.com>
Date: Thu, 12 Mar 2026 20:13:02 +0000
Subject: [PATCH 17/24] Update directory.py

---
 pylipid/util/directory.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pylipid/util/directory.py b/pylipid/util/directory.py
index 265f4d2..616a0ca 100644
--- a/pylipid/util/directory.py
+++ b/pylipid/util/directory.py
@@ -38,6 +38,6 @@ def check_dir(directory=None, suffix=None, print_info=True):
     if not os.path.isdir(directory):
         os.makedirs(directory)
         if print_info:
-            print("Creating new director: {}".format(directory))
+            print(f"Creating new directory: {path}")
     return directory
 

From 2d736ae0a87da3e583c2a515939f47a0c33d4b8c Mon Sep 17 00:00:00 2001
From: Phill Stansfeld <pstansfeld@gmail.com>
Date: Thu, 12 Mar 2026 20:35:34 +0000
Subject: [PATCH 18/24] Update directory.py

---
 pylipid/util/directory.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pylipid/util/directory.py b/pylipid/util/directory.py
index 616a0ca..b108e3f 100644
--- a/pylipid/util/directory.py
+++ b/pylipid/util/directory.py
@@ -38,6 +38,6 @@ def check_dir(directory=None, suffix=None, print_info=True):
     if not os.path.isdir(directory):
         os.makedirs(directory)
         if print_info:
-            print(f"Creating new directory: {path}")
+            print("Creating new directory: {}".format(path))
     return directory
 

From 8e639492cfb7e2594b47611bc0403c45acc4f58f Mon Sep 17 00:00:00 2001
From: Phill Stansfeld <pstansfeld@gmail.com>
Date: Thu, 12 Mar 2026 20:39:55 +0000
Subject: [PATCH 19/24] Update directory.py

---
 pylipid/util/directory.py | 29 ++++++++++++-----------------
 1 file changed, 12 insertions(+), 17 deletions(-)

diff --git a/pylipid/util/directory.py b/pylipid/util/directory.py
index b108e3f..114dce2 100644
--- a/pylipid/util/directory.py
+++ b/pylipid/util/directory.py
@@ -21,23 +21,18 @@
 __all__ = ["check_dir"]
 
 
-def check_dir(directory=None, suffix=None, print_info=True):
-    """Check directory
+def check_dir(root, name=None):
+    """
+    Create a directory under 'root'. If 'name' is provided, the directory
+    will be root/name. Returns the full path.
+    """
+    import os
 
-    This function will combine the suffix with the given directory (or the current
-    working directory if None is given) to generate a new directory name, and create a
-    directory with this name if it does not exit.
+    path = os.path.join(root, name) if name is not None else root
 
-    """
-    if directory is None:
-        directory = os.getcwd()
-    else:
-        directory = os.path.abspath(directory)
-    if suffix is not None:
-        directory = os.path.join(directory, suffix)
-    if not os.path.isdir(directory):
-        os.makedirs(directory)
-        if print_info:
-            print("Creating new directory: {}".format(path))
-    return directory
+    if not os.path.isdir(path):
+        print(f"Creating new directory: {path}")
+        os.makedirs(path, exist_ok=True)
+
+    return path
 

From c03906db80e3600397e9c7c39b2510f20bcc082e Mon Sep 17 00:00:00 2001
From: Phill Stansfeld <pstansfeld@gmail.com>
Date: Thu, 12 Mar 2026 21:12:43 +0000
Subject: [PATCH 20/24] Update api.py

---
 pylipid/api/api.py | 338 +++++++++++++++++----------------------------
 1 file changed, 129 insertions(+), 209 deletions(-)

diff --git a/pylipid/api/api.py b/pylipid/api/api.py
index 0121076..3342fa3 100644
--- a/pylipid/api/api.py
+++ b/pylipid/api/api.py
@@ -1198,215 +1198,135 @@ def compute_site_koff(self, binding_site_id=None, nbootstrap=10, initial_guess=[
                    [self._res_time_BS[bs_id] for bs_id in selected_bs_id]
 
     def analyze_bound_poses(self, binding_site_id=None, n_top_poses=3, n_clusters="auto", pose_format="gro",
-                            score_weights=None, kde_bw=0.15, pca_component=0.90, plot_rmsd=True, save_dir=None,
-                            eps=None, min_samples=None, metric="euclidean",
-                            fig_close=False, fig_format="pdf", num_cpus=None):
-        r"""Analyze bound poses for binding sites.
-
-        This function can find representative bound poses, cluster the bound poses and calculate pose RMSD for
-        binding sites.
-
-        If ``n_top_poses`` is an integer larger than 0, this method will find the representative bound poses for the specified
-        binding sites. To do so, it evaluates all the bound poses in a binding site using a density-based scoring function
-        and ranks the poses using based on the scores. The scoring function is defined as:
-
-        .. math::
-            \text { score }=\sum_{i} W_{i} \cdot \hat{f}_{i, H}(D)
-
-        where :math:`W_{i}` is the weight given to atom i of the lipid molecule, H is the bandwidth and
-        :math:`\hat{f}_{i, H}(D)` is a multivariate kernel density etimation of the position of atom i in the specified
-        binding site. :math:`\hat{f}_{i, H}(D)` is calculated from all the bound lipid poses in that binding site.
-        The position of atom i is a `p`-variant vector, :math:`\left[D_{i 1}, D_{i 2}, \ldots, D_{i p}\right]` where
-        :math:`D_{i p}` is the minimum distance to the residue `p` of the binding site. The multivariant kernel density
-        is estimated by `KDEMultivariate 
-        <https://www.statsmodels.org/devel/generated/statsmodels.nonparametric.kernel_density.KDEMultivariate.html>`_ 
-        provided by Statsmodels. Higher weights can be given to e.g. the headgroup atoms of phospholipids, to generate
-        better defined binding poses, but all lipid atoms are weighted equally by default. The use of relative positions
-        of lipid atoms in their binding site makes the analysis independent of the conformational changes in the rest
-        part of the protein.
-
-        If ``n_clusters`` is given an integer larger than 0, this method will cluster the lipid bound poses in the specified
-        binding site using `KMeans <https://scikit-learn.org/stable/modules/generated/sklearn.cluster.KMeans.html>`_
-        provided by scikit. The KMeans cluster separates the samples into `n` clusters of equal variances, via minimizing
-        the `inertia`, which is defined as:
-
-        .. math::
-            \sum_{i=0}^{n} \min _{u_{i} \in C}\left(\left\|x_{i}-u_{i}\right\|^{2}\right)
-
-        where :math:`u_{i}` is the `centroid`  of cluster i. KMeans scales well with large dataset but performs poorly
-        with clusters of varying sizes and density, which are often the case for lipid poses in a binding site.
-
-        If ``n_clusters`` is set to `auto`, this method will cluster the bound poses using a density-based cluster
-        `DBSCAN <https://scikit-learn.org/stable/modules/generated/sklearn.cluster.DBSCAN.html>`_ provided by scikit.
-        DBSCAN finds clusters of core samples of high density. A sample point is a core sample if at least `min_samples`
-        points are within distance :math:`\varepsilon` of it. A cluster is defined as a set of sample points that are
-        mutually density-connected and density-reachable, i.e. there is a path
-        :math:`\left\langle p_{1}, p_{2}, \ldots, p_{n}\right\rangle` where each :math:`p_{i+1}` is within distance
-        :math:`\varepsilon` of :math:`p_{i}` for any two p in the two. The values of `min_samples` and :math:`\varepsilon`
-        determine the performance of this cluster. If None, `min_samples` takes the value of 2 * ndim. If
-        :math:`\varepsilon` is None, it is set as the value at the knee of the k-distance plot.
-
-        For writing out the cluster poses, this method will randomly select one pose from each cluster in the case of
-        using KMeans or one from the core samples of each cluster when DBSCAN is used, and writes the selected lipid
-        pose with the protein conformation to which it binds using MDTraj, in the provided pose format.
-
-        The root mean square deviation (RMSD) of a lipid bound pose in a binding site is calculated from the relative
-        position of the pose in the binding site compared to the average position of the bound poses. Thus, the pose
-        RMSD is defined as:
-
-        .. math::
-            RMSD=\sqrt{\frac{\sum_{i=0}^{N} \sum_{j=0}^{M}\left(D_{i j}-\bar{D}_{j}\right)^{2}}{N}}
-
-        where :math:`D_{i j}` is the distance of atom `i` to the residue `j` in the binding site; :math:`\bar{D}_{j}` is the
-        average distance of atom `i` from all bound poses in the binding site to residue `j`; `N` is the number of atoms in
-        the lipid molecule and `M` is the number of residues in the binding site.
-
-        Parameters
-        ----------
-        binding_site_id : int or list of int, default=None
-            The binding site ID used in PyLipID. This ID is the index in the binding site node list that is
-            calculated by the method ``compute_binding_nodes``. The ID of the N-th binding site is (N-1). If None,
-            the contact duration of all binding sites are calculated.
-
-        n_top_poses : int, default=3
-            Number of representative bound poses written out for the selected binding site.
-
-        n_clusters : int or 'auto'
-            Number of clusters to form for bound poses of the selected binding site.  If ``n_clusters`` is set to 'auto'`, the
-            density-based clusterer DBSCAN will be used. If ``n_clusters``  is given a non-zero integer, KMeans is used.
-
-        pose_format : str, default="gro"
-            The coordinate format the representative poses and clsutered poses are saved with. Support the formats
-            that are included in MDtraj.save().
-
-        score_weights : dict or None, default=None
-            The weights given to atoms in the scoring function for finding the representative bound poses. It should in
-            the format of a Python dictionary {atom name: weight}. The atom name should be consisten with the topology.
-            By default, all atoms in the lipid molecule are weighted equally.
-
-        kde_bw : scalar, default=0.15
-            The bandwidth for the Gaussian kernel. Used in the density estimation of the lipid atom coordinates in the binding
-            site. Used by the function
-            `KDEMultivariate <https://www.statsmodels.org/devel/generated/statsmodels.nonparametric.kernel_density.KDEMultivariate.html>`_ .
-
-        pca_component : int, float or ‘mle’, default=0.90
-            The PCA used to decrease the dimensions of lipid atom coordinates. The coordinate of a lipid atom in
-            the binding site is expressed as a distance vector of the minimum distances to the residues in that binding site,
-            i.e. :math:`[D_{i 1}, D_{i 2}, .., D_{i p}]`. This can be in high dimensions. Hence, PCA is used on this distance
-            vector prior to calculation of the density. This PCA is carried out by
-            `PCA <https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html>`_ in sci-kit.
-
-        plot_rmsd : bool, default=True
-            Plot the binding site RMSDs in a violinplot.
-
-        eps : float or None, default=None
-            The minimum neighbour distance used by
-            `DBSCAN <https://scikit-learn.org/stable/modules/generated/sklearn.cluster.DBSCAN.html>`_
-            if None, the value will determined from as the elbow point of the sorted minimum neighbour distance 
-            of all the data points. 
-
-        min_samples : int or None, default=None
-            The minimum number of samples to be considered as core samples used by 
-            `DBSCAN <https://scikit-learn.org/stable/modules/generated/sklearn.cluster.DBSCAN.html>`_ .
-            If None, the value will be automatically determined based on the size of data. 
-
-        metric : str, default='euclidean'
-            The metric used to calculated neighbour distance used by 
-            `DBSCAN <https://scikit-learn.org/stable/modules/generated/sklearn.cluster.DBSCAN.html>`_ .
-            Default is the Euclidean distance. 
-
-        fig_close : bool, default=False
-            This parameter control whether to close the plotted figures using plt.close(). It can save memory if 
-            many figures are generated. 
-
-        fig_format : str, default="pdf"
-            Figure format. Support formats included in matplotlib.pyplot.
-
-        num_cpus : int or None default=None
-            The number of CPUs used for the tasks of ranking the poses and clustering poses. Python multiprocessing deployed by
-            `p_tqdm <https://github.com/swansonk14/p_tqdm>`_ is used to speed up these calculations. 
-
-        save_dir : str or None, default=None
-            The root directory for saving the pose analysis results. If None, the root directory set at the initiation of
-            ``LipidInteraction`` will be used. The representative poses and clustered poses will be stored in the directory
-            of Bound_Poses_{lipid} under the root directory.
-
-        Returns
-        -------
-        pose_pool : dict
-            Coordinates of the bound poses for the selected binding sites stored in a python dictionary
-            {binding_site_id: pose coordinates}. The poses coordinates include lipid coordinates and those of the receptor
-            at the time the pose was bound. The pose coordinates are stored in a
-            `mdtraj.Trajectory <https://mdtraj.org/1.9.4/api/generated/mdtraj.Trajectory.html>`_ object.
-
-        rmsd_data : pandas.DataFrame
-            Bound poses RMSDs are stored by columns with column name of binding site id.
-
-        See Also
-        --------
-        pylipid.func.collect_bound_poses
-            Collect bound pose coordinates from trajectories.
-        pylipid.func.vectorize_poses
-            Convert bound poses into distance vectors.
-        pylipid.func.calculate_scores
-            Score the bound poses based on the probability density function of the position of lipid atoms
-        pylipid.func.analyze_pose_wrapper
-            A wrapper function that ranks poses, clusters poses and calculates pose RMSD
-
-        """
-        self._check_calculation("Binding Site ID", self.compute_binding_nodes, print_data=False)
-
-        pose_dir = check_dir(save_dir, "Bound_Poses_{}".format(self._lipid)) if save_dir is not None \
-            else check_dir(self._save_dir, "Bound_Poses_{}".format(self._lipid))
-        if "Binding Site Pose RMSD" in self.dataset.columns:
-            pose_rmsd_per_residue = self.dataset["Binding Site Pose RMSD"].values
-        else:
-            pose_rmsd_per_residue = np.zeros(self._nresi_per_protein)
-        if binding_site_id is not None:
-            selected_bs_id = np.atleast_1d(binding_site_id)
-        else:
-            selected_bs_id = np.arange(len(self._node_list), dtype=int)
-
-        # store bound lipid poses
-        selected_bs_map = {bs_id: self._node_list[bs_id] for bs_id in selected_bs_id}
-        pose_traj, pose_info = collect_bound_poses(selected_bs_map, self._contact_residues_low, self._trajfile_list,
-                                                   self._topfile_list, self._lipid, self._protein_ref, self._lipid_ref,
-                                                   stride=self._stride, nprot=self._nprot)
-        protein_atom_indices = [[atom.index for atom in residue.atoms]
-                                for residue in self._protein_ref.top.residues]
-        lipid_atom_indices = [self._protein_ref.n_atoms + atom_idx
-                              for atom_idx in np.arange(self._lipid_ref.n_atoms)]
-        atom_weights = {atom_idx: 1 for atom_idx in np.arange(self._lipid_ref.n_atoms)}
-        if score_weights is not None:
-            translate = {atom_idx: score_weights[self._lipid_ref.top.atom(atom_idx).name]
-                         for atom_idx in np.arange(self._lipid_ref.n_atoms)
-                         if self._lipid_ref.top.atom(atom_idx).name in score_weights.keys()}
-            atom_weights.update(translate)
-
-        if n_top_poses > 0:
-            # multiprocessing wrapped under p_tqdm
-            rmsd_set = _spawn_pmap(partial(analyze_pose_wrapper, protein_atom_indices=protein_atom_indices,
-                                     lipid_atom_indices=lipid_atom_indices, n_top_poses=n_top_poses,
-                                     pose_dir=pose_dir, atom_weights=atom_weights, kde_bw=kde_bw,
-                                     pca_component=pca_component, pose_format=pose_format, n_clusters=n_clusters,
-                                     eps=eps, min_samples=min_samples, metric=metric,
-                                     trajfile_list=self._trajfile_list),
-                             selected_bs_id, [pose_traj[bs_id] for bs_id in selected_bs_id],
-                             [self._node_list[bs_id] for bs_id in selected_bs_id],
-                             [pose_info[bs_id] for bs_id in selected_bs_id], num_cpus=num_cpus, desc="ANALYZE BOUND POSES")
-            RMSD_set = {}
-            for bs_id, rmsd in zip(selected_bs_id, rmsd_set):
-                RMSD_set["Binding Site {}".format(bs_id)] = rmsd
-                pose_rmsd_per_residue[self._node_list[bs_id]] = np.mean(RMSD_set["Binding Site {}".format(bs_id)])
-        # update dataset
-        self.dataset["Binding Site Pose RMSD"] = pose_rmsd_per_residue
-        pose_rmsd_data = pd.DataFrame(
-            dict([(bs_label, pd.Series(rmsd_set)) for bs_label, rmsd_set in RMSD_set.items()]))
-        # plot RMSD
-        if plot_rmsd and n_top_poses > 0:
-            plot_binding_site_data(pose_rmsd_data, os.path.join(pose_dir, f"Pose_RMSD_violinplot.{fig_format}"),
-                                   title="{}".format(self._lipid), ylabel="RMSD (nm)", fig_close=fig_close)
-        return pose_traj, pose_rmsd_data
+                        score_weights=None, kde_bw=0.15, pca_component=0.90, plot_rmsd=True, save_dir=None,
+                        eps=None, min_samples=None, metric="euclidean",
+                        fig_close=False, fig_format="pdf", num_cpus=None):
+    r"""Analyze bound poses for binding sites.
+
+    (Docstring unchanged — omitted here for brevity)
+    """
+    # Ensure required upstream steps are done
+    self._check_calculation("Binding Site ID", self.compute_binding_nodes, print_data=False)
+
+    # Where to save pose outputs
+    pose_dir = check_dir(save_dir, "Bound_Poses_{}".format(self._lipid)) if save_dir is not None \
+        else check_dir(self._save_dir, "Bound_Poses_{}".format(self._lipid))
+
+    # Initialize/extend per-residue RMSD column
+    if "Binding Site Pose RMSD" in self.dataset.columns:
+        pose_rmsd_per_residue = self.dataset["Binding Site Pose RMSD"].values
+    else:
+        pose_rmsd_per_residue = np.zeros(self._nresi_per_protein)
+
+    # Which binding sites to process
+    if binding_site_id is not None:
+        selected_bs_id = np.atleast_1d(binding_site_id)
+    else:
+        selected_bs_id = np.arange(len(self._node_list), dtype=int)
+
+    # Map of selected binding sites -> node lists
+    selected_bs_map = {bs_id: self._node_list[bs_id] for bs_id in selected_bs_id}
+
+    # --- Heartbeat: announce pose collection --------------------------------
+    print(f"[PyLipID] Gathering bound poses for analysis…")
+    print(f"[PyLipID] Binding sites to analyse: {len(selected_bs_id)}")
+
+    # Collect bound poses and metadata
+    pose_traj, pose_info = collect_bound_poses(
+        selected_bs_map,
+        self._contact_residues_low,
+        self._trajfile_list,
+        self._topfile_list,
+        self._lipid,
+        self._protein_ref,
+        self._lipid_ref,
+        stride=self._stride,
+        nprot=self._nprot
+    )
+
+    # Quick per-site pose counts (helps reassure users on large jobs)
+    for bs_id in selected_bs_id:
+        try:
+            n_poses = len(pose_traj[bs_id])
+        except Exception:
+            n_poses = 0
+        print(f"[PyLipID] - Binding Site {bs_id}: {n_poses} poses")
+
+    # Build atom index lists used by the analyzers
+    protein_atom_indices = [[atom.index for atom in residue.atoms]
+                            for residue in self._protein_ref.top.residues]
+    lipid_atom_indices = [self._protein_ref.n_atoms + atom_idx
+                          for atom_idx in np.arange(self._lipid_ref.n_atoms)]
+
+    # Atom weights (default 1.0 each; allow user override by name)
+    atom_weights = {atom_idx: 1 for atom_idx in np.arange(self._lipid_ref.n_atoms)}
+    if score_weights is not None:
+        translate = {atom_idx: score_weights[self._lipid_ref.top.atom(atom_idx).name]
+                     for atom_idx in np.arange(self._lipid_ref.n_atoms)
+                     if self._lipid_Ref.top.atom(atom_idx).name in score_weights.keys()}
+        atom_weights.update(translate)
+
+    RMSD_set = {}
+
+    # If n_top_poses == 0, we skip the heavy scoring/clustering entirely
+    if n_top_poses > 0:
+        # --- Heartbeat: announce the heavy parallel step --------------------
+        # Number of workers actually used
+        import os
+        n_workers = num_cpus if (isinstance(num_cpus, int) and num_cpus > 0) else (os.cpu_count() or 1)
+        n_workers = min(n_workers, max(1, len(selected_bs_id)))
+        print(f"[PyLipID] Starting multiprocessing for bound pose scoring/clustering… "
+              f"(sites={len(selected_bs_id)}, workers={n_workers})")
+        print(f"[PyLipID] Tip: reduce num_cpus or set n_top_poses=0 / n_clusters=0 to speed up.")
+
+        # Prepare the task closure
+        task = partial(analyze_pose_wrapper,
+                       protein_atom_indices=protein_atom_indices,
+                       lipid_atom_indices=lipid_atom_indices,
+                       n_top_poses=n_top_poses,
+                       pose_dir=pose_dir,
+                       atom_weights=atom_weights,
+                       kde_bw=kde_bw,
+                       pca_component=pca_component,
+                       pose_format=pose_format,
+                       n_clusters=n_clusters,
+                       eps=eps,
+                       min_samples=min_samples,
+                       metric=metric,
+                       trajfile_list=self._trajfile_list)
+
+        # Run in parallel with a visible tqdm progress bar
+        print("[PyLipID] Calculating binding-site koff values… this may take some time.")
+        rmsd_set = _spawn_pmap(task,
+                               selected_bs_id,
+                               [pose_traj[bs_id] for bs_id in selected_bs_id],
+                               [self._node_list[bs_id] for bs_id in selected_bs_id],
+                               [pose_info[bs_id] for bs_id in selected_bs_id],
+                               num_cpus=num_cpus,
+                               desc="ANALYZE BOUND POSES")
+
+        # Collect RMSD output per site and update per-residue RMSD column
+        for bs_id, rmsd in zip(selected_bs_id, rmsd_set):
+            RMSD_set[f"Binding Site {bs_id}"] = rmsd
+            pose_rmsd_per_residue[self._node_list[bs_id]] = np.mean(RMSD_set[f"Binding Site {bs_id}"])
+
+    # Update dataset
+    self.dataset["Binding Site Pose RMSD"] = pose_rmsd_per_residue
+    pose_rmsd_data = pd.DataFrame(dict([(bs_label, pd.Series(rmsd_set))
+                                        for bs_label, rmsd_set in RMSD_set.items()]))
+
+    # Plot RMSD violin (lazy import to avoid GUI in workers)
+    if plot_rmsd and n_top_poses > 0 and not pose_rmsd_data.empty:
+        from ..plot import plot_binding_site_data  # lazy import
+        plot_binding_site_data(
+            pose_rmsd_data,
+            os.path.join(pose_dir, f"Pose_RMSD_violinplot.{fig_format}"),
+            title=f"{self._lipid}",
+            ylabel="RMSD (nm)",
+            fig_close=fig_close
+        )
+
+    return pose_traj, pose_rmsd_data
 
     def compute_surface_area(self, binding_site_id=None, radii=None, plot_data=True, save_dir=None,
                              fig_close=False, fig_format="pdf", num_cpus=None):

From 5d6de35eecf8d5ac91743f083680b5942e5f66fc Mon Sep 17 00:00:00 2001
From: Phill Stansfeld <pstansfeld@gmail.com>
Date: Thu, 12 Mar 2026 21:17:05 +0000
Subject: [PATCH 21/24] Update api.py

---
 pylipid/api/api.py | 249 ++++++++++++++++++++++-----------------------
 1 file changed, 124 insertions(+), 125 deletions(-)

diff --git a/pylipid/api/api.py b/pylipid/api/api.py
index 3342fa3..ba33b44 100644
--- a/pylipid/api/api.py
+++ b/pylipid/api/api.py
@@ -1201,132 +1201,131 @@ def analyze_bound_poses(self, binding_site_id=None, n_top_poses=3, n_clusters="a
                         score_weights=None, kde_bw=0.15, pca_component=0.90, plot_rmsd=True, save_dir=None,
                         eps=None, min_samples=None, metric="euclidean",
                         fig_close=False, fig_format="pdf", num_cpus=None):
-    r"""Analyze bound poses for binding sites.
-
-    (Docstring unchanged — omitted here for brevity)
-    """
-    # Ensure required upstream steps are done
-    self._check_calculation("Binding Site ID", self.compute_binding_nodes, print_data=False)
-
-    # Where to save pose outputs
-    pose_dir = check_dir(save_dir, "Bound_Poses_{}".format(self._lipid)) if save_dir is not None \
-        else check_dir(self._save_dir, "Bound_Poses_{}".format(self._lipid))
-
-    # Initialize/extend per-residue RMSD column
-    if "Binding Site Pose RMSD" in self.dataset.columns:
-        pose_rmsd_per_residue = self.dataset["Binding Site Pose RMSD"].values
-    else:
-        pose_rmsd_per_residue = np.zeros(self._nresi_per_protein)
-
-    # Which binding sites to process
-    if binding_site_id is not None:
-        selected_bs_id = np.atleast_1d(binding_site_id)
-    else:
-        selected_bs_id = np.arange(len(self._node_list), dtype=int)
-
-    # Map of selected binding sites -> node lists
-    selected_bs_map = {bs_id: self._node_list[bs_id] for bs_id in selected_bs_id}
-
-    # --- Heartbeat: announce pose collection --------------------------------
-    print(f"[PyLipID] Gathering bound poses for analysis…")
-    print(f"[PyLipID] Binding sites to analyse: {len(selected_bs_id)}")
-
-    # Collect bound poses and metadata
-    pose_traj, pose_info = collect_bound_poses(
-        selected_bs_map,
-        self._contact_residues_low,
-        self._trajfile_list,
-        self._topfile_list,
-        self._lipid,
-        self._protein_ref,
-        self._lipid_ref,
-        stride=self._stride,
-        nprot=self._nprot
-    )
-
-    # Quick per-site pose counts (helps reassure users on large jobs)
-    for bs_id in selected_bs_id:
-        try:
-            n_poses = len(pose_traj[bs_id])
-        except Exception:
-            n_poses = 0
-        print(f"[PyLipID] - Binding Site {bs_id}: {n_poses} poses")
-
-    # Build atom index lists used by the analyzers
-    protein_atom_indices = [[atom.index for atom in residue.atoms]
-                            for residue in self._protein_ref.top.residues]
-    lipid_atom_indices = [self._protein_ref.n_atoms + atom_idx
-                          for atom_idx in np.arange(self._lipid_ref.n_atoms)]
-
-    # Atom weights (default 1.0 each; allow user override by name)
-    atom_weights = {atom_idx: 1 for atom_idx in np.arange(self._lipid_ref.n_atoms)}
-    if score_weights is not None:
-        translate = {atom_idx: score_weights[self._lipid_ref.top.atom(atom_idx).name]
-                     for atom_idx in np.arange(self._lipid_ref.n_atoms)
-                     if self._lipid_Ref.top.atom(atom_idx).name in score_weights.keys()}
-        atom_weights.update(translate)
-
-    RMSD_set = {}
-
-    # If n_top_poses == 0, we skip the heavy scoring/clustering entirely
-    if n_top_poses > 0:
-        # --- Heartbeat: announce the heavy parallel step --------------------
-        # Number of workers actually used
-        import os
-        n_workers = num_cpus if (isinstance(num_cpus, int) and num_cpus > 0) else (os.cpu_count() or 1)
-        n_workers = min(n_workers, max(1, len(selected_bs_id)))
-        print(f"[PyLipID] Starting multiprocessing for bound pose scoring/clustering… "
-              f"(sites={len(selected_bs_id)}, workers={n_workers})")
-        print(f"[PyLipID] Tip: reduce num_cpus or set n_top_poses=0 / n_clusters=0 to speed up.")
-
-        # Prepare the task closure
-        task = partial(analyze_pose_wrapper,
-                       protein_atom_indices=protein_atom_indices,
-                       lipid_atom_indices=lipid_atom_indices,
-                       n_top_poses=n_top_poses,
-                       pose_dir=pose_dir,
-                       atom_weights=atom_weights,
-                       kde_bw=kde_bw,
-                       pca_component=pca_component,
-                       pose_format=pose_format,
-                       n_clusters=n_clusters,
-                       eps=eps,
-                       min_samples=min_samples,
-                       metric=metric,
-                       trajfile_list=self._trajfile_list)
-
-        # Run in parallel with a visible tqdm progress bar
-        print("[PyLipID] Calculating binding-site koff values… this may take some time.")
-        rmsd_set = _spawn_pmap(task,
-                               selected_bs_id,
-                               [pose_traj[bs_id] for bs_id in selected_bs_id],
-                               [self._node_list[bs_id] for bs_id in selected_bs_id],
-                               [pose_info[bs_id] for bs_id in selected_bs_id],
-                               num_cpus=num_cpus,
-                               desc="ANALYZE BOUND POSES")
-
-        # Collect RMSD output per site and update per-residue RMSD column
-        for bs_id, rmsd in zip(selected_bs_id, rmsd_set):
-            RMSD_set[f"Binding Site {bs_id}"] = rmsd
-            pose_rmsd_per_residue[self._node_list[bs_id]] = np.mean(RMSD_set[f"Binding Site {bs_id}"])
-
-    # Update dataset
-    self.dataset["Binding Site Pose RMSD"] = pose_rmsd_per_residue
-    pose_rmsd_data = pd.DataFrame(dict([(bs_label, pd.Series(rmsd_set))
-                                        for bs_label, rmsd_set in RMSD_set.items()]))
-
-    # Plot RMSD violin (lazy import to avoid GUI in workers)
-    if plot_rmsd and n_top_poses > 0 and not pose_rmsd_data.empty:
-        from ..plot import plot_binding_site_data  # lazy import
-        plot_binding_site_data(
-            pose_rmsd_data,
-            os.path.join(pose_dir, f"Pose_RMSD_violinplot.{fig_format}"),
-            title=f"{self._lipid}",
-            ylabel="RMSD (nm)",
-            fig_close=fig_close
+        r"""Analyze bound poses for binding sites.
+        
+        (Docstring unchanged — omitted here for brevity)
+        """
+        # Ensure required upstream steps are done
+        self._check_calculation("Binding Site ID", self.compute_binding_nodes, print_data=False)
+        
+        # Where to save pose outputs
+        pose_dir = check_dir(save_dir, "Bound_Poses_{}".format(self._lipid)) if save_dir is not None \
+            else check_dir(self._save_dir, "Bound_Poses_{}".format(self._lipid))
+        
+        # Initialize/extend per-residue RMSD column
+        if "Binding Site Pose RMSD" in self.dataset.columns:
+            pose_rmsd_per_residue = self.dataset["Binding Site Pose RMSD"].values
+        else:
+            pose_rmsd_per_residue = np.zeros(self._nresi_per_protein)
+        
+        # Which binding sites to process
+        if binding_site_id is not None:
+            selected_bs_id = np.atleast_1d(binding_site_id)
+        else:
+            selected_bs_id = np.arange(len(self._node_list), dtype=int)
+        
+        # Map of selected binding sites -> node lists
+        selected_bs_map = {bs_id: self._node_list[bs_id] for bs_id in selected_bs_id}
+        
+        # --- Heartbeat: announce pose collection --------------------------------
+        print(f"[PyLipID] Gathering bound poses for analysis…")
+        print(f"[PyLipID] Binding sites to analyse: {len(selected_bs_id)}")
+        
+        # Collect bound poses and metadata
+        pose_traj, pose_info = collect_bound_poses(
+            selected_bs_map,
+            self._contact_residues_low,
+            self._trajfile_list,
+            self._topfile_list,
+            self._lipid,
+            self._protein_ref,
+            self._lipid_ref,
+            stride=self._stride,
+            nprot=self._nprot
         )
-
-    return pose_traj, pose_rmsd_data
+        
+        # Quick per-site pose counts (helps reassure users on large jobs)
+        for bs_id in selected_bs_id:
+            try:
+                n_poses = len(pose_traj[bs_id])
+            except Exception:
+                n_poses = 0
+            print(f"[PyLipID] - Binding Site {bs_id}: {n_poses} poses")
+        
+        # Build atom index lists used by the analyzers
+        protein_atom_indices = [[atom.index for atom in residue.atoms]
+                                for residue in self._protein_ref.top.residues]
+        lipid_atom_indices = [self._protein_ref.n_atoms + atom_idx
+                              for atom_idx in np.arange(self._lipid_ref.n_atoms)]
+        
+        # Atom weights (default 1.0 each; allow user override by name)
+        atom_weights = {atom_idx: 1 for atom_idx in np.arange(self._lipid_ref.n_atoms)}
+        if score_weights is not None:
+            translate = {atom_idx: score_weights[self._lipid_ref.top.atom(atom_idx).name]
+                         for atom_idx in np.arange(self._lipid_ref.n_atoms)
+                         if self._lipid_Ref.top.atom(atom_idx).name in score_weights.keys()}
+            atom_weights.update(translate)
+        
+        RMSD_set = {}
+        
+        # If n_top_poses == 0, we skip the heavy scoring/clustering entirely
+        if n_top_poses > 0:
+            # --- Heartbeat: announce the heavy parallel step --------------------
+            # Number of workers actually used
+            import os
+            n_workers = num_cpus if (isinstance(num_cpus, int) and num_cpus > 0) else (os.cpu_count() or 1)
+            n_workers = min(n_workers, max(1, len(selected_bs_id)))
+            print(f"[PyLipID] Starting multiprocessing for bound pose scoring/clustering… "
+                  f"(sites={len(selected_bs_id)}, workers={n_workers})")
+            print(f"[PyLipID] Tip: reduce num_cpus or set n_top_poses=0 / n_clusters=0 to speed up.")
+        
+            # Prepare the task closure
+            task = partial(analyze_pose_wrapper,
+                           protein_atom_indices=protein_atom_indices,
+                           lipid_atom_indices=lipid_atom_indices,
+                           n_top_poses=n_top_poses,
+                           pose_dir=pose_dir,
+                           atom_weights=atom_weights,
+                           kde_bw=kde_bw,
+                           pca_component=pca_component,
+                           pose_format=pose_format,
+                           n_clusters=n_clusters,
+                           eps=eps,
+                           min_samples=min_samples,
+                           metric=metric,
+                           trajfile_list=self._trajfile_list)
+        
+            # Run in parallel with a visible tqdm progress bar
+            rmsd_set = _spawn_pmap(task,
+                                   selected_bs_id,
+                                   [pose_traj[bs_id] for bs_id in selected_bs_id],
+                                   [self._node_list[bs_id] for bs_id in selected_bs_id],
+                                   [pose_info[bs_id] for bs_id in selected_bs_id],
+                                   num_cpus=num_cpus,
+                                   desc="ANALYZE BOUND POSES")
+        
+            # Collect RMSD output per site and update per-residue RMSD column
+            for bs_id, rmsd in zip(selected_bs_id, rmsd_set):
+                RMSD_set[f"Binding Site {bs_id}"] = rmsd
+                pose_rmsd_per_residue[self._node_list[bs_id]] = np.mean(RMSD_set[f"Binding Site {bs_id}"])
+        
+        # Update dataset
+        self.dataset["Binding Site Pose RMSD"] = pose_rmsd_per_residue
+        pose_rmsd_data = pd.DataFrame(dict([(bs_label, pd.Series(rmsd_set))
+                                            for bs_label, rmsd_set in RMSD_set.items()]))
+        
+        # Plot RMSD violin (lazy import to avoid GUI in workers)
+        if plot_rmsd and n_top_poses > 0 and not pose_rmsd_data.empty:
+            from ..plot import plot_binding_site_data  # lazy import
+            plot_binding_site_data(
+                pose_rmsd_data,
+                os.path.join(pose_dir, f"Pose_RMSD_violinplot.{fig_format}"),
+                title=f"{self._lipid}",
+                ylabel="RMSD (nm)",
+                fig_close=fig_close
+            )
+        
+        return pose_traj, pose_rmsd_data
 
     def compute_surface_area(self, binding_site_id=None, radii=None, plot_data=True, save_dir=None,
                              fig_close=False, fig_format="pdf", num_cpus=None):

From c4ee8d833ff90502e8c3de856ccf7f347f2a7b66 Mon Sep 17 00:00:00 2001
From: Phill Stansfeld <pstansfeld@gmail.com>
Date: Thu, 12 Mar 2026 21:48:39 +0000
Subject: [PATCH 22/24] Update directory.py

---
 pylipid/util/directory.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/pylipid/util/directory.py b/pylipid/util/directory.py
index 114dce2..0b2f9df 100644
--- a/pylipid/util/directory.py
+++ b/pylipid/util/directory.py
@@ -21,17 +21,19 @@
 __all__ = ["check_dir"]
 
 
-def check_dir(root, name=None):
+def check_dir(root, name=None, print_info=True):
     """
-    Create a directory under 'root'. If 'name' is provided, the directory
-    will be root/name. Returns the full path.
+    Create (if needed) and return a directory under 'root'.
+    If 'name' is provided, the directory will be 'root/name'.
+    Set print_info=False to suppress the creation message.
     """
     import os
 
     path = os.path.join(root, name) if name is not None else root
 
     if not os.path.isdir(path):
-        print(f"Creating new directory: {path}")
+        if print_info:
+            print(f"Creating new directory: {path}")
         os.makedirs(path, exist_ok=True)
 
     return path

From 7f10a3681241c3cb1f5324332b5cbe9b05b99495 Mon Sep 17 00:00:00 2001
From: Phill Stansfeld <pstansfeld@gmail.com>
Date: Fri, 13 Mar 2026 07:04:00 +0000
Subject: [PATCH 23/24] Update api.py

---
 pylipid/api/api.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/pylipid/api/api.py b/pylipid/api/api.py
index ba33b44..1c06b08 100644
--- a/pylipid/api/api.py
+++ b/pylipid/api/api.py
@@ -1277,7 +1277,7 @@ def analyze_bound_poses(self, binding_site_id=None, n_top_poses=3, n_clusters="a
             n_workers = min(n_workers, max(1, len(selected_bs_id)))
             print(f"[PyLipID] Starting multiprocessing for bound pose scoring/clustering… "
                   f"(sites={len(selected_bs_id)}, workers={n_workers})")
-            print(f"[PyLipID] Tip: reduce num_cpus or set n_top_poses=0 / n_clusters=0 to speed up.")
+            #print(f"[PyLipID] Reducing num_cpus or set n_top_poses=0 / n_clusters=0 to speed up.")
         
             # Prepare the task closure
             task = partial(analyze_pose_wrapper,
@@ -1296,6 +1296,7 @@ def analyze_bound_poses(self, binding_site_id=None, n_top_poses=3, n_clusters="a
                            trajfile_list=self._trajfile_list)
         
             # Run in parallel with a visible tqdm progress bar
+            print("[PyLipID] Calculating binding-site koff values… this may take some time.")
             rmsd_set = _spawn_pmap(task,
                                    selected_bs_id,
                                    [pose_traj[bs_id] for bs_id in selected_bs_id],
@@ -1303,7 +1304,7 @@ def analyze_bound_poses(self, binding_site_id=None, n_top_poses=3, n_clusters="a
                                    [pose_info[bs_id] for bs_id in selected_bs_id],
                                    num_cpus=num_cpus,
                                    desc="ANALYZE BOUND POSES")
-        
+            print("[PyLipID] Finished calculating binding-site koff values.")  
             # Collect RMSD output per site and update per-residue RMSD column
             for bs_id, rmsd in zip(selected_bs_id, rmsd_set):
                 RMSD_set[f"Binding Site {bs_id}"] = rmsd

From 92ad411a823326270e0bc1b7b6afac98be42bd7c Mon Sep 17 00:00:00 2001
From: Phill Stansfeld <pstansfeld@gmail.com>
Date: Fri, 13 Mar 2026 08:08:04 +0000
Subject: [PATCH 24/24] updates to calcs

---
 pylipid/api/api.py           |  6 ++--
 pylipid/func/binding_site.py | 69 ++++++++++++++++++++++++++----------
 2 files changed, 53 insertions(+), 22 deletions(-)

diff --git a/pylipid/api/api.py b/pylipid/api/api.py
index 1c06b08..693c55b 100644
--- a/pylipid/api/api.py
+++ b/pylipid/api/api.py
@@ -1263,7 +1263,7 @@ def analyze_bound_poses(self, binding_site_id=None, n_top_poses=3, n_clusters="a
         if score_weights is not None:
             translate = {atom_idx: score_weights[self._lipid_ref.top.atom(atom_idx).name]
                          for atom_idx in np.arange(self._lipid_ref.n_atoms)
-                         if self._lipid_Ref.top.atom(atom_idx).name in score_weights.keys()}
+                         if self._lipid_ref.top.atom(atom_idx).name in score_weights.keys()}
             atom_weights.update(translate)
         
         RMSD_set = {}
@@ -1296,7 +1296,7 @@ def analyze_bound_poses(self, binding_site_id=None, n_top_poses=3, n_clusters="a
                            trajfile_list=self._trajfile_list)
         
             # Run in parallel with a visible tqdm progress bar
-            print("[PyLipID] Calculating binding-site koff values… this may take some time.")
+            print("[PyLipID] Scoring and clustering bound poses… this may take some time.")
             rmsd_set = _spawn_pmap(task,
                                    selected_bs_id,
                                    [pose_traj[bs_id] for bs_id in selected_bs_id],
@@ -1304,7 +1304,7 @@ def analyze_bound_poses(self, binding_site_id=None, n_top_poses=3, n_clusters="a
                                    [pose_info[bs_id] for bs_id in selected_bs_id],
                                    num_cpus=num_cpus,
                                    desc="ANALYZE BOUND POSES")
-            print("[PyLipID] Finished calculating binding-site koff values.")  
+            print("[PyLipID] Finished clustering.")  
             # Collect RMSD output per site and update per-residue RMSD column
             for bs_id, rmsd in zip(selected_bs_id, rmsd_set):
                 RMSD_set[f"Binding Site {bs_id}"] = rmsd
diff --git a/pylipid/func/binding_site.py b/pylipid/func/binding_site.py
index aa8398f..e08d34c 100644
--- a/pylipid/func/binding_site.py
+++ b/pylipid/func/binding_site.py
@@ -201,16 +201,28 @@ def collect_bound_poses(binding_site_map, contact_residue_index, trajfile_list,
                 contact_BS = [np.unique(np.concatenate(
                     [contact_residue_index[node][list_to_take][frame_idx] for node in nodes]))
                     for frame_idx in range(traj.n_frames)]
-                for frame_id in range(len(contact_BS)):
-                    if len(contact_BS[frame_id]) > 0:
-                        for lipid_id in contact_BS[frame_id]:
-                            lipid_residue_index = lipid_residue_index_set[int(lipid_id)]
-                            lipid_atom_indices = np.sort(
-                                [atom.index for atom in traj.top.residue(lipid_residue_index).atoms])
-                            pose_pool[bs_id].append(
-                                [np.copy(traj.xyz[frame_id, np.hstack([protein_indices, lipid_atom_indices])]),
-                                 np.copy(traj.unitcell_angles[frame_id]), np.copy(traj.unitcell_lengths[frame_id])])
-                            pose_info[bs_id].append((traj_idx, protein_idx, lipid_residue_index, frame_id*traj.timestep))
+                # Group (frame_id, lipid_res_idx) pairs by lipid molecule so we can
+                # batch-extract all frames for each lipid with a single numpy slice
+                # instead of copying coordinates one frame at a time.
+                lip_frames = defaultdict(list)   # {lipid_res_idx: [frame_ids]}
+                for frame_id, lipids_at_frame in enumerate(contact_BS):
+                    for lipid_id in lipids_at_frame:
+                        lip_res_idx = lipid_residue_index_set[int(lipid_id)]
+                        lip_frames[lip_res_idx].append(frame_id)
+                for lip_res_idx, frame_ids in lip_frames.items():
+                    lip_atoms = np.sort(
+                        [atom.index for atom in traj.top.residue(lip_res_idx).atoms])
+                    combined = np.hstack([protein_indices, lip_atoms])
+                    frame_arr = np.array(frame_ids)
+                    # Single batch slice for all frames of this lipid
+                    batch_xyz     = traj.xyz[frame_arr][:, combined]
+                    batch_angles  = traj.unitcell_angles[frame_arr]
+                    batch_lengths = traj.unitcell_lengths[frame_arr]
+                    for i, frame_id in enumerate(frame_ids):
+                        pose_pool[bs_id].append(
+                            [batch_xyz[i], batch_angles[i], batch_lengths[i]])
+                        pose_info[bs_id].append(
+                            (traj_idx, protein_idx, lip_res_idx, frame_id * traj.timestep))
     pose_traj = {}
     for bs_id in pose_pool.keys():
         pose_traj[bs_id] = md.Trajectory([frame[0] for frame in pose_pool[bs_id]], joined_top,
@@ -250,12 +262,30 @@ def vectorize_poses(bound_poses, binding_nodes, protein_atom_indices, lipid_atom
         The distance matrix of the bound poses, in the shape of [n_lipid_atoms, n_poses, n_binding_site_residues]
 
     """
-    # calculate distance to binding site residues for each lipid atom
-    dist_per_atom = np.array(
-        [np.array([md.compute_distances(bound_poses,
-                                        list(product([lipid_atom_indices[idx]], protein_atom_indices[node])),
-                                        periodic=True).min(axis=1) for node in binding_nodes]).T
-         for idx in np.arange(len(lipid_atom_indices))])  # shape: [n_lipid_atoms, n_poses, n_BS_residues]
+    n_lip   = len(lipid_atom_indices)
+    n_nodes = len(binding_nodes)
+
+    # Build one flat pair list covering every (lipid_atom × node_residue_atom) combination.
+    # block_starts[i] marks where the i-th (lipid_atom, node) block begins so that
+    # np.minimum.reduceat can collapse each block to its per-residue minimum in one pass.
+    # This replaces n_lip * n_nodes separate md.compute_distances calls with a single call.
+    all_pairs    = []
+    block_starts = []   # length = n_lip * n_nodes
+
+    for lip_atom in lipid_atom_indices:
+        for node in binding_nodes:
+            block_starts.append(len(all_pairs))
+            for prot_atom in protein_atom_indices[node]:
+                all_pairs.append((lip_atom, prot_atom))
+
+    # One call → shape [n_frames, n_total_pairs]
+    all_dists = md.compute_distances(bound_poses, all_pairs, periodic=True)
+
+    # Per-block minimum → shape [n_frames, n_lip * n_nodes]
+    min_dists = np.minimum.reduceat(all_dists, block_starts, axis=1)
+
+    # Reshape to [n_frames, n_lip, n_nodes] then transpose → [n_lip, n_frames, n_nodes]
+    dist_per_atom = min_dists.reshape(bound_poses.n_frames, n_lip, n_nodes).transpose(1, 0, 2)
     return dist_per_atom
 
 
@@ -521,9 +551,10 @@ def analyze_pose_wrapper(bs_id, bound_poses, binding_nodes, pose_info, pose_dir=
         _write_pose_info([pose_info[int(pose_idx)] for pose_idx in selected_pose_indices],
                               f"{pose_dir_clustered}/pose_info.txt", trajfile_list)
     ## calculate RMSD ##
-    dist_mean = np.mean(lipid_dist_per_pose, axis=0)
-    pose_rmsds = [rmsd(lipid_dist_per_pose[pose_id], dist_mean)
-                  for pose_id in np.arange(len(lipid_dist_per_pose))]
+    # lipid_dist_per_pose is already computed above; vectorise over all poses at once
+    # instead of looping frame by frame.
+    dist_mean  = lipid_dist_per_pose.mean(axis=0)
+    pose_rmsds = np.sqrt(((lipid_dist_per_pose - dist_mean) ** 2).mean(axis=1)).tolist()
     return pose_rmsds