diff --git a/.all-contributorsrc b/.all-contributorsrc
new file mode 100644
index 0000000..585a78c
--- /dev/null
+++ b/.all-contributorsrc
@@ -0,0 +1,47 @@
+{
+  "files": [
+    "README.md"
+  ],
+  "imageSize": 100,
+  "commit": false,
+  "commitType": "docs",
+  "commitConvention": "angular",
+  "contributors": [
+    {
+      "login": "danielawitten",
+      "name": "danielawitten",
+      "avatar_url": "https://avatars.githubusercontent.com/u/12654191?v=4",
+      "profile": "https://github.com/danielawitten",
+      "contributions": [
+        "code",
+        "content"
+      ]
+    },
+    {
+      "login": "trevorhastie",
+      "name": "trevorhastie",
+      "avatar_url": "https://avatars.githubusercontent.com/u/13293253?v=4",
+      "profile": "https://web.stanford.edu/~hastie/",
+      "contributions": [
+        "code",
+        "content"
+      ]
+    },
+    {
+      "login": "tibshirani",
+      "name": "tibshirani",
+      "avatar_url": "https://avatars.githubusercontent.com/u/2848609?v=4",
+      "profile": "https://github.com/tibshirani",
+      "contributions": [
+        "code",
+        "content"
+      ]
+    }
+  ],
+  "contributorsPerLine": 7,
+  "skipCi": true,
+  "repoType": "github",
+  "repoHost": "https://github.com",
+  "projectName": "ISLP",
+  "projectOwner": "intro-stat-learning"
+}
diff --git a/.github/workflows/build_docs.yml b/.github/workflows/build_docs.yml
new file mode 100644
index 0000000..9260015
--- /dev/null
+++ b/.github/workflows/build_docs.yml
@@ -0,0 +1,85 @@
+# This builds and deploys ISLP docs
+
+name: Build docs
+
+# Controls when the workflow will run
+on:
+  workflow_dispatch:
+    inputs: null
+    
+# A workflow run is made up of one or more jobs that can run
+# sequentially or in parallel
+
+jobs: # This workflow contains a single
+      # job called "build"
+
+  build:
+    # The type of runner that the job will run on
+    runs-on: ubuntu-latest
+
+    # Steps represent a sequence of tasks that will be executed as part of the job
+    steps:
+      # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v4
+        with:
+          python-version: '3.12'
+          cache: 'pip'
+      # Install
+      - name: Install dependencies 
+        run: |
+          sudo apt-get install r-base
+          pip install -r docs/requirements.txt
+          pip install .
+
+      # Checkout labs
+      - name: Checkout version of labs
+        run: |
+          git submodule update --init --force docs/ISLP_labs
+          cd docs
+          mkdir -p source/labs
+          cp ISLP_labs/Ch*nb source/labs
+          python fix_and_clear_notebooks.py source/labs/Ch*nb --rm_md
+          python make_notebooks.py --inplace --requirements=ISLP_labs/requirements.txt source/labs/Ch06-varselect-lab.ipynb
+          rm source/labs/Ch*md
+
+      - name: Make docs
+        run: |
+          cd docs
+          make html
+
+      # Store the output
+      - name: Upload docs
+        uses: actions/upload-artifact@v4
+        with:
+          name: ISLP_docs
+          path: docs/build/html
+          retention-days: 5
+
+  deploy:
+    runs-on: ubuntu-latest
+    needs: build
+
+    # Grant GITHUB_TOKEN the permissions required to make a Pages deployment
+    permissions:
+      pages: write      # to deploy to Pages
+      id-token: write   # to verify the deployment originates from an appropriate source
+
+    environment:
+      name: github-pages
+      url: ${{steps.deployment.outputs.page_url}}
+      
+    steps:
+    - uses: actions/download-artifact@master
+      with:
+        name: ISLP_docs
+        path: .
+    - uses: actions/configure-pages@v4
+      with:
+        node-version: 20.x
+    - uses: actions/upload-pages-artifact@v3
+      with:
+        node-version: 20.x
+        path: .
+    - id: deployment
+      uses: actions/deploy-pages@main
\ No newline at end of file
diff --git a/.github/workflows/build_notebook.yml b/.github/workflows/build_notebook.yml
new file mode 100644
index 0000000..dbf97e8
--- /dev/null
+++ b/.github/workflows/build_notebook.yml
@@ -0,0 +1,105 @@
+# This is a basic workflow to help you get started with Actions
+
+name: Build a notebook
+
+# Controls when the workflow will run
+on:
+  workflow_dispatch:
+    inputs:
+      LABS:
+        description: 'Labs version'
+        required: true
+        default: 'v2'
+        type: string
+      ID:
+        description: 'Which lab to build'
+        required: true
+        default: '03'
+        type: string
+
+# A workflow run is made up of one or more jobs that can run sequentially or in parallel
+jobs:
+  # This workflow contains a single job called "build"
+  build-linux:
+    # The type of runner that the job will run on
+    runs-on: ubuntu-latest
+
+    # Steps represent a sequence of tasks that will be executed as part of the job
+    steps:
+      # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
+      - uses: actions/checkout@v3
+      - uses: actions/setup-python@v4
+        with:
+          python-version: '3.10'
+          cache: 'pip'
+
+      # Install
+      - name: Install dependencies 
+        run: |
+          pip install .
+          pip install jupyterlab
+
+      # Runs a set of commands using the runners shell
+      - name: Build notebook 
+        env:
+          LABS:  ${{ inputs.LABS }}
+          ID:  ${{ inputs.ID }}
+        run: |
+          git clone https://github.com/intro-stat-learning/ISLP_labs.git
+          cd ISLP_labs
+          git checkout $LABS
+          cp Ch*$ID*lab.ipynb ..
+          jupyter nbconvert --execute --inplace ../Ch*$ID*lab.ipynb 
+          jupyter nbconvert --to html ../Ch*$ID*lab.ipynb 
+
+      # Store the output
+      - name: Upload labs
+        env:
+          ID:  ${{ inputs.ID }}
+        uses: actions/upload-artifact@v3
+        with:
+          name: ISLP_labs
+          path: Ch*
+          retention-days: 1
+
+  build-mac:
+    # The type of runner that the job will run on
+    runs-on: macos-latest
+
+    # Steps represent a sequence of tasks that will be executed as part of the job
+    steps:
+      # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
+      - uses: actions/checkout@v3
+      - uses: actions/setup-python@v4
+        with:
+          python-version: '3.10'
+          cache: 'pip'
+
+      # Install
+      - name: Install dependencies 
+        run: |
+          pip install .
+          pip install jupyterlab
+
+      # Runs a set of commands using the runners shell
+      - name: Build notebook 
+        env:
+          LABS:  ${{ inputs.LABS }}
+          ID:  ${{ inputs.ID }}
+        run: |
+          git clone https://github.com/intro-stat-learning/ISLP_labs.git
+          cd ISLP_labs
+          git checkout $LABS
+          cp Ch*$ID*lab.ipynb ..
+          jupyter nbconvert --execute --inplace ../Ch*$ID*lab.ipynb 
+          jupyter nbconvert --to html ../Ch*$ID*lab.ipynb 
+
+      # Store the output
+      - name: Upload labs
+        env:
+          ID:  ${{ inputs.ID }}
+        uses: actions/upload-artifact@v3
+        with:
+          name: ISLP_labs
+          path: Ch*
+          retention-days: 1
\ No newline at end of file
diff --git a/.github/workflows/build_notebook_errors.yml b/.github/workflows/build_notebook_errors.yml
new file mode 100644
index 0000000..d5fabee
--- /dev/null
+++ b/.github/workflows/build_notebook_errors.yml
@@ -0,0 +1,104 @@
+# This is a basic workflow to help you get started with Actions
+
+name: Build a notebook (allow errors, capture result)
+
+# Controls when the workflow will run
+on:
+  workflow_dispatch:
+    inputs:
+      LABS:
+        description: 'Labs version'
+        required: true
+        default: 'v2'
+        type: string
+      ID:
+        description: 'Which lab to build'
+        required: true
+        default: '02'
+        type: string
+
+# A workflow run is made up of one or more jobs that can run sequentially or in parallel
+jobs:
+
+  build-linux:
+    # The type of runner that the job will run on
+    runs-on: ubuntu-latest
+
+    # Steps represent a sequence of tasks that will be executed as part of the job
+    steps:
+      # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
+      - uses: actions/checkout@v3
+      - uses: actions/setup-python@v4
+        with:
+          python-version: '3.10'
+          cache: 'pip'
+
+      # Install
+      - name: Install dependencies 
+        run: |
+          pip install .
+
+      # Runs a set of commands using the runners shell
+      - name: Build notebook, allowing errors
+        env:
+          LABS:  ${{ inputs.LABS }}
+          ID:  ${{ inputs.ID }}
+        run: |
+          git clone https://github.com/intro-stat-learning/ISLP_labs.git
+          cd ISLP_labs
+          git checkout $LABS
+          cp Ch*$ID*lab.ipynb ..
+          jupyter nbconvert --execute --inplace ../Ch*$ID*lab.ipynb --allow-errors
+          jupyter nbconvert --to html ../Ch*$ID*lab.ipynb 
+
+      # Store the output
+      - name: Upload labs
+        env:
+          ID:  ${{ inputs.ID }}
+        uses: actions/upload-artifact@v3
+        with:
+          name: ISLP_labs
+          path: Ch*
+          retention-days: 1	  
+
+  build-mac:
+    # The type of runner that the job will run on
+    runs-on: macos-latest
+
+    # Steps represent a sequence of tasks that will be executed as part of the job
+    steps:
+      # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
+      - uses: actions/checkout@v3
+      - uses: actions/setup-python@v4
+        with:
+          python-version: '3.10'
+          cache: 'pip'
+
+      # Install
+      - name: Install dependencies 
+        run: |
+          pip install .
+
+      # Runs a set of commands using the runners shell
+      - name: Build notebook, allowing errors
+        env:
+          LABS:  ${{ inputs.LABS }}
+          ID:  ${{ inputs.ID }}
+        run: |
+          git clone https://github.com/intro-stat-learning/ISLP_labs.git
+          cd ISLP_labs
+          git checkout $LABS
+          cp Ch*$ID*lab.ipynb ..
+          jupyter nbconvert --execute --inplace ../Ch*$ID*lab.ipynb --allow-errors
+          jupyter nbconvert --to html ../Ch*$ID*lab.ipynb 
+
+      # Store the output
+      - name: Upload labs
+        env:
+          ID:  ${{ inputs.ID }}
+        uses: actions/upload-artifact@v3
+        with:
+          name: ISLP_labs
+          path: Ch*
+          retention-days: 1	  
+
diff --git a/.github/workflows/build_save_labs.yml b/.github/workflows/build_save_labs.yml
new file mode 100644
index 0000000..57ebf78
--- /dev/null
+++ b/.github/workflows/build_save_labs.yml
@@ -0,0 +1,104 @@
+# This is a basic workflow to help you get started with Actions
+
+name: Build + save notebooks (not 10,13)
+
+# Controls when the workflow will run
+on:
+  workflow_dispatch:
+    inputs:
+      LABS:
+        description: 'Labs version'
+        required: true
+        default: 'v2'
+        type: string
+
+# A workflow run is made up of one or more jobs that can run sequentially or in parallel
+jobs:
+  # This workflow contains a single job called "build"
+  build:
+    # The type of runner that the job will run on
+    runs-on: ubuntu-latest
+
+    # Steps represent a sequence of tasks that will be executed as part of the job
+    steps:
+      # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
+      - uses: actions/checkout@v3
+      - uses: actions/setup-python@v4
+        with:
+          python-version: '3.10'
+          cache: 'pip'
+
+      # Install
+      - name: Install dependencies 
+        run: |
+          pip install .
+
+      # Runs a set of commands using the runners shell
+      - name: Build Ch02 notebook (allow errors)
+        env:
+          LABS:  ${{ inputs.LABS }}
+        run: |
+          git clone https://github.com/intro-stat-learning/ISLP_labs.git
+          cd ISLP_labs
+          git checkout $LABS
+          rm Ch10*
+          rm Ch13*
+          jupyter nbconvert --execute --inplace --allow-errors Ch02*lab.ipynb 
+
+      - name: Build Ch03 notebook
+        run: |
+          cd ISLP_labs
+          jupyter nbconvert --execute --inplace  Ch03*lab.ipynb 
+
+      - name: Build Ch04 notebook
+        run: |
+          cd ISLP_labs
+          jupyter nbconvert --execute --inplace  Ch04*lab.ipynb 
+
+      - name: Build Ch05 notebook
+        run: |
+          cd ISLP_labs
+          jupyter nbconvert --execute --inplace  Ch05*lab.ipynb 
+
+      - name: Build Ch06 notebook
+        run: |
+          cd ISLP_labs
+          jupyter nbconvert --execute --inplace  Ch06*lab.ipynb 
+
+      - name: Build Ch07 notebook
+        run: |
+          cd ISLP_labs
+          jupyter nbconvert --execute --inplace  Ch07*lab.ipynb 
+
+      - name: Build Ch08 notebook
+        run: |
+          cd ISLP_labs
+          jupyter nbconvert --execute --inplace  Ch08*lab.ipynb 
+
+      - name: Build Ch09 notebook
+        run: |
+          cd ISLP_labs
+          jupyter nbconvert --execute --inplace  Ch09*lab.ipynb 
+
+      - name: Build Ch11 notebook
+        run: |
+          cd ISLP_labs
+          jupyter nbconvert --execute --inplace  Ch11*lab.ipynb 
+
+      - name: Build Ch12 notebook
+        run: |
+          cd ISLP_labs
+          jupyter nbconvert --execute --inplace  Ch12*lab.ipynb 
+
+      - name: Build HTML
+        run: |
+          cd ISLP_labs
+          jupyter nbconvert --to html Ch*ipynb 
+
+      # Store the output
+      - name: Upload labs
+        uses: actions/upload-artifact@v3
+        with:
+          name: ISLP_labs
+          path: Ch*
+          retention-days: 1
\ No newline at end of file
diff --git a/.github/workflows/build_test.yml b/.github/workflows/build_test.yml
new file mode 100644
index 0000000..767a62e
--- /dev/null
+++ b/.github/workflows/build_test.yml
@@ -0,0 +1,102 @@
+name: Build and test
+
+on: [push]
+
+jobs:
+  build-linux:
+    runs-on: ubuntu-latest
+    strategy:
+      max-parallel: 5
+
+    steps:
+    - uses: actions/checkout@v3
+    - name: Set up Python 3.12
+      uses: actions/setup-python@v4
+      with:
+        python-version: '3.12'
+    - name: Add conda to system path
+      run: |
+        # $CONDA is an environment variable pointing to the root of the miniconda directory
+        echo $CONDA/bin >> $GITHUB_PATH
+    - name: Install dependencies
+      run: |
+        pip install .
+    - name: Lint with flake8
+      run: |
+        pip install flake8
+        # stop the build if there are Python syntax errors or undefined names
+        flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
+        # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
+        flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
+    - name: Test with pytest
+      timeout-minutes: 12
+      run: |
+        pip install torchvision torchinfo 
+        pip install pytest
+        pytest
+
+  build-windows:
+    runs-on: windows-latest
+    strategy:
+      max-parallel: 5
+
+    steps:
+    - uses: actions/checkout@v3
+    - name: Set up Python 3.12
+      uses: actions/setup-python@v4
+      with:
+        python-version: '3.12'
+    - name: Add conda to system path
+      run: |
+        # $CONDA is an environment variable pointing to the root of the miniconda directory
+        echo $CONDA/bin >> $GITHUB_PATH
+    - name: Install dependencies
+      run: |
+        pip install .
+    - name: Lint with flake8
+      run: |
+        pip install flake8
+        # stop the build if there are Python syntax errors or undefined names
+        flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
+        # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
+        flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
+    - name: Test with pytest
+      timeout-minutes: 12
+      run: |
+        pip install torchvision torchinfo 
+        pip install pytest
+        pytest
+
+  build-mac:
+    runs-on: macos-latest
+    strategy:
+      max-parallel: 5
+
+    steps:
+    - uses: actions/checkout@v3
+    - name: Set up Python 3.12
+      uses: actions/setup-python@v4
+      with:
+        python-version: '3.12'
+    - name: Add conda to system path
+      run: |
+        # $CONDA is an environment variable pointing to the root of the miniconda directory
+        echo $CONDA/bin >> $GITHUB_PATH
+    - name: Install dependencies
+      run: |
+        pip install .
+    - name: Lint with flake8
+      run: |
+        pip install flake8
+        # stop the build if there are Python syntax errors or undefined names
+        flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
+        # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
+        flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
+    - name: Test with pytest
+      timeout-minutes: 12
+      run: |
+        pip install torchvision torchinfo
+        pip install pytest
+        pytest --ignore tests/deeplearning/test_hitters.py --ignore tests/deeplearning/test_mnist.py
+
+
diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 0000000..891a60a
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "docs/ISLP_labs"]
+	path = docs/ISLP_labs
+	url = https://github.com/intro-stat-learning/ISLP_labs
diff --git a/.readthedocs.yaml b/.readthedocs.yaml
index aacca4b..44bfa25 100644
--- a/.readthedocs.yaml
+++ b/.readthedocs.yaml
@@ -9,7 +9,15 @@ version: 2
 build:
   os: ubuntu-22.04
   tools:
-    python: "3.9"
+    python: "3.11"
+  apt_packages:
+    - r-base
+  jobs:
+    pre_build:
+      - python docs/fix_and_clear_notebooks.py
+
+submodules:
+  include: all
 
 # Build documentation in the docs/ directory with Sphinx
 sphinx:
@@ -22,8 +30,7 @@ sphinx:
 # Optionally declare the Python requirements required to build your docs
 python:
    install:
-   - requirements: requirements.txt
+   - requirements: docs/ISLP_labs/requirements.txt
    - requirements: docs/requirements.txt
-   - requirements: torch_requirements.txt
    - method: pip
      path: .
diff --git a/ISLP/__init__.py b/ISLP/__init__.py
index ae230d3..6cd1ee1 100644
--- a/ISLP/__init__.py
+++ b/ISLP/__init__.py
@@ -6,28 +6,74 @@
 """
 
 from os.path import join as pjoin
+from importlib.resources import (as_file,
+                                 files)
 import pandas as pd, numpy as np
-from pkg_resources import resource_filename
+from sklearn.metrics import confusion_matrix as _confusion_matrix
+from sklearn.metrics._classification import unique_labels
 
 # data originally saved via: [sm.datasets.get_rdataset(n, 'ISLR').data.to_csv('../ISLP/data/%s.csv' % n, index=False) for n in ['Carseats', 'College', 'Credit', 'Default', 'Hitters', 'Auto', 'OJ', 'Portfolio', 'Smarket', 'Wage', 'Weekly', 'Caravan']]
 
+def _make_categorical(dataset):
+    unordered = _unordered.setdefault(dataset, [])
+    ordered = _ordered.setdefault(dataset, [])
+    with as_file(files('ISLP').joinpath('data', '%s.csv' % dataset)) as filename:
+        df = pd.read_csv(filename)
+        for col in unordered:
+            df[col] = pd.Categorical(df[col])
+        for col in ordered:
+            df[col] = pd.Categorical(df[col], ordered=True)
+        if dataset in _index:
+            df = df.set_index(_index[dataset])
+        return df
+
+_unordered = {'Hitters':['League', 'Division', 'NewLeague'],
+              'Caravan':['Purchase'],
+              'Carseats':['ShelveLoc', 'Urban', 'US'],
+              'College':['Private'],
+              'Publication':['mech'],
+              'BrainCancer':['sex', 'diagnosis', 'loc', 'stereo'],
+              'Wage':['maritl', 'race', 'region', 'jobclass', 'health', 'health_ins'],
+              'Default':['default', 'student'],
+              'Credit':['Gender', 'Student', 'Married', 'Ethnicity'],
+              'OJ':['Purchase', 'Store7'],
+              'Smarket':['Direction'],
+              'Weekly':['Direction']
+              }
+_ordered = {'Wage':['education'],
+            }
+_index = {'Auto':'name'}
+
+_datasets = sorted(list(_unordered.keys()) +
+                   list(_ordered.keys()) +
+                   ['NCI60',
+                    'Khan',
+                    'Bikeshare',
+                    'NYSE'])
+
 def load_data(dataset):
+    
     if dataset == 'NCI60':
-        features = resource_filename('ISLP', pjoin('data', 'NCI60data.npy'))
-        X = np.load(features)
-        labels = resource_filename('ISLP', pjoin('data', 'NCI60labs.csv'))
-        Y = pd.read_csv(labels)
+        with as_file(files('ISLP').joinpath('data', 'NCI60data.npy')) as features:
+            X = np.load(features)
+        with as_file(files('ISLP').joinpath('data', 'NCI60labs.csv')) as labels:
+            Y = pd.read_csv(labels)
         return {'data':X, 'labels':Y}
     elif dataset == 'Khan':
-        xtest = pd.read_csv(resource_filename('ISLP', pjoin('data', 'Khan_xtest.csv')))
+        with as_file(files('ISLP').joinpath('data', 'Khan_xtest.csv')) as xtest:
+            xtest = pd.read_csv(xtest)
         xtest = xtest.rename(columns=dict([('V%d' % d, 'G%04d' % d) for d in range(1, len(xtest.columns)+0)]))
-        ytest = pd.read_csv(resource_filename('ISLP', pjoin('data', 'Khan_ytest.csv')))
+        with as_file(files('ISLP').joinpath('data', 'Khan_ytest.csv')) as ytest:
+            ytest = pd.read_csv(ytest)
         ytest = ytest.rename(columns={'x':'Y'})
         ytest = ytest['Y']
         
-        xtrain = pd.read_csv(resource_filename('ISLP', pjoin('data', 'Khan_xtrain.csv')))
-        xtrain = xtrain.rename(columns=dict([('V%d' % d, 'G%04d' % d) for d in range(1, len(xtest.columns)+0)]))
-        ytrain = pd.read_csv(resource_filename('ISLP', pjoin('data', 'Khan_ytrain.csv')))
+        with as_file(files('ISLP').joinpath('data', 'Khan_xtrain.csv')) as xtrain:
+            xtrain = pd.read_csv(xtrain)
+            xtrain = xtrain.rename(columns=dict([('V%d' % d, 'G%04d' % d) for d in range(1, len(xtest.columns)+0)]))
+
+        with as_file(files('ISLP').joinpath('data', 'Khan_ytrain.csv')) as ytrain:
+            ytrain = pd.read_csv(ytrain)
         ytrain = ytrain.rename(columns={'x':'Y'})
         ytrain = ytrain['Y']
 
@@ -35,35 +81,10 @@ def load_data(dataset):
                 'xtrain':xtrain,
                 'ytest':ytest,
                 'ytrain':ytrain}
-    elif dataset == 'Hitters':
-        filename = resource_filename('ISLP', pjoin('data', '%s.csv' % dataset))
-        df = pd.read_csv(filename)
-        for col in ['League', 'Division', 'NewLeague']:
-            df[col] = pd.Categorical(df[col])
-        return df
-    elif dataset == 'Carseats':
-        filename = resource_filename('ISLP', pjoin('data', '%s.csv' % dataset))
-        df = pd.read_csv(filename)
-        for col in ['ShelveLoc', 'Urban', 'US']:
-            df[col] = pd.Categorical(df[col])
-        return df
-    elif dataset == 'NYSE':
-        filename = resource_filename('ISLP', pjoin('data', '%s.csv' % dataset))
-        df = pd.read_csv(filename).set_index('date')
-        return df
-    elif dataset == 'Publication':
-        df = pd.read_csv(resource_filename('ISLP', pjoin('data', 'Publication.csv')))
-        for col in ['mech']:
-            df[col] = pd.Categorical(df[col])
-        return df
-    elif dataset == 'BrainCancer':
-        df = pd.read_csv(resource_filename('ISLP', pjoin('data', 'BrainCancer.csv')))
-        for col in ['sex', 'diagnosis', 'loc', 'stereo']:
-            df[col] = pd.Categorical(df[col])
-        return df
+
     elif dataset == 'Bikeshare':
-        filename = resource_filename('ISLP', pjoin('data', '%s.csv' % dataset))
-        df = pd.read_csv(filename)
+        with as_file(files('ISLP').joinpath('data', '%s.csv' % dataset)) as filename:
+            df = pd.read_csv(filename)
         df['weathersit'] = pd.Categorical(df['weathersit'], ordered=False)
         # setting order to avoid alphabetical
         df['mnth'] = pd.Categorical(df['mnth'],
@@ -78,26 +99,60 @@ def load_data(dataset):
                                   ordered=False,
                                   categories=range(24))
         return df
-    elif dataset == 'Wage':
-        df = pd.read_csv(resource_filename('ISLP', pjoin('data', 'Wage.csv')))
-        df['education'] = pd.Categorical(df['education'], ordered=True)
-        return df
+    elif dataset == 'NYSE':
+        with as_file(files('ISLP').joinpath('data', '%s.csv' % dataset)) as filename:
+            df = pd.read_csv(filename)
+        # setting order to avoid alphabetical
+        df['day_of_week'] = pd.Categorical(df['day_of_week'],
+                                           ordered=False,
+                                           categories=['mon',
+                                                       'tues',
+                                                       'wed',
+                                                       'thur',
+                                                       'fri'])
+        return df.set_index('date')
     else:
-        filename = resource_filename('ISLP', pjoin('data', '%s.csv' % dataset))
-        return pd.read_csv(filename)
+        return _make_categorical(dataset)
+load_data.__doc__ = f"""
+Load dataset from ISLP package.
 
-from sklearn.metrics import confusion_matrix as _confusion_matrix
+Choices are: {_datasets}
+
+Parameters
+----------
+
+dataset: str
+
+Returns
+-------
+
+data: array-like or dict
+    Either a `pd.DataFrame` representing the dataset or a dictionary
+    containing different parts of the dataset.
+    
+"""
 
 def confusion_table(predicted_labels,
-                    true_labels):
+                    true_labels,
+                    labels=None):
     """
     Return a data frame version of confusion 
     matrix with rows given by predicted label
     and columns the truth.
+
+    Parameters
+    ----------
+
+    predicted_labels: array-like
+        These will form rows of confusion matrix.
+
+    true_labels: array-like
+        These will form columns of confusion matrix.
     """
 
-    labels = sorted(np.unique(list(true_labels) +
-                              list(predicted_labels)))
+    if labels is None:
+        labels = unique_labels(true_labels,
+                               predicted_labels)
     C = _confusion_matrix(true_labels,
                           predicted_labels,
                           labels=labels)
@@ -109,3 +164,4 @@ def confusion_table(predicted_labels,
 
 from . import _version
 __version__ = _version.get_versions()['version']
+
diff --git a/ISLP/_version.py b/ISLP/_version.py
index 9b01ea2..c2d7406 100644
--- a/ISLP/_version.py
+++ b/ISLP/_version.py
@@ -5,8 +5,9 @@
 # directories (produced by setup.py build) will contain a much shorter file
 # that just contains the computed version number.
 
-# This file is released into the public domain. Generated by
-# versioneer-0.21 (https://github.com/python-versioneer/python-versioneer)
+# This file is released into the public domain.
+# Generated by versioneer-0.29
+# https://github.com/python-versioneer/python-versioneer
 
 """Git implementation of _version.py."""
 
@@ -15,10 +16,11 @@
 import re
 import subprocess
 import sys
-from typing import Callable, Dict
+from typing import Any, Callable, Dict, List, Optional, Tuple
+import functools
 
 
-def get_keywords():
+def get_keywords() -> Dict[str, str]:
     """Get the keywords needed to look up the version information."""
     # these strings will be replaced by git during git-archive.
     # setup.py/versioneer.py will grep for the variable names, so they must
@@ -34,8 +36,15 @@ def get_keywords():
 class VersioneerConfig:
     """Container for Versioneer configuration parameters."""
 
+    VCS: str
+    style: str
+    tag_prefix: str
+    parentdir_prefix: str
+    versionfile_source: str
+    verbose: bool
 
-def get_config():
+
+def get_config() -> VersioneerConfig:
     """Create, populate and return the VersioneerConfig() object."""
     # these strings are filled in when 'setup.py versioneer' creates
     # _version.py
@@ -57,9 +66,9 @@ class NotThisMethod(Exception):
 HANDLERS: Dict[str, Dict[str, Callable]] = {}
 
 
-def register_vcs_handler(vcs, method):  # decorator
+def register_vcs_handler(vcs: str, method: str) -> Callable:  # decorator
     """Create decorator to mark a method as the handler of a VCS."""
-    def decorate(f):
+    def decorate(f: Callable) -> Callable:
         """Store f in HANDLERS[vcs][method]."""
         if vcs not in HANDLERS:
             HANDLERS[vcs] = {}
@@ -68,11 +77,25 @@ def decorate(f):
     return decorate
 
 
-def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
-                env=None):
+def run_command(
+    commands: List[str],
+    args: List[str],
+    cwd: Optional[str] = None,
+    verbose: bool = False,
+    hide_stderr: bool = False,
+    env: Optional[Dict[str, str]] = None,
+) -> Tuple[Optional[str], Optional[int]]:
     """Call the given command(s)."""
     assert isinstance(commands, list)
     process = None
+
+    popen_kwargs: Dict[str, Any] = {}
+    if sys.platform == "win32":
+        # This hides the console window if pythonw.exe is used
+        startupinfo = subprocess.STARTUPINFO()
+        startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
+        popen_kwargs["startupinfo"] = startupinfo
+
     for command in commands:
         try:
             dispcmd = str([command] + args)
@@ -80,10 +103,9 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
             process = subprocess.Popen([command] + args, cwd=cwd, env=env,
                                        stdout=subprocess.PIPE,
                                        stderr=(subprocess.PIPE if hide_stderr
-                                               else None))
+                                               else None), **popen_kwargs)
             break
-        except OSError:
-            e = sys.exc_info()[1]
+        except OSError as e:
             if e.errno == errno.ENOENT:
                 continue
             if verbose:
@@ -103,7 +125,11 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
     return stdout, process.returncode
 
 
-def versions_from_parentdir(parentdir_prefix, root, verbose):
+def versions_from_parentdir(
+    parentdir_prefix: str,
+    root: str,
+    verbose: bool,
+) -> Dict[str, Any]:
     """Try to determine the version from the parent directory name.
 
     Source tarballs conventionally unpack into a directory that includes both
@@ -128,13 +154,13 @@ def versions_from_parentdir(parentdir_prefix, root, verbose):
 
 
 @register_vcs_handler("git", "get_keywords")
-def git_get_keywords(versionfile_abs):
+def git_get_keywords(versionfile_abs: str) -> Dict[str, str]:
     """Extract version information from the given file."""
     # the code embedded in _version.py can just fetch the value of these
     # keywords. When used from setup.py, we don't want to import _version.py,
     # so we do it with a regexp instead. This function is not used from
     # _version.py.
-    keywords = {}
+    keywords: Dict[str, str] = {}
     try:
         with open(versionfile_abs, "r") as fobj:
             for line in fobj:
@@ -156,7 +182,11 @@ def git_get_keywords(versionfile_abs):
 
 
 @register_vcs_handler("git", "keywords")
-def git_versions_from_keywords(keywords, tag_prefix, verbose):
+def git_versions_from_keywords(
+    keywords: Dict[str, str],
+    tag_prefix: str,
+    verbose: bool,
+) -> Dict[str, Any]:
     """Get version information from git keywords."""
     if "refnames" not in keywords:
         raise NotThisMethod("Short version file found")
@@ -220,7 +250,12 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose):
 
 
 @register_vcs_handler("git", "pieces_from_vcs")
-def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command):
+def git_pieces_from_vcs(
+    tag_prefix: str,
+    root: str,
+    verbose: bool,
+    runner: Callable = run_command
+) -> Dict[str, Any]:
     """Get version from 'git describe' in the root of the source tree.
 
     This only gets called if the git-archive 'subst' keywords were *not*
@@ -228,13 +263,18 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command):
     version string, meaning we're inside a checked out source tree.
     """
     GITS = ["git"]
-    TAG_PREFIX_REGEX = "*"
     if sys.platform == "win32":
         GITS = ["git.cmd", "git.exe"]
-        TAG_PREFIX_REGEX = r"\*"
+
+    # GIT_DIR can interfere with correct operation of Versioneer.
+    # It may be intended to be passed to the Versioneer-versioned project,
+    # but that should not change where we get our version from.
+    env = os.environ.copy()
+    env.pop("GIT_DIR", None)
+    runner = functools.partial(runner, env=env)
 
     _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root,
-                   hide_stderr=True)
+                   hide_stderr=not verbose)
     if rc != 0:
         if verbose:
             print("Directory %s not under git control" % root)
@@ -242,11 +282,10 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command):
 
     # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
     # if there isn't one, this yields HEX[-dirty] (no NUM)
-    describe_out, rc = runner(GITS, ["describe", "--tags", "--dirty",
-                                     "--always", "--long",
-                                     "--match",
-                                     "%s%s" % (tag_prefix, TAG_PREFIX_REGEX)],
-                              cwd=root)
+    describe_out, rc = runner(GITS, [
+        "describe", "--tags", "--dirty", "--always", "--long",
+        "--match", f"{tag_prefix}[[:digit:]]*"
+    ], cwd=root)
     # --long was added in git-1.5.5
     if describe_out is None:
         raise NotThisMethod("'git describe' failed")
@@ -256,7 +295,7 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command):
         raise NotThisMethod("'git rev-parse' failed")
     full_out = full_out.strip()
 
-    pieces = {}
+    pieces: Dict[str, Any] = {}
     pieces["long"] = full_out
     pieces["short"] = full_out[:7]  # maybe improved later
     pieces["error"] = None
@@ -335,8 +374,8 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command):
     else:
         # HEX: no tags
         pieces["closest-tag"] = None
-        count_out, rc = runner(GITS, ["rev-list", "HEAD", "--count"], cwd=root)
-        pieces["distance"] = int(count_out)  # total number of commits
+        out, rc = runner(GITS, ["rev-list", "HEAD", "--left-right"], cwd=root)
+        pieces["distance"] = len(out.split())  # total number of commits
 
     # commit date: see ISO-8601 comment in git_versions_from_keywords()
     date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip()
@@ -348,14 +387,14 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command):
     return pieces
 
 
-def plus_or_dot(pieces):
+def plus_or_dot(pieces: Dict[str, Any]) -> str:
     """Return a + if we don't already have one, else return a ."""
     if "+" in pieces.get("closest-tag", ""):
         return "."
     return "+"
 
 
-def render_pep440(pieces):
+def render_pep440(pieces: Dict[str, Any]) -> str:
     """Build up version string, with post-release "local version identifier".
 
     Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
@@ -380,7 +419,7 @@ def render_pep440(pieces):
     return rendered
 
 
-def render_pep440_branch(pieces):
+def render_pep440_branch(pieces: Dict[str, Any]) -> str:
     """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] .
 
     The ".dev0" means not master branch. Note that .dev0 sorts backwards
@@ -410,7 +449,7 @@ def render_pep440_branch(pieces):
     return rendered
 
 
-def pep440_split_post(ver):
+def pep440_split_post(ver: str) -> Tuple[str, Optional[int]]:
     """Split pep440 version string at the post-release segment.
 
     Returns the release segments before the post-release and the
@@ -420,7 +459,7 @@ def pep440_split_post(ver):
     return vc[0], int(vc[1] or 0) if len(vc) == 2 else None
 
 
-def render_pep440_pre(pieces):
+def render_pep440_pre(pieces: Dict[str, Any]) -> str:
     """TAG[.postN.devDISTANCE] -- No -dirty.
 
     Exceptions:
@@ -432,7 +471,7 @@ def render_pep440_pre(pieces):
             tag_version, post_version = pep440_split_post(pieces["closest-tag"])
             rendered = tag_version
             if post_version is not None:
-                rendered += ".post%d.dev%d" % (post_version+1, pieces["distance"])
+                rendered += ".post%d.dev%d" % (post_version + 1, pieces["distance"])
             else:
                 rendered += ".post0.dev%d" % (pieces["distance"])
         else:
@@ -444,7 +483,7 @@ def render_pep440_pre(pieces):
     return rendered
 
 
-def render_pep440_post(pieces):
+def render_pep440_post(pieces: Dict[str, Any]) -> str:
     """TAG[.postDISTANCE[.dev0]+gHEX] .
 
     The ".dev0" means dirty. Note that .dev0 sorts backwards
@@ -471,7 +510,7 @@ def render_pep440_post(pieces):
     return rendered
 
 
-def render_pep440_post_branch(pieces):
+def render_pep440_post_branch(pieces: Dict[str, Any]) -> str:
     """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] .
 
     The ".dev0" means not master branch.
@@ -500,7 +539,7 @@ def render_pep440_post_branch(pieces):
     return rendered
 
 
-def render_pep440_old(pieces):
+def render_pep440_old(pieces: Dict[str, Any]) -> str:
     """TAG[.postDISTANCE[.dev0]] .
 
     The ".dev0" means dirty.
@@ -522,7 +561,7 @@ def render_pep440_old(pieces):
     return rendered
 
 
-def render_git_describe(pieces):
+def render_git_describe(pieces: Dict[str, Any]) -> str:
     """TAG[-DISTANCE-gHEX][-dirty].
 
     Like 'git describe --tags --dirty --always'.
@@ -542,7 +581,7 @@ def render_git_describe(pieces):
     return rendered
 
 
-def render_git_describe_long(pieces):
+def render_git_describe_long(pieces: Dict[str, Any]) -> str:
     """TAG-DISTANCE-gHEX[-dirty].
 
     Like 'git describe --tags --dirty --always -long'.
@@ -562,7 +601,7 @@ def render_git_describe_long(pieces):
     return rendered
 
 
-def render(pieces, style):
+def render(pieces: Dict[str, Any], style: str) -> Dict[str, Any]:
     """Render the given version pieces into the requested style."""
     if pieces["error"]:
         return {"version": "unknown",
@@ -598,7 +637,7 @@ def render(pieces, style):
             "date": pieces.get("date")}
 
 
-def get_versions():
+def get_versions() -> Dict[str, Any]:
     """Get version information or return default if unable to do so."""
     # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
     # __file__, we can work backwards from there to the root. Some
diff --git a/ISLP/bart/bart.py b/ISLP/bart/bart.py
index 2c33aba..3c933ca 100644
--- a/ISLP/bart/bart.py
+++ b/ISLP/bart/bart.py
@@ -101,11 +101,11 @@ def predict(self,
         check_is_fitted(self)
 
         nsample = len(self.trees_sample_)
-        output = np.zeros(X.shape[0], np.float)
+        output = np.zeros(X.shape[0], float)
 
         for trees in self.trees_sample_:
             for tree in trees:
-                tree_fit = np.array([tree.predict_out_of_sample(x) for x in X])
+                tree_fit = np.array([tree.predict_out_of_sample(x) for x in np.asarray(X)])
                 output += tree_fit
         output = output / nsample
         return self._inverse(output)
@@ -118,11 +118,11 @@ def staged_predict(self,
 
         trees_sample_ = self.trees_sample_[start_idx:]
         nsample = len(trees_sample_)
-        output = np.zeros((nsample, X.shape[0]), np.float)
+        output = np.zeros((nsample, X.shape[0]), float)
 
         for nstep, trees in enumerate(trees_sample_):
             for tree in trees:
-                tree_fit = np.array([tree.predict_out_of_sample(x) for x in X])
+                tree_fit = np.array([tree.predict_out_of_sample(x) for x in np.asarray(X)])
                 output[nstep] += tree_fit
                 
         output = np.cumsum(output, 0) / (np.arange(nsample) + 1)[:,None]
@@ -141,7 +141,7 @@ def fit(self,
         if self.n_jobs <= 0:
             n_jobs = 1
 
-        random_idx = random_state.randint(0,2**32-1,size=(n_jobs,))
+        random_idx = random_state.randint(0,2**30-1,size=(n_jobs,)) # 2**31-1 should be OK for int32
 
         parallel = Parallel(n_jobs=len(random_idx))
 
diff --git a/ISLP/bart/likelihood.py b/ISLP/bart/likelihood.py
index 28f341d..cfa3ce6 100644
--- a/ISLP/bart/likelihood.py
+++ b/ISLP/bart/likelihood.py
@@ -82,7 +82,7 @@ def marginal_loglikelihood(response,
     if not incremental:
         if responsesq_sum is None:
             responsesq_sum = (response**2).sum()
-            response_moments = (n, response_sum, responseseq_sum)
+            response_moments = (n, response_sum, responsesq_sum)
             
         logL -= n * 0.5 * np.log(sigmasq)
         logL -= 0.5 * responsesq_sum / sigmasq
diff --git a/ISLP/bart/tree.py b/ISLP/bart/tree.py
index 8726929..49b4789 100644
--- a/ISLP/bart/tree.py
+++ b/ISLP/bart/tree.py
@@ -96,7 +96,7 @@ def predict_output(self):
             current_node = self.get_node(node_index)
             output[current_node.idx_data_points] = current_node.value
 
-        return output.astype(np.float)
+        return output.astype(float)
 
     def predict_out_of_sample(self, X):
         """
diff --git a/ISLP/info.py b/ISLP/info.py
deleted file mode 100644
index 3a1fecd..0000000
--- a/ISLP/info.py
+++ /dev/null
@@ -1,78 +0,0 @@
-""" This file contains defines parameters for regreg that we use to fill
-settings in setup.py, the regreg top-level docstring, and for building the docs.
-In setup.py in particular, we exec this file, so it cannot import regreg
-"""
-
-# regreg version information.  An empty _version_extra corresponds to a
-# full release.  '.dev' as a _version_extra string means this is a development
-# version
-_version_major = 0
-_version_minor = 2
-_version_micro = 0
-_version_extra = ''
-
-# Format expected by setup.py and doc/source/conf.py: string of form "X.Y.Z"
-__version__ = "%s.%s.%s%s" % (_version_major,
-                              _version_minor,
-                              _version_micro,
-                              _version_extra)
-
-CLASSIFIERS = ["Development Status :: 3 - Alpha",
-               "Environment :: Console",
-               "Intended Audience :: Science/Research",
-               "License :: OSI Approved :: BSD License",
-               "Operating System :: OS Independent",
-               "Programming Language :: Python",
-               "Topic :: Scientific/Engineering"]
-
-description  = 'Testing a fixed value of lambda'
-
-# Note: this long_description is actually a copy/paste from the top-level
-# README.txt, so that it shows up nicely on PyPI.  So please remember to edit
-# it only in one place and sync it correctly.
-long_description = \
-"""
-============
-Fixed lambda
-============
-
-This mini-package contains a module to perform
-a fixed lambda test for the LASSO.
-"""
-
-# versions
-NUMPY_MIN_VERSION='1.7.1'
-SCIPY_MIN_VERSION = '0.9'
-PANDAS_MIN_VERSION = "0.20"
-SKLEARN_MIN_VERSION = '1.0'
-STATSMODELS_MIN_VERSION = '0.13'
-MATPLOTLIB_MIN_VERSION = '3.3.3'
-
-NAME                = 'ISLP'
-MAINTAINER          = "Jonathan Taylor"
-MAINTAINER_EMAIL    = ""
-DESCRIPTION         = description
-LONG_DESCRIPTION    = long_description
-URL                 = "http://github.org/jonathan.taylor/ISLP"
-DOWNLOAD_URL        = ""
-LICENSE             = "BSD license"
-CLASSIFIERS         = CLASSIFIERS
-AUTHOR              = "ISLP authors"
-AUTHOR_EMAIL        = ""
-PLATFORMS           = "OS Independent"
-MAJOR               = _version_major
-MINOR               = _version_minor
-MICRO               = _version_micro
-ISRELEASE           = _version_extra == ''
-VERSION             = __version__
-STATUS              = 'alpha'
-PROVIDES            = []
-REQUIRES            = ["numpy (>=%s)" % NUMPY_MIN_VERSION,
-                       "scipy (>=%s)" % SCIPY_MIN_VERSION,
-                       "statsmodels (>=%s)" % STATSMODELS_MIN_VERSION,
-                       "pandas (>=%s)" % PANDAS_MIN_VERSION,
-                       "sklearn (>=%s)" % SKLEARN_MIN_VERSION,
-                       "lifelines",
-                       "joblib",
-                       "pygam"
-                       ]
diff --git a/ISLP/models/__init__.py b/ISLP/models/__init__.py
index bf9cd55..cff02f8 100644
--- a/ISLP/models/__init__.py
+++ b/ISLP/models/__init__.py
@@ -4,14 +4,15 @@
 
 """
 import numpy as np, pandas as pd
+from io import StringIO
 
 from .model_spec import (ModelSpec,
                          Column,
-                         Variable,
+                         Feature,
                          poly,
                          ns,
                          bs,
-                         derived_variable,
+                         derived_feature,
                          pca,
                          contrast,
                          build_columns)
@@ -24,13 +25,14 @@
                            sklearn_selection_path)
 
 def summarize(results,
-              conf_int=False):
+              conf_int=False,
+              level=None):
     """
     Take a fit statsmodels and summarize it
     by returning the usual coefficient estimates,
     their standard errors, the usual test
     statistics and P-values as well as 
-    (optionally) 95% confidence intervals.
+    (optionally) confidence intervals.
 
     Based on:
 
@@ -45,8 +47,12 @@ def summarize(results,
         Include 95% confidence intervals?
 
     """
-    tab = results.summary().tables[1]
-    results_table = pd.read_html(tab.as_html(),
+    if level is not None:
+        conf_int = True
+    if level is None:
+        level = 0.95
+    tab = results.summary(alpha=1-level).tables[1]
+    results_table = pd.read_html(StringIO(tab.as_html()),
                                  index_col=0,
                                  header=0)[0]
     if not conf_int:
@@ -57,12 +63,4 @@ def summarize(results,
         return results_table[results_table.columns[:-2]]
     return results_table
 
-# def poly(X, degree):
-#     """  
-#     Create columns of design matrix
-#     for orthogonal polynomial for a given series X
-#     """
-
-#     result = Poly(degree=degree).fit_transform(X)
-
 
diff --git a/ISLP/models/columns.py b/ISLP/models/columns.py
index c15ace2..7ea6adb 100644
--- a/ISLP/models/columns.py
+++ b/ISLP/models/columns.py
@@ -9,7 +9,6 @@
 from sklearn.utils.validation import check_is_fitted
 from sklearn.exceptions import NotFittedError
 
-
 class Column(NamedTuple):
 
     """
@@ -52,7 +51,7 @@ def get_columns(self, X, fit=False):
             Column names
         """
 
-        cols = _get_column(self.idx, X, ndarray=False) 
+        cols = _get_column(self.idx, X) 
         if fit:
             self.fit_encoder(X)
 
@@ -88,7 +87,7 @@ def fit_encoder(self, X):
         -------
         None
         """
-        cols = _get_column(self.idx, X, ndarray=False)
+        cols = _get_column(self.idx, X)
         if self.encoder is not None:
             try:
                 check_is_fitted(self.encoder)
@@ -102,41 +101,30 @@ def fit_encoder(self, X):
 
 def _get_column(idx,
                 X,
-                twodim=False,
-                loc=True,
-                ndarray=True):
+                loc=True):
     """
-    Extract column `idx` from `X`,
-    optionally making it two-dimensional
-    as many sklearn encoders assume
-    two-dimensional input
+    Extract column `idx` from `X`
+    as a two-dimensional ndarray or a pd.DataFrame
     """
     if isinstance(X, np.ndarray):
-        col = X[:, idx]
+        col = X[:, [idx]]
     elif hasattr(X, 'loc'):
         if loc:
-            col = X.loc[:, idx]
+            col = X.loc[:, [idx]]
         else:   # use iloc instead
-            col = X.iloc[:, idx]
+            col = X.iloc[:, [idx]]
     else:
         raise ValueError('expecting an ndarray or a ' +
                          '"loc/iloc" methods, got %s' % str(X))
-    if ndarray:
-        if twodim and np.asarray(col).ndim == 1:
-            return np.asarray(col).reshape((-1, 1))
-        return np.asarray(col)
-    else:
-        return col
+
+    return col
 
     
 def _get_column_info(X,
                      columns,
                      is_categorical,
                      is_ordinal,
-                     default_encoders={
-                         'ordinal': OrdinalEncoder(),
-                         'categorical': OneHotEncoder()
-                         }
+                     categorical_encoders={}
                      ):
 
 
@@ -158,13 +146,19 @@ def _get_column_info(X,
             name = str(col)
         if is_categorical[i]:
             if is_ordinal[i]:
-                Xcol = _get_column(col, X, twodim=True)
-                encoder = clone(default_encoders['ordinal'])
+                Xcol = _get_column(col, X) 
+                if col not in categorical_encoders:
+                    encoder = clone(categorical_encoders['ordinal'])
+                else:
+                    encoder = categorical_encoders[col]
                 encoder.fit(Xcol)
                 columns = ['{0}'.format(col)]
             else:
-                Xcol = _get_column(col, X, twodim=True, ndarray=True)
-                encoder = clone(default_encoders['categorical'])
+                Xcol = _get_column(col, X) 
+                if col not in categorical_encoders:
+                    encoder = clone(categorical_encoders['categorical'])
+                else:
+                    encoder = categorical_encoders[col]
                 cols = encoder.fit_transform(Xcol)
                 if hasattr(encoder, 'columns_'):
                     columns_ = encoder.columns_
@@ -179,7 +173,7 @@ def _get_column_info(X,
                                       tuple(columns),
                                       encoder)
         else:
-            Xcol = _get_column(col, X, twodim=True)
+            Xcol = _get_column(col, X) 
             column_info[col] = Column(col,
                                       name,
                                       columns=(name,))
@@ -189,7 +183,6 @@ def _get_column_info(X,
 # https://github.com/scikit-learn/scikit-learn/blob/2beed55847ee70d363bdbfe14ee4401438fba057/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py
 # max_bins is ignored
 
-
 def _check_categories(categorical_features, X):
     """Check and validate categorical features in X
 
diff --git a/ISLP/models/generic_selector.py b/ISLP/models/generic_selector.py
index b0261e9..7c9329e 100644
--- a/ISLP/models/generic_selector.py
+++ b/ISLP/models/generic_selector.py
@@ -28,7 +28,10 @@
 import scipy as sp
 
 from sklearn.metrics import get_scorer
-from sklearn.base import (clone, MetaEstimatorMixin)
+from sklearn.base import (clone,
+                          MetaEstimatorMixin,
+                          is_classifier,
+                          is_regressor)
 from sklearn.model_selection import cross_val_score
 from joblib import Parallel, delayed
 
@@ -149,13 +152,13 @@ def __init__(self,
         self.scoring = scoring
 
         if scoring is None:
-            if self.est_._estimator_type == 'classifier':
+            if is_classifier(self.est_):
                 scoring = 'accuracy'
-            elif self.est_._estimator_type == 'regressor':
+            elif is_regressor(self.est_):
                 scoring = 'r2'
             else:
-                raise AttributeError('Estimator must '
-                                     'be a Classifier or Regressor.')
+                scoring = None
+                
         if isinstance(scoring, str):
             self.scorer = get_scorer(scoring)
         else:
@@ -486,6 +489,9 @@ def _calc_score(estimator,
                 pre_dispatch='2*n_jobs',
                 **fit_params):
     
+    if scorer is None:
+        scorer = lambda estimator, X, y: estimator.score(X, y)
+
     X_state = build_submodel(X, state)
 
     if cv:
diff --git a/ISLP/models/model_spec.py b/ISLP/models/model_spec.py
index c5be3f9..d970bb7 100644
--- a/ISLP/models/model_spec.py
+++ b/ISLP/models/model_spec.py
@@ -35,7 +35,7 @@
 
 DOCACHE = False
 
-class Variable(NamedTuple):
+class Feature(NamedTuple):
 
     """
     An element in a model matrix that will build
@@ -49,29 +49,64 @@ class Variable(NamedTuple):
     pure_columns: bool=False
     override_encoder_colnames: bool=False
     
+
 #### contrast specific code
 
 class Contrast(TransformerMixin, BaseEstimator):
-    """
-    Contrast encoding for categorical variables.
-    """
 
     def __init__(self,
                  method='drop',
                  drop_level=None):
+        """
+        Contrast encoding for categorical variables.
+
+        Parameters
+        ----------
+        method : ['drop', 'sum', None, callable]
+            If 'drop', then a column of the one-hot
+            encoding will be dropped. If 'sum', then the sum of
+            coefficients is constrained to sum to 1.
+            If `None`, the full one-hot encoding is returned.
+            Finally, if callable, then it should take the number of
+            levels of the category as a single argument and return
+            an appropriate contrast of the full one-hot encoding.
+
+        drop_level : str (optional)
+            If not None, this level of the category
+            will be dropped if `method=='drop'`.
+
+        """
 
         self.method = method
         self.drop_level = drop_level
         
-    def fit(self, X):
+    def fit(self, X, y=None):
+
+        """
+        Construct contrast of categorical variable
+        for use in building a design matrix.
+
+        Parameters
+        ----------
+        X : array-like
+            X on which model matrix will be evaluated.
+            If a :py:class:`pd.DataFrame` or :py:class:`pd.Series`, variables that are of
+            categorical dtype will be treated as categorical.
+
+        Returns
+        -------
+        F : array-like
+            Columns of design matrix implied by the
+            categorical variable.
+
+        """
 
         Xa = np.asarray(X).reshape((-1,1))
         self.encoder_ = OneHotEncoder(drop=None,
-                                      sparse=False).fit(Xa)
+                                      sparse_output=False).fit(Xa)
         cats = self.encoder_.categories_[0]
         column_names = [str(n) for n in cats]
 
-
         if isinstance(X, pd.DataFrame): # expecting a column, we take .iloc[:,0]
             X = X.iloc[:,0]
 
@@ -98,7 +133,7 @@ def fit(self, X):
         if self.method == 'drop':
             self.columns_ = [column_names[j] for j in colmap]
             self.contrast_matrix_ = np.identity(len(cats))
-            keep = np.ones(len(cats), np.bool)
+            keep = np.ones(len(cats), bool)
             keep[drop_idx] = 0
             self.contrast_matrix_ = self.contrast_matrix_[:,keep]
             self.contrast_matrix_ = self.contrast_matrix_[:,colmap]            
@@ -119,6 +154,7 @@ def fit(self, X):
             raise ValueError('method must be one of ["drop", "sum", None] or a callable' +
                              'that returns a contrast matrix and column names given the number' +
                              ' of levels')
+
         return self
 
     def transform(self, X):
@@ -136,22 +172,23 @@ def transform(self, X):
 
 class ModelSpec(TransformerMixin, BaseEstimator):
 
-    '''
-
-    Parameters
+    '''Parameters
     ----------
 
     terms : sequence (optional)
+
         Sequence of sets whose
         elements are columns of *X* when fit.
         For :py:class:`pd.DataFrame` these can be column
         names.
 
     intercept : bool (optional)
+
         Include a column for intercept?
 
     categorical_features : array-like of {bool, int} of shape (n_features) 
             or shape (n_categorical_features,), default=None.
+
         Indicates the categorical features. Will be ignored if *X* is a :py:class:`pd.DataFrame`
         or :py:class:`pd.Series`.
 
@@ -160,25 +197,31 @@ class ModelSpec(TransformerMixin, BaseEstimator):
         - integer array-like : integer indices indicating categorical
           features.
 
-    default_encoders : dict
-        Dictionary whose keys are elements of *terms* and values
-        are transforms to be applied to the associate columns in the model matrix
-        by running the *fit_transform* method when *fit* is called and overwriting
-        these values in the dictionary.
+    categorical_encoders : dict
+
+        Dictionary whose keys are elements of *terms* that represent
+        **categorical variables**. Its values are transforms to be
+        applied to the associate columns in the model matrix by
+        running the *fit_transform* method when *fit* is called and
+        overwriting these values in the dictionary.
+
     '''
 
     def __init__(self,
                  terms=[],
                  intercept=True,
                  categorical_features=None,
-                 default_encoders={'categorical': Contrast(method='drop'),
-                                   'ordinal': OrdinalEncoder()}
+                 categorical_encoders={}
                  ):
        
         self.intercept = intercept
         self.terms = terms
         self.categorical_features = categorical_features
-        self.default_encoders = default_encoders
+
+        self.categorical_encoders = categorical_encoders
+        self.categorical_encoders_ = {'ordinal': OrdinalEncoder(),
+                                      'categorical': Contrast(method='drop')}
+        self.categorical_encoders_.update(**categorical_encoders)
         
     def fit(self, X, y=None):
 
@@ -203,7 +246,7 @@ def fit(self, X, y=None):
                                                          X)
             self.columns_ = X.columns
             if self.is_categorical_ is None:
-                self.is_categorical_ = np.zeros(X.shape[1], np.bool)
+                self.is_categorical_ = np.zeros(X.shape[1], bool)
             self.is_ordinal_ = pd.Series(self.is_ordinal_,
                                          index=self.columns_)
             self.is_categorical_ = pd.Series(self.is_categorical_,
@@ -214,32 +257,33 @@ def fit(self, X, y=None):
              self.known_categories_) = _check_categories(categorical_features,
                                                          X)
             if self.is_categorical_ is None:
-                self.is_categorical_ = np.zeros(X.shape[1], np.bool)
+                self.is_categorical_ = np.zeros(X.shape[1], bool)
             self.is_ordinal_ = np.zeros(self.is_categorical_.shape,
-                                        np.bool)
+                                        bool)
             self.columns_ = np.arange(X.shape[1])
 
-        self.variables_ = {}
+        self.features_ = {}
         self.encoders_ = {}
 
         self.column_info_ = _get_column_info(X,
                                              self.columns_,
-                                             self.is_categorical_,
-                                             self.is_ordinal_,
-                                             default_encoders=self.default_encoders)
-        # include each column as a Variable
+                                             np.asarray(self.is_categorical_),
+                                             np.asarray(self.is_ordinal_),
+                                             categorical_encoders=self.categorical_encoders_)
+
+        # include each column as a Feature
         # so that their columns are built if needed
 
         for col_ in self.columns_:
-            self.variables_[col_] = Variable((col_,), str(col_), None, pure_columns=True) 
+            self.features_[col_] = Feature((col_,), str(col_), None, pure_columns=True) 
 
-        # find possible interactions and other variables
+        # find possible interactions and other features
 
         tmp_cache = {}
 
         for term in self.terms:
-            if isinstance(term, Variable):
-                self.variables_[term] = term
+            if isinstance(term, Feature):
+                self.features_[term] = term
                 build_columns(self.column_info_,
                               X,
                               term,
@@ -247,18 +291,18 @@ def fit(self, X, y=None):
                               col_cache=tmp_cache,
                               fit=True) # these encoders won't have been fit yet
                 for var in term.variables:
-                    if var not in self.variables_ and isinstance(var, Variable):
-                            self.variables_[var] = var
+                    if var not in self.features_ and isinstance(var, Feature):
+                            self.features_[var] = var
             elif term not in self.column_info_:
-                # a tuple of variables represents an interaction
+                # a tuple of features represents an interaction
                 if type(term) == type((1,)): 
                     names = []
                     column_map = {}
                     column_names = {}
                     idx = 0
                     for var in term:
-                        if var in self.variables_:
-                            var = self.variables_[var]
+                        if var in self.features_:
+                            var = self.features_[var]
                         cols, cur_names = build_columns(self.column_info_,
                                                         X,
                                                         var,
@@ -270,17 +314,17 @@ def fit(self, X, y=None):
                         idx += cols.shape[1]                 
                         names.append(var.name)
                     encoder_ = Interaction(names, column_map, column_names)
-                    self.variables_[term] = Variable(term, ':'.join(n for n in names), encoder_)
+                    self.features_[term] = Feature(term, ':'.join(n for n in names), encoder_)
                 elif isinstance(term, Column):
-                    self.variables_[term] = Variable((term,), term.name, None, pure_columns=True)
+                    self.features_[term] = Feature((term,), term.name, None, pure_columns=True)
                 else:
-                    raise ValueError('each element in a term should be a Variable, Column or identify a column')
+                    raise ValueError('each element in a term should be a Feature, Column or identify a column')
                 
         # build the mapping of terms to columns and column names
 
         self.column_names_ = {}
         self.column_map_ = {}
-        self.terms_ = [self.variables_[t] for t in self.terms]
+        self.terms_ = [self.features_[t] for t in self.terms]
         
         idx = 0
         if self.intercept:
@@ -310,64 +354,48 @@ def transform(self, X, y=None):
             Ignored. This parameter exists only for compatibility with
             :py:class:`sklearn.pipeline.Pipeline`.
         """
-        return self.build_submodel(X, self.terms_)
+        check_is_fitted(self)
+        return build_model(self.column_info_,
+                           X,
+                           self.terms_,
+                           intercept=self.intercept,
+                           encoders=self.encoders_)
     
     # ModelSpec specific methods
 
-    def build_submodel(self, X, terms):
+    @property
+    def names(self, help='Name for each term in model specification.'):
+        names = []
+        if self.intercept:
+            names = ['intercept']
+        return names + [t.name for t in self.terms_]
+        
 
+    def build_submodel(self,
+                       X,
+                       terms):
         """
-        Construct design matrix on a
-        sequence of terms and X after 
-        fitting.
+        Build design on X after fitting.
 
         Parameters
         ----------
         X : array-like
-            X on which model matrix will be evaluated.
+            X on which columns are evaluated.
+
+        terms : [Feature]
+            Sequence of features
 
         Returns
         -------
-        df : np.ndarray or pd.DataFrame
-            Design matrix.
+        D : array-like
+            Design matrix created with `terms`
         """
 
-        check_is_fitted(self)
-
-        dfs = []
-
-        col_cache = {}  # avoid recomputing the same columns
-
-        if self.intercept:
-            df = pd.DataFrame({'intercept':np.ones(X.shape[0])})
-            if isinstance(X, (pd.Series, pd.DataFrame)):
-                df.index = X.index
-            dfs.append(df)
-
-        for term_ in terms:
-            term_df = build_columns(self.column_info_,
-                                    X,
-                                    term_,
-                                    col_cache=col_cache,
-                                    encoders=self.encoders_,
-                                    fit=False)[0]
-            dfs.append(term_df)
-
-        if len(dfs):
-            if isinstance(X, (pd.Series, pd.DataFrame)):
-                df = pd.concat(dfs, axis=1)
-                df.index = X.index
-                return df
-            else:
-                return np.column_stack(dfs)
-        else:  # return a 0 design
-            zero = np.zeros(X.shape[0])
-            if isinstance(X, (pd.Series, pd.DataFrame)):
-                df = pd.DataFrame({'zero': zero})
-                df.index = X.index
-                return df
-            else:
-                return zero
+        return build_model(self.column_info_,
+                           X,
+                           terms,
+                           intercept=self.intercept,
+                           encoders=self.encoders_)
 
     def build_sequence(self,
                        X,
@@ -375,6 +403,21 @@ def build_sequence(self,
         """
         Build implied sequence of submodels 
         based on successively including more terms.
+
+        Parameters
+        ----------
+        X : array-like
+            X on which columns are evaluated.
+
+        anova_type: str
+            One of "sequential" or "drop".
+
+        Returns
+        -------
+
+        models : generator
+            Generator for sequence of models for ANOVA.
+
         """
 
         check_is_fitted(self)
@@ -427,8 +470,11 @@ def fit_encoder(encoders, var, X):
     Parameters
     ----------
 
-    var : Variable
-        Variable whose encoder will be fit.
+    encoders : dict
+        Dictionary of encoders for each feature.
+
+    var : Feature
+        Feature whose encoder will be fit.
 
     X : array-like
         X on which encoder will be fit.
@@ -440,7 +486,7 @@ def fit_encoder(encoders, var, X):
             
 def build_columns(column_info, X, var, encoders={}, col_cache={}, fit=False):
     """
-    Build columns for a Variable from X.
+    Build columns for a Feature from X.
 
     Parameters
     ----------
@@ -452,10 +498,13 @@ def build_columns(column_info, X, var, encoders={}, col_cache={}, fit=False):
     X : array-like
         X on which columns are evaluated.
 
-    var : Variable
-        Variable whose columns will be built, typically a key in `column_info`.
+    var : Feature
+        Feature whose columns will be built, typically a key in `column_info`.
 
-    col_cache: 
+    encoders : dict
+        Dict that stores encoder of each Feature.
+    
+    col_cache: dict
         Dict where columns will be stored --
         if `var.name` in `col_cache` then just
         returns those columns.
@@ -480,7 +529,7 @@ def build_columns(column_info, X, var, encoders={}, col_cache={}, fit=False):
             cols, name = col_cache[joblib_hash([var, X])]
         else:
             cols, names = var.get_columns(X, fit=fit)
-    elif isinstance(var, Variable):
+    elif isinstance(var, Feature):
         cols = []
         names = []
         for v in var.variables:
@@ -495,16 +544,18 @@ def build_columns(column_info, X, var, encoders={}, col_cache={}, fit=False):
         cols = np.column_stack(cols)
         if len(names) != cols.shape[1]:
             names = ['{0}[{1}]'.format(var.name, j) for j in range(cols.shape[1])]
-
         if var.encoder:
+            df_cols = pd.DataFrame(np.asarray(cols),
+                                   columns=names)
             try:
                 check_is_fitted(var.encoder)
                 if fit and var not in encoders:
                     raise ValueError('encoder has already been fit previously')
             except NotFittedError as e:
                 if fit:
-                    fit_encoder(var, pd.DataFrame(np.asarray(cols),
-                                                  columns=names))
+                    fit_encoder(encoders,
+                                var,
+                                df_cols)
                 # known issue with Pipeline
                 # https://github.com/scikit-learn/scikit-learn/issues/18648
                 elif isinstance(var.encoder, Pipeline):  
@@ -514,9 +565,9 @@ def build_columns(column_info, X, var, encoders={}, col_cache={}, fit=False):
             except Exception as e:  # was not the NotFitted
                 raise ValueError(e)
             if var.use_transform:
-                cols = var.encoder.transform(cols)
+                cols = var.encoder.transform(df_cols)
             else:
-                cols = var.encoder.predict(cols)
+                cols = var.encoder.predict(df_cols)
             if hasattr(var.encoder, 'columns_') and not var.override_encoder_colnames:
                 names = var.encoder.columns_
             else:
@@ -527,7 +578,7 @@ def build_columns(column_info, X, var, encoders={}, col_cache={}, fit=False):
 
 
     else:
-        raise ValueError('expecting either a column or a Variable')
+        raise ValueError('expecting either a column or a Feature')
     val = pd.DataFrame(np.asarray(cols), columns=names)
 
     if isinstance(X, (pd.DataFrame, pd.Series)):
@@ -537,16 +588,88 @@ def build_columns(column_info, X, var, encoders={}, col_cache={}, fit=False):
         col_cache[joblib_hash([var.name, X])] = (val, names)
     return val, names
 
+def build_model(column_info,
+                X,
+                terms,
+                intercept=True,
+                encoders={}):
+
+    """
+    Construct design matrix on a
+    sequence of terms and X after 
+    fitting.
+
+    Parameters
+    ----------
+    column_info: dict
+        Dictionary with values specifying sets of columns to
+        be concatenated into a design matrix.
+
+    X : array-like
+        X on which columns are evaluated.
+
+    terms : [Feature]
+        Sequence of features
+
+    encoders : dict
+        Dict that stores encoder of each Feature.
 
-def derived_variable(variables, encoder=None, name=None, use_transform=True):
+    Returns
+    -------
+    df : np.ndarray or pd.DataFrame
+        Design matrix.
     """
-    Create a Variable, optionally
+
+    dfs = []
+
+    col_cache = {}  # avoid recomputing the same columns
+
+    if intercept:
+        df = pd.DataFrame({'intercept':np.ones(X.shape[0])})
+        if isinstance(X, (pd.Series, pd.DataFrame)):
+            df.index = X.index
+        dfs.append(df)
+
+    for term_ in terms:
+        term_df = build_columns(column_info,
+                                X,
+                                term_,
+                                col_cache=col_cache,
+                                encoders=encoders,
+                                fit=False)[0]
+        dfs.append(term_df)
+
+    if len(dfs):
+        if isinstance(X, (pd.Series, pd.DataFrame)):
+            df = pd.concat(dfs, axis='columns')
+            df.index = X.index
+        else:
+            return np.column_stack(dfs).astype(float)
+    else:  # return a 0 design
+        zero = np.zeros(X.shape[0])
+        if isinstance(X, (pd.Series, pd.DataFrame)):
+            df = pd.DataFrame({'zero': zero})
+            df.index = X.index
+        else:
+            return zero
+
+    # if we reach here, we will be returning a DataFrame
+    # make sure all columns are floats
+    
+    for i, col in enumerate(df.columns):
+        if df.iloc[:,i].dtype == bool:
+            df[col] = df.iloc[:,i].astype(float)
+    return df
+
+def derived_feature(variables, encoder=None, name=None, use_transform=True):
+    """
+    Create a Feature, optionally
     applying an encoder to the stacked columns.
     
     Parameters
     ----------
 
-    variables : [column identifier, Column, Variable]
+    variables : [column identifier, Column, Feature]
         Variables to apply transform to. Could be
         column identifiers or variables: all columns
         will be stacked before encoding.
@@ -560,12 +683,12 @@ def derived_variable(variables, encoder=None, name=None, use_transform=True):
     Returns
     -------
 
-    var : Variable
+    var : Feature
     """
 
     if name is None:
         name = str(encoder)
-    var = Variable(tuple([v for v in variables]),
+    var = Feature(tuple([v for v in variables]),
                    name,
                    encoder,
                    use_transform=use_transform,
@@ -590,7 +713,7 @@ def contrast(col,
     Returns
     -------
 
-    var : Variable
+    var : Feature
 
     """
 
@@ -606,7 +729,7 @@ def contrast(col,
                   is_categorical=True,
                   encoder=encoder)
 
-def ordinal(col, *args, **kwargs):
+def ordinal(col, name=None, *args, **kwargs):
     """
     Create ordinal encoding of categorical feature.
     
@@ -618,7 +741,7 @@ def ordinal(col, *args, **kwargs):
     Returns
     -------
 
-    var : Variable
+    var : Feature
 
     """
 
@@ -637,7 +760,7 @@ def ordinal(col, *args, **kwargs):
 
         name = f'{shortname}({name})'
 
-    return derived_variable([col],
+    return derived_feature([col],
                             name=name,
                             encoder=encoder)
 
@@ -648,7 +771,7 @@ def poly(col,
          name=None):
 
     """
-    Create a polynomial Variable
+    Create a polynomial Feature
     for a given column.
     
     Additional `args` and `kwargs`
@@ -676,7 +799,7 @@ def poly(col,
     Returns
     -------
 
-    var : Variable
+    var : Feature
     """
     shortname, klass = 'poly', Poly
     encoder = klass(degree=degree,
@@ -701,13 +824,13 @@ def poly(col,
 
         name = f'{shortname}({name})'
 
-    return derived_variable([col],
+    return derived_feature([col],
                             name=name,
                             encoder=encoder)
 
 def ns(col, intercept=False, name=None, **spline_args):
     """
-    Create a natural spline Variable
+    Create a natural spline Feature
     for a given column.
     
     Additional *spline_args*
@@ -727,7 +850,7 @@ def ns(col, intercept=False, name=None, **spline_args):
     Returns
     -------
 
-    var : Variable
+    var : Feature
 
     """
     shortname, klass = 'ns', NaturalSpline
@@ -744,13 +867,13 @@ def ns(col, intercept=False, name=None, **spline_args):
         name = f'{shortname}({name})'
     encoder = klass(intercept=intercept,
                     **spline_args) 
-    return derived_variable([col],
+    return derived_feature([col],
                             name=name,
                             encoder=encoder)
 
 def bs(col, intercept=False, name=None, **spline_args):
     """
-    Create a B-spline Variable
+    Create a B-spline Feature
     for a given column.
     
     Additional args and *spline_args*
@@ -771,7 +894,7 @@ def bs(col, intercept=False, name=None, **spline_args):
     Returns
     -------
 
-    var : Variable
+    var : Feature
 
     """
     shortname, klass = 'bs', BSpline
@@ -788,7 +911,7 @@ def bs(col, intercept=False, name=None, **spline_args):
         name = f'{shortname}({name})'
     encoder = klass(intercept=intercept,
                     **spline_args) 
-    return derived_variable([col],
+    return derived_feature([col],
                             name=name,
                             encoder=encoder)
 
@@ -803,13 +926,13 @@ def pca(variables, name, scale=False, **pca_args):
     Parameters
     ----------
 
-    variables : [column identifier, Column or Variable]
+    variables : [column identifier, Column or Feature]
         Sequence whose columns will be encoded by PCA.
 
     Returns
     -------
 
-    var : Variable
+    var : Feature
 
     """
     shortname, klass = 'pca', PCA
@@ -824,52 +947,10 @@ def pca(variables, name, scale=False, **pca_args):
     if _args:
         name = ', '.join([name, _args])
 
-    return derived_variable(variables,
+    return derived_feature(variables,
                             name=f'{shortname}({name})',
                             encoder=encoder)
 
-# def clusterer(variables, name, transform, scale=False):
-#     """
-#     Create PCA encoding of features
-#     from a sequence of variables.
-    
-#     Additional `args` and `kwargs`
-#     are passed to `PCA`.
-
-#     Parameters
-#     ----------
-
-#     variables : [column identifier, Column or Variable]
-#         Sequence whose columns will be encoded by PCA.
-
-#     name: str
-#         name for the Variable
-
-#     transform: Transformer
-#         A transform with a `predict` method.
-
-#     Returns
-#     -------
-
-#     var : Variable
-
-#     """
-
-#     if scale:
-#         scaler = StandardScaler(with_mean=True,
-#                                 with_std=True)
-#         encoder = make_pipeline(scaler, transform)
-#     else:
-#         encoder = transform
-
-#     intermed = Variable((derived_variable(*variables,
-#                                           name='cluster_intermed',
-#                                           encoder=encoder,
-#                                           use_transform=False),),
-#                             name=f'Cat({encoder}({name}))',
-#                             encoder=Contrast(method='drop'))
-
-#     return intermed
 
 def _argstring(*args, **kwargs):
     _args = ', '.join([str(a) for a in args])
diff --git a/ISLP/models/sklearn_wrap.py b/ISLP/models/sklearn_wrap.py
index 123130b..121da75 100644
--- a/ISLP/models/sklearn_wrap.py
+++ b/ISLP/models/sklearn_wrap.py
@@ -49,7 +49,17 @@ def __init__(self,
         self.model_type = model_type
         self.model_spec = model_spec
         self.model_args = model_args
-        
+
+    def __sklearn_tags__(self):    
+        tags = super().__sklearn_tags__()
+        if self.model_type == sm.OLS:
+            tags.estimator_type = 'regressor'
+        elif (issubclass(self.model_type, sm.GLM) and
+              'family' in self.model_args and
+              isinstance(self.model_args.get('family', None), sm.families.Binomial)):
+            tags.estimator_type = 'classifier'
+        return tags
+
     def fit(self, X, y):
         """
         Fit a statsmodel model
@@ -171,6 +181,9 @@ def __init__(self,
         self.cv = cv
         self.scoring = scoring
 
+    def __sklearn_tags__(self):
+        tags = super().__sklearn_tags__()
+        return tags
                                      
     def fit(self, X, y):
         """
diff --git a/ISLP/models/strategy.py b/ISLP/models/strategy.py
index 028ac94..f237db3 100644
--- a/ISLP/models/strategy.py
+++ b/ISLP/models/strategy.py
@@ -74,9 +74,9 @@ def __init__(self,
             Minumum number of terms to select
         max_terms: int (default: 0)
             Maximum number of terms to select
-        lower_terms: [Variable]
+        lower_terms: [Feature]
             Subset of terms to keep: smallest model.
-        upper_terms: [Variable]
+        upper_terms: [Feature]
             Largest possible model.
         validator: callable
             Callable taking a single argument: state,
@@ -216,9 +216,9 @@ class Stepwise(MinMaxCandidates):
         Minumum number of terms to select
     max_terms: int (default: 1)
         Maximum number of terms to select
-    lower_terms: [Variable]
+    lower_terms: [Feature]
         Subset of terms to keep: smallest model.
-    upper_terms: [Variable]
+    upper_terms: [Feature]
         Largest possible model.
     constraints: {array-like} (optional), shape [n_terms, n_terms]
         Boolean matrix decribing a dag with [i,j] nonzero implying that j is
@@ -342,9 +342,9 @@ def first_peak(model_spec,
             Minumum number of terms to select
         max_terms: int (default: 1)
             Maximum number of terms to select
-        lower_terms: [Variable]
+        lower_terms: [Feature]
             Subset of terms to keep: smallest model.
-        upper_terms: [Variable]
+        upper_terms: [Feature]
             Largest possible model.
         initial_terms: column identifiers, default=[]
             Subset of terms to be used to initialize when direction
@@ -441,9 +441,9 @@ def fixed_steps(model_spec,
         max_terms: int (default: None)
             Maximum number of terms to select.
             If None defaults to number of terms in *model_spec*.
-        lower_terms: [Variable]
+        lower_terms: [Feature]
             Subset of terms to keep: smallest model.
-        upper_terms: [Variable]
+        upper_terms: [Feature]
             Largest possible model.
         initial_terms: column identifiers, default=[]
             Subset of terms to be used to initialize.
@@ -506,9 +506,9 @@ def min_max(model_spec,
         Minumum number of terms to select
     max_terms: int (default: 1)
         Maximum number of terms to select
-    lower_terms: [Variable]
+    lower_terms: [Feature]
         Subset of terms to keep: smallest model.
-    upper_terms: [Variable]
+    upper_terms: [Feature]
         Largest possible model.
     validator: callable
         Callable taking a single argument: state,
diff --git a/ISLP/survival.py b/ISLP/survival.py
index b11967b..c352942 100644
--- a/ISLP/survival.py
+++ b/ISLP/survival.py
@@ -14,7 +14,7 @@
 
 def sim_time(linpred,
              cum_hazard,
-             rng):
+             rng=None):
     """
     Simulate a survival time for a 
     cumulative hazard function $H$ with cumulative hazard
@@ -39,6 +39,9 @@ def sim_time(linpred,
         Used to generate survival times.
     """
 
+    if rng is None:
+        rng = np.random.default_rng()
+        
     U = rng.uniform()
     B = - np.log(U) /  np.exp(linpred)
     lower, upper = 1, 2
diff --git a/ISLP/svm.py b/ISLP/svm.py
index bedf288..8afcd5a 100644
--- a/ISLP/svm.py
+++ b/ISLP/svm.py
@@ -28,6 +28,12 @@ def plot(X,
    '''
    Graphical representation of fitted support vector classifier.
 
+   There are two types of support vectors:
+
+       - Points violating the margin but correctly classified. These are marked with a black '+'.
+
+       - Misclassified points. These are marked with a red 'x'.
+
    Parameters
    ----------
 
@@ -89,7 +95,7 @@ def plot(X,
 
    # draw the points
 
-   ax.scatter(X0, X1, c=Y, cmap=scatter_cmap)
+   ax.scatter(X0, X1, c=Y, cmap=scatter_cmap, s=200)
 
    # add the contour
 
@@ -113,8 +119,27 @@ def plot(X,
                cmap=decision_cmap,
                alpha=alpha)
 
-   # add the support vectors    
+   decision_val = svm.decision_function(X_pred)
 
-   ax.scatter(X[svm.support_,features[0]], 
-              X[svm.support_,features[1]], marker='+', c='k', s=200)
+   # add the support vectors    
 
+   if svm.classes_.shape[0] == 2: # 2-class problem
+
+      ax.contourf(xval,
+                  yval,
+                  decision_val.reshape(yval.shape),
+                  levels=[-1,1],
+                  cmap=decision_cmap,
+                  alpha=alpha)
+
+      D = svm.decision_function(X[svm.support_])
+      Y_ = (2 * (Y[svm.support_] == svm.classes_[1]) - 1)
+      violate_margin = (Y_ * D) > 0
+      ax.scatter(X[svm.support_,features[0]][violate_margin], 
+                 X[svm.support_,features[1]][violate_margin], marker='+', c='k', s=50)
+      misclassified = ~violate_margin
+      ax.scatter(X[svm.support_,features[0]][misclassified], 
+                 X[svm.support_,features[1]][misclassified], marker='x', c='r', s=50)
+   else:
+      ax.scatter(X[svm.support_,features[0]], 
+                 X[svm.support_,features[1]], marker='+', c='k', s=50)
diff --git a/ISLP/torch/imdb.py b/ISLP/torch/imdb.py
index 617489d..3dfacfe 100644
--- a/ISLP/torch/imdb.py
+++ b/ISLP/torch/imdb.py
@@ -12,7 +12,6 @@
 import torch
 from torch.utils.data import TensorDataset
 from scipy.sparse import load_npz
-from pkg_resources import resource_filename
 from pickle import load as load_pickle
 import urllib
 
diff --git a/ISLP/torch/lightning.py b/ISLP/torch/lightning.py
index 82c45db..d7056ec 100644
--- a/ISLP/torch/lightning.py
+++ b/ISLP/torch/lightning.py
@@ -7,14 +7,14 @@
                               DataLoader,
                               Dataset)
 from torch import tensor, Generator, concat
-from torchvision import transforms
+
 from torch.utils.data import TensorDataset
 
 from torchmetrics import Accuracy
 
 from pytorch_lightning import (LightningModule,
                                LightningDataModule)
-from pytorch_lightning.utilities.distributed import rank_zero_only
+from pytorch_lightning.utilities import rank_zero_only
 from pytorch_lightning.callbacks import Callback
 
 class SimpleDataModule(LightningDataModule):
@@ -132,14 +132,15 @@ def __init__(self,
                  model,
                  loss,
                  optimizer=None,
-                 metrics={},
+                 metrics=None,
                  on_epoch=True,
                  pre_process_y_for_metrics=lambda y: y):
 
         super(SimpleModule, self).__init__()
 
         self.model = model
-        self.loss = loss or nn.MSELoss()
+        self.loss = loss
+
         optimizer = optimizer or RMSprop(model.parameters())
         self._optimizer = optimizer
         self.metrics = metrics
@@ -160,8 +161,10 @@ def training_step(self, batch, batch_idx):
 
         y_ = self.pre_process_y_for_metrics(y)
         for _metric in self.metrics.keys():
+            pl_metric = self.metrics[_metric]
             self.log(f"train_{_metric}",
-                     self.metrics[_metric](preds, y_),
+                     pl_metric(preds.to(pl_metric.device),
+                               y_.to(pl_metric.device)),
                      on_epoch=self.on_epoch)
         return loss
 
@@ -181,22 +184,36 @@ def configure_optimizers(self):
 
     @staticmethod
     def regression(model,
+                   metrics=None,
+                   device='cpu',
                    **kwargs):
-        loss = nn.MSELoss()
+
+        if metrics is None:
+            metrics = {}
+
+        loss = nn.MSELoss().to(device)
+        if device is not None:
+            for key, metric in metrics.items():
+                metrics[key] = metric.to(device)
         return SimpleModule(model,
                             loss,
+                            metrics=metrics,
                             **kwargs)
 
     @staticmethod
     def binary_classification(model,
-                              metrics={},
-                              device=None,
+                              metrics=None,
+                              device='cpu',
                               **kwargs):
+
+        if metrics is None:
+            metrics = {}
+
         loss = nn.BCEWithLogitsLoss()
         if 'accuracy' not in metrics:
-            metrics['accuracy'] = Accuracy()
+            metrics['accuracy'] = Accuracy('binary')
         if device is not None:
-            for key, metric in metrics:
+            for key, metric in metrics.items():
                 metrics[key] = metric.to(device)
         return SimpleModule(model,
                             loss,
@@ -206,14 +223,20 @@ def binary_classification(model,
 
     @staticmethod
     def classification(model,
-                       metrics={},
-                       device=None,
+                       num_classes,
+                       metrics=None,
+                       device='cpu',
                        **kwargs):
-        loss = nn.CrossEntropyLoss()
+
+        if metrics is None:
+            metrics = {}
+
+        loss = nn.CrossEntropyLoss().to(device)
         if 'accuracy' not in metrics:
-            metrics['accuracy'] = Accuracy()
+            metrics['accuracy'] = Accuracy('multiclass',
+                                           num_classes=num_classes)
         if device is not None:
-            for key, metric in metrics:
+            for key, metric in metrics.items():
                 metrics[key] = metric.to(device)
         return SimpleModule(model,
                             loss,
@@ -233,7 +256,7 @@ def on_validation_batch_start(self,
                                   pl_module,
                                   batch,
                                   batch_idx,
-                                  dataloader_idx):
+                                  dataloader_idx=0):
         x, y = batch
         self.val_preds.append(pl_module.forward(x))
         self.val_targets.append(y)
@@ -252,8 +275,10 @@ def on_validation_epoch_end(self,
                       on_epoch=pl_module.on_epoch)
 
         for _metric in pl_module.metrics.keys():
+            pl_metric = pl_module.metrics[_metric]
             pl_module.log(f"valid_{_metric}",
-                          pl_module.metrics[_metric](preds, targets_),
+                          pl_metric(preds.to(pl_metric.device),
+                                    targets_.to(pl_metric.device)),
                           on_epoch=pl_module.on_epoch)
 
     def on_test_epoch_start(self,
@@ -267,7 +292,7 @@ def on_test_batch_start(self,
                             pl_module,
                             batch,
                             batch_idx,
-                            dataloader_idx):
+                            dataloader_idx=0):
         x, y = batch
         self.test_preds.append(pl_module.forward(x))
         self.test_targets.append(y)
@@ -286,7 +311,9 @@ def on_test_epoch_end(self,
                       on_epoch=pl_module.on_epoch)
 
         for _metric in pl_module.metrics.keys():
+            pl_metric = pl_module.metrics[_metric]
             pl_module.log(f"test_{_metric}",
-                          pl_module.metrics[_metric](preds, targets_),
+                          pl_metric(preds.to(pl_metric.device),
+                                    targets_.to(pl_metric.device)),
                           on_epoch=pl_module.on_epoch)
 
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..dd8ced0
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,27 @@
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+    (1) Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer. 
+
+    (2) Redistributions in binary form must reproduce the above copyright
+    notice, this list of conditions and the following disclaimer in
+    the documentation and/or other materials provided with the
+    distribution.  
+    
+    (3)The name of the author may not be used to
+    endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
+INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE. 
\ No newline at end of file
diff --git a/README.md b/README.md
index eb283fa..546ddba 100644
--- a/README.md
+++ b/README.md
@@ -1,43 +1,99 @@
 # ISLP
+<!-- ALL-CONTRIBUTORS-BADGE:START - Do not remove or modify this section -->
+[![All Contributors](https://img.shields.io/badge/all_contributors-3-orange.svg?style=flat-square)](#contributors-)
+<!-- ALL-CONTRIBUTORS-BADGE:END -->
 
 This package collects data sets and various helper functions
 for ISLP.
 
 ## Install instructions
 
-### Mac OS X
+### Mac OS X / Linux
+
+We generally recommend creating a [conda](https://anaconda.org) environment to isolate any code
+from other dependencies. The `ISLP` package does not have unusual dependencies, but this is still
+good practice. To create a conda environment in a Mac OS X or Linux environment run:
 
 ```{python}
-pip install ISLP
+conda create --name islp
+```
+
+To run python code in this environment, you must activate it:
+
+```{python}
+conda activate islp
 ```
 
 ### Windows
 
-See the [https://packaging.python.org/en/latest/tutorials/installing-packages/#ensure-you-can-run-pip-from-the-command-line](python-packaging-instructions) for a simple way to run `pip` within
-Jupyter.
+On windows, create a `Python` environment called `islp` in the Anaconda app. This can be done by selecting `Environments` on the left hand side of the app's screen. After creating the environment, open a terminal within that environment by clicking on the "Play" button.
 
-Alternatively, within a python shell, the following commands should install `ISLP`:
+
+## Installing `ISLP`
+
+Having completed the steps above, we use `pip` to install the `ISLP` package:
 
 ```{python}
-import os, sys
-cmd = f'{sys.executable} -m pip install ISLP'
-os.system(cmd)
+pip install ISLP
 ```
 
 ### Torch requirements
 
 The `ISLP` labs use `torch` and various related packages for the lab on deep learning. The requirements
-can be found [here](requirements.txt). Alternatively, you can install them directly using `pip`
+are included in the requirements for `ISLP` with the exception of those needed
+for the labs which are included in the [requirements for the labs](https://github.com/intro-stat-learning/ISLP_labs/blob/main/requirements.txt). 
+
+## Jupyter
+
+### Mac OS X
+
+If JupyterLab is not already installed, run the following after having activated your `islp` environment:
 
 ```{python}
-reqs = 'https://raw.githubusercontent.com/jonathan-taylor/ISLP/master/requirements.txt'
-cmd = f'{sys.executable} -m pip install -r {reqs}'
-os.system(cmd)
+pip install jupyterlab
 ```
 
+### Windows
+
+Either use the same `pip` command above or install JupyterLab from the `Home` tab. Ensure that the environment
+is your `islp` environment. This information appears near the top left in the Anaconda `Home` page.
+
+
 ## Documentation
 
-See the [read the docs](https://islp.readthedocs.io/en/latest/models.html)
+See the [docs](https://intro-stat-learning.github.io/ISLP/labs.html) for the latest documentation.
+
+## Authors
+
+- Jonathan Taylor
+- Trevor Hastie
+- Gareth James
+- Robert Tibshirani
+- Daniela Witten
+
+
+
+
+## Contributors ✨
+
+Thanks goes to these wonderful people ([emoji key](https://allcontributors.org/docs/en/emoji-key)):
+
+<!-- ALL-CONTRIBUTORS-LIST:START - Do not remove or modify this section -->
+<!-- prettier-ignore-start -->
+<!-- markdownlint-disable -->
+<table>
+  <tbody>
+    <tr>
+      <td align="center" valign="top" width="14.28%"><a href="https://github.com/danielawitten"><img src="https://avatars.githubusercontent.com/u/12654191?v=4?s=100" width="100px;" alt="danielawitten"/><br /><sub><b>danielawitten</b></sub></a><br /><a href="https://github.com/intro-stat-learning/ISLP/commits?author=danielawitten" title="Code">💻</a> <a href="#content-danielawitten" title="Content">🖋</a></td>
+      <td align="center" valign="top" width="14.28%"><a href="https://web.stanford.edu/~hastie/"><img src="https://avatars.githubusercontent.com/u/13293253?v=4?s=100" width="100px;" alt="trevorhastie"/><br /><sub><b>trevorhastie</b></sub></a><br /><a href="https://github.com/intro-stat-learning/ISLP/commits?author=trevorhastie" title="Code">💻</a> <a href="#content-trevorhastie" title="Content">🖋</a></td>
+      <td align="center" valign="top" width="14.28%"><a href="https://github.com/tibshirani"><img src="https://avatars.githubusercontent.com/u/2848609?v=4?s=100" width="100px;" alt="tibshirani"/><br /><sub><b>tibshirani</b></sub></a><br /><a href="https://github.com/intro-stat-learning/ISLP/commits?author=tibshirani" title="Code">💻</a> <a href="#content-tibshirani" title="Content">🖋</a></td>
+    </tr>
+  </tbody>
+</table>
 
+<!-- markdownlint-restore -->
+<!-- prettier-ignore-end -->
 
+<!-- ALL-CONTRIBUTORS-LIST:END -->
 
+This project follows the [all-contributors](https://github.com/all-contributors/all-contributors) specification. Contributions of any kind welcome!
\ No newline at end of file
diff --git a/docs/ISLP_labs b/docs/ISLP_labs
new file mode 160000
index 0000000..5d793a3
--- /dev/null
+++ b/docs/ISLP_labs
@@ -0,0 +1 @@
+Subproject commit 5d793a33a8d5025181439b8d0f193c37c69ee20a
diff --git a/docs/README.rst b/docs/README.rst
new file mode 100644
index 0000000..41df584
--- /dev/null
+++ b/docs/README.rst
@@ -0,0 +1,15 @@
+Deep learning
+=============
+
+This lab should be run as a notebook and saved
+
+Ridge regression
+================
+
+There is a snippet that should be inserted to remove the many warnings raised.
+
+Frozen reqs
+===========
+
+The versions of the labs are referred to in `source/installation.myst`, `source/labs.rst`. Version built
+on `readthedocs` is references in `fix_and_run_notebooks.py`
diff --git a/docs/fix_and_clear_notebooks.py b/docs/fix_and_clear_notebooks.py
new file mode 100644
index 0000000..50eebe2
--- /dev/null
+++ b/docs/fix_and_clear_notebooks.py
@@ -0,0 +1,127 @@
+
+from dataclasses import dataclass
+from copy import copy
+
+import shlex
+import subprocess
+import os
+import sys
+import json
+import nbformat
+from argparse import ArgumentParser
+
+def get_version():
+    import __main__
+    dirname = os.path.split(__main__.__file__)[0]
+    sys.path.append(os.path.join(dirname, 'source'))
+    from conf import docs_version
+    sys.path = sys.path[:-1]
+    return docs_version
+
+
+@dataclass
+class Lab(object):
+
+    labfile: str
+    version: str = 'v2'
+    rm_md: bool = True
+    
+    def __post_init__(self):
+        self.labfile = os.path.abspath(self.labfile)
+
+    def fix_header(self):
+        labname = os.path.split(self.labfile)[1]
+        base = os.path.splitext(self.labfile)[0]
+        args = shlex.split(f'jupytext --set-formats ipynb,md:myst {self.labfile}')
+        subprocess.run(args)
+
+        # successful run of jupytext
+        myst = open(f'{base}.md').read().strip()
+        split_myst = myst.split('\n')
+        new_myst = []
+
+        colab_code = f'''
+<a target="_blank" href="https://colab.research.google.com/github/intro-stat-learning/ISLP_labs/blob/{self.version}/{labname}">
+<img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
+</a>
+
+[![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/intro-stat-learning/ISLP_labs/{self.version}?labpath={labname})
+
+'''
+
+        chapter_buffer = 200 # should use a regex...
+        for l in split_myst[:chapter_buffer]: # assumes Chapter appears in first 200 linesmyst.split('\n')
+            if l.strip()[:9] != '# Chapter': # exclude the line with "# Chapter"
+                if 'Lab:' in l:
+                    l = l.replace('Lab:', '') + '\n' + colab_code
+                new_myst.append(l)
+
+        myst = '\n'.join(new_myst + split_myst[chapter_buffer:])
+
+        open(f'{base}.md', 'w').write(myst)
+
+        args = shlex.split(f'jupytext --sync {base}.ipynb')
+        subprocess.run(args)
+
+        args = shlex.split(f'jupytext --set-formats Rmd,ipynb {base}.ipynb')
+        subprocess.run(args)
+
+        args = shlex.split(f'jupytext --sync {base}.ipynb')
+        subprocess.run(args)
+
+        if self.rm_md:
+            subprocess.run(['rm', f'{base}.md'])
+
+def fix_Ch06(Ch06_nbfile):
+
+    nb = nbformat.read(open(Ch06_nbfile), 4)
+
+    md_cell = copy(nb.cells[0])
+    md_cell['id'] = md_cell['id'] + '_duplicate'
+    
+    src = '''
+
+```{attention}
+Using `skl.ElasticNet` to fit ridge regression
+throws up many warnings. We have suppressed them below by a call to `warnings.simplefilter()`.
+```
+
+'''    
+
+    md_cell['source'] = [l +'\n' for l in src.split('\n')]
+
+    for i, cell in enumerate(nb.cells):
+        if cell['cell_type'] == 'code':
+            code_cell = copy(cell)
+            code_cell['id'] = code_cell['id'] + '_duplicate'
+            code_cell['source'] = ['import warnings\n', 'warnings.simplefilter("ignore")\n']
+            break
+
+    nb.cells.insert(i, md_cell)
+    nb.cells.insert(i+1, code_cell)    
+
+    nbformat.write(nb, open(Ch06_nbfile, 'w'))
+    subprocess.run(shlex.split(f'jupytext --sync {Ch06_nbfile}'))
+
+if __name__ == "__main__":
+
+    docs_version = get_version()
+
+    parser = ArgumentParser()
+    parser.add_argument('labs',
+                        metavar='N',
+                        type=str,
+                        nargs='+')
+    parser.add_argument('--rm_md',
+                        dest='rm_md',
+                        action='store_true',
+                        default=False)
+
+    args = parser.parse_args()
+
+    for labfile in args.labs:
+        l = Lab(labfile=labfile, version=docs_version['labs'])
+        l.fix_header()
+        if '06' in labfile:
+            fix_Ch06(labfile)
+
diff --git a/docs/jupyterbook/datasets/Auto.ipynb b/docs/jupyterbook/datasets/Auto.ipynb
index f84fbfc..b88ea02 100644
--- a/docs/jupyterbook/datasets/Auto.ipynb
+++ b/docs/jupyterbook/datasets/Auto.ipynb
@@ -88,9 +88,9 @@
    "formats": "source/datasets///ipynb,jupyterbook/datasets///md:myst,jupyterbook/datasets///ipynb"
   },
   "kernelspec": {
-   "display_name": "islp_test",
+   "display_name": "python3",
    "language": "python",
-   "name": "islp_test"
+   "name": "python3"
   }
  },
  "nbformat": 4,
diff --git a/docs/jupyterbook/datasets/Auto.md b/docs/jupyterbook/datasets/Auto.md
index fe851ed..627d70b 100644
--- a/docs/jupyterbook/datasets/Auto.md
+++ b/docs/jupyterbook/datasets/Auto.md
@@ -5,11 +5,11 @@ jupytext:
     extension: .md
     format_name: myst
     format_version: 0.13
-    jupytext_version: 1.14.1
+    jupytext_version: 1.14.5
 kernelspec:
-  display_name: islp_test
+  display_name: python3
   language: python
-  name: islp_test
+  name: python3
 ---
 
 # Auto Data
diff --git a/docs/jupyterbook/datasets/Bikeshare.ipynb b/docs/jupyterbook/datasets/Bikeshare.ipynb
index b0edebc..ddb1053 100644
--- a/docs/jupyterbook/datasets/Bikeshare.ipynb
+++ b/docs/jupyterbook/datasets/Bikeshare.ipynb
@@ -102,9 +102,9 @@
    "main_language": "python"
   },
   "kernelspec": {
-   "display_name": "islp_test",
+   "display_name": "python3",
    "language": "python",
-   "name": "islp_test"
+   "name": "python3"
   }
  },
  "nbformat": 4,
diff --git a/docs/jupyterbook/datasets/Bikeshare.md b/docs/jupyterbook/datasets/Bikeshare.md
index 90e1f7f..380bc1b 100644
--- a/docs/jupyterbook/datasets/Bikeshare.md
+++ b/docs/jupyterbook/datasets/Bikeshare.md
@@ -7,11 +7,11 @@ jupytext:
     extension: .md
     format_name: myst
     format_version: 0.13
-    jupytext_version: 1.14.1
+    jupytext_version: 1.14.5
 kernelspec:
-  display_name: islp_test
+  display_name: python3
   language: python
-  name: islp_test
+  name: python3
 ---
 
 # Bike sharing data
diff --git a/docs/jupyterbook/datasets/Boston.ipynb b/docs/jupyterbook/datasets/Boston.ipynb
index 1b5dce0..569f5b4 100644
--- a/docs/jupyterbook/datasets/Boston.ipynb
+++ b/docs/jupyterbook/datasets/Boston.ipynb
@@ -95,9 +95,9 @@
    "main_language": "python"
   },
   "kernelspec": {
-   "display_name": "islp_test",
+   "display_name": "python3",
    "language": "python",
-   "name": "islp_test"
+   "name": "python3"
   }
  },
  "nbformat": 4,
diff --git a/docs/jupyterbook/datasets/Boston.md b/docs/jupyterbook/datasets/Boston.md
index 60b6f5e..1146a86 100644
--- a/docs/jupyterbook/datasets/Boston.md
+++ b/docs/jupyterbook/datasets/Boston.md
@@ -7,11 +7,11 @@ jupytext:
     extension: .md
     format_name: myst
     format_version: 0.13
-    jupytext_version: 1.14.1
+    jupytext_version: 1.14.5
 kernelspec:
-  display_name: islp_test
+  display_name: python3
   language: python
-  name: islp_test
+  name: python3
 ---
 
 # Boston Data
diff --git a/docs/jupyterbook/datasets/BrainCancer.ipynb b/docs/jupyterbook/datasets/BrainCancer.ipynb
index fd8e84e..cb75946 100644
--- a/docs/jupyterbook/datasets/BrainCancer.ipynb
+++ b/docs/jupyterbook/datasets/BrainCancer.ipynb
@@ -95,9 +95,9 @@
    "main_language": "python"
   },
   "kernelspec": {
-   "display_name": "islp_test",
+   "display_name": "python3",
    "language": "python",
-   "name": "islp_test"
+   "name": "python3"
   }
  },
  "nbformat": 4,
diff --git a/docs/jupyterbook/datasets/BrainCancer.md b/docs/jupyterbook/datasets/BrainCancer.md
index 3e1a2be..7307a69 100644
--- a/docs/jupyterbook/datasets/BrainCancer.md
+++ b/docs/jupyterbook/datasets/BrainCancer.md
@@ -7,11 +7,11 @@ jupytext:
     extension: .md
     format_name: myst
     format_version: 0.13
-    jupytext_version: 1.14.1
+    jupytext_version: 1.14.5
 kernelspec:
-  display_name: islp_test
+  display_name: python3
   language: python
-  name: islp_test
+  name: python3
 ---
 
 # Brain Cancer Data
diff --git a/docs/jupyterbook/datasets/Caravan.ipynb b/docs/jupyterbook/datasets/Caravan.ipynb
index ad1af58..f093422 100644
--- a/docs/jupyterbook/datasets/Caravan.ipynb
+++ b/docs/jupyterbook/datasets/Caravan.ipynb
@@ -63,9 +63,9 @@
    "main_language": "python"
   },
   "kernelspec": {
-   "display_name": "islp_test",
+   "display_name": "python3",
    "language": "python",
-   "name": "islp_test"
+   "name": "python3"
   }
  },
  "nbformat": 4,
diff --git a/docs/jupyterbook/datasets/Caravan.md b/docs/jupyterbook/datasets/Caravan.md
index a42ddb1..24f8335 100644
--- a/docs/jupyterbook/datasets/Caravan.md
+++ b/docs/jupyterbook/datasets/Caravan.md
@@ -7,11 +7,11 @@ jupytext:
     extension: .md
     format_name: myst
     format_version: 0.13
-    jupytext_version: 1.14.1
+    jupytext_version: 1.14.5
 kernelspec:
-  display_name: islp_test
+  display_name: python3
   language: python
-  name: islp_test
+  name: python3
 ---
 
 # Caravan
diff --git a/docs/jupyterbook/datasets/Carseats.ipynb b/docs/jupyterbook/datasets/Carseats.ipynb
index 911e767..dfd36d4 100644
--- a/docs/jupyterbook/datasets/Carseats.ipynb
+++ b/docs/jupyterbook/datasets/Carseats.ipynb
@@ -83,9 +83,9 @@
    "main_language": "python"
   },
   "kernelspec": {
-   "display_name": "islp_test",
+   "display_name": "python3",
    "language": "python",
-   "name": "islp_test"
+   "name": "python3"
   }
  },
  "nbformat": 4,
diff --git a/docs/jupyterbook/datasets/Carseats.md b/docs/jupyterbook/datasets/Carseats.md
index 3c74d37..76f56e4 100644
--- a/docs/jupyterbook/datasets/Carseats.md
+++ b/docs/jupyterbook/datasets/Carseats.md
@@ -7,11 +7,11 @@ jupytext:
     extension: .md
     format_name: myst
     format_version: 0.13
-    jupytext_version: 1.14.1
+    jupytext_version: 1.14.5
 kernelspec:
-  display_name: islp_test
+  display_name: python3
   language: python
-  name: islp_test
+  name: python3
 ---
 
 # Sales of Child Car Seats
diff --git a/docs/jupyterbook/datasets/College.ipynb b/docs/jupyterbook/datasets/College.ipynb
index ef2f53d..af1027d 100644
--- a/docs/jupyterbook/datasets/College.ipynb
+++ b/docs/jupyterbook/datasets/College.ipynb
@@ -104,9 +104,9 @@
    "main_language": "python"
   },
   "kernelspec": {
-   "display_name": "islp_test",
+   "display_name": "python3",
    "language": "python",
-   "name": "islp_test"
+   "name": "python3"
   }
  },
  "nbformat": 4,
diff --git a/docs/jupyterbook/datasets/College.md b/docs/jupyterbook/datasets/College.md
index 5e2e422..95b0bb3 100644
--- a/docs/jupyterbook/datasets/College.md
+++ b/docs/jupyterbook/datasets/College.md
@@ -7,11 +7,11 @@ jupytext:
     extension: .md
     format_name: myst
     format_version: 0.13
-    jupytext_version: 1.14.1
+    jupytext_version: 1.14.5
 kernelspec:
-  display_name: islp_test
+  display_name: python3
   language: python
-  name: islp_test
+  name: python3
 ---
 
 # U.S. News and World Report's College Data
diff --git a/docs/jupyterbook/datasets/Credit.ipynb b/docs/jupyterbook/datasets/Credit.ipynb
index c4c79b5..f5e51a9 100644
--- a/docs/jupyterbook/datasets/Credit.ipynb
+++ b/docs/jupyterbook/datasets/Credit.ipynb
@@ -89,9 +89,9 @@
    "main_language": "python"
   },
   "kernelspec": {
-   "display_name": "islp_test",
+   "display_name": "python3",
    "language": "python",
-   "name": "islp_test"
+   "name": "python3"
   }
  },
  "nbformat": 4,
diff --git a/docs/jupyterbook/datasets/Credit.md b/docs/jupyterbook/datasets/Credit.md
index 36d2502..51de59d 100644
--- a/docs/jupyterbook/datasets/Credit.md
+++ b/docs/jupyterbook/datasets/Credit.md
@@ -7,11 +7,11 @@ jupytext:
     extension: .md
     format_name: myst
     format_version: 0.13
-    jupytext_version: 1.14.1
+    jupytext_version: 1.14.5
 kernelspec:
-  display_name: islp_test
+  display_name: python3
   language: python
-  name: islp_test
+  name: python3
 ---
 
 # Credit Card Balance Data
diff --git a/docs/jupyterbook/datasets/Default.ipynb b/docs/jupyterbook/datasets/Default.ipynb
index 4799474..64357ef 100644
--- a/docs/jupyterbook/datasets/Default.ipynb
+++ b/docs/jupyterbook/datasets/Default.ipynb
@@ -83,9 +83,9 @@
    "main_language": "python"
   },
   "kernelspec": {
-   "display_name": "islp_test",
+   "display_name": "python3",
    "language": "python",
-   "name": "islp_test"
+   "name": "python3"
   }
  },
  "nbformat": 4,
diff --git a/docs/jupyterbook/datasets/Default.md b/docs/jupyterbook/datasets/Default.md
index f1c9acc..5aeaed2 100644
--- a/docs/jupyterbook/datasets/Default.md
+++ b/docs/jupyterbook/datasets/Default.md
@@ -7,11 +7,11 @@ jupytext:
     extension: .md
     format_name: myst
     format_version: 0.13
-    jupytext_version: 1.14.1
+    jupytext_version: 1.14.5
 kernelspec:
-  display_name: islp_test
+  display_name: python3
   language: python
-  name: islp_test
+  name: python3
 ---
 
 # Credit Card Default Data
diff --git a/docs/jupyterbook/datasets/Fund.ipynb b/docs/jupyterbook/datasets/Fund.ipynb
index 905528d..fce1859 100644
--- a/docs/jupyterbook/datasets/Fund.ipynb
+++ b/docs/jupyterbook/datasets/Fund.ipynb
@@ -51,9 +51,9 @@
    "main_language": "python"
   },
   "kernelspec": {
-   "display_name": "islp_test",
+   "display_name": "python3",
    "language": "python",
-   "name": "islp_test"
+   "name": "python3"
   }
  },
  "nbformat": 4,
diff --git a/docs/jupyterbook/datasets/Fund.md b/docs/jupyterbook/datasets/Fund.md
index 4e53d4f..89009c2 100644
--- a/docs/jupyterbook/datasets/Fund.md
+++ b/docs/jupyterbook/datasets/Fund.md
@@ -7,11 +7,11 @@ jupytext:
     extension: .md
     format_name: myst
     format_version: 0.13
-    jupytext_version: 1.14.1
+    jupytext_version: 1.14.5
 kernelspec:
-  display_name: islp_test
+  display_name: python3
   language: python
-  name: islp_test
+  name: python3
 ---
 
 # Fund Manager Data
diff --git a/docs/jupyterbook/datasets/Hitters.ipynb b/docs/jupyterbook/datasets/Hitters.ipynb
index 295f50b..6f261cd 100644
--- a/docs/jupyterbook/datasets/Hitters.ipynb
+++ b/docs/jupyterbook/datasets/Hitters.ipynb
@@ -110,9 +110,9 @@
    "main_language": "python"
   },
   "kernelspec": {
-   "display_name": "islp_test",
+   "display_name": "python3",
    "language": "python",
-   "name": "islp_test"
+   "name": "python3"
   }
  },
  "nbformat": 4,
diff --git a/docs/jupyterbook/datasets/Hitters.md b/docs/jupyterbook/datasets/Hitters.md
index 7f8d6b7..2fdecf0 100644
--- a/docs/jupyterbook/datasets/Hitters.md
+++ b/docs/jupyterbook/datasets/Hitters.md
@@ -7,11 +7,11 @@ jupytext:
     extension: .md
     format_name: myst
     format_version: 0.13
-    jupytext_version: 1.14.1
+    jupytext_version: 1.14.5
 kernelspec:
-  display_name: islp_test
+  display_name: python3
   language: python
-  name: islp_test
+  name: python3
 ---
 
 # Baseball Data
diff --git a/docs/jupyterbook/datasets/Khan.ipynb b/docs/jupyterbook/datasets/Khan.ipynb
index a1f89a4..f12a5ca 100644
--- a/docs/jupyterbook/datasets/Khan.ipynb
+++ b/docs/jupyterbook/datasets/Khan.ipynb
@@ -81,9 +81,9 @@
    "main_language": "python"
   },
   "kernelspec": {
-   "display_name": "islp_test",
+   "display_name": "python3",
    "language": "python",
-   "name": "islp_test"
+   "name": "python3"
   }
  },
  "nbformat": 4,
diff --git a/docs/jupyterbook/datasets/Khan.md b/docs/jupyterbook/datasets/Khan.md
index f943e99..6f0c303 100644
--- a/docs/jupyterbook/datasets/Khan.md
+++ b/docs/jupyterbook/datasets/Khan.md
@@ -7,11 +7,11 @@ jupytext:
     extension: .md
     format_name: myst
     format_version: 0.13
-    jupytext_version: 1.14.1
+    jupytext_version: 1.14.5
 kernelspec:
-  display_name: islp_test
+  display_name: python3
   language: python
-  name: islp_test
+  name: python3
 ---
 
 # Khan Gene Data
diff --git a/docs/jupyterbook/datasets/NCI60.ipynb b/docs/jupyterbook/datasets/NCI60.ipynb
index d8e2aec..bbb576f 100644
--- a/docs/jupyterbook/datasets/NCI60.ipynb
+++ b/docs/jupyterbook/datasets/NCI60.ipynb
@@ -62,9 +62,9 @@
    "main_language": "python"
   },
   "kernelspec": {
-   "display_name": "islp_test",
+   "display_name": "python3",
    "language": "python",
-   "name": "islp_test"
+   "name": "python3"
   }
  },
  "nbformat": 4,
diff --git a/docs/jupyterbook/datasets/NCI60.md b/docs/jupyterbook/datasets/NCI60.md
index 4cc96c6..621445e 100644
--- a/docs/jupyterbook/datasets/NCI60.md
+++ b/docs/jupyterbook/datasets/NCI60.md
@@ -7,11 +7,11 @@ jupytext:
     extension: .md
     format_name: myst
     format_version: 0.13
-    jupytext_version: 1.14.1
+    jupytext_version: 1.14.5
 kernelspec:
-  display_name: islp_test
+  display_name: python3
   language: python
-  name: islp_test
+  name: python3
 ---
 
 # NCI 60 Data
diff --git a/docs/jupyterbook/datasets/NYSE.ipynb b/docs/jupyterbook/datasets/NYSE.ipynb
index d884201..5f9dbd5 100644
--- a/docs/jupyterbook/datasets/NYSE.ipynb
+++ b/docs/jupyterbook/datasets/NYSE.ipynb
@@ -79,9 +79,9 @@
    "main_language": "python"
   },
   "kernelspec": {
-   "display_name": "islp_test",
+   "display_name": "python3",
    "language": "python",
-   "name": "islp_test"
+   "name": "python3"
   }
  },
  "nbformat": 4,
diff --git a/docs/jupyterbook/datasets/NYSE.md b/docs/jupyterbook/datasets/NYSE.md
index a84a9d4..bdb9581 100644
--- a/docs/jupyterbook/datasets/NYSE.md
+++ b/docs/jupyterbook/datasets/NYSE.md
@@ -7,11 +7,11 @@ jupytext:
     extension: .md
     format_name: myst
     format_version: 0.13
-    jupytext_version: 1.14.1
+    jupytext_version: 1.14.5
 kernelspec:
-  display_name: islp_test
+  display_name: python3
   language: python
-  name: islp_test
+  name: python3
 ---
 
 # New York Stock Exchange Data
diff --git a/docs/jupyterbook/datasets/OJ.ipynb b/docs/jupyterbook/datasets/OJ.ipynb
index 30046cb..e18a4de 100644
--- a/docs/jupyterbook/datasets/OJ.ipynb
+++ b/docs/jupyterbook/datasets/OJ.ipynb
@@ -107,9 +107,9 @@
    "main_language": "python"
   },
   "kernelspec": {
-   "display_name": "islp_test",
+   "display_name": "python3",
    "language": "python",
-   "name": "islp_test"
+   "name": "python3"
   }
  },
  "nbformat": 4,
diff --git a/docs/jupyterbook/datasets/OJ.md b/docs/jupyterbook/datasets/OJ.md
index 8681ea9..94fd7c6 100644
--- a/docs/jupyterbook/datasets/OJ.md
+++ b/docs/jupyterbook/datasets/OJ.md
@@ -7,11 +7,11 @@ jupytext:
     extension: .md
     format_name: myst
     format_version: 0.13
-    jupytext_version: 1.14.1
+    jupytext_version: 1.14.5
 kernelspec:
-  display_name: islp_test
+  display_name: python3
   language: python
-  name: islp_test
+  name: python3
 ---
 
 # Orange Juice Data
diff --git a/docs/jupyterbook/datasets/Portfolio.ipynb b/docs/jupyterbook/datasets/Portfolio.ipynb
index 0596162..6d6a60d 100644
--- a/docs/jupyterbook/datasets/Portfolio.ipynb
+++ b/docs/jupyterbook/datasets/Portfolio.ipynb
@@ -68,9 +68,9 @@
    "main_language": "python"
   },
   "kernelspec": {
-   "display_name": "islp_test",
+   "display_name": "python3",
    "language": "python",
-   "name": "islp_test"
+   "name": "python3"
   }
  },
  "nbformat": 4,
diff --git a/docs/jupyterbook/datasets/Portfolio.md b/docs/jupyterbook/datasets/Portfolio.md
index e130b81..3a79d35 100644
--- a/docs/jupyterbook/datasets/Portfolio.md
+++ b/docs/jupyterbook/datasets/Portfolio.md
@@ -7,11 +7,11 @@ jupytext:
     extension: .md
     format_name: myst
     format_version: 0.13
-    jupytext_version: 1.14.1
+    jupytext_version: 1.14.5
 kernelspec:
-  display_name: islp_test
+  display_name: python3
   language: python
-  name: islp_test
+  name: python3
 ---
 
 # Portfolio Data
diff --git a/docs/jupyterbook/datasets/Publication.ipynb b/docs/jupyterbook/datasets/Publication.ipynb
index c97b201..a4a6dfa 100644
--- a/docs/jupyterbook/datasets/Publication.ipynb
+++ b/docs/jupyterbook/datasets/Publication.ipynb
@@ -91,9 +91,9 @@
    "main_language": "python"
   },
   "kernelspec": {
-   "display_name": "islp_test",
+   "display_name": "python3",
    "language": "python",
-   "name": "islp_test"
+   "name": "python3"
   }
  },
  "nbformat": 4,
diff --git a/docs/jupyterbook/datasets/Publication.md b/docs/jupyterbook/datasets/Publication.md
index 94c18bd..78261af 100644
--- a/docs/jupyterbook/datasets/Publication.md
+++ b/docs/jupyterbook/datasets/Publication.md
@@ -7,11 +7,11 @@ jupytext:
     extension: .md
     format_name: myst
     format_version: 0.13
-    jupytext_version: 1.14.1
+    jupytext_version: 1.14.5
 kernelspec:
-  display_name: islp_test
+  display_name: python3
   language: python
-  name: islp_test
+  name: python3
 ---
 
 # Time-to-Publication Data
diff --git a/docs/jupyterbook/datasets/Smarket.ipynb b/docs/jupyterbook/datasets/Smarket.ipynb
index 35a1918..cced2a9 100644
--- a/docs/jupyterbook/datasets/Smarket.ipynb
+++ b/docs/jupyterbook/datasets/Smarket.ipynb
@@ -87,9 +87,9 @@
    "main_language": "python"
   },
   "kernelspec": {
-   "display_name": "islp_test",
+   "display_name": "python3",
    "language": "python",
-   "name": "islp_test"
+   "name": "python3"
   }
  },
  "nbformat": 4,
diff --git a/docs/jupyterbook/datasets/Smarket.md b/docs/jupyterbook/datasets/Smarket.md
index a42c94e..2c0e120 100644
--- a/docs/jupyterbook/datasets/Smarket.md
+++ b/docs/jupyterbook/datasets/Smarket.md
@@ -7,11 +7,11 @@ jupytext:
     extension: .md
     format_name: myst
     format_version: 0.13
-    jupytext_version: 1.14.1
+    jupytext_version: 1.14.5
 kernelspec:
-  display_name: islp_test
+  display_name: python3
   language: python
-  name: islp_test
+  name: python3
 ---
 
 # S&P Stock Market Data
diff --git a/docs/jupyterbook/datasets/USArrests.ipynb b/docs/jupyterbook/datasets/USArrests.ipynb
index 4a6a1c0..1107424 100644
--- a/docs/jupyterbook/datasets/USArrests.ipynb
+++ b/docs/jupyterbook/datasets/USArrests.ipynb
@@ -202,9 +202,9 @@
    "main_language": "python"
   },
   "kernelspec": {
-   "display_name": "islp_test",
+   "display_name": "python3",
    "language": "python",
-   "name": "islp_test"
+   "name": "python3"
   },
   "language_info": {
    "codemirror_mode": {
diff --git a/docs/jupyterbook/datasets/USArrests.md b/docs/jupyterbook/datasets/USArrests.md
index 7cbede1..ee3c962 100644
--- a/docs/jupyterbook/datasets/USArrests.md
+++ b/docs/jupyterbook/datasets/USArrests.md
@@ -7,11 +7,11 @@ jupytext:
     extension: .md
     format_name: myst
     format_version: 0.13
-    jupytext_version: 1.14.1
+    jupytext_version: 1.14.5
 kernelspec:
-  display_name: islp_test
+  display_name: python3
   language: python
-  name: islp_test
+  name: python3
 ---
 
 # Violent Crime Rates by US State
diff --git a/docs/jupyterbook/datasets/Wage.ipynb b/docs/jupyterbook/datasets/Wage.ipynb
index ad8f9b0..b95d853 100644
--- a/docs/jupyterbook/datasets/Wage.ipynb
+++ b/docs/jupyterbook/datasets/Wage.ipynb
@@ -99,9 +99,9 @@
    "main_language": "python"
   },
   "kernelspec": {
-   "display_name": "islp_test",
+   "display_name": "python3",
    "language": "python",
-   "name": "islp_test"
+   "name": "python3"
   }
  },
  "nbformat": 4,
diff --git a/docs/jupyterbook/datasets/Wage.md b/docs/jupyterbook/datasets/Wage.md
index eeeb3c4..fd22e30 100644
--- a/docs/jupyterbook/datasets/Wage.md
+++ b/docs/jupyterbook/datasets/Wage.md
@@ -7,11 +7,11 @@ jupytext:
     extension: .md
     format_name: myst
     format_version: 0.13
-    jupytext_version: 1.14.1
+    jupytext_version: 1.14.5
 kernelspec:
-  display_name: islp_test
+  display_name: python3
   language: python
-  name: islp_test
+  name: python3
 ---
 
 # Mid-Atlantic Wage Data
diff --git a/docs/jupyterbook/datasets/Weekly.ipynb b/docs/jupyterbook/datasets/Weekly.ipynb
index cf08b80..69f26d6 100644
--- a/docs/jupyterbook/datasets/Weekly.ipynb
+++ b/docs/jupyterbook/datasets/Weekly.ipynb
@@ -95,9 +95,9 @@
    "main_language": "python"
   },
   "kernelspec": {
-   "display_name": "islp_test",
+   "display_name": "python3",
    "language": "python",
-   "name": "islp_test"
+   "name": "python3"
   }
  },
  "nbformat": 4,
diff --git a/docs/jupyterbook/datasets/Weekly.md b/docs/jupyterbook/datasets/Weekly.md
index c0639ea..c239c5e 100644
--- a/docs/jupyterbook/datasets/Weekly.md
+++ b/docs/jupyterbook/datasets/Weekly.md
@@ -7,11 +7,11 @@ jupytext:
     extension: .md
     format_name: myst
     format_version: 0.13
-    jupytext_version: 1.14.1
+    jupytext_version: 1.14.5
 kernelspec:
-  display_name: islp_test
+  display_name: python3
   language: python
-  name: islp_test
+  name: python3
 ---
 
 # Weekly S&P Stock Market Data
diff --git a/docs/jupyterbook/helpers/cluster.ipynb b/docs/jupyterbook/helpers/cluster.ipynb
index bf237a3..31798a0 100644
--- a/docs/jupyterbook/helpers/cluster.ipynb
+++ b/docs/jupyterbook/helpers/cluster.ipynb
@@ -8,15 +8,27 @@
     "# Clustering\n",
     "\n",
     "This module has a single function, used to help visualize a dendrogram from a\n",
-    "hierarchical clustering."
+    "hierarchical clustering. The function is based on this example from [sklearn.cluster](https://scikit-learn.org/stable/auto_examples/cluster/plot_agglomerative_dendrogram.html)."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "id": "d5df152d",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "ename": "ModuleNotFoundError",
+     "evalue": "No module named 'sklearn'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mModuleNotFoundError\u001b[0m                       Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[1], line 2\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mnumpy\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mnp\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msklearn\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcluster\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m AgglomerativeClustering\n\u001b[1;32m      3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mscipy\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcluster\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mhierarchy\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m dendrogram\n\u001b[1;32m      4\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mISLP\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcluster\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m compute_linkage\n",
+      "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'sklearn'"
+     ]
+    }
+   ],
    "source": [
     "import numpy as np\n",
     "from sklearn.cluster import AgglomerativeClustering\n",
@@ -34,7 +46,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
    "id": "0135c1fb",
    "metadata": {},
    "outputs": [],
@@ -101,9 +113,21 @@
    "main_language": "python"
   },
   "kernelspec": {
-   "display_name": "islp_test",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
-   "name": "islp_test"
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.10"
   }
  },
  "nbformat": 4,
diff --git a/docs/jupyterbook/helpers/cluster.md b/docs/jupyterbook/helpers/cluster.md
index ab31348..b951b18 100644
--- a/docs/jupyterbook/helpers/cluster.md
+++ b/docs/jupyterbook/helpers/cluster.md
@@ -7,19 +7,19 @@ jupytext:
     extension: .md
     format_name: myst
     format_version: 0.13
-    jupytext_version: 1.14.1
+    jupytext_version: 1.14.5
 kernelspec:
-  display_name: islp_test
+  display_name: Python 3 (ipykernel)
   language: python
-  name: islp_test
+  name: python3
 ---
 
 # Clustering
 
 This module has a single function, used to help visualize a dendrogram from a
-hierarchical clustering.
+hierarchical clustering. The function is based on this example from [sklearn.cluster](https://scikit-learn.org/stable/auto_examples/cluster/plot_agglomerative_dendrogram.html).
 
-```{code-cell}
+```{code-cell} ipython3
 import numpy as np
 from sklearn.cluster import AgglomerativeClustering
 from scipy.cluster.hierarchy import dendrogram
@@ -28,7 +28,7 @@ from ISLP.cluster import compute_linkage
 
 ## Make a toy dataset
 
-```{code-cell}
+```{code-cell} ipython3
 rng = np.random.default_rng(1)
 X = rng.normal(size=(30, 5))
 X[:10] += 1
@@ -36,19 +36,19 @@ X[:10] += 1
 
 ## Cluster it
 
-```{code-cell}
+```{code-cell} ipython3
 clust = AgglomerativeClustering(distance_threshold=0,
                                 n_clusters=None,
                                 linkage='complete')
 ```
 
-```{code-cell}
+```{code-cell} ipython3
 clust.fit(X)
 ```
 
 ## Plot the dendrogram
 
-```{code-cell}
+```{code-cell} ipython3
 linkage = compute_linkage(clust)
 dendrogram(linkage);
 ```
diff --git a/docs/jupyterbook/helpers/pygam.ipynb b/docs/jupyterbook/helpers/pygam.ipynb
index 01a1e55..aab61d1 100644
--- a/docs/jupyterbook/helpers/pygam.ipynb
+++ b/docs/jupyterbook/helpers/pygam.ipynb
@@ -207,9 +207,9 @@
    "main_language": "python"
   },
   "kernelspec": {
-   "display_name": "islp_test",
+   "display_name": "python3",
    "language": "python",
-   "name": "islp_test"
+   "name": "python3"
   }
  },
  "nbformat": 4,
diff --git a/docs/jupyterbook/helpers/pygam.md b/docs/jupyterbook/helpers/pygam.md
index c91084c..56adc84 100644
--- a/docs/jupyterbook/helpers/pygam.md
+++ b/docs/jupyterbook/helpers/pygam.md
@@ -7,11 +7,11 @@ jupytext:
     extension: .md
     format_name: myst
     format_version: 0.13
-    jupytext_version: 1.14.1
+    jupytext_version: 1.14.5
 kernelspec:
-  display_name: islp_test
+  display_name: python3
   language: python
-  name: islp_test
+  name: python3
 ---
 
 # Generalized Additive Models
diff --git a/docs/jupyterbook/helpers/survival.ipynb b/docs/jupyterbook/helpers/survival.ipynb
index e6b9e3a..7cb30a3 100644
--- a/docs/jupyterbook/helpers/survival.ipynb
+++ b/docs/jupyterbook/helpers/survival.ipynb
@@ -108,9 +108,9 @@
    "main_language": "python"
   },
   "kernelspec": {
-   "display_name": "islp_test",
+   "display_name": "python3",
    "language": "python",
-   "name": "islp_test"
+   "name": "python3"
   }
  },
  "nbformat": 4,
diff --git a/docs/jupyterbook/helpers/survival.md b/docs/jupyterbook/helpers/survival.md
index 715f8bd..58b129d 100644
--- a/docs/jupyterbook/helpers/survival.md
+++ b/docs/jupyterbook/helpers/survival.md
@@ -7,11 +7,11 @@ jupytext:
     extension: .md
     format_name: myst
     format_version: 0.13
-    jupytext_version: 1.14.1
+    jupytext_version: 1.14.5
 kernelspec:
-  display_name: islp_test
+  display_name: python3
   language: python
-  name: islp_test
+  name: python3
 ---
 
 # Survival Analysis
diff --git a/docs/jupyterbook/helpers/svm.ipynb b/docs/jupyterbook/helpers/svm.ipynb
index dac6c39..593d840 100644
--- a/docs/jupyterbook/helpers/svm.ipynb
+++ b/docs/jupyterbook/helpers/svm.ipynb
@@ -103,9 +103,9 @@
    "main_language": "python"
   },
   "kernelspec": {
-   "display_name": "islp_test",
+   "display_name": "python3",
    "language": "python",
-   "name": "islp_test"
+   "name": "python3"
   }
  },
  "nbformat": 4,
diff --git a/docs/jupyterbook/helpers/svm.md b/docs/jupyterbook/helpers/svm.md
index 007eb7a..3025490 100644
--- a/docs/jupyterbook/helpers/svm.md
+++ b/docs/jupyterbook/helpers/svm.md
@@ -7,11 +7,11 @@ jupytext:
     extension: .md
     format_name: myst
     format_version: 0.13
-    jupytext_version: 1.14.1
+    jupytext_version: 1.14.5
 kernelspec:
-  display_name: islp_test
+  display_name: python3
   language: python
-  name: islp_test
+  name: python3
 ---
 
 # Support Vector Machines
diff --git a/docs/jupyterbook/imdb.ipynb b/docs/jupyterbook/imdb.ipynb
index d490921..ae0d7dd 100644
--- a/docs/jupyterbook/imdb.ipynb
+++ b/docs/jupyterbook/imdb.ipynb
@@ -5,71 +5,109 @@
    "id": "50f2b809",
    "metadata": {},
    "source": [
-    "# Creating a clean IMDB dataset\n",
+    "# Creating IMDB dataset from `keras` version\n",
+    "\n",
+    "This script details how the `IMDB` data in `ISLP` was constructed.\n",
     "\n",
     "Running this example requires `keras`. Use `pip install keras` to install if necessary."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "id": "d920bb2e",
    "metadata": {},
    "outputs": [],
    "source": [
-    "import pickle"
+    "import pickle\n",
+    "import numpy as np\n",
+    "from scipy.sparse import coo_matrix, save_npz\n",
+    "import torch\n",
+    "from keras.datasets import imdb\n",
+    "from tensorflow.keras.preprocessing.sequence import pad_sequences"
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "e507f1fb",
+   "cell_type": "markdown",
+   "id": "eaf27f0c-0cb0-4ad5-8775-d138e3f20933",
    "metadata": {},
-   "outputs": [],
    "source": [
-    "import numpy as np\n",
-    "from scipy.sparse import coo_matrix, save_npz\n",
-    "import torch"
+    "We first load the data using `keras`, limiting focus to the 10000 most commmon words."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "id": "b94d3f35",
+   "execution_count": 2,
+   "id": "29f0e01e",
    "metadata": {},
    "outputs": [],
    "source": [
-    "from keras.datasets import imdb\n",
-    "from tensorflow.keras.preprocessing.sequence import pad_sequences"
+    "# the 3 is for three terms: <START> <UNK> <UNUSED> \n",
+    "num_words = 10000+3\n",
+    "((S_train, L_train), \n",
+    " (S_test, L_test)) = imdb.load_data(num_words=num_words)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9020ab27-cc62-4b86-85ba-80a94ff692de",
+   "metadata": {},
+   "source": [
+    "The object `S_train` is effectively a list in which each document has been encoded into a sequence of\n",
+    "values from 0 to 10002."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "id": "29f0e01e",
+   "execution_count": 3,
+   "id": "e27564c4-320f-42b6-9f2e-2a2afdebefcf",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[1, 14, 22, 16, 43, 530, 973, 1622, 1385, 65]"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "# the 3 is for three terms: <START> <UNK> <UNUSED> \n",
-    "num_words = 10000+3\n",
-    "((S_train, Y_train), \n",
-    " (S_test, Y_test)) = imdb.load_data(num_words=num_words)"
+    "S_train[0][:10]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "15f039fe-faed-4884-a725-1c51d6c8d4d4",
+   "metadata": {},
+   "source": [
+    "We'll use `np.float32` as that is the common precision used in `torch`."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
    "id": "6cc3c3cb",
    "metadata": {},
    "outputs": [],
    "source": [
-    "Y_train = Y_train.astype(np.float32)\n",
-    "Y_test = Y_test.astype(np.float32)"
+    "L_train = L_train.astype(np.float32)\n",
+    "L_test = L_test.astype(np.float32)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "005679bc-4337-4757-831e-f9a6ea50f6aa",
+   "metadata": {},
+   "source": [
+    "We will use a one-hot encoding that captures whether or not a given word appears in a given review."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
    "id": "7b6d1098",
    "metadata": {},
    "outputs": [],
@@ -88,18 +126,30 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 6,
    "id": "afcdc8b2",
    "metadata": {},
    "outputs": [],
    "source": [
-    "X_train, L_train = one_hot(S_train, num_words), Y_train\n",
+    "X_train = one_hot(S_train, num_words)\n",
     "X_test = one_hot(S_test, num_words)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "a67e299d-8774-4758-8953-77afdce775ab",
+   "metadata": {},
+   "source": [
+    "## Store as sparse tensors\n",
+    "\n",
+    "We see later in the lab that the dense representation is faster. Nevertheless,\n",
+    "let's store the one-hot representation as sparse `torch` tensors \n",
+    "as well as sparse `scipy` matrices."
+   ]
+  },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 7,
    "id": "b19366ea",
    "metadata": {},
    "outputs": [],
@@ -115,7 +165,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 8,
    "id": "b45ae6d1",
    "metadata": {},
    "outputs": [],
@@ -126,7 +176,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 9,
    "id": "a47d6eb6",
    "metadata": {},
    "outputs": [],
@@ -137,7 +187,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 10,
    "id": "d1b37b37",
    "metadata": {},
    "outputs": [],
@@ -151,12 +201,12 @@
    "id": "1119823a",
    "metadata": {},
    "source": [
-    "save the sparse matrices"
+    "### Save as sparse `scipy` matrices"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 11,
    "id": "6cb6bfdf",
    "metadata": {},
    "outputs": [],
@@ -167,12 +217,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 12,
    "id": "eac1c2ae",
    "metadata": {},
    "outputs": [],
    "source": [
-    "np.save('IMDB_Y_test.npy', Y_test)\n",
+    "np.save('IMDB_Y_test.npy', L_test)\n",
     "np.save('IMDB_Y_train.npy', L_train)"
    ]
   },
@@ -181,12 +231,14 @@
    "id": "25c128e3",
    "metadata": {},
    "source": [
-    "save and pickle the word index"
+    "## Save and pickle the word index\n",
+    "\n",
+    "We'll also want to store a lookup table to convert representations such as `S_train[0]` into words"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 13,
    "id": "8458bf67",
    "metadata": {},
    "outputs": [],
@@ -199,9 +251,46 @@
     "lookup[4] = \"<UNUSED>\""
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "5e62ebff-2575-4d35-b46c-51c6f7598efc",
+   "metadata": {},
+   "source": [
+    "Let's look at our first training document:"
+   ]
+  },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 14,
+   "id": "2aaefdf8-0a49-4bdb-8b40-55665283c8a8",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "\"<START> this film was just brilliant casting location scenery story direction everyone's really suited <UNUSED> part they played and you\""
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "' '.join([lookup[i] for i in S_train[0][:20]])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0e985a73-bfd9-42bd-a523-3dc6e223d602",
+   "metadata": {},
+   "source": [
+    "We save this lookup table so it can be loaded later "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
    "id": "d95252de",
    "metadata": {},
    "outputs": [],
@@ -214,12 +303,15 @@
    "id": "b3d900b9",
    "metadata": {},
    "source": [
-    "create the padded representations"
+    "## Padded representations\n",
+    "\n",
+    "For some of the recurrent models, we'll need sequences of common lengths, padded if necessary.\n",
+    "Here, we pad up to a maximum length of 500, filling the remaining entries with 0."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 16,
    "id": "637b3c5e",
    "metadata": {},
    "outputs": [],
@@ -230,9 +322,17 @@
     "                      S_test]]"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "a6218300-b355-44cc-b7fb-4bff81211aa6",
+   "metadata": {},
+   "source": [
+    "Finally, we save these for later use in the deep learning lab."
+   ]
+  },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 17,
    "id": "bac69f88",
    "metadata": {},
    "outputs": [],
@@ -249,9 +349,21 @@
    "main_language": "python"
   },
   "kernelspec": {
-   "display_name": "islp_test",
+   "display_name": "python3",
    "language": "python",
-   "name": "islp_test"
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.13"
   }
  },
  "nbformat": 4,
diff --git a/docs/jupyterbook/imdb.md b/docs/jupyterbook/imdb.md
index 313952f..0b87bae 100644
--- a/docs/jupyterbook/imdb.md
+++ b/docs/jupyterbook/imdb.md
@@ -7,45 +7,54 @@ jupytext:
     extension: .md
     format_name: myst
     format_version: 0.13
-    jupytext_version: 1.14.1
+    jupytext_version: 1.14.5
 kernelspec:
-  display_name: islp_test
+  display_name: python3
   language: python
-  name: islp_test
+  name: python3
 ---
 
-# Creating a clean IMDB dataset
+# Creating IMDB dataset from `keras` version
+
+This script details how the `IMDB` data in `ISLP` was constructed.
 
 Running this example requires `keras`. Use `pip install keras` to install if necessary.
 
-```{code-cell}
+```{code-cell} ipython3
 import pickle
-```
-
-```{code-cell}
 import numpy as np
 from scipy.sparse import coo_matrix, save_npz
 import torch
-```
-
-```{code-cell}
 from keras.datasets import imdb
 from tensorflow.keras.preprocessing.sequence import pad_sequences
 ```
 
-```{code-cell}
+We first load the data using `keras`, limiting focus to the 10000 most commmon words.
+
+```{code-cell} ipython3
 # the 3 is for three terms: <START> <UNK> <UNUSED> 
 num_words = 10000+3
-((S_train, Y_train), 
- (S_test, Y_test)) = imdb.load_data(num_words=num_words)
+((S_train, L_train), 
+ (S_test, L_test)) = imdb.load_data(num_words=num_words)
 ```
 
-```{code-cell}
-Y_train = Y_train.astype(np.float32)
-Y_test = Y_test.astype(np.float32)
+The object `S_train` is effectively a list in which each document has been encoded into a sequence of
+values from 0 to 10002.
+
+```{code-cell} ipython3
+S_train[0][:10]
+```
+
+We'll use `np.float32` as that is the common precision used in `torch`.
+
+```{code-cell} ipython3
+L_train = L_train.astype(np.float32)
+L_test = L_test.astype(np.float32)
 ```
 
-```{code-cell}
+We will use a one-hot encoding that captures whether or not a given word appears in a given review.
+
+```{code-cell} ipython3
 def one_hot(sequences, ncol):
     idx, vals = [], []
     for i, s in enumerate(sequences):
@@ -58,12 +67,18 @@ def one_hot(sequences, ncol):
     return tens.coalesce()
 ```
 
-```{code-cell}
-X_train, L_train = one_hot(S_train, num_words), Y_train
+```{code-cell} ipython3
+X_train = one_hot(S_train, num_words)
 X_test = one_hot(S_test, num_words)
 ```
 
-```{code-cell}
+## Store as sparse tensors
+
+We see later in the lab that the dense representation is faster. Nevertheless,
+let's store the one-hot representation as sparse `torch` tensors 
+as well as sparse `scipy` matrices.
+
+```{code-cell} ipython3
 def convert_sparse_tensor(X):
     idx = np.asarray(X.indices())
     vals = np.asarray(X.values())
@@ -73,36 +88,38 @@ def convert_sparse_tensor(X):
                       shape=X.shape).tocsr()
 ```
 
-```{code-cell}
+```{code-cell} ipython3
 X_train_s = convert_sparse_tensor(X_train)
 X_test_s = convert_sparse_tensor(X_test)
 ```
 
-```{code-cell}
+```{code-cell} ipython3
 X_train_d = torch.tensor(X_train_s.todense())
 X_test_d = torch.tensor(X_test_s.todense())
 ```
 
-```{code-cell}
+```{code-cell} ipython3
 torch.save(X_train_d, 'IMDB_X_train.tensor')
 torch.save(X_test_d, 'IMDB_X_test.tensor')
 ```
 
-save the sparse matrices
+### Save as sparse `scipy` matrices
 
-```{code-cell}
+```{code-cell} ipython3
 save_npz('IMDB_X_test.npz', X_test_s)
 save_npz('IMDB_X_train.npz', X_train_s)
 ```
 
-```{code-cell}
-np.save('IMDB_Y_test.npy', Y_test)
+```{code-cell} ipython3
+np.save('IMDB_Y_test.npy', L_test)
 np.save('IMDB_Y_train.npy', L_train)
 ```
 
-save and pickle the word index
+## Save and pickle the word index
 
-```{code-cell}
+We'll also want to store a lookup table to convert representations such as `S_train[0]` into words
+
+```{code-cell} ipython3
 word_index = imdb.get_word_index()
 lookup = {(i+3):w for w, i in word_index.items()}
 lookup[0] = "<PAD>"
@@ -111,20 +128,33 @@ lookup[2] = "<UNK>"
 lookup[4] = "<UNUSED>"
 ```
 
-```{code-cell}
+Let's look at our first training document:
+
+```{code-cell} ipython3
+' '.join([lookup[i] for i in S_train[0][:20]])
+```
+
+We save this lookup table so it can be loaded later 
+
+```{code-cell} ipython3
 pickle.dump(lookup, open('IMDB_word_index.pkl', 'bw'))
 ```
 
-create the padded representations
+## Padded representations
 
-```{code-cell}
+For some of the recurrent models, we'll need sequences of common lengths, padded if necessary.
+Here, we pad up to a maximum length of 500, filling the remaining entries with 0.
+
+```{code-cell} ipython3
 (S_train,
  S_test) = [torch.tensor(pad_sequences(S, maxlen=500, value=0))
             for S in [S_train,
                       S_test]]
 ```
 
-```{code-cell}
+Finally, we save these for later use in the deep learning lab.
+
+```{code-cell} ipython3
 torch.save(S_train, 'IMDB_S_train.tensor')
 torch.save(S_test, 'IMDB_S_test.tensor')
 ```
diff --git a/docs/jupyterbook/models/anova.ipynb b/docs/jupyterbook/models/anova.ipynb
new file mode 100644
index 0000000..41e8bcb
--- /dev/null
+++ b/docs/jupyterbook/models/anova.ipynb
@@ -0,0 +1,648 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "ee33d364",
+   "metadata": {},
+   "source": [
+    "# ANOVA using `ModelSpec`\n",
+    "\n",
+    "\n",
+    "In this lab we illustrate how to run create specific ANOVA analyses\n",
+    "using `ModelSpec`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "4c70fbaa",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "\n",
+    "from statsmodels.api import OLS\n",
+    "from statsmodels.stats.anova import anova_lm\n",
+    "\n",
+    "from ISLP import load_data\n",
+    "from ISLP.models import (ModelSpec,\n",
+    "                         derived_feature,\n",
+    "                         summarize)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "333a49cf",
+   "metadata": {},
+   "source": [
+    "We will carry out two simple ANOVA analyses of the `Hitters` data.\n",
+    "We wish to predict a baseball player’s `Salary` on the\n",
+    "basis of various statistics associated with performance in the\n",
+    "previous year."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "8a708215",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "59"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "Hitters = load_data('Hitters')\n",
+    "np.isnan(Hitters['Salary']).sum()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "dad5e991",
+   "metadata": {},
+   "source": [
+    "    \n",
+    " We see that `Salary` is missing for 59 players. The\n",
+    "`dropna()`  method of data frames removes all of the rows that have missing\n",
+    "values in any variable (by default --- see  `Hitters.dropna?`)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "ac7086a5",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Index(['AtBat', 'Hits', 'HmRun', 'Runs', 'RBI', 'Walks', 'Years', 'CAtBat',\n",
+       "       'CHits', 'CHmRun', 'CRuns', 'CRBI', 'CWalks', 'League', 'Division',\n",
+       "       'PutOuts', 'Assists', 'Errors', 'Salary', 'NewLeague'],\n",
+       "      dtype='object')"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "Hitters = Hitters.dropna()\n",
+    "Hitters.columns"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1a0a3521-be74-40df-a404-3895d80a11dc",
+   "metadata": {},
+   "source": [
+    "## Grouping variables\n",
+    "\n",
+    "A look at the [description](https://islp.readthedocs.io/en/latest/datasets/Hitters.html) of the data shows\n",
+    "that there are both career and 1986 offensive stats, as well as some defensive stats.\n",
+    "\n",
+    "Let's group the offensive into recent and career offensive stats, as well as a group of defensive variables."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "a215e43b-7bc8-4bdd-91cf-40d717cd7978",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "confounders = derived_feature(['Division', 'League', 'NewLeague'],\n",
+    "                              name='confounders')\n",
+    "offense_career = derived_feature(['CAtBat', 'CHits', 'CHmRun', 'CRuns', 'CRBI', 'CWalks'],\n",
+    "                                 name='offense_career')\n",
+    "offense_1986 = derived_feature(['AtBat', 'Hits', 'HmRun', 'Runs', 'RBI', 'Walks'],\n",
+    "                               name='offense_1986')\n",
+    "defense_1986 = derived_feature(['PutOuts', 'Assists', 'Errors'],\n",
+    "                               name='defense_1986')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "aa15fd0c-1e8a-431e-8425-c61da8439976",
+   "metadata": {},
+   "source": [
+    "We'll first do a sequential ANOVA where terms are added sequentially"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "40cd6c28",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "design = ModelSpec([confounders, offense_career, defense_1986, offense_1986]).fit(Hitters)\n",
+    "Y = np.array(Hitters['Salary'])\n",
+    "X = design.transform(Hitters)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "074120b1",
+   "metadata": {},
+   "source": [
+    "Along with a score we need to specify the search strategy. This is done through the object\n",
+    "`Stepwise()`  in the `ISLP.models` package. The method `Stepwise.first_peak()`\n",
+    "runs forward stepwise until any further additions to the model do not result\n",
+    "in an improvement in the evaluation score. Similarly, the method `Stepwise.fixed_steps()`\n",
+    "runs a fixed number of steps of stepwise search."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "e65f5607",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>coef</th>\n",
+       "      <th>std err</th>\n",
+       "      <th>t</th>\n",
+       "      <th>P&gt;|t|</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>intercept</th>\n",
+       "      <td>148.2187</td>\n",
+       "      <td>73.595</td>\n",
+       "      <td>2.014</td>\n",
+       "      <td>0.045</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Division[W]</th>\n",
+       "      <td>-116.0404</td>\n",
+       "      <td>40.188</td>\n",
+       "      <td>-2.887</td>\n",
+       "      <td>0.004</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>League[N]</th>\n",
+       "      <td>63.7503</td>\n",
+       "      <td>79.006</td>\n",
+       "      <td>0.807</td>\n",
+       "      <td>0.421</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>NewLeague[N]</th>\n",
+       "      <td>-24.3989</td>\n",
+       "      <td>78.843</td>\n",
+       "      <td>-0.309</td>\n",
+       "      <td>0.757</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>CAtBat</th>\n",
+       "      <td>-0.1887</td>\n",
+       "      <td>0.120</td>\n",
+       "      <td>-1.572</td>\n",
+       "      <td>0.117</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>CHits</th>\n",
+       "      <td>0.1636</td>\n",
+       "      <td>0.665</td>\n",
+       "      <td>0.246</td>\n",
+       "      <td>0.806</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>CHmRun</th>\n",
+       "      <td>-0.1517</td>\n",
+       "      <td>1.612</td>\n",
+       "      <td>-0.094</td>\n",
+       "      <td>0.925</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>CRuns</th>\n",
+       "      <td>1.4716</td>\n",
+       "      <td>0.747</td>\n",
+       "      <td>1.971</td>\n",
+       "      <td>0.050</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>CRBI</th>\n",
+       "      <td>0.8021</td>\n",
+       "      <td>0.691</td>\n",
+       "      <td>1.161</td>\n",
+       "      <td>0.247</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>CWalks</th>\n",
+       "      <td>-0.8124</td>\n",
+       "      <td>0.327</td>\n",
+       "      <td>-2.481</td>\n",
+       "      <td>0.014</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>PutOuts</th>\n",
+       "      <td>0.2827</td>\n",
+       "      <td>0.077</td>\n",
+       "      <td>3.661</td>\n",
+       "      <td>0.000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Assists</th>\n",
+       "      <td>0.3755</td>\n",
+       "      <td>0.220</td>\n",
+       "      <td>1.705</td>\n",
+       "      <td>0.089</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Errors</th>\n",
+       "      <td>-3.2940</td>\n",
+       "      <td>4.377</td>\n",
+       "      <td>-0.753</td>\n",
+       "      <td>0.452</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>AtBat</th>\n",
+       "      <td>-1.9509</td>\n",
+       "      <td>0.624</td>\n",
+       "      <td>-3.125</td>\n",
+       "      <td>0.002</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Hits</th>\n",
+       "      <td>7.4395</td>\n",
+       "      <td>2.363</td>\n",
+       "      <td>3.148</td>\n",
+       "      <td>0.002</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>HmRun</th>\n",
+       "      <td>4.3449</td>\n",
+       "      <td>6.190</td>\n",
+       "      <td>0.702</td>\n",
+       "      <td>0.483</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Runs</th>\n",
+       "      <td>-2.3312</td>\n",
+       "      <td>2.971</td>\n",
+       "      <td>-0.785</td>\n",
+       "      <td>0.433</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>RBI</th>\n",
+       "      <td>-1.0670</td>\n",
+       "      <td>2.595</td>\n",
+       "      <td>-0.411</td>\n",
+       "      <td>0.681</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Walks</th>\n",
+       "      <td>6.2196</td>\n",
+       "      <td>1.825</td>\n",
+       "      <td>3.409</td>\n",
+       "      <td>0.001</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                  coef  std err      t  P>|t|\n",
+       "intercept     148.2187   73.595  2.014  0.045\n",
+       "Division[W]  -116.0404   40.188 -2.887  0.004\n",
+       "League[N]      63.7503   79.006  0.807  0.421\n",
+       "NewLeague[N]  -24.3989   78.843 -0.309  0.757\n",
+       "CAtBat         -0.1887    0.120 -1.572  0.117\n",
+       "CHits           0.1636    0.665  0.246  0.806\n",
+       "CHmRun         -0.1517    1.612 -0.094  0.925\n",
+       "CRuns           1.4716    0.747  1.971  0.050\n",
+       "CRBI            0.8021    0.691  1.161  0.247\n",
+       "CWalks         -0.8124    0.327 -2.481  0.014\n",
+       "PutOuts         0.2827    0.077  3.661  0.000\n",
+       "Assists         0.3755    0.220  1.705  0.089\n",
+       "Errors         -3.2940    4.377 -0.753  0.452\n",
+       "AtBat          -1.9509    0.624 -3.125  0.002\n",
+       "Hits            7.4395    2.363  3.148  0.002\n",
+       "HmRun           4.3449    6.190  0.702  0.483\n",
+       "Runs           -2.3312    2.971 -0.785  0.433\n",
+       "RBI            -1.0670    2.595 -0.411  0.681\n",
+       "Walks           6.2196    1.825  3.409  0.001"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "M = OLS(Y, X).fit()\n",
+    "summarize(M)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "29d9b55f",
+   "metadata": {},
+   "source": [
+    "We'll first produce the sequential, or Type I ANOVA results. This builds up a model sequentially and compares\n",
+    "two successive models."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "cfbe5b92",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>df_resid</th>\n",
+       "      <th>ssr</th>\n",
+       "      <th>df_diff</th>\n",
+       "      <th>ss_diff</th>\n",
+       "      <th>F</th>\n",
+       "      <th>Pr(&gt;F)</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>intercept</th>\n",
+       "      <td>262.0</td>\n",
+       "      <td>5.331911e+07</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>confounders</th>\n",
+       "      <td>259.0</td>\n",
+       "      <td>5.131263e+07</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>2.006478e+06</td>\n",
+       "      <td>6.741147</td>\n",
+       "      <td>2.144265e-04</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>offense_career</th>\n",
+       "      <td>253.0</td>\n",
+       "      <td>3.059130e+07</td>\n",
+       "      <td>6.0</td>\n",
+       "      <td>2.072134e+07</td>\n",
+       "      <td>34.808656</td>\n",
+       "      <td>1.470455e-30</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>defense_1986</th>\n",
+       "      <td>250.0</td>\n",
+       "      <td>2.730614e+07</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>3.285156e+06</td>\n",
+       "      <td>11.037111</td>\n",
+       "      <td>7.880207e-07</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>offense_1986</th>\n",
+       "      <td>244.0</td>\n",
+       "      <td>2.420857e+07</td>\n",
+       "      <td>6.0</td>\n",
+       "      <td>3.097572e+06</td>\n",
+       "      <td>5.203444</td>\n",
+       "      <td>4.648586e-05</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                df_resid           ssr  df_diff       ss_diff          F  \\\n",
+       "intercept          262.0  5.331911e+07      0.0           NaN        NaN   \n",
+       "confounders        259.0  5.131263e+07      3.0  2.006478e+06   6.741147   \n",
+       "offense_career     253.0  3.059130e+07      6.0  2.072134e+07  34.808656   \n",
+       "defense_1986       250.0  2.730614e+07      3.0  3.285156e+06  11.037111   \n",
+       "offense_1986       244.0  2.420857e+07      6.0  3.097572e+06   5.203444   \n",
+       "\n",
+       "                      Pr(>F)  \n",
+       "intercept                NaN  \n",
+       "confounders     2.144265e-04  \n",
+       "offense_career  1.470455e-30  \n",
+       "defense_1986    7.880207e-07  \n",
+       "offense_1986    4.648586e-05  "
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df = anova_lm(*[OLS(Y, D).fit() for D in design.build_sequence(Hitters, anova_type='sequential')])\n",
+    "df.index = design.names\n",
+    "df"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "7092f666",
+   "metadata": {},
+   "source": [
+    "We can similarly compute the Type II ANOVA results which drops each term and compares to the full model."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "e2d43844",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>df_resid</th>\n",
+       "      <th>ssr</th>\n",
+       "      <th>df_diff</th>\n",
+       "      <th>ss_diff</th>\n",
+       "      <th>F</th>\n",
+       "      <th>Pr(&gt;F)</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>intercept</th>\n",
+       "      <td>244.0</td>\n",
+       "      <td>2.420857e+07</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>4.024254e+05</td>\n",
+       "      <td>4.056076</td>\n",
+       "      <td>4.511037e-02</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>confounders</th>\n",
+       "      <td>244.0</td>\n",
+       "      <td>2.420857e+07</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>9.661738e+05</td>\n",
+       "      <td>3.246046</td>\n",
+       "      <td>2.261572e-02</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>offense_career</th>\n",
+       "      <td>244.0</td>\n",
+       "      <td>2.420857e+07</td>\n",
+       "      <td>6.0</td>\n",
+       "      <td>1.051025e+07</td>\n",
+       "      <td>17.655596</td>\n",
+       "      <td>5.701196e-17</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>defense_1986</th>\n",
+       "      <td>244.0</td>\n",
+       "      <td>2.420857e+07</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>1.467933e+06</td>\n",
+       "      <td>4.931803</td>\n",
+       "      <td>2.415732e-03</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>offense_1986</th>\n",
+       "      <td>244.0</td>\n",
+       "      <td>2.420857e+07</td>\n",
+       "      <td>6.0</td>\n",
+       "      <td>3.097572e+06</td>\n",
+       "      <td>5.203444</td>\n",
+       "      <td>4.648586e-05</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                df_resid           ssr  df_diff       ss_diff          F  \\\n",
+       "intercept          244.0  2.420857e+07      1.0  4.024254e+05   4.056076   \n",
+       "confounders        244.0  2.420857e+07      3.0  9.661738e+05   3.246046   \n",
+       "offense_career     244.0  2.420857e+07      6.0  1.051025e+07  17.655596   \n",
+       "defense_1986       244.0  2.420857e+07      3.0  1.467933e+06   4.931803   \n",
+       "offense_1986       244.0  2.420857e+07      6.0  3.097572e+06   5.203444   \n",
+       "\n",
+       "                      Pr(>F)  \n",
+       "intercept       4.511037e-02  \n",
+       "confounders     2.261572e-02  \n",
+       "offense_career  5.701196e-17  \n",
+       "defense_1986    2.415732e-03  \n",
+       "offense_1986    4.648586e-05  "
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "D_full = design.transform(Hitters)\n",
+    "OLS_full = OLS(Y, D_full).fit()\n",
+    "dfs = []\n",
+    "for d in design.build_sequence(Hitters, anova_type='drop'):\n",
+    "    dfs.append(anova_lm(OLS(Y,d).fit(), OLS_full).iloc[1:])\n",
+    "df = pd.concat(dfs)\n",
+    "df.index = design.names\n",
+    "df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "362709ae-9558-4c4c-8f5e-f8388caf631d",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "jupytext": {
+   "formats": "source/models///ipynb,jupyterbook/models///md:myst,jupyterbook/models///ipynb"
+  },
+  "kernelspec": {
+   "display_name": "python3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/docs/jupyterbook/models/anova.md b/docs/jupyterbook/models/anova.md
new file mode 100644
index 0000000..574f9eb
--- /dev/null
+++ b/docs/jupyterbook/models/anova.md
@@ -0,0 +1,115 @@
+---
+jupytext:
+  formats: source/models///ipynb,jupyterbook/models///md:myst,jupyterbook/models///ipynb
+  text_representation:
+    extension: .md
+    format_name: myst
+    format_version: 0.13
+    jupytext_version: 1.14.5
+kernelspec:
+  display_name: python3
+  language: python
+  name: python3
+---
+
+# ANOVA using `ModelSpec`
+
+
+In this lab we illustrate how to run create specific ANOVA analyses
+using `ModelSpec`.
+
+```{code-cell} ipython3
+import numpy as np
+import pandas as pd
+
+from statsmodels.api import OLS
+from statsmodels.stats.anova import anova_lm
+
+from ISLP import load_data
+from ISLP.models import (ModelSpec,
+                         derived_feature,
+                         summarize)
+```
+
+We will carry out two simple ANOVA analyses of the `Hitters` data.
+We wish to predict a baseball player’s `Salary` on the
+basis of various statistics associated with performance in the
+previous year.
+
+```{code-cell} ipython3
+Hitters = load_data('Hitters')
+np.isnan(Hitters['Salary']).sum()
+```
+
+    
+ We see that `Salary` is missing for 59 players. The
+`dropna()`  method of data frames removes all of the rows that have missing
+values in any variable (by default --- see  `Hitters.dropna?`).
+
+```{code-cell} ipython3
+Hitters = Hitters.dropna()
+Hitters.columns
+```
+
+## Grouping variables
+
+A look at the [description](https://islp.readthedocs.io/en/latest/datasets/Hitters.html) of the data shows
+that there are both career and 1986 offensive stats, as well as some defensive stats.
+
+Let's group the offensive into recent and career offensive stats, as well as a group of defensive variables.
+
+```{code-cell} ipython3
+confounders = derived_feature(['Division', 'League', 'NewLeague'],
+                              name='confounders')
+offense_career = derived_feature(['CAtBat', 'CHits', 'CHmRun', 'CRuns', 'CRBI', 'CWalks'],
+                                 name='offense_career')
+offense_1986 = derived_feature(['AtBat', 'Hits', 'HmRun', 'Runs', 'RBI', 'Walks'],
+                               name='offense_1986')
+defense_1986 = derived_feature(['PutOuts', 'Assists', 'Errors'],
+                               name='defense_1986')
+```
+
+We'll first do a sequential ANOVA where terms are added sequentially
+
+```{code-cell} ipython3
+design = ModelSpec([confounders, offense_career, defense_1986, offense_1986]).fit(Hitters)
+Y = np.array(Hitters['Salary'])
+X = design.transform(Hitters)
+```
+
+Along with a score we need to specify the search strategy. This is done through the object
+`Stepwise()`  in the `ISLP.models` package. The method `Stepwise.first_peak()`
+runs forward stepwise until any further additions to the model do not result
+in an improvement in the evaluation score. Similarly, the method `Stepwise.fixed_steps()`
+runs a fixed number of steps of stepwise search.
+
+```{code-cell} ipython3
+M = OLS(Y, X).fit()
+summarize(M)
+```
+
+We'll first produce the sequential, or Type I ANOVA results. This builds up a model sequentially and compares
+two successive models.
+
+```{code-cell} ipython3
+df = anova_lm(*[OLS(Y, D).fit() for D in design.build_sequence(Hitters, anova_type='sequential')])
+df.index = design.names
+df
+```
+
+We can similarly compute the Type II ANOVA results which drops each term and compares to the full model.
+
+```{code-cell} ipython3
+D_full = design.transform(Hitters)
+OLS_full = OLS(Y, D_full).fit()
+dfs = []
+for d in design.build_sequence(Hitters, anova_type='drop'):
+    dfs.append(anova_lm(OLS(Y,d).fit(), OLS_full).iloc[1:])
+df = pd.concat(dfs)
+df.index = design.names
+df
+```
+
+```{code-cell} ipython3
+
+```
diff --git a/docs/jupyterbook/models/derived.ipynb b/docs/jupyterbook/models/derived.ipynb
deleted file mode 100644
index 92fc096..0000000
--- a/docs/jupyterbook/models/derived.ipynb
+++ /dev/null
@@ -1,2125 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "id": "38217f02",
-   "metadata": {},
-   "source": [
-    "# Building design matrices with `ModelSpec`\n",
-    "\n",
-    "Force rebuild"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "id": "3107d1f9",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "x=4\n",
-    "import numpy as np, pandas as pd\n",
-    "%load_ext rpy2.ipython\n",
-    "\n",
-    "from ISLP import load_data\n",
-    "from ISLP.models import ModelSpec\n",
-    "\n",
-    "import statsmodels.api as sm"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "id": "cdc46a4e",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Index(['Sales', 'CompPrice', 'Income', 'Advertising', 'Population', 'Price',\n",
-       "       'ShelveLoc', 'Age', 'Education', 'Urban', 'US'],\n",
-       "      dtype='object')"
-      ]
-     },
-     "execution_count": 2,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "Carseats = load_data('Carseats')\n",
-    "%R -i Carseats\n",
-    "Carseats.columns"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "e0a2a83a",
-   "metadata": {},
-   "source": [
-    "## Let's break up income into groups"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "id": "68b40caf",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0      M\n",
-       "1      L\n",
-       "2      L\n",
-       "3      H\n",
-       "4      M\n",
-       "      ..\n",
-       "395    H\n",
-       "396    L\n",
-       "397    L\n",
-       "398    M\n",
-       "399    L\n",
-       "Name: OIncome, Length: 400, dtype: category\n",
-       "Categories (3, object): ['L' < 'M' < 'H']"
-      ]
-     },
-     "execution_count": 3,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "Carseats['OIncome'] = pd.cut(Carseats['Income'], \n",
-    "                             [0,50,90,200], \n",
-    "                             labels=['L','M','H'])\n",
-    "Carseats['OIncome']"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "35558d88",
-   "metadata": {},
-   "source": [
-    "Let's also create an unordered version"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "id": "e5e81a95",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0      M\n",
-       "1      L\n",
-       "2      L\n",
-       "3      H\n",
-       "4      M\n",
-       "      ..\n",
-       "395    H\n",
-       "396    L\n",
-       "397    L\n",
-       "398    M\n",
-       "399    L\n",
-       "Name: UIncome, Length: 400, dtype: category\n",
-       "Categories (3, object): ['L', 'M', 'H']"
-      ]
-     },
-     "execution_count": 4,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "Carseats['UIncome'] = pd.cut(Carseats['Income'], \n",
-    "                             [0,50,90,200], \n",
-    "                             labels=['L','M','H'],\n",
-    "                             ordered=False)\n",
-    "Carseats['UIncome']"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "4bbf9e13",
-   "metadata": {},
-   "source": [
-    "## A simple model"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "id": "1ad729b3",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Index(['intercept', 'Price', 'Income'], dtype='object')"
-      ]
-     },
-     "execution_count": 5,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design = ModelSpec(['Price', 'Income'])\n",
-    "X = design.fit_transform(Carseats)\n",
-    "X.columns"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "id": "d05e9ec8",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "intercept    12.661546\n",
-       "Price        -0.052213\n",
-       "Income        0.012829\n",
-       "dtype: float64"
-      ]
-     },
-     "execution_count": 6,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "Y = Carseats['Sales']\n",
-    "M = sm.OLS(Y, X).fit()\n",
-    "M.params"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "b4e9ee33",
-   "metadata": {},
-   "source": [
-    "## Basic procedure\n",
-    "\n",
-    "The design matrix is built by cobbling together a set of columns and possibly transforming them.\n",
-    "A `pd.DataFrame` is essentially a list of columns. One of the first tasks done  in `ModelSpec.fit`\n",
-    "is to inspect a dataframe for column info. The column `ShelveLoc` is categorical:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "id": "64ac65d3",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0         Bad\n",
-       "1        Good\n",
-       "2      Medium\n",
-       "3      Medium\n",
-       "4         Bad\n",
-       "        ...  \n",
-       "395      Good\n",
-       "396    Medium\n",
-       "397    Medium\n",
-       "398       Bad\n",
-       "399      Good\n",
-       "Name: ShelveLoc, Length: 400, dtype: category\n",
-       "Categories (3, object): ['Bad', 'Good', 'Medium']"
-      ]
-     },
-     "execution_count": 7,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "Carseats['ShelveLoc']"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "620f0e01",
-   "metadata": {},
-   "source": [
-    "This is recognized by `ModelSpec` in the form of `Column` objects which are just named tuples with two methods\n",
-    "`get_columns` and `fit_encoder`."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "id": "77b898e0",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Column(idx='ShelveLoc', name='ShelveLoc', is_categorical=True, is_ordinal=False, columns=('ShelveLoc[Good]', 'ShelveLoc[Medium]'), encoder=Contrast())"
-      ]
-     },
-     "execution_count": 8,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design.column_info_['ShelveLoc']"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "4580a6bf",
-   "metadata": {},
-   "source": [
-    "It recognized ordinal columns as well."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "id": "c2dab855",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Column(idx='OIncome', name='OIncome', is_categorical=True, is_ordinal=True, columns=('OIncome',), encoder=OrdinalEncoder())"
-      ]
-     },
-     "execution_count": 9,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design.column_info_['OIncome']"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "id": "5e7963d6",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(array([ 73,  48,  35, 100]), ('Income',))"
-      ]
-     },
-     "execution_count": 10,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "income = design.column_info_['Income']\n",
-    "cols, names = income.get_columns(Carseats)\n",
-    "(cols[:4], names)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "6b689966",
-   "metadata": {},
-   "source": [
-    "## Encoding a column\n",
-    "\n",
-    "In building a design matrix we must extract columns from our dataframe (or `np.ndarray`). Categorical\n",
-    "variables usually are encoded by several columns, typically one less than the number of categories.\n",
-    "This task is handled by the `encoder` of the `Column`. The encoder must satisfy the `sklearn` transform\n",
-    "model, i.e. `fit` on some array and `transform` on future arrays. The `fit_encoder` method of `Column` fits\n",
-    "its encoder the first time data is passed to it."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "id": "ff3b96b6",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(array([[0., 0.],\n",
-       "        [1., 0.],\n",
-       "        [0., 1.],\n",
-       "        [0., 1.]]),\n",
-       " ['ShelveLoc[Good]', 'ShelveLoc[Medium]'])"
-      ]
-     },
-     "execution_count": 11,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "shelve = design.column_info_['ShelveLoc']\n",
-    "cols, names = shelve.get_columns(Carseats)\n",
-    "(cols[:4], names)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "id": "7e87da20",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "array([[2.],\n",
-       "       [1.],\n",
-       "       [1.],\n",
-       "       [0.]])"
-      ]
-     },
-     "execution_count": 12,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "oincome = design.column_info_['OIncome']\n",
-    "oincome.get_columns(Carseats)[0][:4]"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "4f2030ac",
-   "metadata": {},
-   "source": [
-    "## The terms\n",
-    "\n",
-    "The design matrix consists of several sets of columns. This is managed by the `ModelSpec` through\n",
-    "the `terms` argument which should be a sequence. The elements of `terms` are often\n",
-    "going to be strings (or tuples of strings for interactions, see below) but are converted to a\n",
-    "`Variable` object and stored in the `terms_` of the fitted `ModelSpec`. A `Variable` is just a named tuple."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "id": "27fc4fb3",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['Price', 'Income']"
-      ]
-     },
-     "execution_count": 13,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design.terms"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "id": "16316981",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "[Variable(variables=('Price',), name='Price', encoder=None, use_transform=True, pure_columns=True, override_encoder_colnames=False),\n",
-       " Variable(variables=('Income',), name='Income', encoder=None, use_transform=True, pure_columns=True, override_encoder_colnames=False)]"
-      ]
-     },
-     "execution_count": 14,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design.terms_"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "ef3f2bd0",
-   "metadata": {},
-   "source": [
-    "While each `Column` can itself extract data, they are all promoted to `Variable` to be of a uniform type.  A\n",
-    "`Variable` can also create columns through the `build_columns` method of `ModelSpec`"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "id": "dd9c7fa6",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(     Price\n",
-       " 0      120\n",
-       " 1       83\n",
-       " 2       80\n",
-       " 3       97\n",
-       " 4      128\n",
-       " ..     ...\n",
-       " 395    128\n",
-       " 396    120\n",
-       " 397    159\n",
-       " 398     95\n",
-       " 399    120\n",
-       " \n",
-       " [400 rows x 1 columns],\n",
-       " ['Price'])"
-      ]
-     },
-     "execution_count": 15,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "price = design.terms_[0]\n",
-    "design.build_columns(Carseats, price)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "5fc4cc45",
-   "metadata": {},
-   "source": [
-    "Note that `Variable` objects have a tuple of `variables` as well as an `encoder` attribute. The\n",
-    "tuple of `variables` first creates a concatenated dataframe from all corresponding variables and then\n",
-    "is run through `encoder.transform`. The `encoder.fit` method of each `Variable` is run once during \n",
-    "the call to `ModelSpec.fit`."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 16,
-   "id": "49d7fb46",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(     Price  Income  UIncome[L]  UIncome[M]\n",
-       " 0    120.0    73.0         0.0         1.0\n",
-       " 1     83.0    48.0         1.0         0.0\n",
-       " 2     80.0    35.0         1.0         0.0\n",
-       " 3     97.0   100.0         0.0         0.0\n",
-       " 4    128.0    64.0         0.0         1.0\n",
-       " ..     ...     ...         ...         ...\n",
-       " 395  128.0   108.0         0.0         0.0\n",
-       " 396  120.0    23.0         1.0         0.0\n",
-       " 397  159.0    26.0         1.0         0.0\n",
-       " 398   95.0    79.0         0.0         1.0\n",
-       " 399  120.0    37.0         1.0         0.0\n",
-       " \n",
-       " [400 rows x 4 columns],\n",
-       " ['Price', 'Income', 'UIncome[L]', 'UIncome[M]'])"
-      ]
-     },
-     "execution_count": 16,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "from ISLP.models.model_spec import Variable\n",
-    "\n",
-    "new_var = Variable(('Price', 'Income', 'UIncome'), name='mynewvar', encoder=None)\n",
-    "design.build_columns(Carseats, new_var)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "bdfc0fe9",
-   "metadata": {},
-   "source": [
-    "Let's now transform these columns with an encoder. Within `ModelSpec` we will first build the\n",
-    "arrays above and then call `pca.fit` and finally `pca.transform` within `design.build_columns`."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 17,
-   "id": "cf6f3f4c",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/Users/jonathantaylor/miniconda3/envs/islp_test/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but PCA was fitted with feature names\n",
-      "  warnings.warn(\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "(     mynewvar[0]  mynewvar[1]\n",
-       " 0      -3.608693    -4.853177\n",
-       " 1      15.081506    35.708630\n",
-       " 2      27.422871    40.774250\n",
-       " 3     -33.973209    13.470489\n",
-       " 4       6.567316   -11.290100\n",
-       " ..           ...          ...\n",
-       " 395   -36.846346   -18.415783\n",
-       " 396    45.741500     3.245602\n",
-       " 397    49.097533   -35.725355\n",
-       " 398   -13.577772    18.845139\n",
-       " 399    31.927566     0.978436\n",
-       " \n",
-       " [400 rows x 2 columns],\n",
-       " ['mynewvar[0]', 'mynewvar[1]'])"
-      ]
-     },
-     "execution_count": 17,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "from sklearn.decomposition import PCA\n",
-    "pca = PCA(n_components=2)\n",
-    "pca.fit(design.build_columns(Carseats, new_var)[0]) # this is done within `ModelSpec.fit`\n",
-    "pca_var = Variable(('Price', 'Income', 'UIncome'), name='mynewvar', encoder=pca)\n",
-    "design.build_columns(Carseats, pca_var)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "1552d19a",
-   "metadata": {},
-   "source": [
-    "The elements of the `variables` attribute may be column identifiers ( `\"Price\"`), `Column` instances (`price`)\n",
-    "or `Variable` instances (`pca_var`)."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 18,
-   "id": "12d955dd",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/Users/jonathantaylor/miniconda3/envs/islp_test/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but PCA was fitted with feature names\n",
-      "  warnings.warn(\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "(     Price  Price  mynewvar[0]  mynewvar[1]\n",
-       " 0    120.0  120.0    -3.608693    -4.853177\n",
-       " 1     83.0   83.0    15.081506    35.708630\n",
-       " 2     80.0   80.0    27.422871    40.774250\n",
-       " 3     97.0   97.0   -33.973209    13.470489\n",
-       " 4    128.0  128.0     6.567316   -11.290100\n",
-       " ..     ...    ...          ...          ...\n",
-       " 395  128.0  128.0   -36.846346   -18.415783\n",
-       " 396  120.0  120.0    45.741500     3.245602\n",
-       " 397  159.0  159.0    49.097533   -35.725355\n",
-       " 398   95.0   95.0   -13.577772    18.845139\n",
-       " 399  120.0  120.0    31.927566     0.978436\n",
-       " \n",
-       " [400 rows x 4 columns],\n",
-       " ['Price', 'Price', 'mynewvar[0]', 'mynewvar[1]'])"
-      ]
-     },
-     "execution_count": 18,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "fancy_var = Variable(('Price', price, pca_var), name='fancy', encoder=None)\n",
-    "design.build_columns(Carseats, fancy_var)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "f5ea292d",
-   "metadata": {},
-   "source": [
-    "We can of course run PCA again on these features (if we wanted)."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 19,
-   "id": "ae2af29b",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/Users/jonathantaylor/miniconda3/envs/islp_test/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but PCA was fitted with feature names\n",
-      "  warnings.warn(\n",
-      "/Users/jonathantaylor/miniconda3/envs/islp_test/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but PCA was fitted with feature names\n",
-      "  warnings.warn(\n",
-      "/Users/jonathantaylor/miniconda3/envs/islp_test/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but PCA was fitted with feature names\n",
-      "  warnings.warn(\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "(     fancy_pca[0]  fancy_pca[1]\n",
-       " 0       -6.951792      4.859283\n",
-       " 1       55.170148    -24.694875\n",
-       " 2       59.418556    -38.033572\n",
-       " 3       34.722389     28.922184\n",
-       " 4      -21.419184     -3.120673\n",
-       " ..            ...           ...\n",
-       " 395    -18.257348     40.760122\n",
-       " 396    -10.546709    -45.021658\n",
-       " 397    -77.706359    -37.174379\n",
-       " 398     36.668694      7.730851\n",
-       " 399     -9.540535    -31.059122\n",
-       " \n",
-       " [400 rows x 2 columns],\n",
-       " ['fancy_pca[0]', 'fancy_pca[1]'])"
-      ]
-     },
-     "execution_count": 19,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "pca2 = PCA(n_components=2)\n",
-    "pca2.fit(design.build_columns(Carseats, fancy_var)[0]) # this is done within `ModelSpec.fit`\n",
-    "pca2_var = Variable(('Price', price, pca_var), name='fancy_pca', encoder=pca2)\n",
-    "design.build_columns(Carseats, pca2_var)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "57305dbe",
-   "metadata": {},
-   "source": [
-    "## Building the design matrix\n",
-    "\n",
-    "With these notions in mind, the final design is essentially then"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 20,
-   "id": "89656ec4",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "X_hand = np.column_stack([design.build_columns(Carseats, v)[0] for v in design.terms_])[:4]"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "f6cb8167",
-   "metadata": {},
-   "source": [
-    "An intercept column is added if `design.intercept` is `True` and if the original argument to `transform` is\n",
-    "a dataframe the index is adjusted accordingly."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 21,
-   "id": "547cb625",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "True"
-      ]
-     },
-     "execution_count": 21,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design.intercept"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 22,
-   "id": "ff5b41d5",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>intercept</th>\n",
-       "      <th>Price</th>\n",
-       "      <th>Income</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>120</td>\n",
-       "      <td>73</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>83</td>\n",
-       "      <td>48</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>80</td>\n",
-       "      <td>35</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>97</td>\n",
-       "      <td>100</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "   intercept  Price  Income\n",
-       "0        1.0    120      73\n",
-       "1        1.0     83      48\n",
-       "2        1.0     80      35\n",
-       "3        1.0     97     100"
-      ]
-     },
-     "execution_count": 22,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design.transform(Carseats)[:4]"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "932759cf",
-   "metadata": {},
-   "source": [
-    "## Predicting\n",
-    "\n",
-    "Constructing the design matrix at any values is carried out by the `transform` method."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 23,
-   "id": "e2190b00",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "array([12.65257604, 12.25873428])"
-      ]
-     },
-     "execution_count": 23,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "new_data = pd.DataFrame({'Price':[10,20], 'Income':[40, 50]})\n",
-    "new_X = design.transform(new_data)\n",
-    "M.get_prediction(new_X).predicted_mean"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 24,
-   "id": "6545c5da",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "       0        1 \n",
-      "12.65258 12.25873 \n"
-     ]
-    }
-   ],
-   "source": [
-    "%%R -i new_data,Carseats\n",
-    "predict(lm(Sales ~ Price + Income, data=Carseats), new_data)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "cd088b51",
-   "metadata": {},
-   "source": [
-    "### Difference between using `pd.DataFrame` and `np.ndarray`\n",
-    "\n",
-    "If the `terms` only refer to a few columns of the data frame, the `transform` method only needs a dataframe with those columns.\n",
-    "\n",
-    "If we had used an `np.ndarray`, the column identifiers would be integers identifying specific columns so,\n",
-    "in order to work correctly, `transform` would need another `np.ndarray` where the columns have the same meaning."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 25,
-   "id": "8f37ae20",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "array([[1.0, 120, 73],\n",
-       "       [1.0, 83, 48],\n",
-       "       [1.0, 80, 35],\n",
-       "       [1.0, 97, 100]], dtype=object)"
-      ]
-     },
-     "execution_count": 25,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "Carseats_np = np.asarray(Carseats[['Price', 'ShelveLoc', 'US', 'Income']])\n",
-    "design_np = ModelSpec([0,3]).fit(Carseats_np)\n",
-    "design_np.transform(Carseats_np)[:4]"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "184aefc2",
-   "metadata": {},
-   "source": [
-    "The following will fail for hopefully obvious reasons"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 26,
-   "id": "e4134980",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "shapes (2,2) and (3,) not aligned: 2 (dim 1) != 3 (dim 0)\n"
-     ]
-    }
-   ],
-   "source": [
-    "try:\n",
-    "    new_D = np.zeros((2,2))\n",
-    "    new_D[:,0] = [10,20]\n",
-    "    new_D[:,1] = [40,50]\n",
-    "    M.get_prediction(new_D).predicted_mean\n",
-    "except ValueError as e:\n",
-    "    print(e)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "53808f3b",
-   "metadata": {},
-   "source": [
-    "Ultimately, `M` expects 3 columns for new predictions because it was fit\n",
-    "with a matrix having 3 columns (the first representing an intercept).\n",
-    "\n",
-    "We might be tempted to try as with the `pd.DataFrame` and produce\n",
-    "an `np.ndarray` with only the necessary variables."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 27,
-   "id": "62059c57",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "index 3 is out of bounds for axis 1 with size 2\n"
-     ]
-    }
-   ],
-   "source": [
-    "try:\n",
-    "    new_X = np.zeros((2,2))\n",
-    "    new_X[:,0] = [10,20]\n",
-    "    new_X[:,1] = [40,50]\n",
-    "    new_D = design_np.transform(new_X)\n",
-    "    M.get_prediction(new_D).predicted_mean\n",
-    "except IndexError as e:\n",
-    "    print(e)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "ded12f69",
-   "metadata": {},
-   "source": [
-    "This fails because `design_np` is looking for column `3` from its `terms`:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 28,
-   "id": "fbb509d1",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "[Variable(variables=(0,), name='0', encoder=None, use_transform=True, pure_columns=True, override_encoder_colnames=False),\n",
-       " Variable(variables=(3,), name='3', encoder=None, use_transform=True, pure_columns=True, override_encoder_colnames=False)]"
-      ]
-     },
-     "execution_count": 28,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design_np.terms_"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "f01391e4",
-   "metadata": {},
-   "source": [
-    "However, if we have an `np.ndarray` in which the first column indeed represents `Price` and the fourth indeed\n",
-    "represents `Income` then we can arrive at the correct answer by supplying such the array to `design_np.transform`:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 29,
-   "id": "10df55ae",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "array([12.65257604, 12.25873428])"
-      ]
-     },
-     "execution_count": 29,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "new_X = np.zeros((2,4))\n",
-    "new_X[:,0] = [10,20]\n",
-    "new_X[:,3] = [40,50]\n",
-    "new_D = design_np.transform(new_X)\n",
-    "M.get_prediction(new_D).predicted_mean"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "b43099fb",
-   "metadata": {},
-   "source": [
-    "Given this subtlety about needing to supply arrays with identical column structure to `transform` when\n",
-    "using `np.ndarray` we presume that using a `pd.DataFrame` will be the more popular use case."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "50bce64d",
-   "metadata": {},
-   "source": [
-    "## A model with some categorical variables\n",
-    "\n",
-    "Categorical variables become `Column` instances with encoders."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 30,
-   "id": "2eb2ff16",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Column(idx='UIncome', name='UIncome', is_categorical=True, is_ordinal=False, columns=('UIncome[L]', 'UIncome[M]'), encoder=Contrast())"
-      ]
-     },
-     "execution_count": 30,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design = ModelSpec(['Population', 'Price', 'UIncome', 'ShelveLoc']).fit(Carseats)\n",
-    "design.column_info_['UIncome']"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 31,
-   "id": "6686dff8",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Index(['intercept', 'Population', 'Price', 'UIncome[L]', 'UIncome[M]',\n",
-       "       'ShelveLoc[Good]', 'ShelveLoc[Medium]'],\n",
-       "      dtype='object')"
-      ]
-     },
-     "execution_count": 31,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "X = design.fit_transform(Carseats)\n",
-    "X.columns"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 32,
-   "id": "0e0eafd7",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "intercept            11.876012\n",
-       "Population            0.001163\n",
-       "Price                -0.055725\n",
-       "UIncome[L]           -1.042297\n",
-       "UIncome[M]           -0.119123\n",
-       "ShelveLoc[Good]       4.999623\n",
-       "ShelveLoc[Medium]     1.964278\n",
-       "dtype: float64"
-      ]
-     },
-     "execution_count": 32,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "sm.OLS(Y, X).fit().params"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 33,
-   "id": "43cce209",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "    (Intercept)      Population           Price        UIncomeM        UIncomeH \n",
-      "    10.83371503      0.00116301     -0.05572469      0.92317388      1.04229679 \n",
-      "  ShelveLocGood ShelveLocMedium \n",
-      "     4.99962319      1.96427771 \n"
-     ]
-    }
-   ],
-   "source": [
-    "%%R\n",
-    "lm(Sales ~ Population + Price + UIncome + ShelveLoc, data=Carseats)$coef"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "99bf408e",
-   "metadata": {},
-   "source": [
-    "## Getting the encoding you want\n",
-    "\n",
-    "By default the level dropped by `ModelSpec` will be the first of the `categories_` values from \n",
-    "`sklearn.preprocessing.OneHotEncoder()`. We might wish to change this. It seems\n",
-    "as if the correct way to do this would be something like `Variable(('UIncome',), 'mynewencoding', new_encoder)`\n",
-    "where `new_encoder` would somehow drop the column we want dropped. \n",
-    "\n",
-    "However, when using the convenient identifier `UIncome` in the `variables` argument, this maps to the `Column` associated to `UIncome` within `design.column_info_`:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 34,
-   "id": "11c19ebf",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Column(idx='UIncome', name='UIncome', is_categorical=True, is_ordinal=False, columns=('UIncome[L]', 'UIncome[M]'), encoder=Contrast())"
-      ]
-     },
-     "execution_count": 34,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design.column_info_['UIncome']"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "4b48e5d2",
-   "metadata": {},
-   "source": [
-    "This column already has an encoder and `Column` instances are immutable as named tuples. Further, there are times when \n",
-    "we may want to encode `UIncome` differently within the same model. In the model below the main effect of `UIncome` is encoded with two columns while in the interaction `UIncome` (see below) has three columns. This is a design of interest\n",
-    "and we need a way to allow different encodings of the same column of `Carseats`"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 35,
-   "id": "81f641ba",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "Call:\n",
-      "lm(formula = Sales ~ UIncome:ShelveLoc + UIncome, data = Carseats)\n",
-      "\n",
-      "Coefficients:\n",
-      "             (Intercept)                  UIncomeM                  UIncomeH  \n",
-      "                  5.1317                    0.1151                    1.1561  \n",
-      "  UIncomeL:ShelveLocGood    UIncomeM:ShelveLocGood    UIncomeH:ShelveLocGood  \n",
-      "                  4.5121                    5.5752                    3.7381  \n",
-      "UIncomeL:ShelveLocMedium  UIncomeM:ShelveLocMedium  UIncomeH:ShelveLocMedium  \n",
-      "                  1.2473                    2.4782                    1.5141  \n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "%%R\n",
-    "lm(Sales ~ UIncome:ShelveLoc + UIncome, data=Carseats)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "79f7eb4d",
-   "metadata": {},
-   "source": [
-    " We can create a new \n",
-    "`Column` with the encoder we want. For categorical variables, there is a convenience function to do so."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 36,
-   "id": "2afb3b5d",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from ISLP.models.model_spec import contrast\n",
-    "pref_encoding = contrast('UIncome', 'drop', 'L')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 37,
-   "id": "c44692ab",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(     UIncome[M]  UIncome[H]\n",
-       " 0           1.0         0.0\n",
-       " 1           0.0         0.0\n",
-       " 2           0.0         0.0\n",
-       " 3           0.0         1.0\n",
-       " 4           1.0         0.0\n",
-       " ..          ...         ...\n",
-       " 395         0.0         1.0\n",
-       " 396         0.0         0.0\n",
-       " 397         0.0         0.0\n",
-       " 398         1.0         0.0\n",
-       " 399         0.0         0.0\n",
-       " \n",
-       " [400 rows x 2 columns],\n",
-       " ['UIncome[M]', 'UIncome[H]'])"
-      ]
-     },
-     "execution_count": 37,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design.build_columns(Carseats, pref_encoding)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 38,
-   "id": "c0bfb2a5",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Index(['intercept', 'Population', 'Price', 'UIncome[M]', 'UIncome[H]',\n",
-       "       'ShelveLoc[Good]', 'ShelveLoc[Medium]'],\n",
-       "      dtype='object')"
-      ]
-     },
-     "execution_count": 38,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design = ModelSpec(['Population', 'Price', pref_encoding, 'ShelveLoc']).fit(Carseats)\n",
-    "X = design.fit_transform(Carseats)\n",
-    "X.columns"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 39,
-   "id": "d263056c",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "intercept            10.833715\n",
-       "Population            0.001163\n",
-       "Price                -0.055725\n",
-       "UIncome[M]            0.923174\n",
-       "UIncome[H]            1.042297\n",
-       "ShelveLoc[Good]       4.999623\n",
-       "ShelveLoc[Medium]     1.964278\n",
-       "dtype: float64"
-      ]
-     },
-     "execution_count": 39,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "sm.OLS(Y, X).fit().params"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 40,
-   "id": "edf0dc68",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "    (Intercept)      Population           Price        UIncomeM        UIncomeH \n",
-      "    10.83371503      0.00116301     -0.05572469      0.92317388      1.04229679 \n",
-      "  ShelveLocGood ShelveLocMedium \n",
-      "     4.99962319      1.96427771 \n"
-     ]
-    }
-   ],
-   "source": [
-    "%%R\n",
-    "lm(Sales ~ Population + Price + UIncome + ShelveLoc, data=Carseats)$coef"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "82071a54",
-   "metadata": {},
-   "source": [
-    "## Interactions\n",
-    "\n",
-    "We've referred to interactions above. These are specified (by convenience) as tuples in the `terms` argument\n",
-    "to `ModelSpec`."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 41,
-   "id": "cd18a4a4",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "intercept                       7.866634\n",
-       "UIncome[L]:ShelveLoc[Good]      4.512054\n",
-       "UIncome[L]:ShelveLoc[Medium]    1.247275\n",
-       "UIncome[M]:ShelveLoc[Good]      5.575170\n",
-       "UIncome[M]:ShelveLoc[Medium]    2.478163\n",
-       "UIncome[L]                     -2.734895\n",
-       "UIncome[M]                     -2.619745\n",
-       "dtype: float64"
-      ]
-     },
-     "execution_count": 41,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design = ModelSpec([('UIncome', 'ShelveLoc'), 'UIncome'])\n",
-    "X = design.fit_transform(Carseats)\n",
-    "sm.OLS(Y, X).fit().params"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "229fa32d",
-   "metadata": {},
-   "source": [
-    "The tuples in `terms` are converted to `Variable` in the formalized `terms_` attribute by creating a `Variable` with\n",
-    "`variables` set to the tuple and the encoder an `Interaction` encoder which (unsurprisingly) creates the interaction columns from the concatenated data frames of `UIncome` and `ShelveLoc`."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 42,
-   "id": "b8c52dbb",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Variable(variables=('UIncome', 'ShelveLoc'), name='UIncome:ShelveLoc', encoder=Interaction(column_names={'ShelveLoc': ['ShelveLoc[Good]', 'ShelveLoc[Medium]'],\n",
-       "                          'UIncome': ['UIncome[L]', 'UIncome[M]']},\n",
-       "            columns={'ShelveLoc': range(2, 4), 'UIncome': range(0, 2)},\n",
-       "            variables=['UIncome', 'ShelveLoc']), use_transform=True, pure_columns=False, override_encoder_colnames=False)"
-      ]
-     },
-     "execution_count": 42,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design.terms_[0]"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "e7f93464",
-   "metadata": {},
-   "source": [
-    "Comparing this to the previous `R` model."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 43,
-   "id": "4094c01f",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "Call:\n",
-      "lm(formula = Sales ~ UIncome:ShelveLoc + UIncome, data = Carseats)\n",
-      "\n",
-      "Coefficients:\n",
-      "             (Intercept)                  UIncomeM                  UIncomeH  \n",
-      "                  5.1317                    0.1151                    1.1561  \n",
-      "  UIncomeL:ShelveLocGood    UIncomeM:ShelveLocGood    UIncomeH:ShelveLocGood  \n",
-      "                  4.5121                    5.5752                    3.7381  \n",
-      "UIncomeL:ShelveLocMedium  UIncomeM:ShelveLocMedium  UIncomeH:ShelveLocMedium  \n",
-      "                  1.2473                    2.4782                    1.5141  \n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "%%R\n",
-    "lm(Sales ~ UIncome:ShelveLoc + UIncome, data=Carseats)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "d448c9ca",
-   "metadata": {},
-   "source": [
-    "We note a few important things:\n",
-    "\n",
-    "1. `R` has reorganized the columns of the design from the formula: although we wrote `UIncome:ShelveLoc` first these\n",
-    "columns have been built later. **`ModelSpec` builds columns in the order determined by `terms`!**\n",
-    "\n",
-    "2. As noted above, `R` has encoded `UIncome` differently in the main effect and in the interaction. For `ModelSpec`, the reference to `UIncome` always refers to the column in `design.column_info_` and will always build only the columns for `L` and `M`. **`ModelSpec` does no inspection of terms to decide how to encode categorical variables.**\n",
-    "\n",
-    "A few notes:\n",
-    "\n",
-    "- **Why not try to inspect the terms?** For any nontrivial formula in `R` with several categorical variables and interactions, predicting what columns will be produced from a given formula is not simple. **`ModelSpec` errs on the side of being explicit.**\n",
-    "\n",
-    "- **Is it impossible to build the design as `R` has?** No. An advanced user who *knows* they want the columns built as `R` has can do so (fairly) easily."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 44,
-   "id": "634e05c6",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(     UIncome[H]  UIncome[L]  UIncome[M]\n",
-       " 0           0.0         0.0         1.0\n",
-       " 1           0.0         1.0         0.0\n",
-       " 2           0.0         1.0         0.0\n",
-       " 3           1.0         0.0         0.0\n",
-       " 4           0.0         0.0         1.0\n",
-       " ..          ...         ...         ...\n",
-       " 395         1.0         0.0         0.0\n",
-       " 396         0.0         1.0         0.0\n",
-       " 397         0.0         1.0         0.0\n",
-       " 398         0.0         0.0         1.0\n",
-       " 399         0.0         1.0         0.0\n",
-       " \n",
-       " [400 rows x 3 columns],\n",
-       " ['UIncome[H]', 'UIncome[L]', 'UIncome[M]'])"
-      ]
-     },
-     "execution_count": 44,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "full_encoding = contrast('UIncome', None)\n",
-    "design.build_columns(Carseats, full_encoding)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 45,
-   "id": "4c09c93f",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "intercept                       5.131739\n",
-       "UIncome[M]                      0.115150\n",
-       "UIncome[H]                      1.156118\n",
-       "UIncome[H]:ShelveLoc[Good]      3.738052\n",
-       "UIncome[H]:ShelveLoc[Medium]    1.514104\n",
-       "UIncome[L]:ShelveLoc[Good]      4.512054\n",
-       "UIncome[L]:ShelveLoc[Medium]    1.247275\n",
-       "UIncome[M]:ShelveLoc[Good]      5.575170\n",
-       "UIncome[M]:ShelveLoc[Medium]    2.478163\n",
-       "dtype: float64"
-      ]
-     },
-     "execution_count": 45,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design = ModelSpec([pref_encoding, (full_encoding, 'ShelveLoc')])\n",
-    "X = design.fit_transform(Carseats)\n",
-    "sm.OLS(Y, X).fit().params"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "48c1989f",
-   "metadata": {},
-   "source": [
-    "## Special encodings\n",
-    "\n",
-    "For flexible models, we may want to consider transformations of features, i.e. polynomial\n",
-    "or spline transformations. Given transforms that follow the `fit/transform` paradigm\n",
-    "we can of course achieve this with a `Column` and an `encoder`. The `ISLP.transforms`\n",
-    "package includes a `Poly` transform"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 46,
-   "id": "85a28d87",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Variable(variables=('Income',), name='poly(Income, 3)', encoder=Poly(degree=3), use_transform=True, pure_columns=False, override_encoder_colnames=True)"
-      ]
-     },
-     "execution_count": 46,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "from ISLP.models.model_spec import poly\n",
-    "poly('Income', 3)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 47,
-   "id": "e17c8a9d",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "intercept              5.440077\n",
-       "poly(Income, 3)[0]    10.036373\n",
-       "poly(Income, 3)[1]    -2.799156\n",
-       "poly(Income, 3)[2]     2.399601\n",
-       "ShelveLoc[Good]        4.808133\n",
-       "ShelveLoc[Medium]      1.889533\n",
-       "dtype: float64"
-      ]
-     },
-     "execution_count": 47,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design = ModelSpec([poly('Income', 3), 'ShelveLoc'])\n",
-    "X = design.fit_transform(Carseats)\n",
-    "sm.OLS(Y, X).fit().params"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "944f56d6",
-   "metadata": {},
-   "source": [
-    "Compare:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 48,
-   "id": "1889caca",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "     (Intercept) poly(Income, 3)1 poly(Income, 3)2 poly(Income, 3)3 \n",
-      "        5.440077        10.036373        -2.799156         2.399601 \n",
-      "   ShelveLocGood  ShelveLocMedium \n",
-      "        4.808133         1.889533 \n"
-     ]
-    }
-   ],
-   "source": [
-    "%%R\n",
-    "lm(Sales ~ poly(Income, 3) + ShelveLoc, data=Carseats)$coef"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "bd4dca31",
-   "metadata": {},
-   "source": [
-    "## Splines\n",
-    "\n",
-    "Support for natural and B-splines is also included"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 49,
-   "id": "70fae990",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from ISLP.models.model_spec import ns, bs, pca"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "2d812694",
-   "metadata": {},
-   "source": [
-    "## Custom encoding\n",
-    "\n",
-    "Instead of PCA we might run some clustering on some features and then uses the clusters to\n",
-    "create new features. This can be done with `derived_variable`. Indeed, `pca`, `ns` and `bs` are all examples\n",
-    "of this."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 50,
-   "id": "8e5d2305",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from ISLP.models.model_spec import derived_variable, Contrast"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 51,
-   "id": "8a40c663",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "array([1, 1, 2, 1, 2, 1, 0, 1, 0, 0, 0, 1, 2, 2, 0, 1, 2, 1, 0, 0, 0, 2,\n",
-       "       2, 2, 1, 2, 1, 0, 0, 1, 0, 1, 2, 1, 2, 0, 0, 2, 2, 2, 0, 2, 0, 2,\n",
-       "       0, 2, 0, 0, 2, 0, 1, 0, 2, 0, 0, 0, 0, 0, 1, 0, 1, 2, 2, 0, 1, 2,\n",
-       "       0, 1, 1, 2, 1, 1, 2, 0, 0, 1, 1, 0, 2, 0, 1, 0, 0, 2, 2, 0, 1, 2,\n",
-       "       2, 2, 2, 2, 0, 2, 0, 2, 2, 0, 1, 2, 0, 0, 2, 0, 0, 1, 2, 0, 1, 0,\n",
-       "       0, 1, 0, 2, 0, 2, 0, 2, 0, 0, 1, 1, 0, 0, 0, 0, 0, 2, 0, 0, 1, 0,\n",
-       "       0, 0, 2, 1, 0, 2, 1, 1, 1, 2, 0, 0, 2, 0, 2, 1, 0, 0, 0, 1, 2, 2,\n",
-       "       1, 0, 2, 2, 0, 2, 2, 2, 2, 0, 0, 2, 1, 0, 0, 1, 1, 1, 0, 0, 2, 0,\n",
-       "       1, 0, 0, 2, 1, 0, 2, 1, 2, 1, 0, 2, 2, 1, 1, 2, 2, 0, 1, 1, 2, 2,\n",
-       "       1, 0, 0, 0, 2, 0, 0, 2, 0, 0, 2, 2, 2, 1, 1, 0, 0, 1, 2, 2, 1, 1,\n",
-       "       1, 2, 0, 2, 2, 2, 2, 0, 1, 0, 0, 0, 0, 1, 1, 2, 1, 2, 2, 0, 0, 0,\n",
-       "       2, 2, 2, 2, 1, 0, 0, 0, 1, 0, 0, 2, 1, 0, 2, 1, 2, 1, 1, 2, 1, 2,\n",
-       "       2, 2, 1, 1, 0, 2, 2, 2, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 2,\n",
-       "       1, 2, 2, 1, 1, 0, 1, 0, 0, 1, 2, 1, 2, 1, 0, 0, 1, 1, 1, 1, 2, 0,\n",
-       "       1, 0, 1, 1, 0, 1, 2, 2, 2, 2, 1, 1, 1, 2, 2, 1, 2, 0, 2, 1, 0, 1,\n",
-       "       2, 1, 1, 2, 1, 1, 2, 2, 2, 2, 2, 0, 1, 2, 0, 2, 0, 2, 1, 1, 1, 1,\n",
-       "       1, 1, 2, 0, 0, 0, 0, 1, 0, 2, 0, 2, 1, 2, 1, 0, 2, 1, 1, 0, 2, 2,\n",
-       "       2, 2, 1, 2, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 2, 0, 0, 1, 0, 1, 1,\n",
-       "       2, 2, 0, 2], dtype=int32)"
-      ]
-     },
-     "execution_count": 51,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "from sklearn.cluster import KMeans\n",
-    "from sklearn.pipeline import make_pipeline\n",
-    "from sklearn.preprocessing import StandardScaler\n",
-    "cluster = make_pipeline(StandardScaler(), KMeans(n_clusters=3, random_state=0))\n",
-    "group = Variable(('Income', 'Price', 'Advertising', 'Population'), 'group', None)\n",
-    "X = design.build_submodel(Carseats, [group]).drop('intercept', axis=1)\n",
-    "cluster.fit(X.values)\n",
-    "cluster.predict(X.values)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "9bc38836",
-   "metadata": {},
-   "source": [
-    "For clustering, we often want to use the `predict` method rather than the `transform` method. If the ultimate\n",
-    "features all use `transform` then the do not even need to use these two calls to `make_pipeline`."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 52,
-   "id": "8ceab9b6",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/Users/jonathantaylor/miniconda3/envs/islp_test/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but StandardScaler was fitted with feature names\n",
-      "  warnings.warn(\n",
-      "/Users/jonathantaylor/miniconda3/envs/islp_test/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but StandardScaler was fitted with feature names\n",
-      "  warnings.warn(\n",
-      "/Users/jonathantaylor/miniconda3/envs/islp_test/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but StandardScaler was fitted with feature names\n",
-      "  warnings.warn(\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>intercept</th>\n",
-       "      <th>myclus</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>...</th>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>395</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>396</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>397</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>398</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>399</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>2</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>400 rows × 2 columns</p>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "     intercept  myclus\n",
-       "0          1.0       1\n",
-       "1          1.0       1\n",
-       "2          1.0       2\n",
-       "3          1.0       1\n",
-       "4          1.0       2\n",
-       "..         ...     ...\n",
-       "395        1.0       1\n",
-       "396        1.0       2\n",
-       "397        1.0       2\n",
-       "398        1.0       0\n",
-       "399        1.0       2\n",
-       "\n",
-       "[400 rows x 2 columns]"
-      ]
-     },
-     "execution_count": 52,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "cluster2 = make_pipeline(StandardScaler(), KMeans(n_clusters=3, random_state=0))\n",
-    "cluster_var = derived_variable(['Income', 'Price', 'Advertising', 'Population'], \n",
-    "                               name='myclus', \n",
-    "                               encoder=cluster2,\n",
-    "                               use_transform=False)\n",
-    "design = ModelSpec([cluster_var]).fit(Carseats)\n",
-    "design.transform(Carseats)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "1f9b2630",
-   "metadata": {},
-   "source": [
-    "Somewhat clunkily, we can make this a categorical variable by creating a `Variable` with a\n",
-    "categorical encoder."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 53,
-   "id": "ffde00a5",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Variable(variables=(Variable(variables=('Income', 'Price', 'Advertising', 'Population'), name='myclus', encoder=Pipeline(steps=[('standardscaler', StandardScaler()),\n",
-       "                ('kmeans', KMeans(n_clusters=3, random_state=0))]), use_transform=False, pure_columns=False, override_encoder_colnames=True),), name='mynewcat', encoder=Contrast(), use_transform=True, pure_columns=False, override_encoder_colnames=False)"
-      ]
-     },
-     "execution_count": 53,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "cluster2 = make_pipeline(StandardScaler(), KMeans(n_clusters=3, random_state=0))\n",
-    "cluster_var = derived_variable(['Income', 'Price', 'Advertising', 'Population'], \n",
-    "                               name='myclus', \n",
-    "                               encoder=cluster2,\n",
-    "                               use_transform=False)\n",
-    "cat_cluster = Variable((cluster_var,), name='mynewcat', encoder=Contrast(method='drop'))\n",
-    "cat_cluster"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 54,
-   "id": "5afeab7c",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/Users/jonathantaylor/miniconda3/envs/islp_test/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but StandardScaler was fitted with feature names\n",
-      "  warnings.warn(\n",
-      "/Users/jonathantaylor/miniconda3/envs/islp_test/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but StandardScaler was fitted with feature names\n",
-      "  warnings.warn(\n",
-      "/Users/jonathantaylor/miniconda3/envs/islp_test/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but StandardScaler was fitted with feature names\n",
-      "  warnings.warn(\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>intercept</th>\n",
-       "      <th>1</th>\n",
-       "      <th>2</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>0.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>0.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>1.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>0.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>1.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>...</th>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>395</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>0.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>396</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>1.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>397</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>1.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>398</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>399</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>1.0</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>400 rows × 3 columns</p>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "     intercept    1    2\n",
-       "0          1.0  1.0  0.0\n",
-       "1          1.0  1.0  0.0\n",
-       "2          1.0  0.0  1.0\n",
-       "3          1.0  1.0  0.0\n",
-       "4          1.0  0.0  1.0\n",
-       "..         ...  ...  ...\n",
-       "395        1.0  1.0  0.0\n",
-       "396        1.0  0.0  1.0\n",
-       "397        1.0  0.0  1.0\n",
-       "398        1.0  0.0  0.0\n",
-       "399        1.0  0.0  1.0\n",
-       "\n",
-       "[400 rows x 3 columns]"
-      ]
-     },
-     "execution_count": 54,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design = ModelSpec([cat_cluster]).fit(Carseats)\n",
-    "\n",
-    "design.transform(Carseats)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "e24d5637-80fb-49bf-ac10-8ff68cb8bd8f",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "jupytext": {
-   "formats": "source/models///ipynb,jupyterbook/models///md:myst,jupyterbook/models///ipynb"
-  },
-  "kernelspec": {
-   "display_name": "islp_test",
-   "language": "python",
-   "name": "islp_test"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.9.13"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
diff --git a/docs/jupyterbook/models/derived.md b/docs/jupyterbook/models/derived.md
deleted file mode 100644
index 1d0f23b..0000000
--- a/docs/jupyterbook/models/derived.md
+++ /dev/null
@@ -1,487 +0,0 @@
----
-jupytext:
-  formats: source/models///ipynb,jupyterbook/models///md:myst,jupyterbook/models///ipynb
-  text_representation:
-    extension: .md
-    format_name: myst
-    format_version: 0.13
-    jupytext_version: 1.14.1
-kernelspec:
-  display_name: islp_test
-  language: python
-  name: islp_test
----
-
-# Building design matrices with `ModelSpec`
-
-Force rebuild
-
-```{code-cell} ipython3
-x=4
-import numpy as np, pandas as pd
-%load_ext rpy2.ipython
-
-from ISLP import load_data
-from ISLP.models import ModelSpec
-
-import statsmodels.api as sm
-```
-
-```{code-cell} ipython3
-Carseats = load_data('Carseats')
-%R -i Carseats
-Carseats.columns
-```
-
-## Let's break up income into groups
-
-```{code-cell} ipython3
-Carseats['OIncome'] = pd.cut(Carseats['Income'], 
-                             [0,50,90,200], 
-                             labels=['L','M','H'])
-Carseats['OIncome']
-```
-
-Let's also create an unordered version
-
-```{code-cell} ipython3
-Carseats['UIncome'] = pd.cut(Carseats['Income'], 
-                             [0,50,90,200], 
-                             labels=['L','M','H'],
-                             ordered=False)
-Carseats['UIncome']
-```
-
-## A simple model
-
-```{code-cell} ipython3
-design = ModelSpec(['Price', 'Income'])
-X = design.fit_transform(Carseats)
-X.columns
-```
-
-```{code-cell} ipython3
-Y = Carseats['Sales']
-M = sm.OLS(Y, X).fit()
-M.params
-```
-
-## Basic procedure
-
-The design matrix is built by cobbling together a set of columns and possibly transforming them.
-A `pd.DataFrame` is essentially a list of columns. One of the first tasks done  in `ModelSpec.fit`
-is to inspect a dataframe for column info. The column `ShelveLoc` is categorical:
-
-```{code-cell} ipython3
-Carseats['ShelveLoc']
-```
-
-This is recognized by `ModelSpec` in the form of `Column` objects which are just named tuples with two methods
-`get_columns` and `fit_encoder`.
-
-```{code-cell} ipython3
-design.column_info_['ShelveLoc']
-```
-
-It recognized ordinal columns as well.
-
-```{code-cell} ipython3
-design.column_info_['OIncome']
-```
-
-```{code-cell} ipython3
-income = design.column_info_['Income']
-cols, names = income.get_columns(Carseats)
-(cols[:4], names)
-```
-
-## Encoding a column
-
-In building a design matrix we must extract columns from our dataframe (or `np.ndarray`). Categorical
-variables usually are encoded by several columns, typically one less than the number of categories.
-This task is handled by the `encoder` of the `Column`. The encoder must satisfy the `sklearn` transform
-model, i.e. `fit` on some array and `transform` on future arrays. The `fit_encoder` method of `Column` fits
-its encoder the first time data is passed to it.
-
-```{code-cell} ipython3
-shelve = design.column_info_['ShelveLoc']
-cols, names = shelve.get_columns(Carseats)
-(cols[:4], names)
-```
-
-```{code-cell} ipython3
-oincome = design.column_info_['OIncome']
-oincome.get_columns(Carseats)[0][:4]
-```
-
-## The terms
-
-The design matrix consists of several sets of columns. This is managed by the `ModelSpec` through
-the `terms` argument which should be a sequence. The elements of `terms` are often
-going to be strings (or tuples of strings for interactions, see below) but are converted to a
-`Variable` object and stored in the `terms_` of the fitted `ModelSpec`. A `Variable` is just a named tuple.
-
-```{code-cell} ipython3
-design.terms
-```
-
-```{code-cell} ipython3
-design.terms_
-```
-
-While each `Column` can itself extract data, they are all promoted to `Variable` to be of a uniform type.  A
-`Variable` can also create columns through the `build_columns` method of `ModelSpec`
-
-```{code-cell} ipython3
-price = design.terms_[0]
-design.build_columns(Carseats, price)
-```
-
-Note that `Variable` objects have a tuple of `variables` as well as an `encoder` attribute. The
-tuple of `variables` first creates a concatenated dataframe from all corresponding variables and then
-is run through `encoder.transform`. The `encoder.fit` method of each `Variable` is run once during 
-the call to `ModelSpec.fit`.
-
-```{code-cell} ipython3
-from ISLP.models.model_spec import Variable
-
-new_var = Variable(('Price', 'Income', 'UIncome'), name='mynewvar', encoder=None)
-design.build_columns(Carseats, new_var)
-```
-
-Let's now transform these columns with an encoder. Within `ModelSpec` we will first build the
-arrays above and then call `pca.fit` and finally `pca.transform` within `design.build_columns`.
-
-```{code-cell} ipython3
-from sklearn.decomposition import PCA
-pca = PCA(n_components=2)
-pca.fit(design.build_columns(Carseats, new_var)[0]) # this is done within `ModelSpec.fit`
-pca_var = Variable(('Price', 'Income', 'UIncome'), name='mynewvar', encoder=pca)
-design.build_columns(Carseats, pca_var)
-```
-
-The elements of the `variables` attribute may be column identifiers ( `"Price"`), `Column` instances (`price`)
-or `Variable` instances (`pca_var`).
-
-```{code-cell} ipython3
-fancy_var = Variable(('Price', price, pca_var), name='fancy', encoder=None)
-design.build_columns(Carseats, fancy_var)
-```
-
-We can of course run PCA again on these features (if we wanted).
-
-```{code-cell} ipython3
-pca2 = PCA(n_components=2)
-pca2.fit(design.build_columns(Carseats, fancy_var)[0]) # this is done within `ModelSpec.fit`
-pca2_var = Variable(('Price', price, pca_var), name='fancy_pca', encoder=pca2)
-design.build_columns(Carseats, pca2_var)
-```
-
-## Building the design matrix
-
-With these notions in mind, the final design is essentially then
-
-```{code-cell} ipython3
-X_hand = np.column_stack([design.build_columns(Carseats, v)[0] for v in design.terms_])[:4]
-```
-
-An intercept column is added if `design.intercept` is `True` and if the original argument to `transform` is
-a dataframe the index is adjusted accordingly.
-
-```{code-cell} ipython3
-design.intercept
-```
-
-```{code-cell} ipython3
-design.transform(Carseats)[:4]
-```
-
-## Predicting
-
-Constructing the design matrix at any values is carried out by the `transform` method.
-
-```{code-cell} ipython3
-new_data = pd.DataFrame({'Price':[10,20], 'Income':[40, 50]})
-new_X = design.transform(new_data)
-M.get_prediction(new_X).predicted_mean
-```
-
-```{code-cell} ipython3
-%%R -i new_data,Carseats
-predict(lm(Sales ~ Price + Income, data=Carseats), new_data)
-```
-
-### Difference between using `pd.DataFrame` and `np.ndarray`
-
-If the `terms` only refer to a few columns of the data frame, the `transform` method only needs a dataframe with those columns.
-
-If we had used an `np.ndarray`, the column identifiers would be integers identifying specific columns so,
-in order to work correctly, `transform` would need another `np.ndarray` where the columns have the same meaning.
-
-```{code-cell} ipython3
-Carseats_np = np.asarray(Carseats[['Price', 'ShelveLoc', 'US', 'Income']])
-design_np = ModelSpec([0,3]).fit(Carseats_np)
-design_np.transform(Carseats_np)[:4]
-```
-
-The following will fail for hopefully obvious reasons
-
-```{code-cell} ipython3
-try:
-    new_D = np.zeros((2,2))
-    new_D[:,0] = [10,20]
-    new_D[:,1] = [40,50]
-    M.get_prediction(new_D).predicted_mean
-except ValueError as e:
-    print(e)
-```
-
-Ultimately, `M` expects 3 columns for new predictions because it was fit
-with a matrix having 3 columns (the first representing an intercept).
-
-We might be tempted to try as with the `pd.DataFrame` and produce
-an `np.ndarray` with only the necessary variables.
-
-```{code-cell} ipython3
-try:
-    new_X = np.zeros((2,2))
-    new_X[:,0] = [10,20]
-    new_X[:,1] = [40,50]
-    new_D = design_np.transform(new_X)
-    M.get_prediction(new_D).predicted_mean
-except IndexError as e:
-    print(e)
-```
-
-This fails because `design_np` is looking for column `3` from its `terms`:
-
-```{code-cell} ipython3
-design_np.terms_
-```
-
-However, if we have an `np.ndarray` in which the first column indeed represents `Price` and the fourth indeed
-represents `Income` then we can arrive at the correct answer by supplying such the array to `design_np.transform`:
-
-```{code-cell} ipython3
-new_X = np.zeros((2,4))
-new_X[:,0] = [10,20]
-new_X[:,3] = [40,50]
-new_D = design_np.transform(new_X)
-M.get_prediction(new_D).predicted_mean
-```
-
-Given this subtlety about needing to supply arrays with identical column structure to `transform` when
-using `np.ndarray` we presume that using a `pd.DataFrame` will be the more popular use case.
-
-+++
-
-## A model with some categorical variables
-
-Categorical variables become `Column` instances with encoders.
-
-```{code-cell} ipython3
-design = ModelSpec(['Population', 'Price', 'UIncome', 'ShelveLoc']).fit(Carseats)
-design.column_info_['UIncome']
-```
-
-```{code-cell} ipython3
-X = design.fit_transform(Carseats)
-X.columns
-```
-
-```{code-cell} ipython3
-sm.OLS(Y, X).fit().params
-```
-
-```{code-cell} ipython3
-%%R
-lm(Sales ~ Population + Price + UIncome + ShelveLoc, data=Carseats)$coef
-```
-
-## Getting the encoding you want
-
-By default the level dropped by `ModelSpec` will be the first of the `categories_` values from 
-`sklearn.preprocessing.OneHotEncoder()`. We might wish to change this. It seems
-as if the correct way to do this would be something like `Variable(('UIncome',), 'mynewencoding', new_encoder)`
-where `new_encoder` would somehow drop the column we want dropped. 
-
-However, when using the convenient identifier `UIncome` in the `variables` argument, this maps to the `Column` associated to `UIncome` within `design.column_info_`:
-
-```{code-cell} ipython3
-design.column_info_['UIncome']
-```
-
-This column already has an encoder and `Column` instances are immutable as named tuples. Further, there are times when 
-we may want to encode `UIncome` differently within the same model. In the model below the main effect of `UIncome` is encoded with two columns while in the interaction `UIncome` (see below) has three columns. This is a design of interest
-and we need a way to allow different encodings of the same column of `Carseats`
-
-```{code-cell} ipython3
-%%R
-lm(Sales ~ UIncome:ShelveLoc + UIncome, data=Carseats)
-```
-
- We can create a new 
-`Column` with the encoder we want. For categorical variables, there is a convenience function to do so.
-
-```{code-cell} ipython3
-from ISLP.models.model_spec import contrast
-pref_encoding = contrast('UIncome', 'drop', 'L')
-```
-
-```{code-cell} ipython3
-design.build_columns(Carseats, pref_encoding)
-```
-
-```{code-cell} ipython3
-design = ModelSpec(['Population', 'Price', pref_encoding, 'ShelveLoc']).fit(Carseats)
-X = design.fit_transform(Carseats)
-X.columns
-```
-
-```{code-cell} ipython3
-sm.OLS(Y, X).fit().params
-```
-
-```{code-cell} ipython3
-%%R
-lm(Sales ~ Population + Price + UIncome + ShelveLoc, data=Carseats)$coef
-```
-
-## Interactions
-
-We've referred to interactions above. These are specified (by convenience) as tuples in the `terms` argument
-to `ModelSpec`.
-
-```{code-cell} ipython3
-design = ModelSpec([('UIncome', 'ShelveLoc'), 'UIncome'])
-X = design.fit_transform(Carseats)
-sm.OLS(Y, X).fit().params
-```
-
-The tuples in `terms` are converted to `Variable` in the formalized `terms_` attribute by creating a `Variable` with
-`variables` set to the tuple and the encoder an `Interaction` encoder which (unsurprisingly) creates the interaction columns from the concatenated data frames of `UIncome` and `ShelveLoc`.
-
-```{code-cell} ipython3
-design.terms_[0]
-```
-
-Comparing this to the previous `R` model.
-
-```{code-cell} ipython3
-%%R
-lm(Sales ~ UIncome:ShelveLoc + UIncome, data=Carseats)
-```
-
-We note a few important things:
-
-1. `R` has reorganized the columns of the design from the formula: although we wrote `UIncome:ShelveLoc` first these
-columns have been built later. **`ModelSpec` builds columns in the order determined by `terms`!**
-
-2. As noted above, `R` has encoded `UIncome` differently in the main effect and in the interaction. For `ModelSpec`, the reference to `UIncome` always refers to the column in `design.column_info_` and will always build only the columns for `L` and `M`. **`ModelSpec` does no inspection of terms to decide how to encode categorical variables.**
-
-A few notes:
-
-- **Why not try to inspect the terms?** For any nontrivial formula in `R` with several categorical variables and interactions, predicting what columns will be produced from a given formula is not simple. **`ModelSpec` errs on the side of being explicit.**
-
-- **Is it impossible to build the design as `R` has?** No. An advanced user who *knows* they want the columns built as `R` has can do so (fairly) easily.
-
-```{code-cell} ipython3
-full_encoding = contrast('UIncome', None)
-design.build_columns(Carseats, full_encoding)
-```
-
-```{code-cell} ipython3
-design = ModelSpec([pref_encoding, (full_encoding, 'ShelveLoc')])
-X = design.fit_transform(Carseats)
-sm.OLS(Y, X).fit().params
-```
-
-## Special encodings
-
-For flexible models, we may want to consider transformations of features, i.e. polynomial
-or spline transformations. Given transforms that follow the `fit/transform` paradigm
-we can of course achieve this with a `Column` and an `encoder`. The `ISLP.transforms`
-package includes a `Poly` transform
-
-```{code-cell} ipython3
-from ISLP.models.model_spec import poly
-poly('Income', 3)
-```
-
-```{code-cell} ipython3
-design = ModelSpec([poly('Income', 3), 'ShelveLoc'])
-X = design.fit_transform(Carseats)
-sm.OLS(Y, X).fit().params
-```
-
-Compare:
-
-```{code-cell} ipython3
-%%R
-lm(Sales ~ poly(Income, 3) + ShelveLoc, data=Carseats)$coef
-```
-
-## Splines
-
-Support for natural and B-splines is also included
-
-```{code-cell} ipython3
-from ISLP.models.model_spec import ns, bs, pca
-```
-
-## Custom encoding
-
-Instead of PCA we might run some clustering on some features and then uses the clusters to
-create new features. This can be done with `derived_variable`. Indeed, `pca`, `ns` and `bs` are all examples
-of this.
-
-```{code-cell} ipython3
-from ISLP.models.model_spec import derived_variable, Contrast
-```
-
-```{code-cell} ipython3
-from sklearn.cluster import KMeans
-from sklearn.pipeline import make_pipeline
-from sklearn.preprocessing import StandardScaler
-cluster = make_pipeline(StandardScaler(), KMeans(n_clusters=3, random_state=0))
-group = Variable(('Income', 'Price', 'Advertising', 'Population'), 'group', None)
-X = design.build_submodel(Carseats, [group]).drop('intercept', axis=1)
-cluster.fit(X.values)
-cluster.predict(X.values)
-```
-
-For clustering, we often want to use the `predict` method rather than the `transform` method. If the ultimate
-features all use `transform` then the do not even need to use these two calls to `make_pipeline`.
-
-```{code-cell} ipython3
-cluster2 = make_pipeline(StandardScaler(), KMeans(n_clusters=3, random_state=0))
-cluster_var = derived_variable(['Income', 'Price', 'Advertising', 'Population'], 
-                               name='myclus', 
-                               encoder=cluster2,
-                               use_transform=False)
-design = ModelSpec([cluster_var]).fit(Carseats)
-design.transform(Carseats)
-```
-
-Somewhat clunkily, we can make this a categorical variable by creating a `Variable` with a
-categorical encoder.
-
-```{code-cell} ipython3
-cluster2 = make_pipeline(StandardScaler(), KMeans(n_clusters=3, random_state=0))
-cluster_var = derived_variable(['Income', 'Price', 'Advertising', 'Population'], 
-                               name='myclus', 
-                               encoder=cluster2,
-                               use_transform=False)
-cat_cluster = Variable((cluster_var,), name='mynewcat', encoder=Contrast(method='drop'))
-cat_cluster
-```
-
-```{code-cell} ipython3
-design = ModelSpec([cat_cluster]).fit(Carseats)
-
-design.transform(Carseats)
-```
-
-```{code-cell} ipython3
-
-```
diff --git a/docs/jupyterbook/models/selection.ipynb b/docs/jupyterbook/models/selection.ipynb
index b41cf6a..fd66d95 100644
--- a/docs/jupyterbook/models/selection.ipynb
+++ b/docs/jupyterbook/models/selection.ipynb
@@ -2,2723 +2,259 @@
  "cells": [
   {
    "cell_type": "markdown",
-   "id": "72bae06a",
+   "id": "247387ec-1477-42e6-9e69-cad1cacb5721",
    "metadata": {},
    "source": [
-    "# Model selection using `ModelSpec`"
+    "# Model selection using `ModelSpec`\n",
+    "\n",
+    "\n",
+    "In this lab we illustrate how to run forward stepwise model selection\n",
+    "using the model specification capability of `ModelSpec`."
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 1,
-   "id": "ae6bd850",
+   "id": "4720bb2a-6bec-4e91-a57e-9689aa4f0532",
    "metadata": {},
    "outputs": [],
    "source": [
-    "import numpy as np, pandas as pd\n",
-    "%load_ext rpy2.ipython\n",
-    "\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "from statsmodels.api import OLS\n",
     "from ISLP import load_data\n",
-    "from ISLP.models import ModelSpec\n",
-    "\n",
-    "import statsmodels.api as sm"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "id": "5ac10e72",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Index(['Sales', 'CompPrice', 'Income', 'Advertising', 'Population', 'Price',\n",
-       "       'ShelveLoc', 'Age', 'Education', 'Urban', 'US'],\n",
-       "      dtype='object')"
-      ]
-     },
-     "execution_count": 2,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "Carseats = load_data('Carseats')\n",
-    "%R -i Carseats\n",
-    "Carseats.columns"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "80a586d9",
-   "metadata": {},
-   "source": [
-    "## Let's break up income into groups"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "id": "850356ba",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0      M\n",
-       "1      L\n",
-       "2      L\n",
-       "3      H\n",
-       "4      M\n",
-       "      ..\n",
-       "395    H\n",
-       "396    L\n",
-       "397    L\n",
-       "398    M\n",
-       "399    L\n",
-       "Name: OIncome, Length: 400, dtype: category\n",
-       "Categories (3, object): ['L' < 'M' < 'H']"
-      ]
-     },
-     "execution_count": 3,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "Carseats['OIncome'] = pd.cut(Carseats['Income'], \n",
-    "                             [0,50,90,200], \n",
-    "                             labels=['L','M','H'])\n",
-    "Carseats['OIncome']"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "e24def3a",
-   "metadata": {},
-   "source": [
-    "Let's also create an unordered version"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "id": "edf83080",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0      M\n",
-       "1      L\n",
-       "2      L\n",
-       "3      H\n",
-       "4      M\n",
-       "      ..\n",
-       "395    H\n",
-       "396    L\n",
-       "397    L\n",
-       "398    M\n",
-       "399    L\n",
-       "Name: UIncome, Length: 400, dtype: category\n",
-       "Categories (3, object): ['L', 'M', 'H']"
-      ]
-     },
-     "execution_count": 4,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "Carseats['UIncome'] = pd.cut(Carseats['Income'], \n",
-    "                             [0,50,90,200], \n",
-    "                             labels=['L','M','H'],\n",
-    "                             ordered=False)\n",
-    "Carseats['UIncome']"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "aa22bb9c",
-   "metadata": {},
-   "source": [
-    "## A simple model"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "id": "38d92522",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Index(['intercept', 'Price', 'Income'], dtype='object')"
-      ]
-     },
-     "execution_count": 5,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design = ModelSpec(['Price', 'Income'])\n",
-    "X = design.fit_transform(Carseats)\n",
-    "X.columns"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "id": "cfc2056f",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "intercept    12.661546\n",
-       "Price        -0.052213\n",
-       "Income        0.012829\n",
-       "dtype: float64"
-      ]
-     },
-     "execution_count": 6,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "Y = Carseats['Sales']\n",
-    "M = sm.OLS(Y, X).fit()\n",
-    "M.params"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "4674c345",
-   "metadata": {},
-   "source": [
-    "## Basic procedure\n",
-    "\n",
-    "The design matrix is built by cobbling together a set of columns and possibly transforming them.\n",
-    "A `pd.DataFrame` is essentially a list of columns. One of the first tasks done  in `ModelSpec.fit`\n",
-    "is to inspect a dataframe for column info. The column `ShelveLoc` is categorical:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "id": "5688f0ad",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0         Bad\n",
-       "1        Good\n",
-       "2      Medium\n",
-       "3      Medium\n",
-       "4         Bad\n",
-       "        ...  \n",
-       "395      Good\n",
-       "396    Medium\n",
-       "397    Medium\n",
-       "398       Bad\n",
-       "399      Good\n",
-       "Name: ShelveLoc, Length: 400, dtype: category\n",
-       "Categories (3, object): ['Bad', 'Good', 'Medium']"
-      ]
-     },
-     "execution_count": 7,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "Carseats['ShelveLoc']"
+    "from ISLP.models import (ModelSpec,\n",
+    "                         Stepwise,\n",
+    "                         sklearn_selected)"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "4ae28ffa",
+   "id": "1c224240-ce8b-47f3-a85a-052c43038b26",
    "metadata": {},
    "source": [
-    "This is recognized by `ModelSpec` in the form of `Column` objects which are just named tuples with two methods\n",
-    "`get_columns` and `fit_encoder`."
+    "### Forward Selection\n",
+    " \n",
+    "We will  apply the forward-selection approach to the  `Hitters` \n",
+    "data.  We wish to predict a baseball player’s `Salary` on the\n",
+    "basis of various statistics associated with performance in the\n",
+    "previous year."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
-   "id": "5f8926fd",
+   "execution_count": 2,
+   "id": "2adc66cc",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "Column(idx='ShelveLoc', name='ShelveLoc', is_categorical=True, is_ordinal=False, columns=('ShelveLoc[Good]', 'ShelveLoc[Medium]'), encoder=Contrast())"
+       "59"
       ]
      },
-     "execution_count": 8,
+     "execution_count": 2,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "design.column_info_['ShelveLoc']"
+    "Hitters = load_data('Hitters')\n",
+    "np.isnan(Hitters['Salary']).sum()"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "966f53a5",
-   "metadata": {},
-   "source": [
-    "It recognized ordinal columns as well."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "id": "a137fa1e",
+   "id": "40c9a484",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Column(idx='OIncome', name='OIncome', is_categorical=True, is_ordinal=True, columns=('OIncome',), encoder=OrdinalEncoder())"
-      ]
-     },
-     "execution_count": 9,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
    "source": [
-    "design.column_info_['OIncome']"
+    "    \n",
+    " We see that `Salary` is missing for 59 players. The\n",
+    "`dropna()`  method of data frames removes all of the rows that have missing\n",
+    "values in any variable (by default --- see  `Hitters.dropna?`)."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
-   "id": "3390dcb0",
+   "execution_count": 3,
+   "id": "1869fdab",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "(array([ 73,  48,  35, 100]), ('Income',))"
+       "(263, 20)"
       ]
      },
-     "execution_count": 10,
+     "execution_count": 3,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "income = design.column_info_['Income']\n",
-    "cols, names = income.get_columns(Carseats)\n",
-    "(cols[:4], names)"
+    "Hitters = Hitters.dropna()\n",
+    "Hitters.shape"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "b6667415",
-   "metadata": {},
-   "source": [
-    "## Encoding a column\n",
-    "\n",
-    "In building a design matrix we must extract columns from our dataframe (or `np.ndarray`). Categorical\n",
-    "variables usually are encoded by several columns, typically one less than the number of categories.\n",
-    "This task is handled by the `encoder` of the `Column`. The encoder must satisfy the `sklearn` transform\n",
-    "model, i.e. `fit` on some array and `transform` on future arrays. The `fit_encoder` method of `Column` fits\n",
-    "its encoder the first time data is passed to it."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "id": "a1b42dbd",
+   "id": "0a1fe9e6",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(array([[0., 0.],\n",
-       "        [1., 0.],\n",
-       "        [0., 1.],\n",
-       "        [0., 1.]]),\n",
-       " ['ShelveLoc[Good]', 'ShelveLoc[Medium]'])"
-      ]
-     },
-     "execution_count": 11,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
    "source": [
-    "shelve = design.column_info_['ShelveLoc']\n",
-    "cols, names = shelve.get_columns(Carseats)\n",
-    "(cols[:4], names)"
+    "We first choose the best model using forward selection based on AIC. This score\n",
+    "is not built in as a metric to `sklearn`. We therefore define a function to compute it ourselves, and use\n",
+    "it as a scorer. By default, `sklearn` tries to maximize a score, hence\n",
+    "  our scoring function  computes the negative AIC statistic."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
-   "id": "31367988",
+   "execution_count": 4,
+   "id": "76bd8110",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "array([[2.],\n",
-       "       [1.],\n",
-       "       [1.],\n",
-       "       [0.]])"
-      ]
-     },
-     "execution_count": 12,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
-    "oincome = design.column_info_['OIncome']\n",
-    "oincome.get_columns(Carseats)[0][:4]"
+    "def negAIC(estimator, X, Y):\n",
+    "    \"Negative AIC\"\n",
+    "    n, p = X.shape\n",
+    "    Yhat = estimator.predict(X)\n",
+    "    MSE = np.mean((Y - Yhat)**2)\n",
+    "    return n + n * np.log(MSE) + 2 * (p + 1)\n",
+    "    "
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "751c1487",
-   "metadata": {},
-   "source": [
-    "## The terms\n",
-    "\n",
-    "The design matrix consists of several sets of columns. This is managed by the `ModelSpec` through\n",
-    "the `terms` argument which should be a sequence. The elements of `terms` are often\n",
-    "going to be strings (or tuples of strings for interactions, see below) but are converted to a\n",
-    "`Variable` object and stored in the `terms_` of the fitted `ModelSpec`. A `Variable` is just a named tuple."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "id": "6e2b6155",
+   "id": "14ba6f49",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['Price', 'Income']"
-      ]
-     },
-     "execution_count": 13,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
    "source": [
-    "design.terms"
+    "We need to estimate the residual variance $\\sigma^2$, which is the first argument in our scoring function above.\n",
+    "We will fit the biggest model, using all the variables, and estimate $\\sigma^2$ based on its MSE."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
-   "id": "d3e669da",
+   "execution_count": 5,
+   "id": "94e10f35",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "[Variable(variables=('Price',), name='Price', encoder=None, use_transform=True, pure_columns=True, override_encoder_colnames=False),\n",
-       " Variable(variables=('Income',), name='Income', encoder=None, use_transform=True, pure_columns=True, override_encoder_colnames=False)]"
-      ]
-     },
-     "execution_count": 14,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
-    "design.terms_"
+    "design = ModelSpec(Hitters.columns.drop('Salary')).fit(Hitters)\n",
+    "Y = np.array(Hitters['Salary'])\n",
+    "X = design.transform(Hitters)"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "fb0a45c9",
+   "id": "afdda5f2",
    "metadata": {},
    "source": [
-    "While each `Column` can itself extract data, they are all promoted to `Variable` to be of a uniform type.  A\n",
-    "`Variable` can also create columns through the `build_columns` method of `ModelSpec`"
+    "Along with a score we need to specify the search strategy. This is done through the object\n",
+    "`Stepwise()`  in the `ISLP.models` package. The method `Stepwise.first_peak()`\n",
+    "runs forward stepwise until any further additions to the model do not result\n",
+    "in an improvement in the evaluation score. Similarly, the method `Stepwise.fixed_steps()`\n",
+    "runs a fixed number of steps of stepwise search."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
-   "id": "554c67cb",
+   "execution_count": 6,
+   "id": "048c8500",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(     Price\n",
-       " 0      120\n",
-       " 1       83\n",
-       " 2       80\n",
-       " 3       97\n",
-       " 4      128\n",
-       " ..     ...\n",
-       " 395    128\n",
-       " 396    120\n",
-       " 397    159\n",
-       " 398     95\n",
-       " 399    120\n",
-       " \n",
-       " [400 rows x 1 columns],\n",
-       " ['Price'])"
-      ]
-     },
-     "execution_count": 15,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
-    "price = design.terms_[0]\n",
-    "design.build_columns(Carseats, price)"
+    "strategy = Stepwise.first_peak(design,\n",
+    "                               direction='forward',\n",
+    "                               max_terms=len(design.terms))"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "06956a6f",
+   "id": "e0c0af0e",
    "metadata": {},
    "source": [
-    "Note that `Variable` objects have a tuple of `variables` as well as an `encoder` attribute. The\n",
-    "tuple of `variables` first creates a concatenated dataframe from all corresponding variables and then\n",
-    "is run through `encoder.transform`. The `encoder.fit` method of each `Variable` is run once during \n",
-    "the call to `ModelSpec.fit`."
+    " \n",
+    "We now fit a linear regression model with `Salary` as outcome using forward\n",
+    "selection. To do so, we use the function `sklearn_selected()`  from the `ISLP.models` package. This takes\n",
+    "a model from `statsmodels` along with a search strategy and selects a model with its\n",
+    "`fit` method. Without specifying a `scoring` argument, the score defaults to MSE, and so all 19 variables will be\n",
+    "selected."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
-   "id": "dd434884",
+   "execution_count": 7,
+   "id": "26f09fe9",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "(     Price  Income  UIncome[L]  UIncome[M]\n",
-       " 0    120.0    73.0         0.0         1.0\n",
-       " 1     83.0    48.0         1.0         0.0\n",
-       " 2     80.0    35.0         1.0         0.0\n",
-       " 3     97.0   100.0         0.0         0.0\n",
-       " 4    128.0    64.0         0.0         1.0\n",
-       " ..     ...     ...         ...         ...\n",
-       " 395  128.0   108.0         0.0         0.0\n",
-       " 396  120.0    23.0         1.0         0.0\n",
-       " 397  159.0    26.0         1.0         0.0\n",
-       " 398   95.0    79.0         0.0         1.0\n",
-       " 399  120.0    37.0         1.0         0.0\n",
-       " \n",
-       " [400 rows x 4 columns],\n",
-       " ['Price', 'Income', 'UIncome[L]', 'UIncome[M]'])"
+       "('Assists',\n",
+       " 'AtBat',\n",
+       " 'CAtBat',\n",
+       " 'CHits',\n",
+       " 'CHmRun',\n",
+       " 'CRBI',\n",
+       " 'CRuns',\n",
+       " 'CWalks',\n",
+       " 'Division',\n",
+       " 'Errors',\n",
+       " 'Hits',\n",
+       " 'HmRun',\n",
+       " 'League',\n",
+       " 'NewLeague',\n",
+       " 'PutOuts',\n",
+       " 'RBI',\n",
+       " 'Runs',\n",
+       " 'Walks',\n",
+       " 'Years')"
       ]
      },
-     "execution_count": 16,
+     "execution_count": 7,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "from ISLP.models.model_spec import Variable\n",
-    "\n",
-    "new_var = Variable(('Price', 'Income', 'UIncome'), name='mynewvar', encoder=None)\n",
-    "design.build_columns(Carseats, new_var)"
+    "hitters_MSE = sklearn_selected(OLS,\n",
+    "                               strategy)\n",
+    "hitters_MSE.fit(Hitters, Y)\n",
+    "hitters_MSE.selected_state_"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "5cdb088c",
+   "id": "4acf4792",
    "metadata": {},
    "source": [
-    "Let's now transform these columns with an encoder. Within `ModelSpec` we will first build the\n",
-    "arrays above and then call `pca.fit` and finally `pca.transform` within `design.build_columns`."
+    " Using `neg_Cp` results in a smaller model, as expected, with just 4variables selected."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
-   "id": "519a642e",
+   "execution_count": 8,
+   "id": "a825f4d8",
    "metadata": {},
    "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/Users/jonathantaylor/miniconda3/envs/islp_test/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but PCA was fitted with feature names\n",
-      "  warnings.warn(\n"
-     ]
-    },
     {
      "data": {
       "text/plain": [
-       "(     mynewvar[0]  mynewvar[1]\n",
-       " 0      -3.608693    -4.853177\n",
-       " 1      15.081506    35.708630\n",
-       " 2      27.422871    40.774250\n",
-       " 3     -33.973209    13.470489\n",
-       " 4       6.567316   -11.290100\n",
-       " ..           ...          ...\n",
-       " 395   -36.846346   -18.415783\n",
-       " 396    45.741500     3.245602\n",
-       " 397    49.097533   -35.725355\n",
-       " 398   -13.577772    18.845139\n",
-       " 399    31.927566     0.978436\n",
-       " \n",
-       " [400 rows x 2 columns],\n",
-       " ['mynewvar[0]', 'mynewvar[1]'])"
+       "('Assists', 'Errors', 'League', 'NewLeague')"
       ]
      },
-     "execution_count": 17,
+     "execution_count": 8,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "from sklearn.decomposition import PCA\n",
-    "pca = PCA(n_components=2)\n",
-    "pca.fit(design.build_columns(Carseats, new_var)[0]) # this is done within `ModelSpec.fit`\n",
-    "pca_var = Variable(('Price', 'Income', 'UIncome'), name='mynewvar', encoder=pca)\n",
-    "design.build_columns(Carseats, pca_var)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "403921a2",
-   "metadata": {},
-   "source": [
-    "The elements of the `variables` attribute may be column identifiers ( `\"Price\"`), `Column` instances (`price`)\n",
-    "or `Variable` instances (`pca_var`)."
+    "hitters_Cp = sklearn_selected(OLS,\n",
+    "                              strategy,\n",
+    "                              scoring=negAIC)\n",
+    "hitters_Cp.fit(Hitters, Y)\n",
+    "hitters_Cp.selected_state_"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 18,
-   "id": "b422cde1",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/Users/jonathantaylor/miniconda3/envs/islp_test/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but PCA was fitted with feature names\n",
-      "  warnings.warn(\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "(     Price  Price  mynewvar[0]  mynewvar[1]\n",
-       " 0    120.0  120.0    -3.608693    -4.853177\n",
-       " 1     83.0   83.0    15.081506    35.708630\n",
-       " 2     80.0   80.0    27.422871    40.774250\n",
-       " 3     97.0   97.0   -33.973209    13.470489\n",
-       " 4    128.0  128.0     6.567316   -11.290100\n",
-       " ..     ...    ...          ...          ...\n",
-       " 395  128.0  128.0   -36.846346   -18.415783\n",
-       " 396  120.0  120.0    45.741500     3.245602\n",
-       " 397  159.0  159.0    49.097533   -35.725355\n",
-       " 398   95.0   95.0   -13.577772    18.845139\n",
-       " 399  120.0  120.0    31.927566     0.978436\n",
-       " \n",
-       " [400 rows x 4 columns],\n",
-       " ['Price', 'Price', 'mynewvar[0]', 'mynewvar[1]'])"
-      ]
-     },
-     "execution_count": 18,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "fancy_var = Variable(('Price', price, pca_var), name='fancy', encoder=None)\n",
-    "design.build_columns(Carseats, fancy_var)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "53e38f57",
-   "metadata": {},
-   "source": [
-    "We can of course run PCA again on these features (if we wanted)."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 19,
-   "id": "6347acb6",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/Users/jonathantaylor/miniconda3/envs/islp_test/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but PCA was fitted with feature names\n",
-      "  warnings.warn(\n",
-      "/Users/jonathantaylor/miniconda3/envs/islp_test/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but PCA was fitted with feature names\n",
-      "  warnings.warn(\n",
-      "/Users/jonathantaylor/miniconda3/envs/islp_test/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but PCA was fitted with feature names\n",
-      "  warnings.warn(\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "(     fancy_pca[0]  fancy_pca[1]\n",
-       " 0       -6.951792      4.859283\n",
-       " 1       55.170148    -24.694875\n",
-       " 2       59.418556    -38.033572\n",
-       " 3       34.722389     28.922184\n",
-       " 4      -21.419184     -3.120673\n",
-       " ..            ...           ...\n",
-       " 395    -18.257348     40.760122\n",
-       " 396    -10.546709    -45.021658\n",
-       " 397    -77.706359    -37.174379\n",
-       " 398     36.668694      7.730851\n",
-       " 399     -9.540535    -31.059122\n",
-       " \n",
-       " [400 rows x 2 columns],\n",
-       " ['fancy_pca[0]', 'fancy_pca[1]'])"
-      ]
-     },
-     "execution_count": 19,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "pca2 = PCA(n_components=2)\n",
-    "pca2.fit(design.build_columns(Carseats, fancy_var)[0]) # this is done within `ModelSpec.fit`\n",
-    "pca2_var = Variable(('Price', price, pca_var), name='fancy_pca', encoder=pca2)\n",
-    "design.build_columns(Carseats, pca2_var)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "08b5ddb0",
-   "metadata": {},
-   "source": [
-    "## Building the design matrix\n",
-    "\n",
-    "With these notions in mind, the final design is essentially then"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 20,
-   "id": "a8eb3e33",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "X_hand = np.column_stack([design.build_columns(Carseats, v)[0] for v in design.terms_])[:4]"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "97912337",
-   "metadata": {},
-   "source": [
-    "An intercept column is added if `design.intercept` is `True` and if the original argument to `transform` is\n",
-    "a dataframe the index is adjusted accordingly."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 21,
-   "id": "72b5e629",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "True"
-      ]
-     },
-     "execution_count": 21,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design.intercept"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 22,
-   "id": "8a457e3e",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>intercept</th>\n",
-       "      <th>Price</th>\n",
-       "      <th>Income</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>120</td>\n",
-       "      <td>73</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>83</td>\n",
-       "      <td>48</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>80</td>\n",
-       "      <td>35</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>97</td>\n",
-       "      <td>100</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "   intercept  Price  Income\n",
-       "0        1.0    120      73\n",
-       "1        1.0     83      48\n",
-       "2        1.0     80      35\n",
-       "3        1.0     97     100"
-      ]
-     },
-     "execution_count": 22,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design.transform(Carseats)[:4]"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "8624ab8c",
-   "metadata": {},
-   "source": [
-    "## Predicting\n",
-    "\n",
-    "Constructing the design matrix at any values is carried out by the `transform` method."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 23,
-   "id": "6052765e",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "array([12.65257604, 12.25873428])"
-      ]
-     },
-     "execution_count": 23,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "new_data = pd.DataFrame({'Price':[10,20], 'Income':[40, 50]})\n",
-    "new_X = design.transform(new_data)\n",
-    "M.get_prediction(new_X).predicted_mean"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 24,
-   "id": "9158de59",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "       0        1 \n",
-      "12.65258 12.25873 \n"
-     ]
-    }
-   ],
-   "source": [
-    "%%R -i new_data,Carseats\n",
-    "predict(lm(Sales ~ Price + Income, data=Carseats), new_data)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "9608bed3",
-   "metadata": {},
-   "source": [
-    "### Difference between using `pd.DataFrame` and `np.ndarray`\n",
-    "\n",
-    "If the `terms` only refer to a few columns of the data frame, the `transform` method only needs a dataframe with those columns.\n",
-    "\n",
-    "If we had used an `np.ndarray`, the column identifiers would be integers identifying specific columns so,\n",
-    "in order to work correctly, `transform` would need another `np.ndarray` where the columns have the same meaning."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 25,
-   "id": "f0b8120f",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "array([[1.0, 120, 73],\n",
-       "       [1.0, 83, 48],\n",
-       "       [1.0, 80, 35],\n",
-       "       [1.0, 97, 100]], dtype=object)"
-      ]
-     },
-     "execution_count": 25,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "Carseats_np = np.asarray(Carseats[['Price', 'ShelveLoc', 'US', 'Income']])\n",
-    "design_np = ModelSpec([0,3]).fit(Carseats_np)\n",
-    "design_np.transform(Carseats_np)[:4]"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "270a02a6",
-   "metadata": {},
-   "source": [
-    "The following will fail for hopefully obvious reasons"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 26,
-   "id": "4ffbce7e",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "shapes (2,2) and (3,) not aligned: 2 (dim 1) != 3 (dim 0)\n"
-     ]
-    }
-   ],
-   "source": [
-    "try:\n",
-    "    new_D = np.zeros((2,2))\n",
-    "    new_D[:,0] = [10,20]\n",
-    "    new_D[:,1] = [40,50]\n",
-    "    M.get_prediction(new_D).predicted_mean\n",
-    "except ValueError as e:\n",
-    "    print(e)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "bc5ff62b",
-   "metadata": {},
-   "source": [
-    "Ultimately, `M` expects 3 columns for new predictions because it was fit\n",
-    "with a matrix having 3 columns (the first representing an intercept).\n",
-    "\n",
-    "We might be tempted to try as with the `pd.DataFrame` and produce\n",
-    "an `np.ndarray` with only the necessary variables."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 27,
-   "id": "34dae1e9",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "index 3 is out of bounds for axis 1 with size 2\n"
-     ]
-    }
-   ],
-   "source": [
-    "try:\n",
-    "    new_X = np.zeros((2,2))\n",
-    "    new_X[:,0] = [10,20]\n",
-    "    new_X[:,1] = [40,50]\n",
-    "    new_D = design_np.transform(new_X)\n",
-    "    M.get_prediction(new_D).predicted_mean\n",
-    "except IndexError as e:\n",
-    "    print(e)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "7e9da262",
-   "metadata": {},
-   "source": [
-    "This fails because `design_np` is looking for column `3` from its `terms`:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 28,
-   "id": "938b9430",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "[Variable(variables=(0,), name='0', encoder=None, use_transform=True, pure_columns=True, override_encoder_colnames=False),\n",
-       " Variable(variables=(3,), name='3', encoder=None, use_transform=True, pure_columns=True, override_encoder_colnames=False)]"
-      ]
-     },
-     "execution_count": 28,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design_np.terms_"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "083e9529",
-   "metadata": {},
-   "source": [
-    "However, if we have an `np.ndarray` in which the first column indeed represents `Price` and the fourth indeed\n",
-    "represents `Income` then we can arrive at the correct answer by supplying such the array to `design_np.transform`:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 29,
-   "id": "d413a9fe",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "array([12.65257604, 12.25873428])"
-      ]
-     },
-     "execution_count": 29,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "new_X = np.zeros((2,4))\n",
-    "new_X[:,0] = [10,20]\n",
-    "new_X[:,3] = [40,50]\n",
-    "new_D = design_np.transform(new_X)\n",
-    "M.get_prediction(new_D).predicted_mean"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "0f4b508b",
-   "metadata": {},
-   "source": [
-    "Given this subtlety about needing to supply arrays with identical column structure to `transform` when\n",
-    "using `np.ndarray` we presume that using a `pd.DataFrame` will be the more popular use case."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "8bcbd973",
-   "metadata": {},
-   "source": [
-    "## A model with some categorical variables\n",
-    "\n",
-    "Categorical variables become `Column` instances with encoders."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 30,
-   "id": "cf13f72e",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Column(idx='UIncome', name='UIncome', is_categorical=True, is_ordinal=False, columns=('UIncome[L]', 'UIncome[M]'), encoder=Contrast())"
-      ]
-     },
-     "execution_count": 30,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design = ModelSpec(['Population', 'Price', 'UIncome', 'ShelveLoc']).fit(Carseats)\n",
-    "design.column_info_['UIncome']"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 31,
-   "id": "c1fa0a90",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Index(['intercept', 'Population', 'Price', 'UIncome[L]', 'UIncome[M]',\n",
-       "       'ShelveLoc[Good]', 'ShelveLoc[Medium]'],\n",
-       "      dtype='object')"
-      ]
-     },
-     "execution_count": 31,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "X = design.fit_transform(Carseats)\n",
-    "X.columns"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 32,
-   "id": "b28aa313",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "intercept            11.876012\n",
-       "Population            0.001163\n",
-       "Price                -0.055725\n",
-       "UIncome[L]           -1.042297\n",
-       "UIncome[M]           -0.119123\n",
-       "ShelveLoc[Good]       4.999623\n",
-       "ShelveLoc[Medium]     1.964278\n",
-       "dtype: float64"
-      ]
-     },
-     "execution_count": 32,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "sm.OLS(Y, X).fit().params"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 33,
-   "id": "aa764acc",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "    (Intercept)      Population           Price        UIncomeM        UIncomeH \n",
-      "    10.83371503      0.00116301     -0.05572469      0.92317388      1.04229679 \n",
-      "  ShelveLocGood ShelveLocMedium \n",
-      "     4.99962319      1.96427771 \n"
-     ]
-    }
-   ],
-   "source": [
-    "%%R\n",
-    "lm(Sales ~ Population + Price + UIncome + ShelveLoc, data=Carseats)$coef"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "31876a29",
-   "metadata": {},
-   "source": [
-    "## Getting the encoding you want\n",
-    "\n",
-    "By default the level dropped by `ModelSpec` will be the first of the `categories_` values from \n",
-    "`sklearn.preprocessing.OneHotEncoder()`. We might wish to change this. It seems\n",
-    "as if the correct way to do this would be something like `Variable(('UIncome',), 'mynewencoding', new_encoder)`\n",
-    "where `new_encoder` would somehow drop the column we want dropped. \n",
-    "\n",
-    "However, when using the convenient identifier `UIncome` in the `variables` argument, this maps to the `Column` associated to `UIncome` within `design.column_info_`:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 34,
-   "id": "bac2643c",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Column(idx='UIncome', name='UIncome', is_categorical=True, is_ordinal=False, columns=('UIncome[L]', 'UIncome[M]'), encoder=Contrast())"
-      ]
-     },
-     "execution_count": 34,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design.column_info_['UIncome']"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "1485735d",
-   "metadata": {},
-   "source": [
-    "This column already has an encoder and `Column` instances are immutable as named tuples. Further, there are times when \n",
-    "we may want to encode `UIncome` differently within the same model. In the model below the main effect of `UIncome` is encoded with two columns while in the interaction `UIncome` (see below) has three columns. This is a design of interest\n",
-    "and we need a way to allow different encodings of the same column of `Carseats`"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 35,
-   "id": "3987c5d6",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "Call:\n",
-      "lm(formula = Sales ~ UIncome:ShelveLoc + UIncome, data = Carseats)\n",
-      "\n",
-      "Coefficients:\n",
-      "             (Intercept)                  UIncomeM                  UIncomeH  \n",
-      "                  5.1317                    0.1151                    1.1561  \n",
-      "  UIncomeL:ShelveLocGood    UIncomeM:ShelveLocGood    UIncomeH:ShelveLocGood  \n",
-      "                  4.5121                    5.5752                    3.7381  \n",
-      "UIncomeL:ShelveLocMedium  UIncomeM:ShelveLocMedium  UIncomeH:ShelveLocMedium  \n",
-      "                  1.2473                    2.4782                    1.5141  \n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "%%R\n",
-    "lm(Sales ~ UIncome:ShelveLoc + UIncome, data=Carseats)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "7a6631c9",
-   "metadata": {},
-   "source": [
-    " We can create a new \n",
-    "`Column` with the encoder we want. For categorical variables, there is a convenience function to do so."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 36,
-   "id": "83a9b94e",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from ISLP.models.model_spec import contrast\n",
-    "pref_encoding = contrast('UIncome', 'drop', 'L')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 37,
-   "id": "f0ffabea",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(     UIncome[M]  UIncome[H]\n",
-       " 0           1.0         0.0\n",
-       " 1           0.0         0.0\n",
-       " 2           0.0         0.0\n",
-       " 3           0.0         1.0\n",
-       " 4           1.0         0.0\n",
-       " ..          ...         ...\n",
-       " 395         0.0         1.0\n",
-       " 396         0.0         0.0\n",
-       " 397         0.0         0.0\n",
-       " 398         1.0         0.0\n",
-       " 399         0.0         0.0\n",
-       " \n",
-       " [400 rows x 2 columns],\n",
-       " ['UIncome[M]', 'UIncome[H]'])"
-      ]
-     },
-     "execution_count": 37,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design.build_columns(Carseats, pref_encoding)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 38,
-   "id": "4a5fdc64",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Index(['intercept', 'Population', 'Price', 'UIncome[M]', 'UIncome[H]',\n",
-       "       'ShelveLoc[Good]', 'ShelveLoc[Medium]'],\n",
-       "      dtype='object')"
-      ]
-     },
-     "execution_count": 38,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design = ModelSpec(['Population', 'Price', pref_encoding, 'ShelveLoc']).fit(Carseats)\n",
-    "X = design.fit_transform(Carseats)\n",
-    "X.columns"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 39,
-   "id": "ae7e3bd2",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "intercept            10.833715\n",
-       "Population            0.001163\n",
-       "Price                -0.055725\n",
-       "UIncome[M]            0.923174\n",
-       "UIncome[H]            1.042297\n",
-       "ShelveLoc[Good]       4.999623\n",
-       "ShelveLoc[Medium]     1.964278\n",
-       "dtype: float64"
-      ]
-     },
-     "execution_count": 39,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "sm.OLS(Y, X).fit().params"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 40,
-   "id": "c12ac3df",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "    (Intercept)      Population           Price        UIncomeM        UIncomeH \n",
-      "    10.83371503      0.00116301     -0.05572469      0.92317388      1.04229679 \n",
-      "  ShelveLocGood ShelveLocMedium \n",
-      "     4.99962319      1.96427771 \n"
-     ]
-    }
-   ],
-   "source": [
-    "%%R\n",
-    "lm(Sales ~ Population + Price + UIncome + ShelveLoc, data=Carseats)$coef"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "53bf8aef",
-   "metadata": {},
-   "source": [
-    "## Interactions\n",
-    "\n",
-    "We've referred to interactions above. These are specified (by convenience) as tuples in the `terms` argument\n",
-    "to `ModelSpec`."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 41,
-   "id": "47723bce",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "intercept                       7.866634\n",
-       "UIncome[L]:ShelveLoc[Good]      4.512054\n",
-       "UIncome[L]:ShelveLoc[Medium]    1.247275\n",
-       "UIncome[M]:ShelveLoc[Good]      5.575170\n",
-       "UIncome[M]:ShelveLoc[Medium]    2.478163\n",
-       "UIncome[L]                     -2.734895\n",
-       "UIncome[M]                     -2.619745\n",
-       "dtype: float64"
-      ]
-     },
-     "execution_count": 41,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design = ModelSpec([('UIncome', 'ShelveLoc'), 'UIncome'])\n",
-    "X = design.fit_transform(Carseats)\n",
-    "sm.OLS(Y, X).fit().params"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "86060622",
-   "metadata": {},
-   "source": [
-    "The tuples in `terms` are converted to `Variable` in the formalized `terms_` attribute by creating a `Variable` with\n",
-    "`variables` set to the tuple and the encoder an `Interaction` encoder which (unsurprisingly) creates the interaction columns from the concatenated data frames of `UIncome` and `ShelveLoc`."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 42,
-   "id": "d7a2ab9b",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Variable(variables=('UIncome', 'ShelveLoc'), name='UIncome:ShelveLoc', encoder=Interaction(column_names={'ShelveLoc': ['ShelveLoc[Good]', 'ShelveLoc[Medium]'],\n",
-       "                          'UIncome': ['UIncome[L]', 'UIncome[M]']},\n",
-       "            columns={'ShelveLoc': range(2, 4), 'UIncome': range(0, 2)},\n",
-       "            variables=['UIncome', 'ShelveLoc']), use_transform=True, pure_columns=False, override_encoder_colnames=False)"
-      ]
-     },
-     "execution_count": 42,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design.terms_[0]"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "2a5e7f6b",
-   "metadata": {},
-   "source": [
-    "Comparing this to the previous `R` model."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 43,
-   "id": "bbb02036",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "Call:\n",
-      "lm(formula = Sales ~ UIncome:ShelveLoc + UIncome, data = Carseats)\n",
-      "\n",
-      "Coefficients:\n",
-      "             (Intercept)                  UIncomeM                  UIncomeH  \n",
-      "                  5.1317                    0.1151                    1.1561  \n",
-      "  UIncomeL:ShelveLocGood    UIncomeM:ShelveLocGood    UIncomeH:ShelveLocGood  \n",
-      "                  4.5121                    5.5752                    3.7381  \n",
-      "UIncomeL:ShelveLocMedium  UIncomeM:ShelveLocMedium  UIncomeH:ShelveLocMedium  \n",
-      "                  1.2473                    2.4782                    1.5141  \n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "%%R\n",
-    "lm(Sales ~ UIncome:ShelveLoc + UIncome, data=Carseats)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "89106a85",
-   "metadata": {},
-   "source": [
-    "We note a few important things:\n",
-    "\n",
-    "1. `R` has reorganized the columns of the design from the formula: although we wrote `UIncome:ShelveLoc` first these\n",
-    "columns have been built later. **`ModelSpec` builds columns in the order determined by `terms`!**\n",
-    "\n",
-    "2. As noted above, `R` has encoded `UIncome` differently in the main effect and in the interaction. For `ModelSpec`, the reference to `UIncome` always refers to the column in `design.column_info_` and will always build only the columns for `L` and `M`. **`ModelSpec` does no inspection of terms to decide how to encode categorical variables.**\n",
-    "\n",
-    "A few notes:\n",
-    "\n",
-    "- **Why not try to inspect the terms?** For any nontrivial formula in `R` with several categorical variables and interactions, predicting what columns will be produced from a given formula is not simple. **`ModelSpec` errs on the side of being explicit.**\n",
-    "\n",
-    "- **Is it impossible to build the design as `R` has?** No. An advanced user who *knows* they want the columns built as `R` has can do so (fairly) easily."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 44,
-   "id": "151f3fee",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(     UIncome[H]  UIncome[L]  UIncome[M]\n",
-       " 0           0.0         0.0         1.0\n",
-       " 1           0.0         1.0         0.0\n",
-       " 2           0.0         1.0         0.0\n",
-       " 3           1.0         0.0         0.0\n",
-       " 4           0.0         0.0         1.0\n",
-       " ..          ...         ...         ...\n",
-       " 395         1.0         0.0         0.0\n",
-       " 396         0.0         1.0         0.0\n",
-       " 397         0.0         1.0         0.0\n",
-       " 398         0.0         0.0         1.0\n",
-       " 399         0.0         1.0         0.0\n",
-       " \n",
-       " [400 rows x 3 columns],\n",
-       " ['UIncome[H]', 'UIncome[L]', 'UIncome[M]'])"
-      ]
-     },
-     "execution_count": 44,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "full_encoding = contrast('UIncome', None)\n",
-    "design.build_columns(Carseats, full_encoding)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 45,
-   "id": "945ce7bc",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "intercept                       5.131739\n",
-       "UIncome[M]                      0.115150\n",
-       "UIncome[H]                      1.156118\n",
-       "UIncome[H]:ShelveLoc[Good]      3.738052\n",
-       "UIncome[H]:ShelveLoc[Medium]    1.514104\n",
-       "UIncome[L]:ShelveLoc[Good]      4.512054\n",
-       "UIncome[L]:ShelveLoc[Medium]    1.247275\n",
-       "UIncome[M]:ShelveLoc[Good]      5.575170\n",
-       "UIncome[M]:ShelveLoc[Medium]    2.478163\n",
-       "dtype: float64"
-      ]
-     },
-     "execution_count": 45,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design = ModelSpec([pref_encoding, (full_encoding, 'ShelveLoc')])\n",
-    "X = design.fit_transform(Carseats)\n",
-    "sm.OLS(Y, X).fit().params"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "450b94dd",
-   "metadata": {},
-   "source": [
-    "## Special encodings\n",
-    "\n",
-    "For flexible models, we may want to consider transformations of features, i.e. polynomial\n",
-    "or spline transformations. Given transforms that follow the `fit/transform` paradigm\n",
-    "we can of course achieve this with a `Column` and an `encoder`. The `ISLP.transforms`\n",
-    "package includes a `Poly` transform"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 46,
-   "id": "18d5c1c8",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Variable(variables=('Income',), name='poly(Income, 3, )', encoder=Poly(degree=3), use_transform=True, pure_columns=False, override_encoder_colnames=True)"
-      ]
-     },
-     "execution_count": 46,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "from ISLP.models.model_spec import poly\n",
-    "poly('Income', 3)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 47,
-   "id": "46c7d911",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "intercept                5.440077\n",
-       "poly(Income, 3, )[0]    10.036373\n",
-       "poly(Income, 3, )[1]    -2.799156\n",
-       "poly(Income, 3, )[2]     2.399601\n",
-       "ShelveLoc[Good]          4.808133\n",
-       "ShelveLoc[Medium]        1.889533\n",
-       "dtype: float64"
-      ]
-     },
-     "execution_count": 47,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design = ModelSpec([poly('Income', 3), 'ShelveLoc'])\n",
-    "X = design.fit_transform(Carseats)\n",
-    "sm.OLS(Y, X).fit().params"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "99bf13a1",
-   "metadata": {},
-   "source": [
-    "Compare:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 48,
-   "id": "7606facd",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "     (Intercept) poly(Income, 3)1 poly(Income, 3)2 poly(Income, 3)3 \n",
-      "        5.440077        10.036373        -2.799156         2.399601 \n",
-      "   ShelveLocGood  ShelveLocMedium \n",
-      "        4.808133         1.889533 \n"
-     ]
-    }
-   ],
-   "source": [
-    "%%R\n",
-    "lm(Sales ~ poly(Income, 3) + ShelveLoc, data=Carseats)$coef"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "a4931031",
-   "metadata": {},
-   "source": [
-    "## Splines\n",
-    "\n",
-    "Support for natural and B-splines is also included"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 49,
-   "id": "1c1bf5f3",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "intercept                4.240421\n",
-       "ns(Income, , df=5)[0]    1.468196\n",
-       "ns(Income, , df=5)[1]    1.499471\n",
-       "ns(Income, , df=5)[2]    1.152070\n",
-       "ns(Income, , df=5)[3]    2.418398\n",
-       "ns(Income, , df=5)[4]    1.804460\n",
-       "ShelveLoc[Good]          4.810449\n",
-       "ShelveLoc[Medium]        1.881095\n",
-       "dtype: float64"
-      ]
-     },
-     "execution_count": 49,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "from ISLP.models.model_spec import ns, bs, pca\n",
-    "design = ModelSpec([ns('Income', df=5), 'ShelveLoc'])\n",
-    "X = design.fit_transform(Carseats)\n",
-    "sm.OLS(Y, X).fit().params"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 50,
-   "id": "8c24254b",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "        (Intercept) ns(Income, df = 5)1 ns(Income, df = 5)2 ns(Income, df = 5)3 \n",
-      "           4.240421            1.468196            1.499471            1.152070 \n",
-      "ns(Income, df = 5)4 ns(Income, df = 5)5       ShelveLocGood     ShelveLocMedium \n",
-      "           2.418398            1.804460            4.810449            1.881095 \n"
-     ]
-    }
-   ],
-   "source": [
-    "%%R\n",
-    "library(splines)\n",
-    "lm(Sales ~ ns(Income, df=5) + ShelveLoc, data=Carseats)$coef"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 51,
-   "id": "f9d6c4a7",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "intercept                          3.495085\n",
-       "bs(Income, , df=7, degree=2)[0]    1.813118\n",
-       "bs(Income, , df=7, degree=2)[1]    0.961852\n",
-       "bs(Income, , df=7, degree=2)[2]    2.471545\n",
-       "bs(Income, , df=7, degree=2)[3]    2.158891\n",
-       "bs(Income, , df=7, degree=2)[4]    2.091625\n",
-       "bs(Income, , df=7, degree=2)[5]    2.600669\n",
-       "bs(Income, , df=7, degree=2)[6]    2.843108\n",
-       "ShelveLoc[Good]                    4.804919\n",
-       "ShelveLoc[Medium]                  1.880337\n",
-       "dtype: float64"
-      ]
-     },
-     "execution_count": 51,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design = ModelSpec([bs('Income', df=7, degree=2), 'ShelveLoc'])\n",
-    "X = design.fit_transform(Carseats)\n",
-    "sm.OLS(Y, X).fit().params"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 52,
-   "id": "0bf1726a",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "                    (Intercept) bs(Income, df = 7, degree = 2)1 \n",
-      "                      3.4950851                       1.8131176 \n",
-      "bs(Income, df = 7, degree = 2)2 bs(Income, df = 7, degree = 2)3 \n",
-      "                      0.9618523                       2.4715450 \n",
-      "bs(Income, df = 7, degree = 2)4 bs(Income, df = 7, degree = 2)5 \n",
-      "                      2.1588908                       2.0916252 \n",
-      "bs(Income, df = 7, degree = 2)6 bs(Income, df = 7, degree = 2)7 \n",
-      "                      2.6006694                       2.8431084 \n",
-      "                  ShelveLocGood                 ShelveLocMedium \n",
-      "                      4.8049190                       1.8803375 \n"
-     ]
-    }
-   ],
-   "source": [
-    "%%R\n",
-    "lm(Sales ~ bs(Income, df=7, degree=2) + ShelveLoc, data=Carseats)$coef"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "914df4cf",
-   "metadata": {},
-   "source": [
-    "## PCA"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 53,
-   "id": "cc22e780",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/Users/jonathantaylor/miniconda3/envs/islp_test/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but PCA was fitted with feature names\n",
-      "  warnings.warn(\n",
-      "/Users/jonathantaylor/miniconda3/envs/islp_test/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but PCA was fitted with feature names\n",
-      "  warnings.warn(\n",
-      "/Users/jonathantaylor/miniconda3/envs/islp_test/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but PCA was fitted with feature names\n",
-      "  warnings.warn(\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "intercept                           5.419405\n",
-       "pca(myvars, , n_components=2)[0]   -0.001131\n",
-       "pca(myvars, , n_components=2)[1]   -0.024217\n",
-       "ShelveLoc[Good]                     4.816253\n",
-       "ShelveLoc[Medium]                   1.924139\n",
-       "dtype: float64"
-      ]
-     },
-     "execution_count": 53,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design = ModelSpec([pca(['Income', \n",
-    "                           'Price', \n",
-    "                           'Advertising', \n",
-    "                           'Population'], \n",
-    "                          n_components=2, \n",
-    "                          name='myvars'), 'ShelveLoc'])\n",
-    "X = design.fit_transform(Carseats)\n",
-    "sm.OLS(Y, X).fit().params"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 54,
-   "id": "de571e61",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "Call:\n",
-      "lm(formula = Sales ~ prcomp(cbind(Income, Price, Advertising, \n",
-      "    Population))$x[, 1:2] + ShelveLoc, data = Carseats)\n",
-      "\n",
-      "Coefficients:\n",
-      "                                                      (Intercept)  \n",
-      "                                                         5.419405  \n",
-      "prcomp(cbind(Income, Price, Advertising, Population))$x[, 1:2]PC1  \n",
-      "                                                         0.001131  \n",
-      "prcomp(cbind(Income, Price, Advertising, Population))$x[, 1:2]PC2  \n",
-      "                                                        -0.024217  \n",
-      "                                                    ShelveLocGood  \n",
-      "                                                         4.816253  \n",
-      "                                                  ShelveLocMedium  \n",
-      "                                                         1.924139  \n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "%%R\n",
-    "lm(Sales ~ prcomp(cbind(Income, Price, Advertising, Population))$x[,1:2] + ShelveLoc, data=Carseats)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "0a103b5a",
-   "metadata": {},
-   "source": [
-    "It is of course common to scale before running PCA."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 55,
-   "id": "95ca42f5",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/Users/jonathantaylor/miniconda3/envs/islp_test/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but StandardScaler was fitted with feature names\n",
-      "  warnings.warn(\n",
-      "/Users/jonathantaylor/miniconda3/envs/islp_test/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but StandardScaler was fitted with feature names\n",
-      "  warnings.warn(\n",
-      "/Users/jonathantaylor/miniconda3/envs/islp_test/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but StandardScaler was fitted with feature names\n",
-      "  warnings.warn(\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "intercept                           5.352159\n",
-       "pca(myvars, , n_components=2)[0]    0.446383\n",
-       "pca(myvars, , n_components=2)[1]   -1.219788\n",
-       "ShelveLoc[Good]                     4.922780\n",
-       "ShelveLoc[Medium]                   2.005617\n",
-       "dtype: float64"
-      ]
-     },
-     "execution_count": 55,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design = ModelSpec([pca(['Income', \n",
-    "                           'Price', \n",
-    "                           'Advertising', \n",
-    "                           'Population'], \n",
-    "                          n_components=2, \n",
-    "                          name='myvars',\n",
-    "                          scale=True), 'ShelveLoc'])\n",
-    "X = design.fit_transform(Carseats)\n",
-    "sm.OLS(Y, X).fit().params"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 56,
-   "id": "0dc22e35",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "Call:\n",
-      "lm(formula = Sales ~ prcomp(cbind(Income, Price, Advertising, \n",
-      "    Population), scale = TRUE)$x[, 1:2] + ShelveLoc, data = Carseats)\n",
-      "\n",
-      "Coefficients:\n",
-      "                                                                    (Intercept)  \n",
-      "                                                                         5.3522  \n",
-      "prcomp(cbind(Income, Price, Advertising, Population), scale = TRUE)$x[, 1:2]PC1  \n",
-      "                                                                         0.4469  \n",
-      "prcomp(cbind(Income, Price, Advertising, Population), scale = TRUE)$x[, 1:2]PC2  \n",
-      "                                                                        -1.2213  \n",
-      "                                                                  ShelveLocGood  \n",
-      "                                                                         4.9228  \n",
-      "                                                                ShelveLocMedium  \n",
-      "                                                                         2.0056  \n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "%%R\n",
-    "lm(Sales ~ prcomp(cbind(Income, Price, Advertising, Population), scale=TRUE)$x[,1:2] + ShelveLoc, data=Carseats)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "70347ee9",
-   "metadata": {},
-   "source": [
-    "There will be some small differences in the coefficients due to `sklearn` use of `np.std(ddof=0)` instead\n",
-    "of `np.std(ddof=1)`."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 57,
-   "id": "aa0c2f2e",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "array([ 0.44694166, -1.22131519])"
-      ]
-     },
-     "execution_count": 57,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "np.array(sm.OLS(Y, X).fit().params)[1:3] * np.sqrt(X.shape[0] / (X.shape[0]-1))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "ab05c497",
-   "metadata": {},
-   "source": [
-    "## Model selection\n",
-    "\n",
-    "Another task requiring different design matrices is model selection. Manipulating\n",
-    "the `terms` attribute of a `ModelSpec` (or more precisely its more uniform version `terms_`)\n",
-    "can clearly allow for both exhaustive and stepwise model selection."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 58,
-   "id": "9505c178",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from ISLP.models.strategy import (Stepwise, \n",
-    "                                  min_max)\n",
-    "from ISLP.models.generic_selector import FeatureSelector"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "020c2532",
-   "metadata": {},
-   "source": [
-    "### Best subsets"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 59,
-   "id": "f9aba6db",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "design = ModelSpec(['Price', \n",
-    "                    'UIncome', \n",
-    "                    'Advertising', \n",
-    "                    'US', \n",
-    "                    'Income',\n",
-    "                    'ShelveLoc',\n",
-    "                    'Education',\n",
-    "                    'Urban']).fit(Carseats)\n",
-    "strategy = min_max(design,\n",
-    "                   min_terms=0,\n",
-    "                   max_terms=3)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 60,
-   "id": "91144a3d",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from sklearn.linear_model import LinearRegression\n",
-    "selector = FeatureSelector(LinearRegression(fit_intercept=False),\n",
-    "                           strategy,\n",
-    "                           scoring='neg_mean_squared_error')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 61,
-   "id": "ae3cb2eb",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "<ISLP.models.generic_selector.FeatureSelector at 0x174d82220>"
-      ]
-     },
-     "execution_count": 61,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "selector.fit(Carseats, Y)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 62,
-   "id": "e63b2744",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "('Price', 'Advertising', 'ShelveLoc')"
-      ]
-     },
-     "execution_count": 62,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "selector.selected_state_"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 63,
-   "id": "0a774b48",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "dict_keys([(), ('Price',), ('UIncome',), ('Advertising',), ('US',), ('Income',), ('ShelveLoc',), ('Education',), ('Urban',), ('Price', 'UIncome'), ('Price', 'Advertising'), ('Price', 'US'), ('Price', 'Income'), ('Price', 'ShelveLoc'), ('Price', 'Education'), ('Price', 'Urban'), ('UIncome', 'Advertising'), ('UIncome', 'US'), ('UIncome', 'Income'), ('UIncome', 'ShelveLoc'), ('UIncome', 'Education'), ('UIncome', 'Urban'), ('Advertising', 'US'), ('Advertising', 'Income'), ('Advertising', 'ShelveLoc'), ('Advertising', 'Education'), ('Advertising', 'Urban'), ('US', 'Income'), ('US', 'ShelveLoc'), ('US', 'Education'), ('US', 'Urban'), ('Income', 'ShelveLoc'), ('Income', 'Education'), ('Income', 'Urban'), ('ShelveLoc', 'Education'), ('ShelveLoc', 'Urban'), ('Education', 'Urban'), ('Price', 'UIncome', 'Advertising'), ('Price', 'UIncome', 'US'), ('Price', 'UIncome', 'Income'), ('Price', 'UIncome', 'ShelveLoc'), ('Price', 'UIncome', 'Education'), ('Price', 'UIncome', 'Urban'), ('Price', 'Advertising', 'US'), ('Price', 'Advertising', 'Income'), ('Price', 'Advertising', 'ShelveLoc'), ('Price', 'Advertising', 'Education'), ('Price', 'Advertising', 'Urban'), ('Price', 'US', 'Income'), ('Price', 'US', 'ShelveLoc'), ('Price', 'US', 'Education'), ('Price', 'US', 'Urban'), ('Price', 'Income', 'ShelveLoc'), ('Price', 'Income', 'Education'), ('Price', 'Income', 'Urban'), ('Price', 'ShelveLoc', 'Education'), ('Price', 'ShelveLoc', 'Urban'), ('Price', 'Education', 'Urban'), ('UIncome', 'Advertising', 'US'), ('UIncome', 'Advertising', 'Income'), ('UIncome', 'Advertising', 'ShelveLoc'), ('UIncome', 'Advertising', 'Education'), ('UIncome', 'Advertising', 'Urban'), ('UIncome', 'US', 'Income'), ('UIncome', 'US', 'ShelveLoc'), ('UIncome', 'US', 'Education'), ('UIncome', 'US', 'Urban'), ('UIncome', 'Income', 'ShelveLoc'), ('UIncome', 'Income', 'Education'), ('UIncome', 'Income', 'Urban'), ('UIncome', 'ShelveLoc', 'Education'), ('UIncome', 'ShelveLoc', 'Urban'), ('UIncome', 'Education', 'Urban'), ('Advertising', 'US', 'Income'), ('Advertising', 'US', 'ShelveLoc'), ('Advertising', 'US', 'Education'), ('Advertising', 'US', 'Urban'), ('Advertising', 'Income', 'ShelveLoc'), ('Advertising', 'Income', 'Education'), ('Advertising', 'Income', 'Urban'), ('Advertising', 'ShelveLoc', 'Education'), ('Advertising', 'ShelveLoc', 'Urban'), ('Advertising', 'Education', 'Urban'), ('US', 'Income', 'ShelveLoc'), ('US', 'Income', 'Education'), ('US', 'Income', 'Urban'), ('US', 'ShelveLoc', 'Education'), ('US', 'ShelveLoc', 'Urban'), ('US', 'Education', 'Urban'), ('Income', 'ShelveLoc', 'Education'), ('Income', 'ShelveLoc', 'Urban'), ('Income', 'Education', 'Urban'), ('ShelveLoc', 'Education', 'Urban')])"
-      ]
-     },
-     "execution_count": 63,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "selector.results_.keys()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 64,
-   "id": "0ca1f28c",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "('Price', 'Advertising', 'Income')"
-      ]
-     },
-     "execution_count": 64,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "strategy = min_max(design,\n",
-    "                   min_terms=0,\n",
-    "                   max_terms=3,\n",
-    "                   lower_terms=['Price'],\n",
-    "                   upper_terms=['Price', 'Income', 'Advertising'])\n",
-    "selector = FeatureSelector(LinearRegression(fit_intercept=False),\n",
-    "                           strategy,\n",
-    "                           scoring='neg_mean_squared_error')\n",
-    "selector.fit(Carseats, Y)\n",
-    "selector.selected_state_"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 65,
-   "id": "5c6732fa",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "dict_keys([('Price',), ('Price', 'Advertising'), ('Price', 'Income'), ('Price', 'Advertising', 'Income')])"
-      ]
-     },
-     "execution_count": 65,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "selector.results_.keys()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "7bb6fcc3",
-   "metadata": {},
-   "source": [
-    "### Stepwise selection"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 66,
-   "id": "9985d0fc",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "('Advertising', 'Income', 'Price', 'ShelveLoc')"
-      ]
-     },
-     "execution_count": 66,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "strategy = Stepwise.first_peak(design,\n",
-    "                               min_terms=0,\n",
-    "                               max_terms=6,\n",
-    "                               lower_terms=['Price'],\n",
-    "                               upper_terms=['Price', 'Income', 'Advertising', 'ShelveLoc', 'UIncome', 'US'\n",
-    "                                     'Education', 'Urban'])\n",
-    "selector = FeatureSelector(LinearRegression(fit_intercept=False),\n",
-    "                           strategy,\n",
-    "                           scoring='neg_mean_squared_error',\n",
-    "                           cv=3)\n",
-    "selector.fit(Carseats, Y)\n",
-    "selector.selected_state_"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 67,
-   "id": "d3cf3e9b",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "dict_keys([(), ('Price',), ('Price', 'UIncome'), ('Advertising', 'Price'), ('Income', 'Price'), ('Price', 'ShelveLoc'), ('Price', 'Urban'), ('Price', 'ShelveLoc', 'UIncome'), ('Advertising', 'Price', 'ShelveLoc'), ('Income', 'Price', 'ShelveLoc'), ('Price', 'ShelveLoc', 'Urban'), ('Advertising', 'Price', 'ShelveLoc', 'UIncome'), ('Advertising', 'Income', 'Price', 'ShelveLoc'), ('Advertising', 'Price', 'ShelveLoc', 'Urban'), ('Advertising', 'Income', 'Price', 'ShelveLoc', 'UIncome'), ('Advertising', 'Income', 'Price', 'ShelveLoc', 'Urban')])"
-      ]
-     },
-     "execution_count": 67,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "selector.results_.keys()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 68,
-   "id": "dd43ea7c",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "{(): -8.055847677297269,\n",
-       " ('Price',): -6.514630258019962,\n",
-       " ('Price', 'UIncome'): -6.621654905418576,\n",
-       " ('Advertising', 'Price'): -5.825225309857156,\n",
-       " ('Income', 'Price'): -6.455432795910743,\n",
-       " ('Price', 'ShelveLoc'): -3.780183168075897,\n",
-       " ('Price', 'Urban'): -6.5430157266926114,\n",
-       " ('Price', 'ShelveLoc', 'UIncome'): -3.6938729706475004,\n",
-       " ('Advertising', 'Price', 'ShelveLoc'): -3.2067316025050645,\n",
-       " ('Income', 'Price', 'ShelveLoc'): -3.634698914456587,\n",
-       " ('Price', 'ShelveLoc', 'Urban'): -3.776148947585277,\n",
-       " ('Advertising', 'Price', 'ShelveLoc', 'UIncome'): -3.1240961493998642,\n",
-       " ('Advertising', 'Income', 'Price', 'ShelveLoc'): -3.0801704971796244,\n",
-       " ('Advertising', 'Price', 'ShelveLoc', 'Urban'): -3.207569489139369,\n",
-       " ('Advertising',\n",
-       "  'Income',\n",
-       "  'Price',\n",
-       "  'ShelveLoc',\n",
-       "  'UIncome'): -3.1048826894036115,\n",
-       " ('Advertising', 'Income', 'Price', 'ShelveLoc', 'Urban'): -3.0867130108677423}"
-      ]
-     },
-     "execution_count": 68,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "selector.results_"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 69,
-   "id": "7c026f0a",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "('Advertising', 'Income', 'Price', 'ShelveLoc')"
-      ]
-     },
-     "execution_count": 69,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "selector.selected_state_"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "b4b89d04",
-   "metadata": {},
-   "source": [
-    "### Enforcing constraints\n",
-    "\n",
-    "In models with interactions, we may often want to impose constraints on interactions and main effects.\n",
-    "This can be achieved here by use of a `validator` that checks whether a given model is valid.\n",
-    "\n",
-    "Suppose we want to have the following constraint: `ShelveLoc` may not be in the model unless\n",
-    "`Price` is in the following model."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 70,
-   "id": "1c1e31d0",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "design = ModelSpec(['Price', \n",
-    "                    'Advertising', \n",
-    "                    'Income',\n",
-    "                    'ShelveLoc']).fit(Carseats)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "be929807",
-   "metadata": {},
-   "source": [
-    "The constraints are described with a boolean matrix with `(i,j)` as `j` is a child of `i`: so `j` should not\n",
-    "be in the model when `i` is not and enforced with a callable `validator` that evaluates each candidate state.\n",
-    "\n",
-    "Both `min_max_strategy` and `step_strategy` accept a `validator` argument."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 71,
-   "id": "c075b1b7",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "dict_keys([(), ('Price',), ('Advertising',), ('Income',), ('Price', 'Advertising'), ('Price', 'Income'), ('Price', 'ShelveLoc'), ('Advertising', 'Income'), ('Price', 'Advertising', 'Income'), ('Price', 'Advertising', 'ShelveLoc'), ('Price', 'Income', 'ShelveLoc'), ('Price', 'Advertising', 'Income', 'ShelveLoc')])"
-      ]
-     },
-     "execution_count": 71,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "from ISLP.models.strategy import validator_from_constraints\n",
-    "constraints = np.zeros((4, 4))\n",
-    "constraints[0,3] = 1\n",
-    "strategy = min_max(design,\n",
-    "                   min_terms=0,\n",
-    "                   max_terms=4,\n",
-    "                   validator=validator_from_constraints(design,\n",
-    "                                                        constraints))\n",
-    "selector = FeatureSelector(LinearRegression(fit_intercept=False),\n",
-    "                           strategy,\n",
-    "                           scoring='neg_mean_squared_error',\n",
-    "                           cv=3)\n",
-    "selector.fit(Carseats, Y)\n",
-    "selector.results_.keys()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 72,
-   "id": "3472d47c",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "('Price', 'Advertising', 'Income', 'ShelveLoc')"
-      ]
-     },
-     "execution_count": 72,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "selector.selected_state_"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 73,
-   "id": "5d2c82b9",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "Hitters=load_data('Hitters')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 74,
-   "id": "4b2ac2c2",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Index(['AtBat', 'Hits', 'HmRun', 'Runs', 'RBI', 'Walks', 'Years', 'CAtBat',\n",
-       "       'CHits', 'CHmRun', 'CRuns', 'CRBI', 'CWalks', 'League', 'Division',\n",
-       "       'PutOuts', 'Assists', 'Errors', 'Salary', 'NewLeague'],\n",
-       "      dtype='object')"
-      ]
-     },
-     "execution_count": 74,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "Hitters.columns"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 75,
-   "id": "bd2ad0dd",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "dict_keys([(), ('AtBat',), ('Hits',), ('HmRun',), ('Runs',), ('RBI',), ('Walks',), ('Years',), ('CAtBat',), ('CHits',), ('CHmRun',), ('CRuns',), ('CRBI',), ('CWalks',), ('League',), ('Division',), ('PutOuts',), ('Assists',), ('Errors',), ('NewLeague',), ('AtBat', 'CRBI'), ('CRBI', 'Hits'), ('CRBI', 'HmRun'), ('CRBI', 'Runs'), ('CRBI', 'RBI'), ('CRBI', 'Walks'), ('CRBI', 'Years'), ('CAtBat', 'CRBI'), ('CHits', 'CRBI'), ('CHmRun', 'CRBI'), ('CRBI', 'CRuns'), ('CRBI', 'CWalks'), ('CRBI', 'League'), ('CRBI', 'Division'), ('CRBI', 'PutOuts'), ('Assists', 'CRBI'), ('CRBI', 'Errors'), ('CRBI', 'NewLeague'), ('AtBat', 'CRBI', 'Hits'), ('CRBI', 'Hits', 'HmRun'), ('CRBI', 'Hits', 'Runs'), ('CRBI', 'Hits', 'RBI'), ('CRBI', 'Hits', 'Walks'), ('CRBI', 'Hits', 'Years'), ('CAtBat', 'CRBI', 'Hits'), ('CHits', 'CRBI', 'Hits'), ('CHmRun', 'CRBI', 'Hits'), ('CRBI', 'CRuns', 'Hits'), ('CRBI', 'CWalks', 'Hits'), ('CRBI', 'Hits', 'League'), ('CRBI', 'Division', 'Hits'), ('CRBI', 'Hits', 'PutOuts'), ('Assists', 'CRBI', 'Hits'), ('CRBI', 'Errors', 'Hits'), ('CRBI', 'Hits', 'NewLeague'), ('AtBat', 'CRBI', 'Hits', 'PutOuts'), ('CRBI', 'Hits', 'HmRun', 'PutOuts'), ('CRBI', 'Hits', 'PutOuts', 'Runs'), ('CRBI', 'Hits', 'PutOuts', 'RBI'), ('CRBI', 'Hits', 'PutOuts', 'Walks'), ('CRBI', 'Hits', 'PutOuts', 'Years'), ('CAtBat', 'CRBI', 'Hits', 'PutOuts'), ('CHits', 'CRBI', 'Hits', 'PutOuts'), ('CHmRun', 'CRBI', 'Hits', 'PutOuts'), ('CRBI', 'CRuns', 'Hits', 'PutOuts'), ('CRBI', 'CWalks', 'Hits', 'PutOuts'), ('CRBI', 'Hits', 'League', 'PutOuts'), ('CRBI', 'Division', 'Hits', 'PutOuts'), ('Assists', 'CRBI', 'Hits', 'PutOuts'), ('CRBI', 'Errors', 'Hits', 'PutOuts'), ('CRBI', 'Hits', 'NewLeague', 'PutOuts'), ('AtBat', 'CRBI', 'Division', 'Hits', 'PutOuts'), ('CRBI', 'Division', 'Hits', 'HmRun', 'PutOuts'), ('CRBI', 'Division', 'Hits', 'PutOuts', 'Runs'), ('CRBI', 'Division', 'Hits', 'PutOuts', 'RBI'), ('CRBI', 'Division', 'Hits', 'PutOuts', 'Walks'), ('CRBI', 'Division', 'Hits', 'PutOuts', 'Years'), ('CAtBat', 'CRBI', 'Division', 'Hits', 'PutOuts'), ('CHits', 'CRBI', 'Division', 'Hits', 'PutOuts'), ('CHmRun', 'CRBI', 'Division', 'Hits', 'PutOuts'), ('CRBI', 'CRuns', 'Division', 'Hits', 'PutOuts'), ('CRBI', 'CWalks', 'Division', 'Hits', 'PutOuts'), ('CRBI', 'Division', 'Hits', 'League', 'PutOuts'), ('Assists', 'CRBI', 'Division', 'Hits', 'PutOuts'), ('CRBI', 'Division', 'Errors', 'Hits', 'PutOuts'), ('CRBI', 'Division', 'Hits', 'NewLeague', 'PutOuts'), ('AtBat', 'CRBI', 'Division', 'Hits', 'HmRun', 'PutOuts'), ('AtBat', 'CRBI', 'Division', 'Hits', 'PutOuts', 'Runs'), ('AtBat', 'CRBI', 'Division', 'Hits', 'PutOuts', 'RBI'), ('AtBat', 'CRBI', 'Division', 'Hits', 'PutOuts', 'Walks'), ('AtBat', 'CRBI', 'Division', 'Hits', 'PutOuts', 'Years'), ('AtBat', 'CAtBat', 'CRBI', 'Division', 'Hits', 'PutOuts'), ('AtBat', 'CHits', 'CRBI', 'Division', 'Hits', 'PutOuts'), ('AtBat', 'CHmRun', 'CRBI', 'Division', 'Hits', 'PutOuts'), ('AtBat', 'CRBI', 'CRuns', 'Division', 'Hits', 'PutOuts'), ('AtBat', 'CRBI', 'CWalks', 'Division', 'Hits', 'PutOuts'), ('AtBat', 'CRBI', 'Division', 'Hits', 'League', 'PutOuts'), ('Assists', 'AtBat', 'CRBI', 'Division', 'Hits', 'PutOuts'), ('AtBat', 'CRBI', 'Division', 'Errors', 'Hits', 'PutOuts'), ('AtBat', 'CRBI', 'Division', 'Hits', 'NewLeague', 'PutOuts'), ('AtBat', 'CRBI', 'Division', 'Hits', 'HmRun', 'PutOuts', 'Walks'), ('AtBat', 'CRBI', 'Division', 'Hits', 'PutOuts', 'Runs', 'Walks'), ('AtBat', 'CRBI', 'Division', 'Hits', 'PutOuts', 'RBI', 'Walks'), ('AtBat', 'CRBI', 'Division', 'Hits', 'PutOuts', 'Walks', 'Years'), ('AtBat', 'CAtBat', 'CRBI', 'Division', 'Hits', 'PutOuts', 'Walks'), ('AtBat', 'CHits', 'CRBI', 'Division', 'Hits', 'PutOuts', 'Walks'), ('AtBat', 'CHmRun', 'CRBI', 'Division', 'Hits', 'PutOuts', 'Walks'), ('AtBat', 'CRBI', 'CRuns', 'Division', 'Hits', 'PutOuts', 'Walks'), ('AtBat', 'CRBI', 'CWalks', 'Division', 'Hits', 'PutOuts', 'Walks'), ('AtBat', 'CRBI', 'Division', 'Hits', 'League', 'PutOuts', 'Walks'), ('Assists', 'AtBat', 'CRBI', 'Division', 'Hits', 'PutOuts', 'Walks'), ('AtBat', 'CRBI', 'Division', 'Errors', 'Hits', 'PutOuts', 'Walks'), ('AtBat', 'CRBI', 'Division', 'Hits', 'NewLeague', 'PutOuts', 'Walks'), ('AtBat', 'CRBI', 'CWalks', 'Division', 'Hits', 'HmRun', 'PutOuts', 'Walks'), ('AtBat', 'CRBI', 'CWalks', 'Division', 'Hits', 'PutOuts', 'Runs', 'Walks'), ('AtBat', 'CRBI', 'CWalks', 'Division', 'Hits', 'PutOuts', 'RBI', 'Walks'), ('AtBat', 'CRBI', 'CWalks', 'Division', 'Hits', 'PutOuts', 'Walks', 'Years'), ('AtBat', 'CAtBat', 'CRBI', 'CWalks', 'Division', 'Hits', 'PutOuts', 'Walks'), ('AtBat', 'CHits', 'CRBI', 'CWalks', 'Division', 'Hits', 'PutOuts', 'Walks'), ('AtBat', 'CHmRun', 'CRBI', 'CWalks', 'Division', 'Hits', 'PutOuts', 'Walks'), ('AtBat', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Hits', 'PutOuts', 'Walks'), ('AtBat', 'CRBI', 'CWalks', 'Division', 'Hits', 'League', 'PutOuts', 'Walks'), ('Assists', 'AtBat', 'CRBI', 'CWalks', 'Division', 'Hits', 'PutOuts', 'Walks'), ('AtBat', 'CRBI', 'CWalks', 'Division', 'Errors', 'Hits', 'PutOuts', 'Walks'), ('AtBat', 'CRBI', 'CWalks', 'Division', 'Hits', 'NewLeague', 'PutOuts', 'Walks'), ('AtBat', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Hits', 'HmRun', 'PutOuts', 'Walks'), ('AtBat', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Hits', 'PutOuts', 'Runs', 'Walks'), ('AtBat', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Hits', 'PutOuts', 'RBI', 'Walks'), ('AtBat', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Hits', 'PutOuts', 'Walks', 'Years'), ('AtBat', 'CAtBat', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Hits', 'PutOuts', 'Walks'), ('AtBat', 'CHits', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Hits', 'PutOuts', 'Walks'), ('AtBat', 'CHmRun', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Hits', 'PutOuts', 'Walks'), ('AtBat', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Hits', 'League', 'PutOuts', 'Walks'), ('Assists', 'AtBat', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Hits', 'PutOuts', 'Walks'), ('AtBat', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Errors', 'Hits', 'PutOuts', 'Walks'), ('AtBat', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Hits', 'NewLeague', 'PutOuts', 'Walks'), ('AtBat', 'CAtBat', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Hits', 'HmRun', 'PutOuts', 'Walks'), ('AtBat', 'CAtBat', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Hits', 'PutOuts', 'Runs', 'Walks'), ('AtBat', 'CAtBat', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Hits', 'PutOuts', 'RBI', 'Walks'), ('AtBat', 'CAtBat', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Hits', 'PutOuts', 'Walks', 'Years'), ('AtBat', 'CAtBat', 'CHits', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Hits', 'PutOuts', 'Walks'), ('AtBat', 'CAtBat', 'CHmRun', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Hits', 'PutOuts', 'Walks'), ('AtBat', 'CAtBat', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Hits', 'League', 'PutOuts', 'Walks'), ('Assists', 'AtBat', 'CAtBat', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Hits', 'PutOuts', 'Walks'), ('AtBat', 'CAtBat', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Errors', 'Hits', 'PutOuts', 'Walks'), ('AtBat', 'CAtBat', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Hits', 'NewLeague', 'PutOuts', 'Walks'), ('Assists', 'AtBat', 'CAtBat', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Hits', 'HmRun', 'PutOuts', 'Walks'), ('Assists', 'AtBat', 'CAtBat', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Hits', 'PutOuts', 'Runs', 'Walks'), ('Assists', 'AtBat', 'CAtBat', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Hits', 'PutOuts', 'RBI', 'Walks'), ('Assists', 'AtBat', 'CAtBat', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Hits', 'PutOuts', 'Walks', 'Years'), ('Assists', 'AtBat', 'CAtBat', 'CHits', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Hits', 'PutOuts', 'Walks'), ('Assists', 'AtBat', 'CAtBat', 'CHmRun', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Hits', 'PutOuts', 'Walks'), ('Assists', 'AtBat', 'CAtBat', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Hits', 'League', 'PutOuts', 'Walks'), ('Assists', 'AtBat', 'CAtBat', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Errors', 'Hits', 'PutOuts', 'Walks'), ('Assists', 'AtBat', 'CAtBat', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Hits', 'NewLeague', 'PutOuts', 'Walks'), ('Assists', 'AtBat', 'CAtBat', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Hits', 'HmRun', 'League', 'PutOuts', 'Walks'), ('Assists', 'AtBat', 'CAtBat', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Hits', 'League', 'PutOuts', 'Runs', 'Walks'), ('Assists', 'AtBat', 'CAtBat', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Hits', 'League', 'PutOuts', 'RBI', 'Walks'), ('Assists', 'AtBat', 'CAtBat', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Hits', 'League', 'PutOuts', 'Walks', 'Years'), ('Assists', 'AtBat', 'CAtBat', 'CHits', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Hits', 'League', 'PutOuts', 'Walks'), ('Assists', 'AtBat', 'CAtBat', 'CHmRun', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Hits', 'League', 'PutOuts', 'Walks'), ('Assists', 'AtBat', 'CAtBat', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Errors', 'Hits', 'League', 'PutOuts', 'Walks'), ('Assists', 'AtBat', 'CAtBat', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Hits', 'League', 'NewLeague', 'PutOuts', 'Walks'), ('Assists', 'AtBat', 'CAtBat', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Hits', 'HmRun', 'League', 'PutOuts', 'Runs', 'Walks'), ('Assists', 'AtBat', 'CAtBat', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Hits', 'League', 'PutOuts', 'RBI', 'Runs', 'Walks'), ('Assists', 'AtBat', 'CAtBat', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Hits', 'League', 'PutOuts', 'Runs', 'Walks', 'Years'), ('Assists', 'AtBat', 'CAtBat', 'CHits', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Hits', 'League', 'PutOuts', 'Runs', 'Walks'), ('Assists', 'AtBat', 'CAtBat', 'CHmRun', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Hits', 'League', 'PutOuts', 'Runs', 'Walks'), ('Assists', 'AtBat', 'CAtBat', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Errors', 'Hits', 'League', 'PutOuts', 'Runs', 'Walks'), ('Assists', 'AtBat', 'CAtBat', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Hits', 'League', 'NewLeague', 'PutOuts', 'Runs', 'Walks'), ('Assists', 'AtBat', 'CAtBat', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Errors', 'Hits', 'HmRun', 'League', 'PutOuts', 'Runs', 'Walks'), ('Assists', 'AtBat', 'CAtBat', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Errors', 'Hits', 'League', 'PutOuts', 'RBI', 'Runs', 'Walks'), ('Assists', 'AtBat', 'CAtBat', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Errors', 'Hits', 'League', 'PutOuts', 'Runs', 'Walks', 'Years'), ('Assists', 'AtBat', 'CAtBat', 'CHits', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Errors', 'Hits', 'League', 'PutOuts', 'Runs', 'Walks'), ('Assists', 'AtBat', 'CAtBat', 'CHmRun', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Errors', 'Hits', 'League', 'PutOuts', 'Runs', 'Walks'), ('Assists', 'AtBat', 'CAtBat', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Errors', 'Hits', 'League', 'NewLeague', 'PutOuts', 'Runs', 'Walks'), ('Assists', 'AtBat', 'CAtBat', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Errors', 'Hits', 'HmRun', 'League', 'PutOuts', 'RBI', 'Runs', 'Walks'), ('Assists', 'AtBat', 'CAtBat', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Errors', 'Hits', 'HmRun', 'League', 'PutOuts', 'Runs', 'Walks', 'Years'), ('Assists', 'AtBat', 'CAtBat', 'CHits', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Errors', 'Hits', 'HmRun', 'League', 'PutOuts', 'Runs', 'Walks'), ('Assists', 'AtBat', 'CAtBat', 'CHmRun', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Errors', 'Hits', 'HmRun', 'League', 'PutOuts', 'Runs', 'Walks'), ('Assists', 'AtBat', 'CAtBat', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Errors', 'Hits', 'HmRun', 'League', 'NewLeague', 'PutOuts', 'Runs', 'Walks'), ('Assists', 'AtBat', 'CAtBat', 'CHits', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Errors', 'Hits', 'HmRun', 'League', 'PutOuts', 'RBI', 'Runs', 'Walks'), ('Assists', 'AtBat', 'CAtBat', 'CHits', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Errors', 'Hits', 'HmRun', 'League', 'PutOuts', 'Runs', 'Walks', 'Years'), ('Assists', 'AtBat', 'CAtBat', 'CHits', 'CHmRun', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Errors', 'Hits', 'HmRun', 'League', 'PutOuts', 'Runs', 'Walks'), ('Assists', 'AtBat', 'CAtBat', 'CHits', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Errors', 'Hits', 'HmRun', 'League', 'NewLeague', 'PutOuts', 'Runs', 'Walks'), ('Assists', 'AtBat', 'CAtBat', 'CHits', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Errors', 'Hits', 'HmRun', 'League', 'PutOuts', 'RBI', 'Runs', 'Walks', 'Years'), ('Assists', 'AtBat', 'CAtBat', 'CHits', 'CHmRun', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Errors', 'Hits', 'HmRun', 'League', 'PutOuts', 'RBI', 'Runs', 'Walks'), ('Assists', 'AtBat', 'CAtBat', 'CHits', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Errors', 'Hits', 'HmRun', 'League', 'NewLeague', 'PutOuts', 'RBI', 'Runs', 'Walks'), ('Assists', 'AtBat', 'CAtBat', 'CHits', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Errors', 'Hits', 'HmRun', 'League', 'NewLeague', 'PutOuts', 'RBI', 'Runs', 'Walks', 'Years'), ('Assists', 'AtBat', 'CAtBat', 'CHits', 'CHmRun', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Errors', 'Hits', 'HmRun', 'League', 'NewLeague', 'PutOuts', 'RBI', 'Runs', 'Walks'), ('Assists', 'AtBat', 'CAtBat', 'CHits', 'CHmRun', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Errors', 'Hits', 'HmRun', 'League', 'NewLeague', 'PutOuts', 'RBI', 'Runs', 'Walks', 'Years')])"
-      ]
-     },
-     "execution_count": 75,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "Hitters = Hitters.dropna()\n",
-    "Y=Hitters['Salary']\n",
-    "X=Hitters.drop('Salary', axis=1)\n",
-    "design = ModelSpec(X.columns).fit(X)\n",
-    "strategy = Stepwise.first_peak(design,\n",
-    "                               direction='forward',\n",
-    "                               min_terms=0,\n",
-    "                               max_terms=19)\n",
-    "selector = FeatureSelector(LinearRegression(fit_intercept=False),\n",
-    "                           strategy,\n",
-    "                           scoring='neg_mean_squared_error', cv=None)\n",
-    "selector.fit(X, Y)\n",
-    "selector.results_.keys()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 76,
-   "id": "31788748",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "19"
-      ]
-     },
-     "execution_count": 76,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "len(selector.selected_state_)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 77,
-   "id": "e97d80c3",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "19"
-      ]
-     },
-     "execution_count": 77,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "len(X.columns)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "a71f0332",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Start:  AIC=3215.77\n",
-      "Salary ~ 1\n",
-      "\n",
-      "            Df Sum of Sq      RSS    AIC\n",
-      "+ CRBI       1  17139434 36179679 3115.8\n",
-      "+ CRuns      1  16881162 36437951 3117.6\n",
-      "+ CHits      1  16065140 37253973 3123.5\n",
-      "+ CAtBat     1  14759710 38559403 3132.5\n",
-      "+ CHmRun     1  14692193 38626920 3133.0\n",
-      "+ CWalks     1  12792622 40526491 3145.6\n",
-      "+ RBI        1  10771083 42548030 3158.4\n",
-      "+ Walks      1  10504833 42814280 3160.1\n",
-      "+ Hits       1  10260491 43058621 3161.6\n",
-      "+ Runs       1   9399158 43919955 3166.8\n",
-      "+ Years      1   8559105 44760007 3171.7\n",
-      "+ AtBat      1   8309469 45009644 3173.2\n",
-      "+ HmRun      1   6273967 47045145 3184.8\n",
-      "+ PutOuts    1   4814100 48505013 3192.9\n",
-      "+ Division   1   1976102 51343011 3207.8\n",
-      "<none>                   53319113 3215.8\n",
-      "+ Assists    1     34497 53284615 3217.6\n",
-      "+ League     1     10876 53308237 3217.7\n",
-      "+ Errors     1      1555 53317558 3217.8\n",
-      "+ NewLeague  1       428 53318684 3217.8\n",
-      "\n",
-      "Step:  AIC=3115.78\n",
-      "Salary ~ CRBI\n",
-      "\n",
-      "            Df Sum of Sq      RSS    AIC\n",
-      "+ Hits       1   5533119 30646560 3074.1\n",
-      "+ Runs       1   5176532 31003147 3077.2\n",
-      "+ Walks      1   4199733 31979946 3085.3\n",
-      "+ AtBat      1   4064585 32115095 3086.4\n",
-      "+ RBI        1   3308272 32871407 3092.6\n",
-      "+ PutOuts    1   3267035 32912644 3092.9\n",
-      "+ Division   1   1733887 34445793 3104.9\n",
-      "+ Years      1   1667339 34512340 3105.4\n",
-      "+ HmRun      1   1271587 34908092 3108.4\n",
-      "+ CRuns      1    354561 35825119 3115.2\n",
-      "+ Assists    1    346020 35833659 3115.2\n",
-      "<none>                   36179679 3115.8\n",
-      "+ Errors     1    194403 35985276 3116.4\n",
-      "+ CAtBat     1     92261 36087418 3117.1\n",
-      "+ CHits      1     75469 36104210 3117.2\n",
-      "+ CWalks     1     51974 36127705 3117.4\n",
-      "+ NewLeague  1     17778 36161901 3117.7\n",
-      "+ League     1     11825 36167855 3117.7\n",
-      "+ CHmRun     1       515 36179165 3117.8\n",
-      "\n",
-      "Step:  AIC=3074.13\n",
-      "Salary ~ CRBI + Hits\n",
-      "\n",
-      "            Df Sum of Sq      RSS    AIC\n",
-      "+ PutOuts    1   1397263 29249297 3063.8\n",
-      "+ Division   1   1279275 29367285 3064.9\n",
-      "+ AtBat      1    821767 29824793 3069.0\n",
-      "+ Walks      1    781767 29864793 3069.3\n",
-      "+ Years      1    254910 30391650 3073.9\n",
-      "<none>                   30646560 3074.1\n",
-      "+ League     1    208880 30437680 3074.3\n",
-      "+ CRuns      1    132614 30513946 3075.0\n",
-      "+ NewLeague  1    118474 30528086 3075.1\n",
-      "+ Runs       1    114198 30532362 3075.1\n",
-      "+ Errors     1     99776 30546784 3075.3\n",
-      "+ CAtBat     1     83517 30563043 3075.4\n",
-      "+ Assists    1     44781 30601779 3075.7\n",
-      "+ CWalks     1     23668 30622892 3075.9\n",
-      "+ CHmRun     1      4790 30641769 3076.1\n",
-      "+ CHits      1      4358 30642202 3076.1\n",
-      "+ HmRun      1      2173 30644387 3076.1\n",
-      "+ RBI        1      1137 30645423 3076.1\n",
-      "\n",
-      "Step:  AIC=3063.85\n",
-      "Salary ~ CRBI + Hits + PutOuts\n",
-      "\n",
-      "            Df Sum of Sq      RSS    AIC\n",
-      "+ Division   1   1278445 27970852 3054.1\n",
-      "+ AtBat      1   1009933 28239364 3056.6\n",
-      "+ Walks      1    539490 28709807 3061.0\n",
-      "+ CRuns      1    273649 28975648 3063.4\n",
-      "<none>                   29249297 3063.8\n",
-      "+ Years      1    136906 29112391 3064.6\n",
-      "+ League     1    122841 29126456 3064.8\n",
-      "+ Runs       1    117930 29131367 3064.8\n",
-      "+ Errors     1     97244 29152053 3065.0\n",
-      "+ NewLeague  1     57839 29191458 3065.3\n",
-      "+ CHits      1     35096 29214201 3065.5\n",
-      "+ RBI        1     33965 29215331 3065.6\n",
-      "+ HmRun      1     31227 29218070 3065.6\n",
-      "+ CWalks     1     28572 29220725 3065.6\n",
-      "+ CAtBat     1     20518 29228779 3065.7\n",
-      "+ Assists    1      1681 29247616 3065.8\n",
-      "+ CHmRun     1      1419 29247878 3065.8\n",
-      "\n",
-      "Step:  AIC=3054.1\n",
-      "Salary ~ CRBI + Hits + PutOuts + Division\n",
-      "\n",
-      "            Df Sum of Sq      RSS    AIC\n",
-      "+ AtBat      1    820952 27149899 3048.3\n",
-      "+ Walks      1    491584 27479268 3051.4\n",
-      "<none>                   27970852 3054.1\n",
-      "+ CRuns      1    193604 27777248 3054.3\n",
-      "+ Years      1    166845 27804007 3054.5\n",
-      "+ League     1    110628 27860224 3055.1\n",
-      "+ Errors     1     81385 27889467 3055.3\n",
-      "+ Runs       1     65921 27904931 3055.5\n",
-      "+ RBI        1     53719 27917133 3055.6\n",
-      "+ NewLeague  1     52275 27918577 3055.6\n",
-      "+ CHits      1     33863 27936989 3055.8\n",
-      "+ HmRun      1     26390 27944462 3055.8\n",
-      "+ CAtBat     1     18751 27952101 3055.9\n",
-      "+ CWalks     1      5723 27965129 3056.0\n",
-      "+ Assists    1      1036 27969816 3056.1\n",
-      "+ CHmRun     1       165 27970687 3056.1\n",
-      "\n",
-      "Step:  AIC=3048.26\n",
-      "Salary ~ CRBI + Hits + PutOuts + Division + AtBat\n",
-      "\n",
-      "            Df Sum of Sq      RSS    AIC\n",
-      "+ Walks      1    954996 26194904 3040.8\n",
-      "+ Years      1    253362 26896537 3047.8\n",
-      "+ Runs       1    208743 26941157 3048.2\n",
-      "<none>                   27149899 3048.3\n",
-      "+ CRuns      1    185825 26964075 3048.5\n",
-      "+ League     1     95986 27053913 3049.3\n",
-      "+ NewLeague  1     52693 27097206 3049.8\n",
-      "+ CHmRun     1     43173 27106726 3049.8\n",
-      "+ Assists    1     28898 27121001 3050.0\n",
-      "+ CAtBat     1     20989 27128910 3050.1\n",
-      "+ CWalks     1     15599 27134301 3050.1\n",
-      "+ Errors     1      6265 27143634 3050.2\n",
-      "+ CHits      1      5305 27144594 3050.2\n",
-      "+ RBI        1      1236 27148663 3050.2\n",
-      "+ HmRun      1        11 27149888 3050.3\n",
-      "\n",
-      "Step:  AIC=3040.85\n",
-      "Salary ~ CRBI + Hits + PutOuts + Division + AtBat + Walks\n",
-      "\n",
-      "            Df Sum of Sq      RSS    AIC\n",
-      "+ CWalks     1    240687 25954217 3040.4\n",
-      "<none>                   26194904 3040.8\n",
-      "+ Years      1    184508 26010396 3041.0\n",
-      "+ CRuns      1    110695 26084209 3041.7\n",
-      "+ League     1     77974 26116930 3042.1\n",
-      "+ Assists    1     75782 26119122 3042.1\n",
-      "+ NewLeague  1     40909 26153995 3042.4\n",
-      "+ CHits      1     37304 26157599 3042.5\n",
-      "+ RBI        1     11728 26183176 3042.7\n",
-      "+ HmRun      1      4747 26190157 3042.8\n",
-      "+ Errors     1      2727 26192177 3042.8\n",
-      "+ CAtBat     1      2630 26192274 3042.8\n",
-      "+ CHmRun     1       943 26193961 3042.8\n",
-      "+ Runs       1        37 26194867 3042.8\n",
-      "\n",
-      "Step:  AIC=3040.42\n",
-      "Salary ~ CRBI + Hits + PutOuts + Division + AtBat + Walks + CWalks\n",
-      "\n",
-      "            Df Sum of Sq      RSS    AIC\n",
-      "+ CRuns      1    794983 25159234 3034.2\n",
-      "+ CHits      1    273728 25680489 3039.6\n",
-      "<none>                   25954217 3040.4\n",
-      "+ Assists    1    138506 25815711 3041.0\n",
-      "+ CAtBat     1     89289 25864929 3041.5\n",
-      "+ RBI        1     86941 25867276 3041.5\n",
-      "+ League     1     77159 25877058 3041.6\n",
-      "+ Years      1     70126 25884091 3041.7\n",
-      "+ NewLeague  1     37807 25916410 3042.0\n",
-      "+ HmRun      1     33601 25920616 3042.1\n",
-      "+ CHmRun     1      9034 25945183 3042.3\n",
-      "+ Errors     1      6928"
-     ]
-    }
-   ],
-   "source": [
-    "%%R -i Hitters\n",
-    "step(lm(Salary ~ 1, data=Hitters), scope=list(upper=lm(Salary ~ ., data=Hitters)), direction='forward', trace=TRUE)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "6117f650",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "536a8bc3",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "bddc13c5",
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {
@@ -2726,9 +262,9 @@
    "formats": "source/models///ipynb,jupyterbook/models///md:myst,jupyterbook/models///ipynb"
   },
   "kernelspec": {
-   "display_name": "islp_test",
+   "display_name": "python3",
    "language": "python",
-   "name": "islp_test"
+   "name": "python3"
   },
   "language_info": {
    "codemirror_mode": {
diff --git a/docs/jupyterbook/models/selection.md b/docs/jupyterbook/models/selection.md
index c868c75..949ccc1 100644
--- a/docs/jupyterbook/models/selection.md
+++ b/docs/jupyterbook/models/selection.md
@@ -5,670 +5,107 @@ jupytext:
     extension: .md
     format_name: myst
     format_version: 0.13
-    jupytext_version: 1.14.1
+    jupytext_version: 1.14.5
 kernelspec:
-  display_name: islp_test
+  display_name: python3
   language: python
-  name: islp_test
+  name: python3
 ---
 
 # Model selection using `ModelSpec`
 
-```{code-cell} ipython3
-import numpy as np, pandas as pd
-%load_ext rpy2.ipython
-
-from ISLP import load_data
-from ISLP.models import ModelSpec
-
-import statsmodels.api as sm
-```
-
-```{code-cell} ipython3
-Carseats = load_data('Carseats')
-%R -i Carseats
-Carseats.columns
-```
-
-## Let's break up income into groups
-
-```{code-cell} ipython3
-Carseats['OIncome'] = pd.cut(Carseats['Income'], 
-                             [0,50,90,200], 
-                             labels=['L','M','H'])
-Carseats['OIncome']
-```
-
-Let's also create an unordered version
-
-```{code-cell} ipython3
-Carseats['UIncome'] = pd.cut(Carseats['Income'], 
-                             [0,50,90,200], 
-                             labels=['L','M','H'],
-                             ordered=False)
-Carseats['UIncome']
-```
-
-## A simple model
-
-```{code-cell} ipython3
-design = ModelSpec(['Price', 'Income'])
-X = design.fit_transform(Carseats)
-X.columns
-```
-
-```{code-cell} ipython3
-Y = Carseats['Sales']
-M = sm.OLS(Y, X).fit()
-M.params
-```
-
-## Basic procedure
-
-The design matrix is built by cobbling together a set of columns and possibly transforming them.
-A `pd.DataFrame` is essentially a list of columns. One of the first tasks done  in `ModelSpec.fit`
-is to inspect a dataframe for column info. The column `ShelveLoc` is categorical:
-
-```{code-cell} ipython3
-Carseats['ShelveLoc']
-```
-
-This is recognized by `ModelSpec` in the form of `Column` objects which are just named tuples with two methods
-`get_columns` and `fit_encoder`.
-
-```{code-cell} ipython3
-design.column_info_['ShelveLoc']
-```
-
-It recognized ordinal columns as well.
-
-```{code-cell} ipython3
-design.column_info_['OIncome']
-```
-
-```{code-cell} ipython3
-income = design.column_info_['Income']
-cols, names = income.get_columns(Carseats)
-(cols[:4], names)
-```
-
-## Encoding a column
-
-In building a design matrix we must extract columns from our dataframe (or `np.ndarray`). Categorical
-variables usually are encoded by several columns, typically one less than the number of categories.
-This task is handled by the `encoder` of the `Column`. The encoder must satisfy the `sklearn` transform
-model, i.e. `fit` on some array and `transform` on future arrays. The `fit_encoder` method of `Column` fits
-its encoder the first time data is passed to it.
-
-```{code-cell} ipython3
-shelve = design.column_info_['ShelveLoc']
-cols, names = shelve.get_columns(Carseats)
-(cols[:4], names)
-```
-
-```{code-cell} ipython3
-oincome = design.column_info_['OIncome']
-oincome.get_columns(Carseats)[0][:4]
-```
-
-## The terms
-
-The design matrix consists of several sets of columns. This is managed by the `ModelSpec` through
-the `terms` argument which should be a sequence. The elements of `terms` are often
-going to be strings (or tuples of strings for interactions, see below) but are converted to a
-`Variable` object and stored in the `terms_` of the fitted `ModelSpec`. A `Variable` is just a named tuple.
-
-```{code-cell} ipython3
-design.terms
-```
-
-```{code-cell} ipython3
-design.terms_
-```
-
-While each `Column` can itself extract data, they are all promoted to `Variable` to be of a uniform type.  A
-`Variable` can also create columns through the `build_columns` method of `ModelSpec`
-
-```{code-cell} ipython3
-price = design.terms_[0]
-design.build_columns(Carseats, price)
-```
-
-Note that `Variable` objects have a tuple of `variables` as well as an `encoder` attribute. The
-tuple of `variables` first creates a concatenated dataframe from all corresponding variables and then
-is run through `encoder.transform`. The `encoder.fit` method of each `Variable` is run once during 
-the call to `ModelSpec.fit`.
-
-```{code-cell} ipython3
-from ISLP.models.model_spec import Variable
-
-new_var = Variable(('Price', 'Income', 'UIncome'), name='mynewvar', encoder=None)
-design.build_columns(Carseats, new_var)
-```
-
-Let's now transform these columns with an encoder. Within `ModelSpec` we will first build the
-arrays above and then call `pca.fit` and finally `pca.transform` within `design.build_columns`.
-
-```{code-cell} ipython3
-from sklearn.decomposition import PCA
-pca = PCA(n_components=2)
-pca.fit(design.build_columns(Carseats, new_var)[0]) # this is done within `ModelSpec.fit`
-pca_var = Variable(('Price', 'Income', 'UIncome'), name='mynewvar', encoder=pca)
-design.build_columns(Carseats, pca_var)
-```
-
-The elements of the `variables` attribute may be column identifiers ( `"Price"`), `Column` instances (`price`)
-or `Variable` instances (`pca_var`).
-
-```{code-cell} ipython3
-fancy_var = Variable(('Price', price, pca_var), name='fancy', encoder=None)
-design.build_columns(Carseats, fancy_var)
-```
-
-We can of course run PCA again on these features (if we wanted).
-
-```{code-cell} ipython3
-pca2 = PCA(n_components=2)
-pca2.fit(design.build_columns(Carseats, fancy_var)[0]) # this is done within `ModelSpec.fit`
-pca2_var = Variable(('Price', price, pca_var), name='fancy_pca', encoder=pca2)
-design.build_columns(Carseats, pca2_var)
-```
-
-## Building the design matrix
-
-With these notions in mind, the final design is essentially then
-
-```{code-cell} ipython3
-X_hand = np.column_stack([design.build_columns(Carseats, v)[0] for v in design.terms_])[:4]
-```
-
-An intercept column is added if `design.intercept` is `True` and if the original argument to `transform` is
-a dataframe the index is adjusted accordingly.
-
-```{code-cell} ipython3
-design.intercept
-```
-
-```{code-cell} ipython3
-design.transform(Carseats)[:4]
-```
-
-## Predicting
-
-Constructing the design matrix at any values is carried out by the `transform` method.
-
-```{code-cell} ipython3
-new_data = pd.DataFrame({'Price':[10,20], 'Income':[40, 50]})
-new_X = design.transform(new_data)
-M.get_prediction(new_X).predicted_mean
-```
-
-```{code-cell} ipython3
-%%R -i new_data,Carseats
-predict(lm(Sales ~ Price + Income, data=Carseats), new_data)
-```
-
-### Difference between using `pd.DataFrame` and `np.ndarray`
-
-If the `terms` only refer to a few columns of the data frame, the `transform` method only needs a dataframe with those columns.
-
-If we had used an `np.ndarray`, the column identifiers would be integers identifying specific columns so,
-in order to work correctly, `transform` would need another `np.ndarray` where the columns have the same meaning.
-
-```{code-cell} ipython3
-Carseats_np = np.asarray(Carseats[['Price', 'ShelveLoc', 'US', 'Income']])
-design_np = ModelSpec([0,3]).fit(Carseats_np)
-design_np.transform(Carseats_np)[:4]
-```
-
-The following will fail for hopefully obvious reasons
-
-```{code-cell} ipython3
-try:
-    new_D = np.zeros((2,2))
-    new_D[:,0] = [10,20]
-    new_D[:,1] = [40,50]
-    M.get_prediction(new_D).predicted_mean
-except ValueError as e:
-    print(e)
-```
-
-Ultimately, `M` expects 3 columns for new predictions because it was fit
-with a matrix having 3 columns (the first representing an intercept).
-
-We might be tempted to try as with the `pd.DataFrame` and produce
-an `np.ndarray` with only the necessary variables.
-
-```{code-cell} ipython3
-try:
-    new_X = np.zeros((2,2))
-    new_X[:,0] = [10,20]
-    new_X[:,1] = [40,50]
-    new_D = design_np.transform(new_X)
-    M.get_prediction(new_D).predicted_mean
-except IndexError as e:
-    print(e)
-```
-
-This fails because `design_np` is looking for column `3` from its `terms`:
-
-```{code-cell} ipython3
-design_np.terms_
-```
-
-However, if we have an `np.ndarray` in which the first column indeed represents `Price` and the fourth indeed
-represents `Income` then we can arrive at the correct answer by supplying such the array to `design_np.transform`:
-
-```{code-cell} ipython3
-new_X = np.zeros((2,4))
-new_X[:,0] = [10,20]
-new_X[:,3] = [40,50]
-new_D = design_np.transform(new_X)
-M.get_prediction(new_D).predicted_mean
-```
-
-Given this subtlety about needing to supply arrays with identical column structure to `transform` when
-using `np.ndarray` we presume that using a `pd.DataFrame` will be the more popular use case.
-
-+++
-
-## A model with some categorical variables
-
-Categorical variables become `Column` instances with encoders.
-
-```{code-cell} ipython3
-design = ModelSpec(['Population', 'Price', 'UIncome', 'ShelveLoc']).fit(Carseats)
-design.column_info_['UIncome']
-```
-
-```{code-cell} ipython3
-X = design.fit_transform(Carseats)
-X.columns
-```
-
-```{code-cell} ipython3
-sm.OLS(Y, X).fit().params
-```
-
-```{code-cell} ipython3
-%%R
-lm(Sales ~ Population + Price + UIncome + ShelveLoc, data=Carseats)$coef
-```
-
-## Getting the encoding you want
-
-By default the level dropped by `ModelSpec` will be the first of the `categories_` values from 
-`sklearn.preprocessing.OneHotEncoder()`. We might wish to change this. It seems
-as if the correct way to do this would be something like `Variable(('UIncome',), 'mynewencoding', new_encoder)`
-where `new_encoder` would somehow drop the column we want dropped. 
-
-However, when using the convenient identifier `UIncome` in the `variables` argument, this maps to the `Column` associated to `UIncome` within `design.column_info_`:
-
-```{code-cell} ipython3
-design.column_info_['UIncome']
-```
-
-This column already has an encoder and `Column` instances are immutable as named tuples. Further, there are times when 
-we may want to encode `UIncome` differently within the same model. In the model below the main effect of `UIncome` is encoded with two columns while in the interaction `UIncome` (see below) has three columns. This is a design of interest
-and we need a way to allow different encodings of the same column of `Carseats`
-
-```{code-cell} ipython3
-%%R
-lm(Sales ~ UIncome:ShelveLoc + UIncome, data=Carseats)
-```
-
- We can create a new 
-`Column` with the encoder we want. For categorical variables, there is a convenience function to do so.
-
-```{code-cell} ipython3
-from ISLP.models.model_spec import contrast
-pref_encoding = contrast('UIncome', 'drop', 'L')
-```
-
-```{code-cell} ipython3
-design.build_columns(Carseats, pref_encoding)
-```
-
-```{code-cell} ipython3
-design = ModelSpec(['Population', 'Price', pref_encoding, 'ShelveLoc']).fit(Carseats)
-X = design.fit_transform(Carseats)
-X.columns
-```
-
-```{code-cell} ipython3
-sm.OLS(Y, X).fit().params
-```
-
-```{code-cell} ipython3
-%%R
-lm(Sales ~ Population + Price + UIncome + ShelveLoc, data=Carseats)$coef
-```
-
-## Interactions
-
-We've referred to interactions above. These are specified (by convenience) as tuples in the `terms` argument
-to `ModelSpec`.
-
-```{code-cell} ipython3
-design = ModelSpec([('UIncome', 'ShelveLoc'), 'UIncome'])
-X = design.fit_transform(Carseats)
-sm.OLS(Y, X).fit().params
-```
-
-The tuples in `terms` are converted to `Variable` in the formalized `terms_` attribute by creating a `Variable` with
-`variables` set to the tuple and the encoder an `Interaction` encoder which (unsurprisingly) creates the interaction columns from the concatenated data frames of `UIncome` and `ShelveLoc`.
-
-```{code-cell} ipython3
-design.terms_[0]
-```
-
-Comparing this to the previous `R` model.
-
-```{code-cell} ipython3
-%%R
-lm(Sales ~ UIncome:ShelveLoc + UIncome, data=Carseats)
-```
-
-We note a few important things:
-
-1. `R` has reorganized the columns of the design from the formula: although we wrote `UIncome:ShelveLoc` first these
-columns have been built later. **`ModelSpec` builds columns in the order determined by `terms`!**
-
-2. As noted above, `R` has encoded `UIncome` differently in the main effect and in the interaction. For `ModelSpec`, the reference to `UIncome` always refers to the column in `design.column_info_` and will always build only the columns for `L` and `M`. **`ModelSpec` does no inspection of terms to decide how to encode categorical variables.**
-
-A few notes:
-
-- **Why not try to inspect the terms?** For any nontrivial formula in `R` with several categorical variables and interactions, predicting what columns will be produced from a given formula is not simple. **`ModelSpec` errs on the side of being explicit.**
 
-- **Is it impossible to build the design as `R` has?** No. An advanced user who *knows* they want the columns built as `R` has can do so (fairly) easily.
-
-```{code-cell} ipython3
-full_encoding = contrast('UIncome', None)
-design.build_columns(Carseats, full_encoding)
-```
-
-```{code-cell} ipython3
-design = ModelSpec([pref_encoding, (full_encoding, 'ShelveLoc')])
-X = design.fit_transform(Carseats)
-sm.OLS(Y, X).fit().params
-```
-
-## Special encodings
-
-For flexible models, we may want to consider transformations of features, i.e. polynomial
-or spline transformations. Given transforms that follow the `fit/transform` paradigm
-we can of course achieve this with a `Column` and an `encoder`. The `ISLP.transforms`
-package includes a `Poly` transform
-
-```{code-cell} ipython3
-from ISLP.models.model_spec import poly
-poly('Income', 3)
-```
-
-```{code-cell} ipython3
-design = ModelSpec([poly('Income', 3), 'ShelveLoc'])
-X = design.fit_transform(Carseats)
-sm.OLS(Y, X).fit().params
-```
-
-Compare:
-
-```{code-cell} ipython3
-%%R
-lm(Sales ~ poly(Income, 3) + ShelveLoc, data=Carseats)$coef
-```
-
-## Splines
-
-Support for natural and B-splines is also included
-
-```{code-cell} ipython3
-from ISLP.models.model_spec import ns, bs, pca
-design = ModelSpec([ns('Income', df=5), 'ShelveLoc'])
-X = design.fit_transform(Carseats)
-sm.OLS(Y, X).fit().params
-```
-
-```{code-cell} ipython3
-%%R
-library(splines)
-lm(Sales ~ ns(Income, df=5) + ShelveLoc, data=Carseats)$coef
-```
-
-```{code-cell} ipython3
-design = ModelSpec([bs('Income', df=7, degree=2), 'ShelveLoc'])
-X = design.fit_transform(Carseats)
-sm.OLS(Y, X).fit().params
-```
-
-```{code-cell} ipython3
-%%R
-lm(Sales ~ bs(Income, df=7, degree=2) + ShelveLoc, data=Carseats)$coef
-```
-
-## PCA
-
-```{code-cell} ipython3
-design = ModelSpec([pca(['Income', 
-                           'Price', 
-                           'Advertising', 
-                           'Population'], 
-                          n_components=2, 
-                          name='myvars'), 'ShelveLoc'])
-X = design.fit_transform(Carseats)
-sm.OLS(Y, X).fit().params
-```
+In this lab we illustrate how to run forward stepwise model selection
+using the model specification capability of `ModelSpec`.
 
 ```{code-cell} ipython3
-%%R
-lm(Sales ~ prcomp(cbind(Income, Price, Advertising, Population))$x[,1:2] + ShelveLoc, data=Carseats)
-```
-
-It is of course common to scale before running PCA.
-
-```{code-cell} ipython3
-design = ModelSpec([pca(['Income', 
-                           'Price', 
-                           'Advertising', 
-                           'Population'], 
-                          n_components=2, 
-                          name='myvars',
-                          scale=True), 'ShelveLoc'])
-X = design.fit_transform(Carseats)
-sm.OLS(Y, X).fit().params
-```
-
-```{code-cell} ipython3
-%%R
-lm(Sales ~ prcomp(cbind(Income, Price, Advertising, Population), scale=TRUE)$x[,1:2] + ShelveLoc, data=Carseats)
-```
-
-There will be some small differences in the coefficients due to `sklearn` use of `np.std(ddof=0)` instead
-of `np.std(ddof=1)`.
-
-```{code-cell} ipython3
-np.array(sm.OLS(Y, X).fit().params)[1:3] * np.sqrt(X.shape[0] / (X.shape[0]-1))
-```
-
-## Model selection
-
-Another task requiring different design matrices is model selection. Manipulating
-the `terms` attribute of a `ModelSpec` (or more precisely its more uniform version `terms_`)
-can clearly allow for both exhaustive and stepwise model selection.
-
-```{code-cell} ipython3
-from ISLP.models.strategy import (Stepwise, 
-                                  min_max)
-from ISLP.models.generic_selector import FeatureSelector
-```
-
-### Best subsets
-
-```{code-cell} ipython3
-design = ModelSpec(['Price', 
-                    'UIncome', 
-                    'Advertising', 
-                    'US', 
-                    'Income',
-                    'ShelveLoc',
-                    'Education',
-                    'Urban']).fit(Carseats)
-strategy = min_max(design,
-                   min_terms=0,
-                   max_terms=3)
-```
-
-```{code-cell} ipython3
-from sklearn.linear_model import LinearRegression
-selector = FeatureSelector(LinearRegression(fit_intercept=False),
-                           strategy,
-                           scoring='neg_mean_squared_error')
-```
-
-```{code-cell} ipython3
-selector.fit(Carseats, Y)
-```
-
-```{code-cell} ipython3
-selector.selected_state_
-```
-
-```{code-cell} ipython3
-selector.results_.keys()
-```
-
-```{code-cell} ipython3
-strategy = min_max(design,
-                   min_terms=0,
-                   max_terms=3,
-                   lower_terms=['Price'],
-                   upper_terms=['Price', 'Income', 'Advertising'])
-selector = FeatureSelector(LinearRegression(fit_intercept=False),
-                           strategy,
-                           scoring='neg_mean_squared_error')
-selector.fit(Carseats, Y)
-selector.selected_state_
-```
-
-```{code-cell} ipython3
-selector.results_.keys()
+import numpy as np
+import pandas as pd
+from statsmodels.api import OLS
+from ISLP import load_data
+from ISLP.models import (ModelSpec,
+                         Stepwise,
+                         sklearn_selected)
 ```
 
-### Stepwise selection
+### Forward Selection
+ 
+We will  apply the forward-selection approach to the  `Hitters` 
+data.  We wish to predict a baseball player’s `Salary` on the
+basis of various statistics associated with performance in the
+previous year.
 
 ```{code-cell} ipython3
-strategy = Stepwise.first_peak(design,
-                               min_terms=0,
-                               max_terms=6,
-                               lower_terms=['Price'],
-                               upper_terms=['Price', 'Income', 'Advertising', 'ShelveLoc', 'UIncome', 'US'
-                                     'Education', 'Urban'])
-selector = FeatureSelector(LinearRegression(fit_intercept=False),
-                           strategy,
-                           scoring='neg_mean_squared_error',
-                           cv=3)
-selector.fit(Carseats, Y)
-selector.selected_state_
+Hitters = load_data('Hitters')
+np.isnan(Hitters['Salary']).sum()
 ```
 
-```{code-cell} ipython3
-selector.results_.keys()
-```
-
-```{code-cell} ipython3
-selector.results_
-```
+    
+ We see that `Salary` is missing for 59 players. The
+`dropna()`  method of data frames removes all of the rows that have missing
+values in any variable (by default --- see  `Hitters.dropna?`).
 
 ```{code-cell} ipython3
-selector.selected_state_
+Hitters = Hitters.dropna()
+Hitters.shape
 ```
 
-### Enforcing constraints
-
-In models with interactions, we may often want to impose constraints on interactions and main effects.
-This can be achieved here by use of a `validator` that checks whether a given model is valid.
-
-Suppose we want to have the following constraint: `ShelveLoc` may not be in the model unless
-`Price` is in the following model.
+We first choose the best model using forward selection based on AIC. This score
+is not built in as a metric to `sklearn`. We therefore define a function to compute it ourselves, and use
+it as a scorer. By default, `sklearn` tries to maximize a score, hence
+  our scoring function  computes the negative AIC statistic.
 
 ```{code-cell} ipython3
-design = ModelSpec(['Price', 
-                    'Advertising', 
-                    'Income',
-                    'ShelveLoc']).fit(Carseats)
+def negAIC(estimator, X, Y):
+    "Negative AIC"
+    n, p = X.shape
+    Yhat = estimator.predict(X)
+    MSE = np.mean((Y - Yhat)**2)
+    return n + n * np.log(MSE) + 2 * (p + 1)
+    
 ```
 
-The constraints are described with a boolean matrix with `(i,j)` as `j` is a child of `i`: so `j` should not
-be in the model when `i` is not and enforced with a callable `validator` that evaluates each candidate state.
-
-Both `min_max_strategy` and `step_strategy` accept a `validator` argument.
+We need to estimate the residual variance $\sigma^2$, which is the first argument in our scoring function above.
+We will fit the biggest model, using all the variables, and estimate $\sigma^2$ based on its MSE.
 
 ```{code-cell} ipython3
-from ISLP.models.strategy import validator_from_constraints
-constraints = np.zeros((4, 4))
-constraints[0,3] = 1
-strategy = min_max(design,
-                   min_terms=0,
-                   max_terms=4,
-                   validator=validator_from_constraints(design,
-                                                        constraints))
-selector = FeatureSelector(LinearRegression(fit_intercept=False),
-                           strategy,
-                           scoring='neg_mean_squared_error',
-                           cv=3)
-selector.fit(Carseats, Y)
-selector.results_.keys()
+design = ModelSpec(Hitters.columns.drop('Salary')).fit(Hitters)
+Y = np.array(Hitters['Salary'])
+X = design.transform(Hitters)
 ```
 
-```{code-cell} ipython3
-selector.selected_state_
-```
-
-```{code-cell} ipython3
-Hitters=load_data('Hitters')
-```
+Along with a score we need to specify the search strategy. This is done through the object
+`Stepwise()`  in the `ISLP.models` package. The method `Stepwise.first_peak()`
+runs forward stepwise until any further additions to the model do not result
+in an improvement in the evaluation score. Similarly, the method `Stepwise.fixed_steps()`
+runs a fixed number of steps of stepwise search.
 
 ```{code-cell} ipython3
-Hitters.columns
-```
-
-```{code-cell} ipython3
-Hitters = Hitters.dropna()
-Y=Hitters['Salary']
-X=Hitters.drop('Salary', axis=1)
-design = ModelSpec(X.columns).fit(X)
 strategy = Stepwise.first_peak(design,
                                direction='forward',
-                               min_terms=0,
-                               max_terms=19)
-selector = FeatureSelector(LinearRegression(fit_intercept=False),
-                           strategy,
-                           scoring='neg_mean_squared_error', cv=None)
-selector.fit(X, Y)
-selector.results_.keys()
+                               max_terms=len(design.terms))
 ```
 
-```{code-cell} ipython3
-len(selector.selected_state_)
-```
+ 
+We now fit a linear regression model with `Salary` as outcome using forward
+selection. To do so, we use the function `sklearn_selected()`  from the `ISLP.models` package. This takes
+a model from `statsmodels` along with a search strategy and selects a model with its
+`fit` method. Without specifying a `scoring` argument, the score defaults to MSE, and so all 19 variables will be
+selected.
 
 ```{code-cell} ipython3
-len(X.columns)
+hitters_MSE = sklearn_selected(OLS,
+                               strategy)
+hitters_MSE.fit(Hitters, Y)
+hitters_MSE.selected_state_
 ```
 
-```{code-cell} ipython3
-%%R -i Hitters
-step(lm(Salary ~ 1, data=Hitters), scope=list(upper=lm(Salary ~ ., data=Hitters)), direction='forward', trace=TRUE)
-```
+ Using `neg_Cp` results in a smaller model, as expected, with just 4variables selected.
 
 ```{code-cell} ipython3
-
-```
-
-```{code-cell} ipython3
-
-```
-
-```{code-cell} ipython3
-
+hitters_Cp = sklearn_selected(OLS,
+                              strategy,
+                              scoring=negAIC)
+hitters_Cp.fit(Hitters, Y)
+hitters_Cp.selected_state_
 ```
diff --git a/docs/jupyterbook/models/spec.ipynb b/docs/jupyterbook/models/spec.ipynb
index b60e402..fce6b32 100644
--- a/docs/jupyterbook/models/spec.ipynb
+++ b/docs/jupyterbook/models/spec.ipynb
@@ -7,7 +7,14 @@
    "source": [
     "# Building design matrices with `ModelSpec`\n",
     "\n",
-    "Force rebuild"
+    "The `ISLP` package provides a facility to build design\n",
+    "matrices for regression and classification tasks. It provides similar functionality to the formula\n",
+    "notation of `R` though uses python objects rather than specification through the special formula syntax.\n",
+    "\n",
+    "Related tools include `patsy` and `ColumnTransformer` from `sklearn.compose`. \n",
+    "\n",
+    "Perhaps the most common use is to extract some columns from a `pd.DataFrame` and \n",
+    "produce a design matrix, optionally with an intercept."
    ]
   },
   {
@@ -17,12 +24,15 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "x=4\n",
-    "import numpy as np, pandas as pd\n",
-    "%load_ext rpy2.ipython\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
     "\n",
     "from ISLP import load_data\n",
-    "from ISLP.models import ModelSpec\n",
+    "from ISLP.models import (ModelSpec,\n",
+    "                         summarize,\n",
+    "                         Column,\n",
+    "                         Feature,\n",
+    "                         build_columns)\n",
     "\n",
     "import statsmodels.api as sm"
    ]
@@ -48,40 +58,42 @@
    ],
    "source": [
     "Carseats = load_data('Carseats')\n",
-    "%R -i Carseats\n",
     "Carseats.columns"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "excellent-hamilton",
+   "id": "b7a2e6ab-491d-4a57-8184-a9fcccb2047b",
    "metadata": {},
    "source": [
-    "## Let's break up income into groups"
+    "We'll first build a design matrix that we can use to model `Sales`\n",
+    "in terms of the categorical variable `ShelveLoc` and `Price`.\n",
+    "\n",
+    "We see first that `ShelveLoc` is a categorical variable:"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 3,
-   "id": "going-administrator",
+   "id": "7d3642a6-90c6-48ad-8d35-88231b4991f8",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "0      M\n",
-       "1      L\n",
-       "2      L\n",
-       "3      H\n",
-       "4      M\n",
-       "      ..\n",
-       "395    H\n",
-       "396    L\n",
-       "397    L\n",
-       "398    M\n",
-       "399    L\n",
-       "Name: OIncome, Length: 400, dtype: category\n",
-       "Categories (3, object): ['L' < 'M' < 'H']"
+       "0         Bad\n",
+       "1        Good\n",
+       "2      Medium\n",
+       "3      Medium\n",
+       "4         Bad\n",
+       "        ...  \n",
+       "395      Good\n",
+       "396    Medium\n",
+       "397    Medium\n",
+       "398       Bad\n",
+       "399      Good\n",
+       "Name: ShelveLoc, Length: 400, dtype: category\n",
+       "Categories (3, object): ['Bad', 'Good', 'Medium']"
       ]
      },
      "execution_count": 3,
@@ -90,42 +102,142 @@
     }
    ],
    "source": [
-    "Carseats['OIncome'] = pd.cut(Carseats['Income'], \n",
-    "                             [0,50,90,200], \n",
-    "                             labels=['L','M','H'])\n",
-    "Carseats['OIncome']"
+    "Carseats['ShelveLoc']"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "warming-mobile",
+   "id": "4afa201d-4b19-4d85-9e1b-1392a54d027b",
    "metadata": {},
    "source": [
-    "Let's also create an unordered version"
+    "This is recognized by `ModelSpec` and only 2 columns are added for the three levels. The\n",
+    "default behavior is to drop the first level of the categories. Later, \n",
+    "we will show other contrasts of the 3 columns can be produced.  \n",
+    "\n",
+    "This simple example below illustrates how the first argument (its `terms`) is\n",
+    "used to construct a design matrix."
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 4,
-   "id": "varying-fourth",
+   "id": "fd5528fe-11da-4e10-8996-06085896c1a0",
    "metadata": {},
    "outputs": [
     {
      "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>intercept</th>\n",
+       "      <th>ShelveLoc[Good]</th>\n",
+       "      <th>ShelveLoc[Medium]</th>\n",
+       "      <th>Price</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>120</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>83</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>80</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>97</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>128</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>72</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>108</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>1.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>120</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>124</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>124</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
       "text/plain": [
-       "0      M\n",
-       "1      L\n",
-       "2      L\n",
-       "3      H\n",
-       "4      M\n",
-       "      ..\n",
-       "395    H\n",
-       "396    L\n",
-       "397    L\n",
-       "398    M\n",
-       "399    L\n",
-       "Name: UIncome, Length: 400, dtype: category\n",
-       "Categories (3, object): ['L', 'M', 'H']"
+       "   intercept  ShelveLoc[Good]  ShelveLoc[Medium]  Price\n",
+       "0        1.0              0.0                0.0    120\n",
+       "1        1.0              1.0                0.0     83\n",
+       "2        1.0              0.0                1.0     80\n",
+       "3        1.0              0.0                1.0     97\n",
+       "4        1.0              0.0                0.0    128\n",
+       "5        1.0              0.0                0.0     72\n",
+       "6        1.0              0.0                1.0    108\n",
+       "7        1.0              1.0                0.0    120\n",
+       "8        1.0              0.0                1.0    124\n",
+       "9        1.0              0.0                1.0    124"
       ]
      },
      "execution_count": 4,
@@ -134,31 +246,129 @@
     }
    ],
    "source": [
-    "Carseats['UIncome'] = pd.cut(Carseats['Income'], \n",
-    "                             [0,50,90,200], \n",
-    "                             labels=['L','M','H'],\n",
-    "                             ordered=False)\n",
-    "Carseats['UIncome']"
+    "MS = ModelSpec(['ShelveLoc', 'Price'])\n",
+    "X = MS.fit_transform(Carseats)\n",
+    "X.iloc[:10]"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "utility-viking",
+   "id": "6948e1ef-3685-4840-a4f2-ef15a1bcfb69",
    "metadata": {},
    "source": [
-    "## A simple model"
+    "We note that a column has been added for the intercept by default. This can be changed using the\n",
+    "`intercept` argument."
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 5,
-   "id": "unlikely-begin",
+   "id": "682d4c81-eba9-467d-a176-911a0269a21d",
    "metadata": {},
    "outputs": [
     {
      "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>ShelveLoc[Good]</th>\n",
+       "      <th>ShelveLoc[Medium]</th>\n",
+       "      <th>Price</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>120</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>83</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>80</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>97</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>128</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>72</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>108</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>120</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>124</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>124</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
       "text/plain": [
-       "Index(['intercept', 'Price', 'Income'], dtype='object')"
+       "   ShelveLoc[Good]  ShelveLoc[Medium]  Price\n",
+       "0              0.0                0.0    120\n",
+       "1              1.0                0.0     83\n",
+       "2              0.0                1.0     80\n",
+       "3              0.0                1.0     97\n",
+       "4              0.0                0.0    128\n",
+       "5              0.0                0.0     72\n",
+       "6              0.0                1.0    108\n",
+       "7              1.0                0.0    120\n",
+       "8              0.0                1.0    124\n",
+       "9              0.0                1.0    124"
       ]
      },
      "execution_count": 5,
@@ -167,24 +377,143 @@
     }
    ],
    "source": [
-    "design = ModelSpec(['Price', 'Income'])\n",
-    "X = design.fit_transform(Carseats)\n",
-    "X.columns"
+    "MS_no1 = ModelSpec(['ShelveLoc', 'Price'], intercept=False)\n",
+    "MS_no1.fit_transform(Carseats)[:10]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "54d8fd20-d8f5-44d6-9965-83e745680798",
+   "metadata": {},
+   "source": [
+    "We see that `ShelveLoc` still only contributes\n",
+    "two columns to the design. The `ModelSpec` object does no introspection of its arguments to effectively include an intercept term\n",
+    "in the column space of the design matrix.\n",
+    "\n",
+    "To include this intercept via `ShelveLoc` we can use 3 columns to encode this categorical variable. Following the nomenclature of\n",
+    "`R`, we call this a `Contrast` of the categorical variable."
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 6,
-   "id": "driven-employee",
+   "id": "555734bb-2682-4721-a1cd-6fb207394b0e",
    "metadata": {},
    "outputs": [
     {
      "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>ShelveLoc[Bad]</th>\n",
+       "      <th>ShelveLoc[Good]</th>\n",
+       "      <th>ShelveLoc[Medium]</th>\n",
+       "      <th>Price</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>120</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>83</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>80</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>97</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>128</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>72</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>108</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>120</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>124</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>124</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
       "text/plain": [
-       "intercept    12.661546\n",
-       "Price        -0.052213\n",
-       "Income        0.012829\n",
-       "dtype: float64"
+       "   ShelveLoc[Bad]  ShelveLoc[Good]  ShelveLoc[Medium]  Price\n",
+       "0             1.0              0.0                0.0    120\n",
+       "1             0.0              1.0                0.0     83\n",
+       "2             0.0              0.0                1.0     80\n",
+       "3             0.0              0.0                1.0     97\n",
+       "4             1.0              0.0                0.0    128\n",
+       "5             1.0              0.0                0.0     72\n",
+       "6             0.0              0.0                1.0    108\n",
+       "7             0.0              1.0                0.0    120\n",
+       "8             0.0              0.0                1.0    124\n",
+       "9             0.0              0.0                1.0    124"
       ]
      },
      "execution_count": 6,
@@ -193,45 +522,32 @@
     }
    ],
    "source": [
-    "Y = Carseats['Sales']\n",
-    "M = sm.OLS(Y, X).fit()\n",
-    "M.params"
+    "from ISLP.models import contrast\n",
+    "shelve = contrast('ShelveLoc', None)\n",
+    "MS_contr = ModelSpec([shelve, 'Price'], intercept=False)\n",
+    "MS_contr.fit_transform(Carseats)[:10]"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "secondary-winner",
+   "id": "66db03cf-489c-40b6-8fac-762d66cf9932",
    "metadata": {},
    "source": [
-    "## Basic procedure\n",
-    "\n",
-    "The design matrix is built by cobbling together a set of columns and possibly transforming them.\n",
-    "A `pd.DataFrame` is essentially a list of columns. One of the first tasks done  in `ModelSpec.fit`\n",
-    "is to inspect a dataframe for column info. The column `ShelveLoc` is categorical:"
+    "This example above illustrates that columns need not be identified by name in `terms`. The basic\n",
+    "role of an item in the `terms` sequence is a description of how to extract a column\n",
+    "from a columnar data object, usually a `pd.DataFrame`."
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 7,
-   "id": "bored-making",
+   "id": "852ee40e-05d2-4785-ab7d-968fb087f3c0",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "0         Bad\n",
-       "1        Good\n",
-       "2      Medium\n",
-       "3      Medium\n",
-       "4         Bad\n",
-       "        ...  \n",
-       "395      Good\n",
-       "396    Medium\n",
-       "397    Medium\n",
-       "398       Bad\n",
-       "399      Good\n",
-       "Name: ShelveLoc, Length: 400, dtype: category\n",
-       "Categories (3, object): ['Bad', 'Good', 'Medium']"
+       "Column(idx='ShelveLoc', name='ShelveLoc', is_categorical=True, is_ordinal=False, columns=(), encoder=Contrast(method=None))"
       ]
      },
      "execution_count": 7,
@@ -240,28 +556,36 @@
     }
    ],
    "source": [
-    "Carseats['ShelveLoc']"
+    "shelve"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "phantom-assurance",
+   "id": "b3be8808-1dbf-4154-882b-f61656a2ed4e",
    "metadata": {},
    "source": [
-    "This is recognized by `ModelSpec` in the form of `Column` objects which are just named tuples with two methods\n",
-    "`get_columns` and `fit_encoder`."
+    "The `Column` object can be used to directly extract relevant columns from a `pd.DataFrame`. If the `encoder` field is not\n",
+    "`None`, then the extracted columns will be passed through `encoder`.\n",
+    "The `get_columns` method produces these columns as well as names for the columns."
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 8,
-   "id": "blind-harvest",
+   "id": "0ebadfc0-0ea2-4abc-aac6-ef78be227ce1",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "Column(idx='ShelveLoc', name='ShelveLoc', is_categorical=True, is_ordinal=False, columns=('ShelveLoc[Good]', 'ShelveLoc[Medium]'), encoder=Contrast())"
+       "(array([[1., 0., 0.],\n",
+       "        [0., 1., 0.],\n",
+       "        [0., 0., 1.],\n",
+       "        ...,\n",
+       "        [0., 0., 1.],\n",
+       "        [1., 0., 0.],\n",
+       "        [0., 1., 0.]]),\n",
+       " ['ShelveLoc[Bad]', 'ShelveLoc[Good]', 'ShelveLoc[Medium]'])"
       ]
      },
      "execution_count": 8,
@@ -270,27 +594,89 @@
     }
    ],
    "source": [
-    "design.column_info_['ShelveLoc']"
+    "shelve.get_columns(Carseats)"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "suspended-affairs",
+   "id": "269e6d18-4ae4-4a77-8498-90281ae7c803",
    "metadata": {},
    "source": [
-    "It recognized ordinal columns as well."
+    "Let's now fit a simple OLS model with this design."
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 9,
-   "id": "military-locking",
+   "id": "411238d0-dd36-4878-a869-e8ce0ada099c",
    "metadata": {},
    "outputs": [
     {
      "data": {
-      "text/plain": [
-       "Column(idx='OIncome', name='OIncome', is_categorical=True, is_ordinal=True, columns=('OIncome',), encoder=OrdinalEncoder())"
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>coef</th>\n",
+       "      <th>std err</th>\n",
+       "      <th>t</th>\n",
+       "      <th>P&gt;|t|</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>ShelveLoc[Bad]</th>\n",
+       "      <td>12.0018</td>\n",
+       "      <td>0.503</td>\n",
+       "      <td>23.839</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>ShelveLoc[Good]</th>\n",
+       "      <td>16.8976</td>\n",
+       "      <td>0.522</td>\n",
+       "      <td>32.386</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>ShelveLoc[Medium]</th>\n",
+       "      <td>13.8638</td>\n",
+       "      <td>0.487</td>\n",
+       "      <td>28.467</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Price</th>\n",
+       "      <td>-0.0567</td>\n",
+       "      <td>0.004</td>\n",
+       "      <td>-13.967</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                      coef  std err       t  P>|t|\n",
+       "ShelveLoc[Bad]     12.0018    0.503  23.839    0.0\n",
+       "ShelveLoc[Good]    16.8976    0.522  32.386    0.0\n",
+       "ShelveLoc[Medium]  13.8638    0.487  28.467    0.0\n",
+       "Price              -0.0567    0.004 -13.967    0.0"
       ]
      },
      "execution_count": 9,
@@ -299,19 +685,166 @@
     }
    ],
    "source": [
-    "design.column_info_['OIncome']"
+    "X = MS_contr.transform(Carseats)\n",
+    "Y = Carseats['Sales']\n",
+    "M_ols = sm.OLS(Y, X).fit()\n",
+    "summarize(M_ols)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "40ddf68e-7d58-4e30-93a8-5b7fe840d37a",
+   "metadata": {},
+   "source": [
+    "## Interactions\n",
+    "\n",
+    "One of the common uses of formulae in `R` is to specify interactions between variables.\n",
+    "This is done in `ModelSpec` by including a tuple in the `terms` argument."
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 10,
-   "id": "italic-shakespeare",
+   "id": "3f5e314c-7a7f-4e8d-bb07-295beb42c728",
    "metadata": {},
    "outputs": [
     {
      "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>intercept</th>\n",
+       "      <th>ShelveLoc[Bad]:Price</th>\n",
+       "      <th>ShelveLoc[Good]:Price</th>\n",
+       "      <th>ShelveLoc[Medium]:Price</th>\n",
+       "      <th>Price</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1.0</td>\n",
+       "      <td>120.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>120</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>83.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>83</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>80.0</td>\n",
+       "      <td>80</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>97.0</td>\n",
+       "      <td>97</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>1.0</td>\n",
+       "      <td>128.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>128</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>1.0</td>\n",
+       "      <td>72.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>72</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>108.0</td>\n",
+       "      <td>108</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>120.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>120</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>124.0</td>\n",
+       "      <td>124</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>124.0</td>\n",
+       "      <td>124</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
       "text/plain": [
-       "(array([ 73,  48,  35, 100]), ('Income',))"
+       "   intercept  ShelveLoc[Bad]:Price  ShelveLoc[Good]:Price  \\\n",
+       "0        1.0                 120.0                    0.0   \n",
+       "1        1.0                   0.0                   83.0   \n",
+       "2        1.0                   0.0                    0.0   \n",
+       "3        1.0                   0.0                    0.0   \n",
+       "4        1.0                 128.0                    0.0   \n",
+       "5        1.0                  72.0                    0.0   \n",
+       "6        1.0                   0.0                    0.0   \n",
+       "7        1.0                   0.0                  120.0   \n",
+       "8        1.0                   0.0                    0.0   \n",
+       "9        1.0                   0.0                    0.0   \n",
+       "\n",
+       "   ShelveLoc[Medium]:Price  Price  \n",
+       "0                      0.0    120  \n",
+       "1                      0.0     83  \n",
+       "2                     80.0     80  \n",
+       "3                     97.0     97  \n",
+       "4                      0.0    128  \n",
+       "5                      0.0     72  \n",
+       "6                    108.0    108  \n",
+       "7                      0.0    120  \n",
+       "8                    124.0    124  \n",
+       "9                    124.0    124  "
       ]
      },
      "execution_count": 10,
@@ -320,65 +853,71 @@
     }
    ],
    "source": [
-    "income = design.column_info_['Income']\n",
-    "cols, names = income.get_columns(Carseats)\n",
-    "(cols[:4], names)"
+    "ModelSpec([(shelve, 'Price'), 'Price']).fit_transform(Carseats).iloc[:10]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3f85fcb2-f0ef-4c1b-a89f-fcf083937274",
+   "metadata": {},
+   "source": [
+    "The above design matrix is clearly rank deficient, as `ModelSpec` has not inspected the formula\n",
+    "and attempted to produce a corresponding matrix that may or may not match a user's intent."
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "medieval-speed",
+   "id": "excellent-hamilton",
    "metadata": {},
    "source": [
-    "## Encoding a column\n",
+    "## Ordinal variables\n",
     "\n",
-    "In building a design matrix we must extract columns from our dataframe (or `np.ndarray`). Categorical\n",
-    "variables usually are encoded by several columns, typically one less than the number of categories.\n",
-    "This task is handled by the `encoder` of the `Column`. The encoder must satisfy the `sklearn` transform\n",
-    "model, i.e. `fit` on some array and `transform` on future arrays. The `fit_encoder` method of `Column` fits\n",
-    "its encoder the first time data is passed to it."
+    "Ordinal variables are handled by a corresponding encoder)"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 11,
-   "id": "public-basket",
+   "id": "going-administrator",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "Carseats['OIncome'] = pd.cut(Carseats['Income'], \n",
+    "                             [0,50,90,200], \n",
+    "                             labels=['L','M','H'])\n",
+    "MS_order = ModelSpec(['OIncome']).fit(Carseats)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5e1defb1-071b-4751-9358-b8d2f0b3412e",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(array([[0., 0.],\n",
-       "        [1., 0.],\n",
-       "        [0., 1.],\n",
-       "        [0., 1.]]),\n",
-       " ['ShelveLoc[Good]', 'ShelveLoc[Medium]'])"
-      ]
-     },
-     "execution_count": 11,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
    "source": [
-    "shelve = design.column_info_['ShelveLoc']\n",
-    "cols, names = shelve.get_columns(Carseats)\n",
-    "(cols[:4], names)"
+    "Part of the `fit` method of `ModelSpec` involves inspection of the columns of `Carseats`. \n",
+    "The results of that inspection can be found in the `column_info_` attribute:"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 12,
-   "id": "improved-alloy",
+   "id": "050fb4ae-648d-429d-9cb2-8423ad9707d7",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "array([[2.],\n",
-       "       [1.],\n",
-       "       [1.],\n",
-       "       [0.]])"
+       "{'Sales': Column(idx='Sales', name='Sales', is_categorical=False, is_ordinal=False, columns=('Sales',), encoder=None),\n",
+       " 'CompPrice': Column(idx='CompPrice', name='CompPrice', is_categorical=False, is_ordinal=False, columns=('CompPrice',), encoder=None),\n",
+       " 'Income': Column(idx='Income', name='Income', is_categorical=False, is_ordinal=False, columns=('Income',), encoder=None),\n",
+       " 'Advertising': Column(idx='Advertising', name='Advertising', is_categorical=False, is_ordinal=False, columns=('Advertising',), encoder=None),\n",
+       " 'Population': Column(idx='Population', name='Population', is_categorical=False, is_ordinal=False, columns=('Population',), encoder=None),\n",
+       " 'Price': Column(idx='Price', name='Price', is_categorical=False, is_ordinal=False, columns=('Price',), encoder=None),\n",
+       " 'ShelveLoc': Column(idx='ShelveLoc', name='ShelveLoc', is_categorical=True, is_ordinal=False, columns=('ShelveLoc[Good]', 'ShelveLoc[Medium]'), encoder=Contrast()),\n",
+       " 'Age': Column(idx='Age', name='Age', is_categorical=False, is_ordinal=False, columns=('Age',), encoder=None),\n",
+       " 'Education': Column(idx='Education', name='Education', is_categorical=False, is_ordinal=False, columns=('Education',), encoder=None),\n",
+       " 'Urban': Column(idx='Urban', name='Urban', is_categorical=True, is_ordinal=False, columns=('Urban[Yes]',), encoder=Contrast()),\n",
+       " 'US': Column(idx='US', name='US', is_categorical=True, is_ordinal=False, columns=('US[Yes]',), encoder=Contrast()),\n",
+       " 'OIncome': Column(idx='OIncome', name='OIncome', is_categorical=True, is_ordinal=True, columns=('OIncome',), encoder=OrdinalEncoder())}"
       ]
      },
      "execution_count": 12,
@@ -387,33 +926,32 @@
     }
    ],
    "source": [
-    "oincome = design.column_info_['OIncome']\n",
-    "oincome.get_columns(Carseats)[0][:4]"
+    "MS_order.column_info_"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "frank-mathematics",
+   "id": "debf7e2e-0a9d-451b-866c-66c0df9f43e5",
    "metadata": {},
    "source": [
-    "## The terms\n",
+    "## Structure of a `ModelSpec`\n",
     "\n",
-    "The design matrix consists of several sets of columns. This is managed by the `ModelSpec` through\n",
-    "the `terms` argument which should be a sequence. The elements of `terms` are often\n",
-    "going to be strings (or tuples of strings for interactions, see below) but are converted to a\n",
-    "`Variable` object and stored in the `terms_` of the fitted `ModelSpec`. A `Variable` is just a named tuple."
+    "The first argument to `ModelSpec` is stored as the `terms` attribute. Under the hood,\n",
+    "this sequence is inspected to produce the `terms_` attribute which specify the objects\n",
+    "that will ultimately create the design matrix."
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 13,
-   "id": "together-north",
+   "id": "ea51e988-0857-4d49-9987-d7531b34a233",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "['Price', 'Income']"
+       "[Feature(variables=('ShelveLoc',), name='ShelveLoc', encoder=None, use_transform=True, pure_columns=True, override_encoder_colnames=False),\n",
+       " Feature(variables=('Price',), name='Price', encoder=None, use_transform=True, pure_columns=True, override_encoder_colnames=False)]"
       ]
      },
      "execution_count": 13,
@@ -422,64 +960,145 @@
     }
    ],
    "source": [
-    "design.terms"
+    "MS = ModelSpec(['ShelveLoc', 'Price'])\n",
+    "MS.fit(Carseats)\n",
+    "MS.terms_"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "warming-mobile",
+   "metadata": {},
+   "source": [
+    "Each element of `terms_` should be a `Feature` which describes a set of columns to be extracted from\n",
+    "a columnar data form as well as possible a possible encoder."
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 14,
-   "id": "chinese-necessity",
+   "id": "59214a70-1e6b-41c4-9f44-a92d340723c9",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "[Variable(variables=('Price',), name='Price', encoder=None, use_transform=True, pure_columns=True, override_encoder_colnames=False),\n",
-       " Variable(variables=('Income',), name='Income', encoder=None, use_transform=True, pure_columns=True, override_encoder_colnames=False)]"
-      ]
-     },
-     "execution_count": 14,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
-    "design.terms_"
+    "shelve_var = MS.terms_[0]"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "simplified-chinese",
+   "id": "5fed3ea2-ff50-4e5d-819d-a948f121f9d3",
    "metadata": {},
    "source": [
-    "While each `Column` can itself extract data, they are all promoted to `Variable` to be of a uniform type.  A\n",
-    "`Variable` can also create columns through the `build_columns` method of `ModelSpec`"
+    "We can find the columns associated to each term using the `build_columns` method of `ModelSpec`:"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 15,
-   "id": "automotive-hobby",
+   "id": "5e25ef64-497d-4f42-9f20-3d4a320cda23",
    "metadata": {},
    "outputs": [
     {
      "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>ShelveLoc[Good]</th>\n",
+       "      <th>ShelveLoc[Medium]</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>395</th>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>396</th>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>397</th>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>398</th>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>399</th>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>400 rows × 2 columns</p>\n",
+       "</div>"
+      ],
       "text/plain": [
-       "(     Price\n",
-       " 0      120\n",
-       " 1       83\n",
-       " 2       80\n",
-       " 3       97\n",
-       " 4      128\n",
-       " ..     ...\n",
-       " 395    128\n",
-       " 396    120\n",
-       " 397    159\n",
-       " 398     95\n",
-       " 399    120\n",
-       " \n",
-       " [400 rows x 1 columns],\n",
-       " ['Price'])"
+       "     ShelveLoc[Good]  ShelveLoc[Medium]\n",
+       "0                0.0                0.0\n",
+       "1                1.0                0.0\n",
+       "2                0.0                1.0\n",
+       "3                0.0                1.0\n",
+       "4                0.0                0.0\n",
+       "..               ...                ...\n",
+       "395              1.0                0.0\n",
+       "396              0.0                1.0\n",
+       "397              0.0                1.0\n",
+       "398              0.0                0.0\n",
+       "399              1.0                0.0\n",
+       "\n",
+       "[400 rows x 2 columns]"
       ]
      },
      "execution_count": 15,
@@ -488,280 +1107,37 @@
     }
    ],
    "source": [
-    "price = design.terms_[0]\n",
-    "design.build_columns(Carseats, price)"
+    "df, names = build_columns(MS.column_info_,\n",
+    "                          Carseats, \n",
+    "                          shelve_var)\n",
+    "df"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "former-spring",
-   "metadata": {},
-   "source": [
-    "Note that `Variable` objects have a tuple of `variables` as well as an `encoder` attribute. The\n",
-    "tuple of `variables` first creates a concatenated dataframe from all corresponding variables and then\n",
-    "is run through `encoder.transform`. The `encoder.fit` method of each `Variable` is run once during \n",
-    "the call to `ModelSpec.fit`."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 16,
-   "id": "floral-liabilities",
+   "id": "63edf7a2-e776-45b0-b434-d676d7e13dbd",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(     Price  Income  UIncome[L]  UIncome[M]\n",
-       " 0    120.0    73.0         0.0         1.0\n",
-       " 1     83.0    48.0         1.0         0.0\n",
-       " 2     80.0    35.0         1.0         0.0\n",
-       " 3     97.0   100.0         0.0         0.0\n",
-       " 4    128.0    64.0         0.0         1.0\n",
-       " ..     ...     ...         ...         ...\n",
-       " 395  128.0   108.0         0.0         0.0\n",
-       " 396  120.0    23.0         1.0         0.0\n",
-       " 397  159.0    26.0         1.0         0.0\n",
-       " 398   95.0    79.0         0.0         1.0\n",
-       " 399  120.0    37.0         1.0         0.0\n",
-       " \n",
-       " [400 rows x 4 columns],\n",
-       " ['Price', 'Income', 'UIncome[L]', 'UIncome[M]'])"
-      ]
-     },
-     "execution_count": 16,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
    "source": [
-    "from ISLP.models.model_spec import Variable\n",
-    "\n",
-    "new_var = Variable(('Price', 'Income', 'UIncome'), name='mynewvar', encoder=None)\n",
-    "design.build_columns(Carseats, new_var)"
+    "The design matrix is constructed by running through `terms_` and concatenating the corresponding columns."
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "reasonable-canadian",
+   "id": "former-spring",
    "metadata": {},
    "source": [
-    "Let's now transform these columns with an encoder. Within `ModelSpec` we will first build the\n",
-    "arrays above and then call `pca.fit` and finally `pca.transform` within `design.build_columns`."
+    "### `Feature` objects\n",
+    "\n",
+    "Note that `Feature` objects have a tuple of `variables` as well as an `encoder` attribute. The\n",
+    "tuple of `variables` first creates a concatenated dataframe from all corresponding variables and then\n",
+    "is run through `encoder.transform`. The `encoder.fit` method of each `Feature` is run once during \n",
+    "the call to `ModelSpec.fit`."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
-   "id": "imported-measure",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/Users/jonathantaylor/miniconda3/envs/islp_test/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but PCA was fitted with feature names\n",
-      "  warnings.warn(\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "(     mynewvar[0]  mynewvar[1]\n",
-       " 0      -3.608693    -4.853177\n",
-       " 1      15.081506    35.708630\n",
-       " 2      27.422871    40.774250\n",
-       " 3     -33.973209    13.470489\n",
-       " 4       6.567316   -11.290100\n",
-       " ..           ...          ...\n",
-       " 395   -36.846346   -18.415783\n",
-       " 396    45.741500     3.245602\n",
-       " 397    49.097533   -35.725355\n",
-       " 398   -13.577772    18.845139\n",
-       " 399    31.927566     0.978436\n",
-       " \n",
-       " [400 rows x 2 columns],\n",
-       " ['mynewvar[0]', 'mynewvar[1]'])"
-      ]
-     },
-     "execution_count": 17,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "from sklearn.decomposition import PCA\n",
-    "pca = PCA(n_components=2)\n",
-    "pca.fit(design.build_columns(Carseats, new_var)[0]) # this is done within `ModelSpec.fit`\n",
-    "pca_var = Variable(('Price', 'Income', 'UIncome'), name='mynewvar', encoder=pca)\n",
-    "design.build_columns(Carseats, pca_var)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "institutional-burden",
-   "metadata": {},
-   "source": [
-    "The elements of the `variables` attribute may be column identifiers ( `\"Price\"`), `Column` instances (`price`)\n",
-    "or `Variable` instances (`pca_var`)."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 18,
-   "id": "western-bloom",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/Users/jonathantaylor/miniconda3/envs/islp_test/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but PCA was fitted with feature names\n",
-      "  warnings.warn(\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "(     Price  Price  mynewvar[0]  mynewvar[1]\n",
-       " 0    120.0  120.0    -3.608693    -4.853177\n",
-       " 1     83.0   83.0    15.081506    35.708630\n",
-       " 2     80.0   80.0    27.422871    40.774250\n",
-       " 3     97.0   97.0   -33.973209    13.470489\n",
-       " 4    128.0  128.0     6.567316   -11.290100\n",
-       " ..     ...    ...          ...          ...\n",
-       " 395  128.0  128.0   -36.846346   -18.415783\n",
-       " 396  120.0  120.0    45.741500     3.245602\n",
-       " 397  159.0  159.0    49.097533   -35.725355\n",
-       " 398   95.0   95.0   -13.577772    18.845139\n",
-       " 399  120.0  120.0    31.927566     0.978436\n",
-       " \n",
-       " [400 rows x 4 columns],\n",
-       " ['Price', 'Price', 'mynewvar[0]', 'mynewvar[1]'])"
-      ]
-     },
-     "execution_count": 18,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "fancy_var = Variable(('Price', price, pca_var), name='fancy', encoder=None)\n",
-    "design.build_columns(Carseats, fancy_var)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "ordinary-newman",
-   "metadata": {},
-   "source": [
-    "We can of course run PCA again on these features (if we wanted)."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 19,
-   "id": "modern-negotiation",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/Users/jonathantaylor/miniconda3/envs/islp_test/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but PCA was fitted with feature names\n",
-      "  warnings.warn(\n",
-      "/Users/jonathantaylor/miniconda3/envs/islp_test/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but PCA was fitted with feature names\n",
-      "  warnings.warn(\n",
-      "/Users/jonathantaylor/miniconda3/envs/islp_test/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but PCA was fitted with feature names\n",
-      "  warnings.warn(\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "(     fancy_pca[0]  fancy_pca[1]\n",
-       " 0       -6.951792      4.859283\n",
-       " 1       55.170148    -24.694875\n",
-       " 2       59.418556    -38.033572\n",
-       " 3       34.722389     28.922184\n",
-       " 4      -21.419184     -3.120673\n",
-       " ..            ...           ...\n",
-       " 395    -18.257348     40.760122\n",
-       " 396    -10.546709    -45.021658\n",
-       " 397    -77.706359    -37.174379\n",
-       " 398     36.668694      7.730851\n",
-       " 399     -9.540535    -31.059122\n",
-       " \n",
-       " [400 rows x 2 columns],\n",
-       " ['fancy_pca[0]', 'fancy_pca[1]'])"
-      ]
-     },
-     "execution_count": 19,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "pca2 = PCA(n_components=2)\n",
-    "pca2.fit(design.build_columns(Carseats, fancy_var)[0]) # this is done within `ModelSpec.fit`\n",
-    "pca2_var = Variable(('Price', price, pca_var), name='fancy_pca', encoder=pca2)\n",
-    "design.build_columns(Carseats, pca2_var)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "private-shepherd",
-   "metadata": {},
-   "source": [
-    "## Building the design matrix\n",
-    "\n",
-    "With these notions in mind, the final design is essentially then"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 20,
-   "id": "hollywood-union",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "X_hand = np.column_stack([design.build_columns(Carseats, v)[0] for v in design.terms_])[:4]"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "suffering-lover",
-   "metadata": {},
-   "source": [
-    "An intercept column is added if `design.intercept` is `True` and if the original argument to `transform` is\n",
-    "a dataframe the index is adjusted accordingly."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 21,
-   "id": "successful-express",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "True"
-      ]
-     },
-     "execution_count": 21,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design.intercept"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 22,
-   "id": "banner-metadata",
+   "execution_count": 18,
+   "id": "floral-liabilities",
    "metadata": {},
    "outputs": [
     {
@@ -785,1227 +1161,643 @@
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
-       "      <th>intercept</th>\n",
        "      <th>Price</th>\n",
        "      <th>Income</th>\n",
+       "      <th>OIncome</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>120</td>\n",
-       "      <td>73</td>\n",
+       "      <td>120.0</td>\n",
+       "      <td>73.0</td>\n",
+       "      <td>2.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
+       "      <td>83.0</td>\n",
+       "      <td>48.0</td>\n",
        "      <td>1.0</td>\n",
-       "      <td>83</td>\n",
-       "      <td>48</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
+       "      <td>80.0</td>\n",
+       "      <td>35.0</td>\n",
        "      <td>1.0</td>\n",
-       "      <td>80</td>\n",
-       "      <td>35</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
+       "      <td>97.0</td>\n",
+       "      <td>100.0</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>128.0</td>\n",
+       "      <td>64.0</td>\n",
+       "      <td>2.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>395</th>\n",
+       "      <td>128.0</td>\n",
+       "      <td>108.0</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>396</th>\n",
+       "      <td>120.0</td>\n",
+       "      <td>23.0</td>\n",
+       "      <td>1.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>397</th>\n",
+       "      <td>159.0</td>\n",
+       "      <td>26.0</td>\n",
+       "      <td>1.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>398</th>\n",
+       "      <td>95.0</td>\n",
+       "      <td>79.0</td>\n",
+       "      <td>2.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>399</th>\n",
+       "      <td>120.0</td>\n",
+       "      <td>37.0</td>\n",
        "      <td>1.0</td>\n",
-       "      <td>97</td>\n",
-       "      <td>100</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
+       "<p>400 rows × 3 columns</p>\n",
        "</div>"
       ],
       "text/plain": [
-       "   intercept  Price  Income\n",
-       "0        1.0    120      73\n",
-       "1        1.0     83      48\n",
-       "2        1.0     80      35\n",
-       "3        1.0     97     100"
-      ]
-     },
-     "execution_count": 22,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design.transform(Carseats)[:4]"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "absent-branch",
-   "metadata": {},
-   "source": [
-    "## Predicting\n",
-    "\n",
-    "Constructing the design matrix at any values is carried out by the `transform` method."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 23,
-   "id": "naked-hollywood",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "array([12.65257604, 12.25873428])"
+       "     Price  Income  OIncome\n",
+       "0    120.0    73.0      2.0\n",
+       "1     83.0    48.0      1.0\n",
+       "2     80.0    35.0      1.0\n",
+       "3     97.0   100.0      0.0\n",
+       "4    128.0    64.0      2.0\n",
+       "..     ...     ...      ...\n",
+       "395  128.0   108.0      0.0\n",
+       "396  120.0    23.0      1.0\n",
+       "397  159.0    26.0      1.0\n",
+       "398   95.0    79.0      2.0\n",
+       "399  120.0    37.0      1.0\n",
+       "\n",
+       "[400 rows x 3 columns]"
       ]
      },
-     "execution_count": 23,
+     "execution_count": 18,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "new_data = pd.DataFrame({'Price':[10,20], 'Income':[40, 50]})\n",
-    "new_X = design.transform(new_data)\n",
-    "M.get_prediction(new_X).predicted_mean"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 24,
-   "id": "iraqi-divorce",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "       0        1 \n",
-      "12.65258 12.25873 \n"
-     ]
-    }
-   ],
-   "source": [
-    "%%R -i new_data,Carseats\n",
-    "predict(lm(Sales ~ Price + Income, data=Carseats), new_data)"
+    "new_var = Feature(('Price', 'Income', 'OIncome'), name='mynewvar', encoder=None)\n",
+    "build_columns(MS.column_info_,\n",
+    "              Carseats, \n",
+    "              new_var)[0]"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "signal-yahoo",
+   "id": "reasonable-canadian",
    "metadata": {},
    "source": [
-    "### Difference between using `pd.DataFrame` and `np.ndarray`\n",
-    "\n",
-    "If the `terms` only refer to a few columns of the data frame, the `transform` method only needs a dataframe with those columns.\n",
-    "\n",
-    "If we had used an `np.ndarray`, the column identifiers would be integers identifying specific columns so,\n",
-    "in order to work correctly, `transform` would need another `np.ndarray` where the columns have the same meaning."
+    "Let's now transform these columns with an encoder. Within `ModelSpec` we will first build the\n",
+    "arrays above and then call `pca.fit` and finally `pca.transform` within `design.build_columns`."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
-   "id": "completed-surveillance",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "array([[1.0, 120, 73],\n",
-       "       [1.0, 83, 48],\n",
-       "       [1.0, 80, 35],\n",
-       "       [1.0, 97, 100]], dtype=object)"
-      ]
-     },
-     "execution_count": 25,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "Carseats_np = np.asarray(Carseats[['Price', 'ShelveLoc', 'US', 'Income']])\n",
-    "design_np = ModelSpec([0,3]).fit(Carseats_np)\n",
-    "design_np.transform(Carseats_np)[:4]"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "undefined-sacrifice",
-   "metadata": {},
-   "source": [
-    "The following will fail for hopefully obvious reasons"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 26,
-   "id": "incredible-concert",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "shapes (2,2) and (3,) not aligned: 2 (dim 1) != 3 (dim 0)\n"
-     ]
-    }
-   ],
-   "source": [
-    "try:\n",
-    "    new_D = np.zeros((2,2))\n",
-    "    new_D[:,0] = [10,20]\n",
-    "    new_D[:,1] = [40,50]\n",
-    "    M.get_prediction(new_D).predicted_mean\n",
-    "except ValueError as e:\n",
-    "    print(e)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "allied-botswana",
-   "metadata": {},
-   "source": [
-    "Ultimately, `M` expects 3 columns for new predictions because it was fit\n",
-    "with a matrix having 3 columns (the first representing an intercept).\n",
-    "\n",
-    "We might be tempted to try as with the `pd.DataFrame` and produce\n",
-    "an `np.ndarray` with only the necessary variables."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 27,
-   "id": "stunning-container",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "index 3 is out of bounds for axis 1 with size 2\n"
-     ]
-    }
-   ],
-   "source": [
-    "try:\n",
-    "    new_X = np.zeros((2,2))\n",
-    "    new_X[:,0] = [10,20]\n",
-    "    new_X[:,1] = [40,50]\n",
-    "    new_D = design_np.transform(new_X)\n",
-    "    M.get_prediction(new_D).predicted_mean\n",
-    "except IndexError as e:\n",
-    "    print(e)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "specific-tobacco",
-   "metadata": {},
-   "source": [
-    "This fails because `design_np` is looking for column `3` from its `terms`:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 28,
-   "id": "latin-publisher",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "[Variable(variables=(0,), name='0', encoder=None, use_transform=True, pure_columns=True, override_encoder_colnames=False),\n",
-       " Variable(variables=(3,), name='3', encoder=None, use_transform=True, pure_columns=True, override_encoder_colnames=False)]"
-      ]
-     },
-     "execution_count": 28,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design_np.terms_"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "rocky-franchise",
-   "metadata": {},
-   "source": [
-    "However, if we have an `np.ndarray` in which the first column indeed represents `Price` and the fourth indeed\n",
-    "represents `Income` then we can arrive at the correct answer by supplying such the array to `design_np.transform`:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 29,
-   "id": "returning-matthew",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "array([12.65257604, 12.25873428])"
-      ]
-     },
-     "execution_count": 29,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "new_X = np.zeros((2,4))\n",
-    "new_X[:,0] = [10,20]\n",
-    "new_X[:,3] = [40,50]\n",
-    "new_D = design_np.transform(new_X)\n",
-    "M.get_prediction(new_D).predicted_mean"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "sapphire-adelaide",
-   "metadata": {},
-   "source": [
-    "Given this subtlety about needing to supply arrays with identical column structure to `transform` when\n",
-    "using `np.ndarray` we presume that using a `pd.DataFrame` will be the more popular use case."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "standing-involvement",
-   "metadata": {},
-   "source": [
-    "## A model with some categorical variables\n",
-    "\n",
-    "Categorical variables become `Column` instances with encoders."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 30,
-   "id": "taken-university",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Column(idx='UIncome', name='UIncome', is_categorical=True, is_ordinal=False, columns=('UIncome[L]', 'UIncome[M]'), encoder=Contrast())"
-      ]
-     },
-     "execution_count": 30,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design = ModelSpec(['Population', 'Price', 'UIncome', 'ShelveLoc']).fit(Carseats)\n",
-    "design.column_info_['UIncome']"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 31,
-   "id": "rural-cycling",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Index(['intercept', 'Population', 'Price', 'UIncome[L]', 'UIncome[M]',\n",
-       "       'ShelveLoc[Good]', 'ShelveLoc[Medium]'],\n",
-       "      dtype='object')"
-      ]
-     },
-     "execution_count": 31,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "X = design.fit_transform(Carseats)\n",
-    "X.columns"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 32,
-   "id": "former-trick",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "intercept            11.876012\n",
-       "Population            0.001163\n",
-       "Price                -0.055725\n",
-       "UIncome[L]           -1.042297\n",
-       "UIncome[M]           -0.119123\n",
-       "ShelveLoc[Good]       4.999623\n",
-       "ShelveLoc[Medium]     1.964278\n",
-       "dtype: float64"
-      ]
-     },
-     "execution_count": 32,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "sm.OLS(Y, X).fit().params"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 33,
-   "id": "specialized-processing",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "    (Intercept)      Population           Price        UIncomeM        UIncomeH \n",
-      "    10.83371503      0.00116301     -0.05572469      0.92317388      1.04229679 \n",
-      "  ShelveLocGood ShelveLocMedium \n",
-      "     4.99962319      1.96427771 \n"
-     ]
-    }
-   ],
-   "source": [
-    "%%R\n",
-    "lm(Sales ~ Population + Price + UIncome + ShelveLoc, data=Carseats)$coef"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "verified-administrator",
-   "metadata": {},
-   "source": [
-    "## Getting the encoding you want\n",
-    "\n",
-    "By default the level dropped by `ModelSpec` will be the first of the `categories_` values from \n",
-    "`sklearn.preprocessing.OneHotEncoder()`. We might wish to change this. It seems\n",
-    "as if the correct way to do this would be something like `Variable(('UIncome',), 'mynewencoding', new_encoder)`\n",
-    "where `new_encoder` would somehow drop the column we want dropped. \n",
-    "\n",
-    "However, when using the convenient identifier `UIncome` in the `variables` argument, this maps to the `Column` associated to `UIncome` within `design.column_info_`:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 34,
-   "id": "limited-johns",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Column(idx='UIncome', name='UIncome', is_categorical=True, is_ordinal=False, columns=('UIncome[L]', 'UIncome[M]'), encoder=Contrast())"
-      ]
-     },
-     "execution_count": 34,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design.column_info_['UIncome']"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "saving-remainder",
-   "metadata": {},
-   "source": [
-    "This column already has an encoder and `Column` instances are immutable as named tuples. Further, there are times when \n",
-    "we may want to encode `UIncome` differently within the same model. In the model below the main effect of `UIncome` is encoded with two columns while in the interaction `UIncome` (see below) has three columns. This is a design of interest\n",
-    "and we need a way to allow different encodings of the same column of `Carseats`"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 35,
-   "id": "satisfied-harbor",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "Call:\n",
-      "lm(formula = Sales ~ UIncome:ShelveLoc + UIncome, data = Carseats)\n",
-      "\n",
-      "Coefficients:\n",
-      "             (Intercept)                  UIncomeM                  UIncomeH  \n",
-      "                  5.1317                    0.1151                    1.1561  \n",
-      "  UIncomeL:ShelveLocGood    UIncomeM:ShelveLocGood    UIncomeH:ShelveLocGood  \n",
-      "                  4.5121                    5.5752                    3.7381  \n",
-      "UIncomeL:ShelveLocMedium  UIncomeM:ShelveLocMedium  UIncomeH:ShelveLocMedium  \n",
-      "                  1.2473                    2.4782                    1.5141  \n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "%%R\n",
-    "lm(Sales ~ UIncome:ShelveLoc + UIncome, data=Carseats)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "silver-wesley",
-   "metadata": {},
-   "source": [
-    " We can create a new \n",
-    "`Column` with the encoder we want. For categorical variables, there is a convenience function to do so."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 36,
-   "id": "crazy-bikini",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from ISLP.models.model_spec import contrast\n",
-    "pref_encoding = contrast('UIncome', 'drop', 'L')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 37,
-   "id": "accredited-barrier",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(     UIncome[M]  UIncome[H]\n",
-       " 0           1.0         0.0\n",
-       " 1           0.0         0.0\n",
-       " 2           0.0         0.0\n",
-       " 3           0.0         1.0\n",
-       " 4           1.0         0.0\n",
-       " ..          ...         ...\n",
-       " 395         0.0         1.0\n",
-       " 396         0.0         0.0\n",
-       " 397         0.0         0.0\n",
-       " 398         1.0         0.0\n",
-       " 399         0.0         0.0\n",
-       " \n",
-       " [400 rows x 2 columns],\n",
-       " ['UIncome[M]', 'UIncome[H]'])"
-      ]
-     },
-     "execution_count": 37,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design.build_columns(Carseats, pref_encoding)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 38,
-   "id": "smaller-execution",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Index(['intercept', 'Population', 'Price', 'UIncome[M]', 'UIncome[H]',\n",
-       "       'ShelveLoc[Good]', 'ShelveLoc[Medium]'],\n",
-       "      dtype='object')"
-      ]
-     },
-     "execution_count": 38,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design = ModelSpec(['Population', 'Price', pref_encoding, 'ShelveLoc']).fit(Carseats)\n",
-    "X = design.fit_transform(Carseats)\n",
-    "X.columns"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 39,
-   "id": "limited-center",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "intercept            10.833715\n",
-       "Population            0.001163\n",
-       "Price                -0.055725\n",
-       "UIncome[M]            0.923174\n",
-       "UIncome[H]            1.042297\n",
-       "ShelveLoc[Good]       4.999623\n",
-       "ShelveLoc[Medium]     1.964278\n",
-       "dtype: float64"
-      ]
-     },
-     "execution_count": 39,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "sm.OLS(Y, X).fit().params"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 40,
-   "id": "combined-relaxation",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "    (Intercept)      Population           Price        UIncomeM        UIncomeH \n",
-      "    10.83371503      0.00116301     -0.05572469      0.92317388      1.04229679 \n",
-      "  ShelveLocGood ShelveLocMedium \n",
-      "     4.99962319      1.96427771 \n"
-     ]
-    }
-   ],
-   "source": [
-    "%%R\n",
-    "lm(Sales ~ Population + Price + UIncome + ShelveLoc, data=Carseats)$coef"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "focal-determination",
-   "metadata": {},
-   "source": [
-    "## Interactions\n",
-    "\n",
-    "We've referred to interactions above. These are specified (by convenience) as tuples in the `terms` argument\n",
-    "to `ModelSpec`."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 41,
-   "id": "earned-ready",
+   "execution_count": 20,
+   "id": "imported-measure",
    "metadata": {},
    "outputs": [
     {
      "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>mynewvar[0]</th>\n",
+       "      <th>mynewvar[1]</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>-3.595740</td>\n",
+       "      <td>-4.850530</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>15.070401</td>\n",
+       "      <td>35.706773</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>27.412228</td>\n",
+       "      <td>40.772377</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>-33.983048</td>\n",
+       "      <td>13.468087</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>6.580644</td>\n",
+       "      <td>-11.287452</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>395</th>\n",
+       "      <td>-36.856308</td>\n",
+       "      <td>-18.418138</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>396</th>\n",
+       "      <td>45.731520</td>\n",
+       "      <td>3.243768</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>397</th>\n",
+       "      <td>49.087659</td>\n",
+       "      <td>-35.727136</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>398</th>\n",
+       "      <td>-13.565178</td>\n",
+       "      <td>18.847760</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>399</th>\n",
+       "      <td>31.917072</td>\n",
+       "      <td>0.976615</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>400 rows × 2 columns</p>\n",
+       "</div>"
+      ],
       "text/plain": [
-       "intercept                       7.866634\n",
-       "UIncome[L]:ShelveLoc[Good]      4.512054\n",
-       "UIncome[L]:ShelveLoc[Medium]    1.247275\n",
-       "UIncome[M]:ShelveLoc[Good]      5.575170\n",
-       "UIncome[M]:ShelveLoc[Medium]    2.478163\n",
-       "UIncome[L]                     -2.734895\n",
-       "UIncome[M]                     -2.619745\n",
-       "dtype: float64"
+       "     mynewvar[0]  mynewvar[1]\n",
+       "0      -3.595740    -4.850530\n",
+       "1      15.070401    35.706773\n",
+       "2      27.412228    40.772377\n",
+       "3     -33.983048    13.468087\n",
+       "4       6.580644   -11.287452\n",
+       "..           ...          ...\n",
+       "395   -36.856308   -18.418138\n",
+       "396    45.731520     3.243768\n",
+       "397    49.087659   -35.727136\n",
+       "398   -13.565178    18.847760\n",
+       "399    31.917072     0.976615\n",
+       "\n",
+       "[400 rows x 2 columns]"
       ]
      },
-     "execution_count": 41,
+     "execution_count": 20,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "design = ModelSpec([('UIncome', 'ShelveLoc'), 'UIncome'])\n",
-    "X = design.fit_transform(Carseats)\n",
-    "sm.OLS(Y, X).fit().params"
+    "from sklearn.decomposition import PCA\n",
+    "pca = PCA(n_components=2)\n",
+    "pca.fit(build_columns(MS.column_info_, Carseats, new_var)[0]) # this is done within `ModelSpec.fit`\n",
+    "pca_var = Feature(('Price', 'Income', 'OIncome'), name='mynewvar', encoder=pca)\n",
+    "build_columns(MS.column_info_,\n",
+    "              Carseats, \n",
+    "              pca_var)[0]"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "prescribed-accessory",
+   "id": "institutional-burden",
    "metadata": {},
    "source": [
-    "The tuples in `terms` are converted to `Variable` in the formalized `terms_` attribute by creating a `Variable` with\n",
-    "`variables` set to the tuple and the encoder an `Interaction` encoder which (unsurprisingly) creates the interaction columns from the concatenated data frames of `UIncome` and `ShelveLoc`."
+    "The elements of the `variables` attribute may be column identifiers ( `\"Price\"`), `Column` instances (`price`)\n",
+    "or `Feature` instances (`pca_var`)."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 42,
-   "id": "pacific-animal",
+   "execution_count": 21,
+   "id": "western-bloom",
    "metadata": {},
    "outputs": [
     {
      "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Income</th>\n",
+       "      <th>Price</th>\n",
+       "      <th>mynewvar[0]</th>\n",
+       "      <th>mynewvar[1]</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>73.0</td>\n",
+       "      <td>120.0</td>\n",
+       "      <td>-3.595740</td>\n",
+       "      <td>-4.850530</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>48.0</td>\n",
+       "      <td>83.0</td>\n",
+       "      <td>15.070401</td>\n",
+       "      <td>35.706773</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>35.0</td>\n",
+       "      <td>80.0</td>\n",
+       "      <td>27.412228</td>\n",
+       "      <td>40.772377</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>100.0</td>\n",
+       "      <td>97.0</td>\n",
+       "      <td>-33.983048</td>\n",
+       "      <td>13.468087</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>64.0</td>\n",
+       "      <td>128.0</td>\n",
+       "      <td>6.580644</td>\n",
+       "      <td>-11.287452</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>395</th>\n",
+       "      <td>108.0</td>\n",
+       "      <td>128.0</td>\n",
+       "      <td>-36.856308</td>\n",
+       "      <td>-18.418138</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>396</th>\n",
+       "      <td>23.0</td>\n",
+       "      <td>120.0</td>\n",
+       "      <td>45.731520</td>\n",
+       "      <td>3.243768</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>397</th>\n",
+       "      <td>26.0</td>\n",
+       "      <td>159.0</td>\n",
+       "      <td>49.087659</td>\n",
+       "      <td>-35.727136</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>398</th>\n",
+       "      <td>79.0</td>\n",
+       "      <td>95.0</td>\n",
+       "      <td>-13.565178</td>\n",
+       "      <td>18.847760</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>399</th>\n",
+       "      <td>37.0</td>\n",
+       "      <td>120.0</td>\n",
+       "      <td>31.917072</td>\n",
+       "      <td>0.976615</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>400 rows × 4 columns</p>\n",
+       "</div>"
+      ],
       "text/plain": [
-       "Variable(variables=('UIncome', 'ShelveLoc'), name='UIncome:ShelveLoc', encoder=Interaction(column_names={'ShelveLoc': ['ShelveLoc[Good]', 'ShelveLoc[Medium]'],\n",
-       "                          'UIncome': ['UIncome[L]', 'UIncome[M]']},\n",
-       "            columns={'ShelveLoc': range(2, 4), 'UIncome': range(0, 2)},\n",
-       "            variables=['UIncome', 'ShelveLoc']), use_transform=True, pure_columns=False, override_encoder_colnames=False)"
+       "     Income  Price  mynewvar[0]  mynewvar[1]\n",
+       "0      73.0  120.0    -3.595740    -4.850530\n",
+       "1      48.0   83.0    15.070401    35.706773\n",
+       "2      35.0   80.0    27.412228    40.772377\n",
+       "3     100.0   97.0   -33.983048    13.468087\n",
+       "4      64.0  128.0     6.580644   -11.287452\n",
+       "..      ...    ...          ...          ...\n",
+       "395   108.0  128.0   -36.856308   -18.418138\n",
+       "396    23.0  120.0    45.731520     3.243768\n",
+       "397    26.0  159.0    49.087659   -35.727136\n",
+       "398    79.0   95.0   -13.565178    18.847760\n",
+       "399    37.0  120.0    31.917072     0.976615\n",
+       "\n",
+       "[400 rows x 4 columns]"
       ]
      },
-     "execution_count": 42,
+     "execution_count": 21,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "design.terms_[0]"
+    "price = MS.column_info_['Price']\n",
+    "fancy_var = Feature(('Income', price, pca_var), name='fancy', encoder=None)\n",
+    "build_columns(MS.column_info_,\n",
+    "              Carseats, \n",
+    "              fancy_var)[0]"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "planned-wrestling",
+   "id": "e289feba-e3f5-48e0-9e29-cdd88d7f9923",
    "metadata": {},
    "source": [
-    "Comparing this to the previous `R` model."
+    "## Predicting at new points"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 43,
-   "id": "given-testimony",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "Call:\n",
-      "lm(formula = Sales ~ UIncome:ShelveLoc + UIncome, data = Carseats)\n",
-      "\n",
-      "Coefficients:\n",
-      "             (Intercept)                  UIncomeM                  UIncomeH  \n",
-      "                  5.1317                    0.1151                    1.1561  \n",
-      "  UIncomeL:ShelveLocGood    UIncomeM:ShelveLocGood    UIncomeH:ShelveLocGood  \n",
-      "                  4.5121                    5.5752                    3.7381  \n",
-      "UIncomeL:ShelveLocMedium  UIncomeM:ShelveLocMedium  UIncomeH:ShelveLocMedium  \n",
-      "                  1.2473                    2.4782                    1.5141  \n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "%%R\n",
-    "lm(Sales ~ UIncome:ShelveLoc + UIncome, data=Carseats)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "external-barrier",
-   "metadata": {},
-   "source": [
-    "We note a few important things:\n",
-    "\n",
-    "1. `R` has reorganized the columns of the design from the formula: although we wrote `UIncome:ShelveLoc` first these\n",
-    "columns have been built later. **`ModelSpec` builds columns in the order determined by `terms`!**\n",
-    "\n",
-    "2. As noted above, `R` has encoded `UIncome` differently in the main effect and in the interaction. For `ModelSpec`, the reference to `UIncome` always refers to the column in `design.column_info_` and will always build only the columns for `L` and `M`. **`ModelSpec` does no inspection of terms to decide how to encode categorical variables.**\n",
-    "\n",
-    "A few notes:\n",
-    "\n",
-    "- **Why not try to inspect the terms?** For any nontrivial formula in `R` with several categorical variables and interactions, predicting what columns will be produced from a given formula is not simple. **`ModelSpec` errs on the side of being explicit.**\n",
-    "\n",
-    "- **Is it impossible to build the design as `R` has?** No. An advanced user who *knows* they want the columns built as `R` has can do so (fairly) easily."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 44,
-   "id": "authentic-meditation",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(     UIncome[H]  UIncome[L]  UIncome[M]\n",
-       " 0           0.0         0.0         1.0\n",
-       " 1           0.0         1.0         0.0\n",
-       " 2           0.0         1.0         0.0\n",
-       " 3           1.0         0.0         0.0\n",
-       " 4           0.0         0.0         1.0\n",
-       " ..          ...         ...         ...\n",
-       " 395         1.0         0.0         0.0\n",
-       " 396         0.0         1.0         0.0\n",
-       " 397         0.0         1.0         0.0\n",
-       " 398         0.0         0.0         1.0\n",
-       " 399         0.0         1.0         0.0\n",
-       " \n",
-       " [400 rows x 3 columns],\n",
-       " ['UIncome[H]', 'UIncome[L]', 'UIncome[M]'])"
-      ]
-     },
-     "execution_count": 44,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "full_encoding = contrast('UIncome', None)\n",
-    "design.build_columns(Carseats, full_encoding)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 45,
-   "id": "lucky-success",
+   "execution_count": 22,
+   "id": "6efed2fa-9e5d-429c-a8d9-ac544cab2b41",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "intercept                       5.131739\n",
-       "UIncome[M]                      0.115150\n",
-       "UIncome[H]                      1.156118\n",
-       "UIncome[H]:ShelveLoc[Good]      3.738052\n",
-       "UIncome[H]:ShelveLoc[Medium]    1.514104\n",
-       "UIncome[L]:ShelveLoc[Good]      4.512054\n",
-       "UIncome[L]:ShelveLoc[Medium]    1.247275\n",
-       "UIncome[M]:ShelveLoc[Good]      5.575170\n",
-       "UIncome[M]:ShelveLoc[Medium]    2.478163\n",
+       "intercept    12.661546\n",
+       "Price        -0.052213\n",
+       "Income        0.012829\n",
        "dtype: float64"
       ]
      },
-     "execution_count": 45,
+     "execution_count": 22,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "design = ModelSpec([pref_encoding, (full_encoding, 'ShelveLoc')])\n",
-    "X = design.fit_transform(Carseats)\n",
-    "sm.OLS(Y, X).fit().params"
+    "MS = ModelSpec(['Price', 'Income']).fit(Carseats)\n",
+    "X = MS.transform(Carseats)\n",
+    "Y = Carseats['Sales']\n",
+    "M_ols = sm.OLS(Y, X).fit()\n",
+    "M_ols.params"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "laden-beach",
-   "metadata": {},
-   "source": [
-    "## Special encodings\n",
-    "\n",
-    "For flexible models, we may want to consider transformations of features, i.e. polynomial\n",
-    "or spline transformations. Given transforms that follow the `fit/transform` paradigm\n",
-    "we can of course achieve this with a `Column` and an `encoder`. The `ISLP.transforms`\n",
-    "package includes a `Poly` transform"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 46,
-   "id": "copyrighted-luther",
+   "id": "e6b4609b-fcb2-4cc2-b630-509df4c87546",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Variable(variables=('Income',), name='poly(Income, 3)', encoder=Poly(degree=3), use_transform=True, pure_columns=False, override_encoder_colnames=True)"
-      ]
-     },
-     "execution_count": 46,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
    "source": [
-    "from ISLP.models.model_spec import poly\n",
-    "poly('Income', 3)"
+    "As `ModelSpec` is a transformer, it can be evaluated at new feature values.\n",
+    "Constructing the design matrix at any values is carried out by the `transform` method."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 47,
-   "id": "threatened-marine",
+   "execution_count": 23,
+   "id": "8784b0e8-ce53-4a90-aee6-b935834295c7",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "intercept              5.440077\n",
-       "poly(Income, 3)[0]    10.036373\n",
-       "poly(Income, 3)[1]    -2.799156\n",
-       "poly(Income, 3)[2]     2.399601\n",
-       "ShelveLoc[Good]        4.808133\n",
-       "ShelveLoc[Medium]      1.889533\n",
-       "dtype: float64"
+       "array([10.70130676, 10.307465  ])"
       ]
      },
-     "execution_count": 47,
+     "execution_count": 23,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "design = ModelSpec([poly('Income', 3), 'ShelveLoc'])\n",
-    "X = design.fit_transform(Carseats)\n",
-    "sm.OLS(Y, X).fit().params"
+    "new_data = pd.DataFrame({'Price':[40, 50], 'Income':[10, 20]})\n",
+    "new_X = MS.transform(new_data)\n",
+    "M_ols.get_prediction(new_X).predicted_mean"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "senior-spokesman",
-   "metadata": {},
-   "source": [
-    "Compare:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 48,
-   "id": "prompt-fifteen",
+   "id": "signal-yahoo",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "     (Intercept) poly(Income, 3)1 poly(Income, 3)2 poly(Income, 3)3 \n",
-      "        5.440077        10.036373        -2.799156         2.399601 \n",
-      "   ShelveLocGood  ShelveLocMedium \n",
-      "        4.808133         1.889533 \n"
-     ]
-    }
-   ],
    "source": [
-    "%%R\n",
-    "lm(Sales ~ poly(Income, 3) + ShelveLoc, data=Carseats)$coef"
+    "## Using `np.ndarray`\n",
+    "\n",
+    "As the basic model is to concatenate columns extracted from a columnar data\n",
+    "representation, one *can* use `np.ndarray` as the column data. In this case,\n",
+    "columns will be selected by integer indices. \n",
+    "\n",
+    "### Caveats using `np.ndarray`\n",
+    "\n",
+    "If the `terms` only refer to a few columns of the data frame, the `transform` method only needs a dataframe with those columns.\n",
+    "However,\n",
+    "unless all features are floats, `np.ndarray` will default to a dtype of `object`, complicating issues.\n",
+    "\n",
+    "However, if we had used an `np.ndarray`, the column identifiers would be integers identifying specific columns so,\n",
+    "in order to work correctly, `transform` would need another `np.ndarray` where the columns have the same meaning. \n",
+    "\n",
+    "We illustrate this below, where we build a model from `Price` and `Income` for `Sales` and want to find predictions at new\n",
+    "values of `Price` and `Location`. We first find the predicitions using `pd.DataFrame` and then illustrate the difficulties\n",
+    "in using `np.ndarray`."
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "better-christianity",
+   "id": "e7ffdd07-4d6b-4a4c-ab38-ad1270e85de6",
    "metadata": {},
    "source": [
-    "## Splines\n",
-    "\n",
-    "Support for natural and B-splines is also included"
+    "We will refit this model, using `ModelSpec` with an `np.ndarray` instead"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 49,
-   "id": "outstanding-performer",
+   "execution_count": 24,
+   "id": "4fec9030-7445-48be-a15f-2ac5a789e717",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "intercept              4.240421\n",
-       "ns(Income, df=5)[0]    1.468196\n",
-       "ns(Income, df=5)[1]    1.499471\n",
-       "ns(Income, df=5)[2]    1.152070\n",
-       "ns(Income, df=5)[3]    2.418398\n",
-       "ns(Income, df=5)[4]    1.804460\n",
-       "ShelveLoc[Good]        4.810449\n",
-       "ShelveLoc[Medium]      1.881095\n",
-       "dtype: float64"
+       "array([[  1., 120.,  73.],\n",
+       "       [  1.,  83.,  48.],\n",
+       "       [  1.,  80.,  35.],\n",
+       "       ...,\n",
+       "       [  1., 159.,  26.],\n",
+       "       [  1.,  95.,  79.],\n",
+       "       [  1., 120.,  37.]])"
       ]
      },
-     "execution_count": 49,
+     "execution_count": 24,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "from ISLP.models.model_spec import ns, bs, pca\n",
-    "design = ModelSpec([ns('Income', df=5), 'ShelveLoc'])\n",
-    "X = design.fit_transform(Carseats)\n",
-    "sm.OLS(Y, X).fit().params"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 50,
-   "id": "informative-spirituality",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "        (Intercept) ns(Income, df = 5)1 ns(Income, df = 5)2 ns(Income, df = 5)3 \n",
-      "           4.240421            1.468196            1.499471            1.152070 \n",
-      "ns(Income, df = 5)4 ns(Income, df = 5)5       ShelveLocGood     ShelveLocMedium \n",
-      "           2.418398            1.804460            4.810449            1.881095 \n"
-     ]
-    }
-   ],
-   "source": [
-    "%%R\n",
-    "library(splines)\n",
-    "lm(Sales ~ ns(Income, df=5) + ShelveLoc, data=Carseats)$coef"
+    "Carseats_np = np.asarray(Carseats[['Price', 'Education', 'Income']])\n",
+    "MS_np = ModelSpec([0,2]).fit(Carseats_np)\n",
+    "MS_np.transform(Carseats_np)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 51,
-   "id": "destroyed-complexity",
+   "execution_count": 25,
+   "id": "c864e365-2476-4ca6-9d27-625cac2b2271",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "intercept                        3.495085\n",
-       "bs(Income, df=7, degree=2)[0]    1.813118\n",
-       "bs(Income, df=7, degree=2)[1]    0.961852\n",
-       "bs(Income, df=7, degree=2)[2]    2.471545\n",
-       "bs(Income, df=7, degree=2)[3]    2.158891\n",
-       "bs(Income, df=7, degree=2)[4]    2.091625\n",
-       "bs(Income, df=7, degree=2)[5]    2.600669\n",
-       "bs(Income, df=7, degree=2)[6]    2.843108\n",
-       "ShelveLoc[Good]                  4.804919\n",
-       "ShelveLoc[Medium]                1.880337\n",
+       "const    12.661546\n",
+       "x1       -0.052213\n",
+       "x2        0.012829\n",
        "dtype: float64"
       ]
      },
-     "execution_count": 51,
+     "execution_count": 25,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "design = ModelSpec([bs('Income', df=7, degree=2), 'ShelveLoc'])\n",
-    "X = design.fit_transform(Carseats)\n",
-    "sm.OLS(Y, X).fit().params"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 52,
-   "id": "incident-nicaragua",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "                    (Intercept) bs(Income, df = 7, degree = 2)1 \n",
-      "                      3.4950851                       1.8131176 \n",
-      "bs(Income, df = 7, degree = 2)2 bs(Income, df = 7, degree = 2)3 \n",
-      "                      0.9618523                       2.4715450 \n",
-      "bs(Income, df = 7, degree = 2)4 bs(Income, df = 7, degree = 2)5 \n",
-      "                      2.1588908                       2.0916252 \n",
-      "bs(Income, df = 7, degree = 2)6 bs(Income, df = 7, degree = 2)7 \n",
-      "                      2.6006694                       2.8431084 \n",
-      "                  ShelveLocGood                 ShelveLocMedium \n",
-      "                      4.8049190                       1.8803375 \n"
-     ]
-    }
-   ],
-   "source": [
-    "%%R\n",
-    "lm(Sales ~ bs(Income, df=7, degree=2) + ShelveLoc, data=Carseats)$coef"
+    "M_ols_np = sm.OLS(Y, MS_np.transform(Carseats_np)).fit()\n",
+    "M_ols_np.params"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "formal-medline",
-   "metadata": {},
-   "source": [
-    "## PCA"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 53,
-   "id": "general-joshua",
+   "id": "undefined-sacrifice",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/Users/jonathantaylor/miniconda3/envs/islp_test/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but PCA was fitted with feature names\n",
-      "  warnings.warn(\n",
-      "/Users/jonathantaylor/miniconda3/envs/islp_test/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but PCA was fitted with feature names\n",
-      "  warnings.warn(\n",
-      "/Users/jonathantaylor/miniconda3/envs/islp_test/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but PCA was fitted with feature names\n",
-      "  warnings.warn(\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "intercept                         5.419405\n",
-       "pca(myvars, n_components=2)[0]   -0.001131\n",
-       "pca(myvars, n_components=2)[1]   -0.024217\n",
-       "ShelveLoc[Good]                   4.816253\n",
-       "ShelveLoc[Medium]                 1.924139\n",
-       "dtype: float64"
-      ]
-     },
-     "execution_count": 53,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
    "source": [
-    "design = ModelSpec([pca(['Income', \n",
-    "                           'Price', \n",
-    "                           'Advertising', \n",
-    "                           'Population'], \n",
-    "                          n_components=2, \n",
-    "                          name='myvars'), 'ShelveLoc'])\n",
-    "X = design.fit_transform(Carseats)\n",
-    "sm.OLS(Y, X).fit().params"
+    "Now, let's consider finding the design matrix at new points. \n",
+    "When using `pd.DataFrame` we only need to supply the `transform` method\n",
+    "a data frame with columns implicated in the `terms` argument (in this case, `Price` and `Income`). \n",
+    "\n",
+    "However, when using `np.ndarray` with integers as indices, `Price` was column 0 and `Income` was column 2. The only\n",
+    "sensible way to produce a return for predict is to extract its 0th and 2nd columns. Note this means\n",
+    "that the meaning of columns in an `np.ndarray` provided to `transform` essentially must be identical to those\n",
+    "passed to `fit`."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 54,
-   "id": "coordinate-calcium",
+   "execution_count": 26,
+   "id": "incredible-concert",
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\n",
-      "Call:\n",
-      "lm(formula = Sales ~ prcomp(cbind(Income, Price, Advertising, \n",
-      "    Population))$x[, 1:2] + ShelveLoc, data = Carseats)\n",
-      "\n",
-      "Coefficients:\n",
-      "                                                      (Intercept)  \n",
-      "                                                         5.419405  \n",
-      "prcomp(cbind(Income, Price, Advertising, Population))$x[, 1:2]PC1  \n",
-      "                                                         0.001131  \n",
-      "prcomp(cbind(Income, Price, Advertising, Population))$x[, 1:2]PC2  \n",
-      "                                                        -0.024217  \n",
-      "                                                    ShelveLocGood  \n",
-      "                                                         4.816253  \n",
-      "                                                  ShelveLocMedium  \n",
-      "                                                         1.924139  \n",
-      "\n"
+      "index 2 is out of bounds for axis 1 with size 2\n"
      ]
     }
    ],
    "source": [
-    "%%R\n",
-    "lm(Sales ~ prcomp(cbind(Income, Price, Advertising, Population))$x[,1:2] + ShelveLoc, data=Carseats)"
+    "try:\n",
+    "    new_D = np.array([[40,50], [10,20]]).T\n",
+    "    new_X = MS_np.transform(new_D)\n",
+    "except IndexError as e:\n",
+    "    print(e)"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "foster-canvas",
+   "id": "allied-botswana",
    "metadata": {},
    "source": [
-    "It is of course common to scale before running PCA."
+    "Ultimately, `M` expects 3 columns for new predictions because it was fit\n",
+    "with a matrix having 3 columns (the first representing an intercept).\n",
+    "\n",
+    "We might be tempted to try as with the `pd.DataFrame` and produce\n",
+    "an `np.ndarray` with only the necessary variables."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 55,
-   "id": "geographic-founder",
+   "execution_count": 27,
+   "id": "stunning-container",
    "metadata": {},
    "outputs": [
     {
-     "name": "stderr",
+     "name": "stdout",
      "output_type": "stream",
      "text": [
-      "/Users/jonathantaylor/miniconda3/envs/islp_test/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but StandardScaler was fitted with feature names\n",
-      "  warnings.warn(\n",
-      "/Users/jonathantaylor/miniconda3/envs/islp_test/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but StandardScaler was fitted with feature names\n",
-      "  warnings.warn(\n",
-      "/Users/jonathantaylor/miniconda3/envs/islp_test/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but StandardScaler was fitted with feature names\n",
-      "  warnings.warn(\n"
+      "[[ 1. 40. 10.]\n",
+      " [ 1. 50. 20.]]\n"
      ]
     },
     {
      "data": {
       "text/plain": [
-       "intercept                         5.352159\n",
-       "pca(myvars, n_components=2)[0]    0.446383\n",
-       "pca(myvars, n_components=2)[1]   -1.219788\n",
-       "ShelveLoc[Good]                   4.922780\n",
-       "ShelveLoc[Medium]                 2.005617\n",
-       "dtype: float64"
+       "array([10.70130676, 10.307465  ])"
       ]
      },
-     "execution_count": 55,
+     "execution_count": 27,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "design = ModelSpec([pca(['Income', \n",
-    "                           'Price', \n",
-    "                           'Advertising', \n",
-    "                           'Population'], \n",
-    "                          n_components=2, \n",
-    "                          name='myvars',\n",
-    "                          scale=True), 'ShelveLoc'])\n",
-    "X = design.fit_transform(Carseats)\n",
-    "sm.OLS(Y, X).fit().params"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 56,
-   "id": "floral-packaging",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "Call:\n",
-      "lm(formula = Sales ~ prcomp(cbind(Income, Price, Advertising, \n",
-      "    Population), scale = TRUE)$x[, 1:2] + ShelveLoc, data = Carseats)\n",
-      "\n",
-      "Coefficients:\n",
-      "                                                                    (Intercept)  \n",
-      "                                                                         5.3522  \n",
-      "prcomp(cbind(Income, Price, Advertising, Population), scale = TRUE)$x[, 1:2]PC1  \n",
-      "                                                                         0.4469  \n",
-      "prcomp(cbind(Income, Price, Advertising, Population), scale = TRUE)$x[, 1:2]PC2  \n",
-      "                                                                        -1.2213  \n",
-      "                                                                  ShelveLocGood  \n",
-      "                                                                         4.9228  \n",
-      "                                                                ShelveLocMedium  \n",
-      "                                                                         2.0056  \n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "%%R\n",
-    "lm(Sales ~ prcomp(cbind(Income, Price, Advertising, Population), scale=TRUE)$x[,1:2] + ShelveLoc, data=Carseats)"
+    "new_D = np.array([[40,50], [np.nan, np.nan], [10,20]]).T\n",
+    "new_X = MS_np.transform(new_D)\n",
+    "print(new_X)\n",
+    "M_ols.get_prediction(new_X).predicted_mean"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "social-cherry",
-   "metadata": {},
-   "source": [
-    "There will be some small differences in the coefficients due to `sklearn` use of `np.std(ddof=0)` instead\n",
-    "of `np.std(ddof=1)`."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 57,
-   "id": "another-glory",
+   "id": "specific-tobacco",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "array([ 0.44694166, -1.22131519])"
-      ]
-     },
-     "execution_count": 57,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
    "source": [
-    "np.array(sm.OLS(Y, X).fit().params)[1:3] * np.sqrt(X.shape[0] / (X.shape[0]-1))"
+    "For more complicated design contructions ensuring the columns of `new_D` match that of the original data will be more cumbersome. We expect\n",
+    "then that `pd.DataFrame` (or a columnar data representation with similar API) will likely be easier to use with `ModelSpec`."
    ]
   }
  ],
@@ -2014,9 +1806,9 @@
    "formats": "source/models///ipynb,jupyterbook/models///md:myst,jupyterbook/models///ipynb"
   },
   "kernelspec": {
-   "display_name": "islp_test",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
-   "name": "islp_test"
+   "name": "python3"
   },
   "language_info": {
    "codemirror_mode": {
@@ -2028,7 +1820,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.13"
+   "version": "3.10.10"
   }
  },
  "nbformat": 4,
diff --git a/docs/jupyterbook/models/spec.md b/docs/jupyterbook/models/spec.md
index fdf8c60..27bb3a4 100644
--- a/docs/jupyterbook/models/spec.md
+++ b/docs/jupyterbook/models/spec.md
@@ -5,490 +5,296 @@ jupytext:
     extension: .md
     format_name: myst
     format_version: 0.13
-    jupytext_version: 1.14.1
+    jupytext_version: 1.14.5
 kernelspec:
-  display_name: islp_test
+  display_name: Python 3 (ipykernel)
   language: python
-  name: islp_test
+  name: python3
 ---
 
 # Building design matrices with `ModelSpec`
 
-Force rebuild
+The `ISLP` package provides a facility to build design
+matrices for regression and classification tasks. It provides similar functionality to the formula
+notation of `R` though uses python objects rather than specification through the special formula syntax.
+
+Related tools include `patsy` and `ColumnTransformer` from `sklearn.compose`. 
+
+Perhaps the most common use is to extract some columns from a `pd.DataFrame` and 
+produce a design matrix, optionally with an intercept.
 
 ```{code-cell} ipython3
-x=4
-import numpy as np, pandas as pd
-%load_ext rpy2.ipython
+import pandas as pd
+import numpy as np
 
 from ISLP import load_data
-from ISLP.models import ModelSpec
+from ISLP.models import (ModelSpec,
+                         summarize,
+                         Column,
+                         Feature,
+                         build_columns)
 
 import statsmodels.api as sm
 ```
 
 ```{code-cell} ipython3
 Carseats = load_data('Carseats')
-%R -i Carseats
 Carseats.columns
 ```
 
-## Let's break up income into groups
+We'll first build a design matrix that we can use to model `Sales`
+in terms of the categorical variable `ShelveLoc` and `Price`.
 
-```{code-cell} ipython3
-Carseats['OIncome'] = pd.cut(Carseats['Income'], 
-                             [0,50,90,200], 
-                             labels=['L','M','H'])
-Carseats['OIncome']
-```
-
-Let's also create an unordered version
-
-```{code-cell} ipython3
-Carseats['UIncome'] = pd.cut(Carseats['Income'], 
-                             [0,50,90,200], 
-                             labels=['L','M','H'],
-                             ordered=False)
-Carseats['UIncome']
-```
-
-## A simple model
-
-```{code-cell} ipython3
-design = ModelSpec(['Price', 'Income'])
-X = design.fit_transform(Carseats)
-X.columns
-```
-
-```{code-cell} ipython3
-Y = Carseats['Sales']
-M = sm.OLS(Y, X).fit()
-M.params
-```
-
-## Basic procedure
-
-The design matrix is built by cobbling together a set of columns and possibly transforming them.
-A `pd.DataFrame` is essentially a list of columns. One of the first tasks done  in `ModelSpec.fit`
-is to inspect a dataframe for column info. The column `ShelveLoc` is categorical:
+We see first that `ShelveLoc` is a categorical variable:
 
 ```{code-cell} ipython3
 Carseats['ShelveLoc']
 ```
 
-This is recognized by `ModelSpec` in the form of `Column` objects which are just named tuples with two methods
-`get_columns` and `fit_encoder`.
-
-```{code-cell} ipython3
-design.column_info_['ShelveLoc']
-```
-
-It recognized ordinal columns as well.
-
-```{code-cell} ipython3
-design.column_info_['OIncome']
-```
-
-```{code-cell} ipython3
-income = design.column_info_['Income']
-cols, names = income.get_columns(Carseats)
-(cols[:4], names)
-```
-
-## Encoding a column
-
-In building a design matrix we must extract columns from our dataframe (or `np.ndarray`). Categorical
-variables usually are encoded by several columns, typically one less than the number of categories.
-This task is handled by the `encoder` of the `Column`. The encoder must satisfy the `sklearn` transform
-model, i.e. `fit` on some array and `transform` on future arrays. The `fit_encoder` method of `Column` fits
-its encoder the first time data is passed to it.
-
-```{code-cell} ipython3
-shelve = design.column_info_['ShelveLoc']
-cols, names = shelve.get_columns(Carseats)
-(cols[:4], names)
-```
-
-```{code-cell} ipython3
-oincome = design.column_info_['OIncome']
-oincome.get_columns(Carseats)[0][:4]
-```
-
-## The terms
+This is recognized by `ModelSpec` and only 2 columns are added for the three levels. The
+default behavior is to drop the first level of the categories. Later, 
+we will show other contrasts of the 3 columns can be produced.  
 
-The design matrix consists of several sets of columns. This is managed by the `ModelSpec` through
-the `terms` argument which should be a sequence. The elements of `terms` are often
-going to be strings (or tuples of strings for interactions, see below) but are converted to a
-`Variable` object and stored in the `terms_` of the fitted `ModelSpec`. A `Variable` is just a named tuple.
+This simple example below illustrates how the first argument (its `terms`) is
+used to construct a design matrix.
 
 ```{code-cell} ipython3
-design.terms
+MS = ModelSpec(['ShelveLoc', 'Price'])
+X = MS.fit_transform(Carseats)
+X.iloc[:10]
 ```
 
-```{code-cell} ipython3
-design.terms_
-```
-
-While each `Column` can itself extract data, they are all promoted to `Variable` to be of a uniform type.  A
-`Variable` can also create columns through the `build_columns` method of `ModelSpec`
+We note that a column has been added for the intercept by default. This can be changed using the
+`intercept` argument.
 
 ```{code-cell} ipython3
-price = design.terms_[0]
-design.build_columns(Carseats, price)
+MS_no1 = ModelSpec(['ShelveLoc', 'Price'], intercept=False)
+MS_no1.fit_transform(Carseats)[:10]
 ```
 
-Note that `Variable` objects have a tuple of `variables` as well as an `encoder` attribute. The
-tuple of `variables` first creates a concatenated dataframe from all corresponding variables and then
-is run through `encoder.transform`. The `encoder.fit` method of each `Variable` is run once during 
-the call to `ModelSpec.fit`.
+We see that `ShelveLoc` still only contributes
+two columns to the design. The `ModelSpec` object does no introspection of its arguments to effectively include an intercept term
+in the column space of the design matrix.
 
-```{code-cell} ipython3
-from ISLP.models.model_spec import Variable
-
-new_var = Variable(('Price', 'Income', 'UIncome'), name='mynewvar', encoder=None)
-design.build_columns(Carseats, new_var)
-```
-
-Let's now transform these columns with an encoder. Within `ModelSpec` we will first build the
-arrays above and then call `pca.fit` and finally `pca.transform` within `design.build_columns`.
+To include this intercept via `ShelveLoc` we can use 3 columns to encode this categorical variable. Following the nomenclature of
+`R`, we call this a `Contrast` of the categorical variable.
 
 ```{code-cell} ipython3
-from sklearn.decomposition import PCA
-pca = PCA(n_components=2)
-pca.fit(design.build_columns(Carseats, new_var)[0]) # this is done within `ModelSpec.fit`
-pca_var = Variable(('Price', 'Income', 'UIncome'), name='mynewvar', encoder=pca)
-design.build_columns(Carseats, pca_var)
-```
-
-The elements of the `variables` attribute may be column identifiers ( `"Price"`), `Column` instances (`price`)
-or `Variable` instances (`pca_var`).
-
-```{code-cell} ipython3
-fancy_var = Variable(('Price', price, pca_var), name='fancy', encoder=None)
-design.build_columns(Carseats, fancy_var)
+from ISLP.models import contrast
+shelve = contrast('ShelveLoc', None)
+MS_contr = ModelSpec([shelve, 'Price'], intercept=False)
+MS_contr.fit_transform(Carseats)[:10]
 ```
 
-We can of course run PCA again on these features (if we wanted).
+This example above illustrates that columns need not be identified by name in `terms`. The basic
+role of an item in the `terms` sequence is a description of how to extract a column
+from a columnar data object, usually a `pd.DataFrame`.
 
 ```{code-cell} ipython3
-pca2 = PCA(n_components=2)
-pca2.fit(design.build_columns(Carseats, fancy_var)[0]) # this is done within `ModelSpec.fit`
-pca2_var = Variable(('Price', price, pca_var), name='fancy_pca', encoder=pca2)
-design.build_columns(Carseats, pca2_var)
+shelve
 ```
 
-## Building the design matrix
-
-With these notions in mind, the final design is essentially then
+The `Column` object can be used to directly extract relevant columns from a `pd.DataFrame`. If the `encoder` field is not
+`None`, then the extracted columns will be passed through `encoder`.
+The `get_columns` method produces these columns as well as names for the columns.
 
 ```{code-cell} ipython3
-X_hand = np.column_stack([design.build_columns(Carseats, v)[0] for v in design.terms_])[:4]
+shelve.get_columns(Carseats)
 ```
 
-An intercept column is added if `design.intercept` is `True` and if the original argument to `transform` is
-a dataframe the index is adjusted accordingly.
+Let's now fit a simple OLS model with this design.
 
 ```{code-cell} ipython3
-design.intercept
-```
-
-```{code-cell} ipython3
-design.transform(Carseats)[:4]
+X = MS_contr.transform(Carseats)
+Y = Carseats['Sales']
+M_ols = sm.OLS(Y, X).fit()
+summarize(M_ols)
 ```
 
-## Predicting
+## Interactions
 
-Constructing the design matrix at any values is carried out by the `transform` method.
+One of the common uses of formulae in `R` is to specify interactions between variables.
+This is done in `ModelSpec` by including a tuple in the `terms` argument.
 
 ```{code-cell} ipython3
-new_data = pd.DataFrame({'Price':[10,20], 'Income':[40, 50]})
-new_X = design.transform(new_data)
-M.get_prediction(new_X).predicted_mean
+ModelSpec([(shelve, 'Price'), 'Price']).fit_transform(Carseats).iloc[:10]
 ```
 
-```{code-cell} ipython3
-%%R -i new_data,Carseats
-predict(lm(Sales ~ Price + Income, data=Carseats), new_data)
-```
+The above design matrix is clearly rank deficient, as `ModelSpec` has not inspected the formula
+and attempted to produce a corresponding matrix that may or may not match a user's intent.
 
-### Difference between using `pd.DataFrame` and `np.ndarray`
++++
 
-If the `terms` only refer to a few columns of the data frame, the `transform` method only needs a dataframe with those columns.
+## Ordinal variables
 
-If we had used an `np.ndarray`, the column identifiers would be integers identifying specific columns so,
-in order to work correctly, `transform` would need another `np.ndarray` where the columns have the same meaning.
+Ordinal variables are handled by a corresponding encoder)
 
 ```{code-cell} ipython3
-Carseats_np = np.asarray(Carseats[['Price', 'ShelveLoc', 'US', 'Income']])
-design_np = ModelSpec([0,3]).fit(Carseats_np)
-design_np.transform(Carseats_np)[:4]
+Carseats['OIncome'] = pd.cut(Carseats['Income'], 
+                             [0,50,90,200], 
+                             labels=['L','M','H'])
+MS_order = ModelSpec(['OIncome']).fit(Carseats)
 ```
 
-The following will fail for hopefully obvious reasons
+Part of the `fit` method of `ModelSpec` involves inspection of the columns of `Carseats`. 
+The results of that inspection can be found in the `column_info_` attribute:
 
 ```{code-cell} ipython3
-try:
-    new_D = np.zeros((2,2))
-    new_D[:,0] = [10,20]
-    new_D[:,1] = [40,50]
-    M.get_prediction(new_D).predicted_mean
-except ValueError as e:
-    print(e)
+MS_order.column_info_
 ```
 
-Ultimately, `M` expects 3 columns for new predictions because it was fit
-with a matrix having 3 columns (the first representing an intercept).
+## Structure of a `ModelSpec`
 
-We might be tempted to try as with the `pd.DataFrame` and produce
-an `np.ndarray` with only the necessary variables.
+The first argument to `ModelSpec` is stored as the `terms` attribute. Under the hood,
+this sequence is inspected to produce the `terms_` attribute which specify the objects
+that will ultimately create the design matrix.
 
 ```{code-cell} ipython3
-try:
-    new_X = np.zeros((2,2))
-    new_X[:,0] = [10,20]
-    new_X[:,1] = [40,50]
-    new_D = design_np.transform(new_X)
-    M.get_prediction(new_D).predicted_mean
-except IndexError as e:
-    print(e)
+MS = ModelSpec(['ShelveLoc', 'Price'])
+MS.fit(Carseats)
+MS.terms_
 ```
 
-This fails because `design_np` is looking for column `3` from its `terms`:
+Each element of `terms_` should be a `Feature` which describes a set of columns to be extracted from
+a columnar data form as well as possible a possible encoder.
 
 ```{code-cell} ipython3
-design_np.terms_
+shelve_var = MS.terms_[0]
 ```
 
-However, if we have an `np.ndarray` in which the first column indeed represents `Price` and the fourth indeed
-represents `Income` then we can arrive at the correct answer by supplying such the array to `design_np.transform`:
+We can find the columns associated to each term using the `build_columns` method of `ModelSpec`:
 
 ```{code-cell} ipython3
-new_X = np.zeros((2,4))
-new_X[:,0] = [10,20]
-new_X[:,3] = [40,50]
-new_D = design_np.transform(new_X)
-M.get_prediction(new_D).predicted_mean
+df, names = build_columns(MS.column_info_,
+                          Carseats, 
+                          shelve_var)
+df
 ```
 
-Given this subtlety about needing to supply arrays with identical column structure to `transform` when
-using `np.ndarray` we presume that using a `pd.DataFrame` will be the more popular use case.
+The design matrix is constructed by running through `terms_` and concatenating the corresponding columns.
 
 +++
 
-## A model with some categorical variables
+### `Feature` objects
 
-Categorical variables become `Column` instances with encoders.
-
-```{code-cell} ipython3
-design = ModelSpec(['Population', 'Price', 'UIncome', 'ShelveLoc']).fit(Carseats)
-design.column_info_['UIncome']
-```
-
-```{code-cell} ipython3
-X = design.fit_transform(Carseats)
-X.columns
-```
-
-```{code-cell} ipython3
-sm.OLS(Y, X).fit().params
-```
-
-```{code-cell} ipython3
-%%R
-lm(Sales ~ Population + Price + UIncome + ShelveLoc, data=Carseats)$coef
-```
-
-## Getting the encoding you want
-
-By default the level dropped by `ModelSpec` will be the first of the `categories_` values from 
-`sklearn.preprocessing.OneHotEncoder()`. We might wish to change this. It seems
-as if the correct way to do this would be something like `Variable(('UIncome',), 'mynewencoding', new_encoder)`
-where `new_encoder` would somehow drop the column we want dropped. 
-
-However, when using the convenient identifier `UIncome` in the `variables` argument, this maps to the `Column` associated to `UIncome` within `design.column_info_`:
-
-```{code-cell} ipython3
-design.column_info_['UIncome']
-```
-
-This column already has an encoder and `Column` instances are immutable as named tuples. Further, there are times when 
-we may want to encode `UIncome` differently within the same model. In the model below the main effect of `UIncome` is encoded with two columns while in the interaction `UIncome` (see below) has three columns. This is a design of interest
-and we need a way to allow different encodings of the same column of `Carseats`
-
-```{code-cell} ipython3
-%%R
-lm(Sales ~ UIncome:ShelveLoc + UIncome, data=Carseats)
-```
-
- We can create a new 
-`Column` with the encoder we want. For categorical variables, there is a convenience function to do so.
-
-```{code-cell} ipython3
-from ISLP.models.model_spec import contrast
-pref_encoding = contrast('UIncome', 'drop', 'L')
-```
-
-```{code-cell} ipython3
-design.build_columns(Carseats, pref_encoding)
-```
-
-```{code-cell} ipython3
-design = ModelSpec(['Population', 'Price', pref_encoding, 'ShelveLoc']).fit(Carseats)
-X = design.fit_transform(Carseats)
-X.columns
-```
-
-```{code-cell} ipython3
-sm.OLS(Y, X).fit().params
-```
+Note that `Feature` objects have a tuple of `variables` as well as an `encoder` attribute. The
+tuple of `variables` first creates a concatenated dataframe from all corresponding variables and then
+is run through `encoder.transform`. The `encoder.fit` method of each `Feature` is run once during 
+the call to `ModelSpec.fit`.
 
 ```{code-cell} ipython3
-%%R
-lm(Sales ~ Population + Price + UIncome + ShelveLoc, data=Carseats)$coef
+new_var = Feature(('Price', 'Income', 'OIncome'), name='mynewvar', encoder=None)
+build_columns(MS.column_info_,
+              Carseats, 
+              new_var)[0]
 ```
 
-## Interactions
-
-We've referred to interactions above. These are specified (by convenience) as tuples in the `terms` argument
-to `ModelSpec`.
+Let's now transform these columns with an encoder. Within `ModelSpec` we will first build the
+arrays above and then call `pca.fit` and finally `pca.transform` within `design.build_columns`.
 
 ```{code-cell} ipython3
-design = ModelSpec([('UIncome', 'ShelveLoc'), 'UIncome'])
-X = design.fit_transform(Carseats)
-sm.OLS(Y, X).fit().params
+from sklearn.decomposition import PCA
+pca = PCA(n_components=2)
+pca.fit(build_columns(MS.column_info_, Carseats, new_var)[0]) # this is done within `ModelSpec.fit`
+pca_var = Feature(('Price', 'Income', 'OIncome'), name='mynewvar', encoder=pca)
+build_columns(MS.column_info_,
+              Carseats, 
+              pca_var)[0]
 ```
 
-The tuples in `terms` are converted to `Variable` in the formalized `terms_` attribute by creating a `Variable` with
-`variables` set to the tuple and the encoder an `Interaction` encoder which (unsurprisingly) creates the interaction columns from the concatenated data frames of `UIncome` and `ShelveLoc`.
+The elements of the `variables` attribute may be column identifiers ( `"Price"`), `Column` instances (`price`)
+or `Feature` instances (`pca_var`).
 
 ```{code-cell} ipython3
-design.terms_[0]
+price = MS.column_info_['Price']
+fancy_var = Feature(('Income', price, pca_var), name='fancy', encoder=None)
+build_columns(MS.column_info_,
+              Carseats, 
+              fancy_var)[0]
 ```
 
-Comparing this to the previous `R` model.
+## Predicting at new points
 
 ```{code-cell} ipython3
-%%R
-lm(Sales ~ UIncome:ShelveLoc + UIncome, data=Carseats)
+MS = ModelSpec(['Price', 'Income']).fit(Carseats)
+X = MS.transform(Carseats)
+Y = Carseats['Sales']
+M_ols = sm.OLS(Y, X).fit()
+M_ols.params
 ```
 
-We note a few important things:
-
-1. `R` has reorganized the columns of the design from the formula: although we wrote `UIncome:ShelveLoc` first these
-columns have been built later. **`ModelSpec` builds columns in the order determined by `terms`!**
-
-2. As noted above, `R` has encoded `UIncome` differently in the main effect and in the interaction. For `ModelSpec`, the reference to `UIncome` always refers to the column in `design.column_info_` and will always build only the columns for `L` and `M`. **`ModelSpec` does no inspection of terms to decide how to encode categorical variables.**
-
-A few notes:
-
-- **Why not try to inspect the terms?** For any nontrivial formula in `R` with several categorical variables and interactions, predicting what columns will be produced from a given formula is not simple. **`ModelSpec` errs on the side of being explicit.**
-
-- **Is it impossible to build the design as `R` has?** No. An advanced user who *knows* they want the columns built as `R` has can do so (fairly) easily.
-
-```{code-cell} ipython3
-full_encoding = contrast('UIncome', None)
-design.build_columns(Carseats, full_encoding)
-```
+As `ModelSpec` is a transformer, it can be evaluated at new feature values.
+Constructing the design matrix at any values is carried out by the `transform` method.
 
 ```{code-cell} ipython3
-design = ModelSpec([pref_encoding, (full_encoding, 'ShelveLoc')])
-X = design.fit_transform(Carseats)
-sm.OLS(Y, X).fit().params
+new_data = pd.DataFrame({'Price':[40, 50], 'Income':[10, 20]})
+new_X = MS.transform(new_data)
+M_ols.get_prediction(new_X).predicted_mean
 ```
 
-## Special encodings
-
-For flexible models, we may want to consider transformations of features, i.e. polynomial
-or spline transformations. Given transforms that follow the `fit/transform` paradigm
-we can of course achieve this with a `Column` and an `encoder`. The `ISLP.transforms`
-package includes a `Poly` transform
-
-```{code-cell} ipython3
-from ISLP.models.model_spec import poly
-poly('Income', 3)
-```
+## Using `np.ndarray`
 
-```{code-cell} ipython3
-design = ModelSpec([poly('Income', 3), 'ShelveLoc'])
-X = design.fit_transform(Carseats)
-sm.OLS(Y, X).fit().params
-```
+As the basic model is to concatenate columns extracted from a columnar data
+representation, one *can* use `np.ndarray` as the column data. In this case,
+columns will be selected by integer indices. 
 
-Compare:
+### Caveats using `np.ndarray`
 
-```{code-cell} ipython3
-%%R
-lm(Sales ~ poly(Income, 3) + ShelveLoc, data=Carseats)$coef
-```
+If the `terms` only refer to a few columns of the data frame, the `transform` method only needs a dataframe with those columns.
+However,
+unless all features are floats, `np.ndarray` will default to a dtype of `object`, complicating issues.
 
-## Splines
+However, if we had used an `np.ndarray`, the column identifiers would be integers identifying specific columns so,
+in order to work correctly, `transform` would need another `np.ndarray` where the columns have the same meaning. 
 
-Support for natural and B-splines is also included
+We illustrate this below, where we build a model from `Price` and `Income` for `Sales` and want to find predictions at new
+values of `Price` and `Location`. We first find the predicitions using `pd.DataFrame` and then illustrate the difficulties
+in using `np.ndarray`.
 
-```{code-cell} ipython3
-from ISLP.models.model_spec import ns, bs, pca
-design = ModelSpec([ns('Income', df=5), 'ShelveLoc'])
-X = design.fit_transform(Carseats)
-sm.OLS(Y, X).fit().params
-```
++++
 
-```{code-cell} ipython3
-%%R
-library(splines)
-lm(Sales ~ ns(Income, df=5) + ShelveLoc, data=Carseats)$coef
-```
+We will refit this model, using `ModelSpec` with an `np.ndarray` instead
 
 ```{code-cell} ipython3
-design = ModelSpec([bs('Income', df=7, degree=2), 'ShelveLoc'])
-X = design.fit_transform(Carseats)
-sm.OLS(Y, X).fit().params
+Carseats_np = np.asarray(Carseats[['Price', 'Education', 'Income']])
+MS_np = ModelSpec([0,2]).fit(Carseats_np)
+MS_np.transform(Carseats_np)
 ```
 
 ```{code-cell} ipython3
-%%R
-lm(Sales ~ bs(Income, df=7, degree=2) + ShelveLoc, data=Carseats)$coef
+M_ols_np = sm.OLS(Y, MS_np.transform(Carseats_np)).fit()
+M_ols_np.params
 ```
 
-## PCA
+Now, let's consider finding the design matrix at new points. 
+When using `pd.DataFrame` we only need to supply the `transform` method
+a data frame with columns implicated in the `terms` argument (in this case, `Price` and `Income`). 
 
-```{code-cell} ipython3
-design = ModelSpec([pca(['Income', 
-                           'Price', 
-                           'Advertising', 
-                           'Population'], 
-                          n_components=2, 
-                          name='myvars'), 'ShelveLoc'])
-X = design.fit_transform(Carseats)
-sm.OLS(Y, X).fit().params
-```
+However, when using `np.ndarray` with integers as indices, `Price` was column 0 and `Income` was column 2. The only
+sensible way to produce a return for predict is to extract its 0th and 2nd columns. Note this means
+that the meaning of columns in an `np.ndarray` provided to `transform` essentially must be identical to those
+passed to `fit`.
 
 ```{code-cell} ipython3
-%%R
-lm(Sales ~ prcomp(cbind(Income, Price, Advertising, Population))$x[,1:2] + ShelveLoc, data=Carseats)
+try:
+    new_D = np.array([[40,50], [10,20]]).T
+    new_X = MS_np.transform(new_D)
+except IndexError as e:
+    print(e)
 ```
 
-It is of course common to scale before running PCA.
+Ultimately, `M` expects 3 columns for new predictions because it was fit
+with a matrix having 3 columns (the first representing an intercept).
 
-```{code-cell} ipython3
-design = ModelSpec([pca(['Income', 
-                           'Price', 
-                           'Advertising', 
-                           'Population'], 
-                          n_components=2, 
-                          name='myvars',
-                          scale=True), 'ShelveLoc'])
-X = design.fit_transform(Carseats)
-sm.OLS(Y, X).fit().params
-```
+We might be tempted to try as with the `pd.DataFrame` and produce
+an `np.ndarray` with only the necessary variables.
 
 ```{code-cell} ipython3
-%%R
-lm(Sales ~ prcomp(cbind(Income, Price, Advertising, Population), scale=TRUE)$x[,1:2] + ShelveLoc, data=Carseats)
+new_D = np.array([[40,50], [np.nan, np.nan], [10,20]]).T
+new_X = MS_np.transform(new_D)
+print(new_X)
+M_ols.get_prediction(new_X).predicted_mean
 ```
 
-There will be some small differences in the coefficients due to `sklearn` use of `np.std(ddof=0)` instead
-of `np.std(ddof=1)`.
-
-```{code-cell} ipython3
-np.array(sm.OLS(Y, X).fit().params)[1:3] * np.sqrt(X.shape[0] / (X.shape[0]-1))
-```
+For more complicated design contructions ensuring the columns of `new_D` match that of the original data will be more cumbersome. We expect
+then that `pd.DataFrame` (or a columnar data representation with similar API) will likely be easier to use with `ModelSpec`.
diff --git a/docs/jupyterbook/models/submodels.ipynb b/docs/jupyterbook/models/submodels.ipynb
deleted file mode 100644
index 777037a..0000000
--- a/docs/jupyterbook/models/submodels.ipynb
+++ /dev/null
@@ -1,3127 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "id": "ee33d364",
-   "metadata": {},
-   "source": [
-    "# Building design matrices with `ModelSpec`\n",
-    "\n",
-    "Force rebuild"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "id": "4c70fbaa",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "x=4\n",
-    "import numpy as np, pandas as pd\n",
-    "%load_ext rpy2.ipython\n",
-    "\n",
-    "from ISLP import load_data\n",
-    "from ISLP.models import ModelSpec\n",
-    "\n",
-    "import statsmodels.api as sm"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "id": "8a708215",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Index(['Sales', 'CompPrice', 'Income', 'Advertising', 'Population', 'Price',\n",
-       "       'ShelveLoc', 'Age', 'Education', 'Urban', 'US'],\n",
-       "      dtype='object')"
-      ]
-     },
-     "execution_count": 2,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "Carseats = load_data('Carseats')\n",
-    "%R -i Carseats\n",
-    "Carseats.columns"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "dad5e991",
-   "metadata": {},
-   "source": [
-    "## Let's break up income into groups"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "id": "ac7086a5",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0      M\n",
-       "1      L\n",
-       "2      L\n",
-       "3      H\n",
-       "4      M\n",
-       "      ..\n",
-       "395    H\n",
-       "396    L\n",
-       "397    L\n",
-       "398    M\n",
-       "399    L\n",
-       "Name: OIncome, Length: 400, dtype: category\n",
-       "Categories (3, object): ['L' < 'M' < 'H']"
-      ]
-     },
-     "execution_count": 3,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "Carseats['OIncome'] = pd.cut(Carseats['Income'], \n",
-    "                             [0,50,90,200], \n",
-    "                             labels=['L','M','H'])\n",
-    "Carseats['OIncome']"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "261446c8",
-   "metadata": {},
-   "source": [
-    "Let's also create an unordered version"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "id": "674bb806",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0      M\n",
-       "1      L\n",
-       "2      L\n",
-       "3      H\n",
-       "4      M\n",
-       "      ..\n",
-       "395    H\n",
-       "396    L\n",
-       "397    L\n",
-       "398    M\n",
-       "399    L\n",
-       "Name: UIncome, Length: 400, dtype: category\n",
-       "Categories (3, object): ['L', 'M', 'H']"
-      ]
-     },
-     "execution_count": 4,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "Carseats['UIncome'] = pd.cut(Carseats['Income'], \n",
-    "                             [0,50,90,200], \n",
-    "                             labels=['L','M','H'],\n",
-    "                             ordered=False)\n",
-    "Carseats['UIncome']"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "8f030039",
-   "metadata": {},
-   "source": [
-    "## A simple model"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "id": "40cd6c28",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Index(['intercept', 'Price', 'Income'], dtype='object')"
-      ]
-     },
-     "execution_count": 5,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design = ModelSpec(['Price', 'Income'])\n",
-    "X = design.fit_transform(Carseats)\n",
-    "X.columns"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "id": "e65f5607",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "intercept    12.661546\n",
-       "Price        -0.052213\n",
-       "Income        0.012829\n",
-       "dtype: float64"
-      ]
-     },
-     "execution_count": 6,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "Y = Carseats['Sales']\n",
-    "M = sm.OLS(Y, X).fit()\n",
-    "M.params"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "29d9b55f",
-   "metadata": {},
-   "source": [
-    "## Basic procedure\n",
-    "\n",
-    "The design matrix is built by cobbling together a set of columns and possibly transforming them.\n",
-    "A `pd.DataFrame` is essentially a list of columns. One of the first tasks done  in `ModelSpec.fit`\n",
-    "is to inspect a dataframe for column info. The column `ShelveLoc` is categorical:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "id": "cfbe5b92",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0         Bad\n",
-       "1        Good\n",
-       "2      Medium\n",
-       "3      Medium\n",
-       "4         Bad\n",
-       "        ...  \n",
-       "395      Good\n",
-       "396    Medium\n",
-       "397    Medium\n",
-       "398       Bad\n",
-       "399      Good\n",
-       "Name: ShelveLoc, Length: 400, dtype: category\n",
-       "Categories (3, object): ['Bad', 'Good', 'Medium']"
-      ]
-     },
-     "execution_count": 7,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "Carseats['ShelveLoc']"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "7092f666",
-   "metadata": {},
-   "source": [
-    "This is recognized by `ModelSpec` in the form of `Column` objects which are just named tuples with two methods\n",
-    "`get_columns` and `fit_encoder`."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "id": "e2d43844",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Column(idx='ShelveLoc', name='ShelveLoc', is_categorical=True, is_ordinal=False, columns=('ShelveLoc[Good]', 'ShelveLoc[Medium]'), encoder=Contrast())"
-      ]
-     },
-     "execution_count": 8,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design.column_info_['ShelveLoc']"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "46a01612",
-   "metadata": {},
-   "source": [
-    "It recognized ordinal columns as well."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "id": "465a9326",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Column(idx='OIncome', name='OIncome', is_categorical=True, is_ordinal=True, columns=('OIncome',), encoder=OrdinalEncoder())"
-      ]
-     },
-     "execution_count": 9,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design.column_info_['OIncome']"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "id": "76f8480d",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(array([ 73,  48,  35, 100]), ('Income',))"
-      ]
-     },
-     "execution_count": 10,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "income = design.column_info_['Income']\n",
-    "cols, names = income.get_columns(Carseats)\n",
-    "(cols[:4], names)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "25fcc1de",
-   "metadata": {},
-   "source": [
-    "## Encoding a column\n",
-    "\n",
-    "In building a design matrix we must extract columns from our dataframe (or `np.ndarray`). Categorical\n",
-    "variables usually are encoded by several columns, typically one less than the number of categories.\n",
-    "This task is handled by the `encoder` of the `Column`. The encoder must satisfy the `sklearn` transform\n",
-    "model, i.e. `fit` on some array and `transform` on future arrays. The `fit_encoder` method of `Column` fits\n",
-    "its encoder the first time data is passed to it."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "id": "dfe6cc35",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(array([[0., 0.],\n",
-       "        [1., 0.],\n",
-       "        [0., 1.],\n",
-       "        [0., 1.]]),\n",
-       " ['ShelveLoc[Good]', 'ShelveLoc[Medium]'])"
-      ]
-     },
-     "execution_count": 11,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "shelve = design.column_info_['ShelveLoc']\n",
-    "cols, names = shelve.get_columns(Carseats)\n",
-    "(cols[:4], names)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "id": "8fc9779a",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "array([[2.],\n",
-       "       [1.],\n",
-       "       [1.],\n",
-       "       [0.]])"
-      ]
-     },
-     "execution_count": 12,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "oincome = design.column_info_['OIncome']\n",
-    "oincome.get_columns(Carseats)[0][:4]"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "8e04da60",
-   "metadata": {},
-   "source": [
-    "## The terms\n",
-    "\n",
-    "The design matrix consists of several sets of columns. This is managed by the `ModelSpec` through\n",
-    "the `terms` argument which should be a sequence. The elements of `terms` are often\n",
-    "going to be strings (or tuples of strings for interactions, see below) but are converted to a\n",
-    "`Variable` object and stored in the `terms_` of the fitted `ModelSpec`. A `Variable` is just a named tuple."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "id": "c579dbce",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['Price', 'Income']"
-      ]
-     },
-     "execution_count": 13,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design.terms"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "id": "4587b8bd",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "[Variable(variables=('Price',), name='Price', encoder=None, use_transform=True, pure_columns=True, override_encoder_colnames=False),\n",
-       " Variable(variables=('Income',), name='Income', encoder=None, use_transform=True, pure_columns=True, override_encoder_colnames=False)]"
-      ]
-     },
-     "execution_count": 14,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design.terms_"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "2595f0fa",
-   "metadata": {},
-   "source": [
-    "While each `Column` can itself extract data, they are all promoted to `Variable` to be of a uniform type.  A\n",
-    "`Variable` can also create columns through the `build_columns` method of `ModelSpec`"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "id": "03bd9366",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(     Price\n",
-       " 0      120\n",
-       " 1       83\n",
-       " 2       80\n",
-       " 3       97\n",
-       " 4      128\n",
-       " ..     ...\n",
-       " 395    128\n",
-       " 396    120\n",
-       " 397    159\n",
-       " 398     95\n",
-       " 399    120\n",
-       " \n",
-       " [400 rows x 1 columns],\n",
-       " ['Price'])"
-      ]
-     },
-     "execution_count": 15,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "price = design.terms_[0]\n",
-    "design.build_columns(Carseats, price)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "de04ca48",
-   "metadata": {},
-   "source": [
-    "Note that `Variable` objects have a tuple of `variables` as well as an `encoder` attribute. The\n",
-    "tuple of `variables` first creates a concatenated dataframe from all corresponding variables and then\n",
-    "is run through `encoder.transform`. The `encoder.fit` method of each `Variable` is run once during \n",
-    "the call to `ModelSpec.fit`."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 16,
-   "id": "a42af4c5",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(     Price  Income  UIncome[L]  UIncome[M]\n",
-       " 0    120.0    73.0         0.0         1.0\n",
-       " 1     83.0    48.0         1.0         0.0\n",
-       " 2     80.0    35.0         1.0         0.0\n",
-       " 3     97.0   100.0         0.0         0.0\n",
-       " 4    128.0    64.0         0.0         1.0\n",
-       " ..     ...     ...         ...         ...\n",
-       " 395  128.0   108.0         0.0         0.0\n",
-       " 396  120.0    23.0         1.0         0.0\n",
-       " 397  159.0    26.0         1.0         0.0\n",
-       " 398   95.0    79.0         0.0         1.0\n",
-       " 399  120.0    37.0         1.0         0.0\n",
-       " \n",
-       " [400 rows x 4 columns],\n",
-       " ['Price', 'Income', 'UIncome[L]', 'UIncome[M]'])"
-      ]
-     },
-     "execution_count": 16,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "from ISLP.models.model_spec import Variable\n",
-    "\n",
-    "new_var = Variable(('Price', 'Income', 'UIncome'), name='mynewvar', encoder=None)\n",
-    "design.build_columns(Carseats, new_var)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "b146d0c0",
-   "metadata": {},
-   "source": [
-    "Let's now transform these columns with an encoder. Within `ModelSpec` we will first build the\n",
-    "arrays above and then call `pca.fit` and finally `pca.transform` within `design.build_columns`."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 17,
-   "id": "b6c394a6",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/Users/jonathantaylor/miniconda3/envs/islp_test/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but PCA was fitted with feature names\n",
-      "  warnings.warn(\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "(     mynewvar[0]  mynewvar[1]\n",
-       " 0      -3.608693    -4.853177\n",
-       " 1      15.081506    35.708630\n",
-       " 2      27.422871    40.774250\n",
-       " 3     -33.973209    13.470489\n",
-       " 4       6.567316   -11.290100\n",
-       " ..           ...          ...\n",
-       " 395   -36.846346   -18.415783\n",
-       " 396    45.741500     3.245602\n",
-       " 397    49.097533   -35.725355\n",
-       " 398   -13.577772    18.845139\n",
-       " 399    31.927566     0.978436\n",
-       " \n",
-       " [400 rows x 2 columns],\n",
-       " ['mynewvar[0]', 'mynewvar[1]'])"
-      ]
-     },
-     "execution_count": 17,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "from sklearn.decomposition import PCA\n",
-    "pca = PCA(n_components=2)\n",
-    "pca.fit(design.build_columns(Carseats, new_var)[0]) # this is done within `ModelSpec.fit`\n",
-    "pca_var = Variable(('Price', 'Income', 'UIncome'), name='mynewvar', encoder=pca)\n",
-    "design.build_columns(Carseats, pca_var)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "3bb30a3f",
-   "metadata": {},
-   "source": [
-    "The elements of the `variables` attribute may be column identifiers ( `\"Price\"`), `Column` instances (`price`)\n",
-    "or `Variable` instances (`pca_var`)."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 18,
-   "id": "ea7770ff",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/Users/jonathantaylor/miniconda3/envs/islp_test/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but PCA was fitted with feature names\n",
-      "  warnings.warn(\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "(     Price  Price  mynewvar[0]  mynewvar[1]\n",
-       " 0    120.0  120.0    -3.608693    -4.853177\n",
-       " 1     83.0   83.0    15.081506    35.708630\n",
-       " 2     80.0   80.0    27.422871    40.774250\n",
-       " 3     97.0   97.0   -33.973209    13.470489\n",
-       " 4    128.0  128.0     6.567316   -11.290100\n",
-       " ..     ...    ...          ...          ...\n",
-       " 395  128.0  128.0   -36.846346   -18.415783\n",
-       " 396  120.0  120.0    45.741500     3.245602\n",
-       " 397  159.0  159.0    49.097533   -35.725355\n",
-       " 398   95.0   95.0   -13.577772    18.845139\n",
-       " 399  120.0  120.0    31.927566     0.978436\n",
-       " \n",
-       " [400 rows x 4 columns],\n",
-       " ['Price', 'Price', 'mynewvar[0]', 'mynewvar[1]'])"
-      ]
-     },
-     "execution_count": 18,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "fancy_var = Variable(('Price', price, pca_var), name='fancy', encoder=None)\n",
-    "design.build_columns(Carseats, fancy_var)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "b2b4a01a",
-   "metadata": {},
-   "source": [
-    "We can of course run PCA again on these features (if we wanted)."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 19,
-   "id": "21ad8b44",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/Users/jonathantaylor/miniconda3/envs/islp_test/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but PCA was fitted with feature names\n",
-      "  warnings.warn(\n",
-      "/Users/jonathantaylor/miniconda3/envs/islp_test/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but PCA was fitted with feature names\n",
-      "  warnings.warn(\n",
-      "/Users/jonathantaylor/miniconda3/envs/islp_test/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but PCA was fitted with feature names\n",
-      "  warnings.warn(\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "(     fancy_pca[0]  fancy_pca[1]\n",
-       " 0       -6.951792      4.859283\n",
-       " 1       55.170148    -24.694875\n",
-       " 2       59.418556    -38.033572\n",
-       " 3       34.722389     28.922184\n",
-       " 4      -21.419184     -3.120673\n",
-       " ..            ...           ...\n",
-       " 395    -18.257348     40.760122\n",
-       " 396    -10.546709    -45.021658\n",
-       " 397    -77.706359    -37.174379\n",
-       " 398     36.668694      7.730851\n",
-       " 399     -9.540535    -31.059122\n",
-       " \n",
-       " [400 rows x 2 columns],\n",
-       " ['fancy_pca[0]', 'fancy_pca[1]'])"
-      ]
-     },
-     "execution_count": 19,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "pca2 = PCA(n_components=2)\n",
-    "pca2.fit(design.build_columns(Carseats, fancy_var)[0]) # this is done within `ModelSpec.fit`\n",
-    "pca2_var = Variable(('Price', price, pca_var), name='fancy_pca', encoder=pca2)\n",
-    "design.build_columns(Carseats, pca2_var)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "2262377d",
-   "metadata": {},
-   "source": [
-    "## Building the design matrix\n",
-    "\n",
-    "With these notions in mind, the final design is essentially then"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 20,
-   "id": "1654ca47",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "X_hand = np.column_stack([design.build_columns(Carseats, v)[0] for v in design.terms_])[:4]"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "1db0e0a9",
-   "metadata": {},
-   "source": [
-    "An intercept column is added if `design.intercept` is `True` and if the original argument to `transform` is\n",
-    "a dataframe the index is adjusted accordingly."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 21,
-   "id": "d20e8ea8",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "True"
-      ]
-     },
-     "execution_count": 21,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design.intercept"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 22,
-   "id": "450fe910",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>intercept</th>\n",
-       "      <th>Price</th>\n",
-       "      <th>Income</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>120</td>\n",
-       "      <td>73</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>83</td>\n",
-       "      <td>48</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>80</td>\n",
-       "      <td>35</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>97</td>\n",
-       "      <td>100</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "   intercept  Price  Income\n",
-       "0        1.0    120      73\n",
-       "1        1.0     83      48\n",
-       "2        1.0     80      35\n",
-       "3        1.0     97     100"
-      ]
-     },
-     "execution_count": 22,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design.transform(Carseats)[:4]"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "0705ba6f",
-   "metadata": {},
-   "source": [
-    "## Predicting\n",
-    "\n",
-    "Constructing the design matrix at any values is carried out by the `transform` method."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 23,
-   "id": "866c2863",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "array([12.65257604, 12.25873428])"
-      ]
-     },
-     "execution_count": 23,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "new_data = pd.DataFrame({'Price':[10,20], 'Income':[40, 50]})\n",
-    "new_X = design.transform(new_data)\n",
-    "M.get_prediction(new_X).predicted_mean"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 24,
-   "id": "f2021166",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "       0        1 \n",
-      "12.65258 12.25873 \n"
-     ]
-    }
-   ],
-   "source": [
-    "%%R -i new_data,Carseats\n",
-    "predict(lm(Sales ~ Price + Income, data=Carseats), new_data)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "20e1a31a",
-   "metadata": {},
-   "source": [
-    "### Difference between using `pd.DataFrame` and `np.ndarray`\n",
-    "\n",
-    "If the `terms` only refer to a few columns of the data frame, the `transform` method only needs a dataframe with those columns.\n",
-    "\n",
-    "If we had used an `np.ndarray`, the column identifiers would be integers identifying specific columns so,\n",
-    "in order to work correctly, `transform` would need another `np.ndarray` where the columns have the same meaning."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 25,
-   "id": "a5926ec9",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "array([[1.0, 120, 73],\n",
-       "       [1.0, 83, 48],\n",
-       "       [1.0, 80, 35],\n",
-       "       [1.0, 97, 100]], dtype=object)"
-      ]
-     },
-     "execution_count": 25,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "Carseats_np = np.asarray(Carseats[['Price', 'ShelveLoc', 'US', 'Income']])\n",
-    "design_np = ModelSpec([0,3]).fit(Carseats_np)\n",
-    "design_np.transform(Carseats_np)[:4]"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "997a63cb",
-   "metadata": {},
-   "source": [
-    "The following will fail for hopefully obvious reasons"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 26,
-   "id": "40410c48",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "shapes (2,2) and (3,) not aligned: 2 (dim 1) != 3 (dim 0)\n"
-     ]
-    }
-   ],
-   "source": [
-    "try:\n",
-    "    new_D = np.zeros((2,2))\n",
-    "    new_D[:,0] = [10,20]\n",
-    "    new_D[:,1] = [40,50]\n",
-    "    M.get_prediction(new_D).predicted_mean\n",
-    "except ValueError as e:\n",
-    "    print(e)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "920203e9",
-   "metadata": {},
-   "source": [
-    "Ultimately, `M` expects 3 columns for new predictions because it was fit\n",
-    "with a matrix having 3 columns (the first representing an intercept).\n",
-    "\n",
-    "We might be tempted to try as with the `pd.DataFrame` and produce\n",
-    "an `np.ndarray` with only the necessary variables."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 27,
-   "id": "1061da77",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "index 3 is out of bounds for axis 1 with size 2\n"
-     ]
-    }
-   ],
-   "source": [
-    "try:\n",
-    "    new_X = np.zeros((2,2))\n",
-    "    new_X[:,0] = [10,20]\n",
-    "    new_X[:,1] = [40,50]\n",
-    "    new_D = design_np.transform(new_X)\n",
-    "    M.get_prediction(new_D).predicted_mean\n",
-    "except IndexError as e:\n",
-    "    print(e)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "c6bfe001",
-   "metadata": {},
-   "source": [
-    "This fails because `design_np` is looking for column `3` from its `terms`:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 28,
-   "id": "5ae6d25f",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "[Variable(variables=(0,), name='0', encoder=None, use_transform=True, pure_columns=True, override_encoder_colnames=False),\n",
-       " Variable(variables=(3,), name='3', encoder=None, use_transform=True, pure_columns=True, override_encoder_colnames=False)]"
-      ]
-     },
-     "execution_count": 28,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design_np.terms_"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "edd7ebeb",
-   "metadata": {},
-   "source": [
-    "However, if we have an `np.ndarray` in which the first column indeed represents `Price` and the fourth indeed\n",
-    "represents `Income` then we can arrive at the correct answer by supplying such the array to `design_np.transform`:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 29,
-   "id": "9455e532",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "array([12.65257604, 12.25873428])"
-      ]
-     },
-     "execution_count": 29,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "new_X = np.zeros((2,4))\n",
-    "new_X[:,0] = [10,20]\n",
-    "new_X[:,3] = [40,50]\n",
-    "new_D = design_np.transform(new_X)\n",
-    "M.get_prediction(new_D).predicted_mean"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "fd726791",
-   "metadata": {},
-   "source": [
-    "Given this subtlety about needing to supply arrays with identical column structure to `transform` when\n",
-    "using `np.ndarray` we presume that using a `pd.DataFrame` will be the more popular use case."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "967d9ebc",
-   "metadata": {},
-   "source": [
-    "## A model with some categorical variables\n",
-    "\n",
-    "Categorical variables become `Column` instances with encoders."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 30,
-   "id": "d0429b56",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Column(idx='UIncome', name='UIncome', is_categorical=True, is_ordinal=False, columns=('UIncome[L]', 'UIncome[M]'), encoder=Contrast())"
-      ]
-     },
-     "execution_count": 30,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design = ModelSpec(['Population', 'Price', 'UIncome', 'ShelveLoc']).fit(Carseats)\n",
-    "design.column_info_['UIncome']"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 31,
-   "id": "415e3fd0",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Index(['intercept', 'Population', 'Price', 'UIncome[L]', 'UIncome[M]',\n",
-       "       'ShelveLoc[Good]', 'ShelveLoc[Medium]'],\n",
-       "      dtype='object')"
-      ]
-     },
-     "execution_count": 31,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "X = design.fit_transform(Carseats)\n",
-    "X.columns"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 32,
-   "id": "8a99c3a5",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "intercept            11.876012\n",
-       "Population            0.001163\n",
-       "Price                -0.055725\n",
-       "UIncome[L]           -1.042297\n",
-       "UIncome[M]           -0.119123\n",
-       "ShelveLoc[Good]       4.999623\n",
-       "ShelveLoc[Medium]     1.964278\n",
-       "dtype: float64"
-      ]
-     },
-     "execution_count": 32,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "sm.OLS(Y, X).fit().params"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 33,
-   "id": "9250a28a",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "    (Intercept)      Population           Price        UIncomeM        UIncomeH \n",
-      "    10.83371503      0.00116301     -0.05572469      0.92317388      1.04229679 \n",
-      "  ShelveLocGood ShelveLocMedium \n",
-      "     4.99962319      1.96427771 \n"
-     ]
-    }
-   ],
-   "source": [
-    "%%R\n",
-    "lm(Sales ~ Population + Price + UIncome + ShelveLoc, data=Carseats)$coef"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "fe90c12c",
-   "metadata": {},
-   "source": [
-    "## Getting the encoding you want\n",
-    "\n",
-    "By default the level dropped by `ModelSpec` will be the first of the `categories_` values from \n",
-    "`sklearn.preprocessing.OneHotEncoder()`. We might wish to change this. It seems\n",
-    "as if the correct way to do this would be something like `Variable(('UIncome',), 'mynewencoding', new_encoder)`\n",
-    "where `new_encoder` would somehow drop the column we want dropped. \n",
-    "\n",
-    "However, when using the convenient identifier `UIncome` in the `variables` argument, this maps to the `Column` associated to `UIncome` within `design.column_info_`:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 34,
-   "id": "0546ec84",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Column(idx='UIncome', name='UIncome', is_categorical=True, is_ordinal=False, columns=('UIncome[L]', 'UIncome[M]'), encoder=Contrast())"
-      ]
-     },
-     "execution_count": 34,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design.column_info_['UIncome']"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "6ec4fe65",
-   "metadata": {},
-   "source": [
-    "This column already has an encoder and `Column` instances are immutable as named tuples. Further, there are times when \n",
-    "we may want to encode `UIncome` differently within the same model. In the model below the main effect of `UIncome` is encoded with two columns while in the interaction `UIncome` (see below) has three columns. This is a design of interest\n",
-    "and we need a way to allow different encodings of the same column of `Carseats`"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 35,
-   "id": "61e7f56e",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "Call:\n",
-      "lm(formula = Sales ~ UIncome:ShelveLoc + UIncome, data = Carseats)\n",
-      "\n",
-      "Coefficients:\n",
-      "             (Intercept)                  UIncomeM                  UIncomeH  \n",
-      "                  5.1317                    0.1151                    1.1561  \n",
-      "  UIncomeL:ShelveLocGood    UIncomeM:ShelveLocGood    UIncomeH:ShelveLocGood  \n",
-      "                  4.5121                    5.5752                    3.7381  \n",
-      "UIncomeL:ShelveLocMedium  UIncomeM:ShelveLocMedium  UIncomeH:ShelveLocMedium  \n",
-      "                  1.2473                    2.4782                    1.5141  \n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "%%R\n",
-    "lm(Sales ~ UIncome:ShelveLoc + UIncome, data=Carseats)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "802ed854",
-   "metadata": {},
-   "source": [
-    " We can create a new \n",
-    "`Column` with the encoder we want. For categorical variables, there is a convenience function to do so."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 36,
-   "id": "82d7a01d",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from ISLP.models.model_spec import contrast\n",
-    "pref_encoding = contrast('UIncome', 'drop', 'L')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 37,
-   "id": "e26849a1",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(     UIncome[M]  UIncome[H]\n",
-       " 0           1.0         0.0\n",
-       " 1           0.0         0.0\n",
-       " 2           0.0         0.0\n",
-       " 3           0.0         1.0\n",
-       " 4           1.0         0.0\n",
-       " ..          ...         ...\n",
-       " 395         0.0         1.0\n",
-       " 396         0.0         0.0\n",
-       " 397         0.0         0.0\n",
-       " 398         1.0         0.0\n",
-       " 399         0.0         0.0\n",
-       " \n",
-       " [400 rows x 2 columns],\n",
-       " ['UIncome[M]', 'UIncome[H]'])"
-      ]
-     },
-     "execution_count": 37,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design.build_columns(Carseats, pref_encoding)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 38,
-   "id": "2fc4cd8c",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Index(['intercept', 'Population', 'Price', 'UIncome[M]', 'UIncome[H]',\n",
-       "       'ShelveLoc[Good]', 'ShelveLoc[Medium]'],\n",
-       "      dtype='object')"
-      ]
-     },
-     "execution_count": 38,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design = ModelSpec(['Population', 'Price', pref_encoding, 'ShelveLoc']).fit(Carseats)\n",
-    "X = design.fit_transform(Carseats)\n",
-    "X.columns"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 39,
-   "id": "49e33d41",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "intercept            10.833715\n",
-       "Population            0.001163\n",
-       "Price                -0.055725\n",
-       "UIncome[M]            0.923174\n",
-       "UIncome[H]            1.042297\n",
-       "ShelveLoc[Good]       4.999623\n",
-       "ShelveLoc[Medium]     1.964278\n",
-       "dtype: float64"
-      ]
-     },
-     "execution_count": 39,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "sm.OLS(Y, X).fit().params"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 40,
-   "id": "ce018fdf",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "    (Intercept)      Population           Price        UIncomeM        UIncomeH \n",
-      "    10.83371503      0.00116301     -0.05572469      0.92317388      1.04229679 \n",
-      "  ShelveLocGood ShelveLocMedium \n",
-      "     4.99962319      1.96427771 \n"
-     ]
-    }
-   ],
-   "source": [
-    "%%R\n",
-    "lm(Sales ~ Population + Price + UIncome + ShelveLoc, data=Carseats)$coef"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "2d42b822",
-   "metadata": {},
-   "source": [
-    "## Interactions\n",
-    "\n",
-    "We've referred to interactions above. These are specified (by convenience) as tuples in the `terms` argument\n",
-    "to `ModelSpec`."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 41,
-   "id": "fbb3e3ba",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "intercept                       7.866634\n",
-       "UIncome[L]:ShelveLoc[Good]      4.512054\n",
-       "UIncome[L]:ShelveLoc[Medium]    1.247275\n",
-       "UIncome[M]:ShelveLoc[Good]      5.575170\n",
-       "UIncome[M]:ShelveLoc[Medium]    2.478163\n",
-       "UIncome[L]                     -2.734895\n",
-       "UIncome[M]                     -2.619745\n",
-       "dtype: float64"
-      ]
-     },
-     "execution_count": 41,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design = ModelSpec([('UIncome', 'ShelveLoc'), 'UIncome'])\n",
-    "X = design.fit_transform(Carseats)\n",
-    "sm.OLS(Y, X).fit().params"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "f9a7d4ad",
-   "metadata": {},
-   "source": [
-    "The tuples in `terms` are converted to `Variable` in the formalized `terms_` attribute by creating a `Variable` with\n",
-    "`variables` set to the tuple and the encoder an `Interaction` encoder which (unsurprisingly) creates the interaction columns from the concatenated data frames of `UIncome` and `ShelveLoc`."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 42,
-   "id": "5a6f8e69",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Variable(variables=('UIncome', 'ShelveLoc'), name='UIncome:ShelveLoc', encoder=Interaction(column_names={'ShelveLoc': ['ShelveLoc[Good]', 'ShelveLoc[Medium]'],\n",
-       "                          'UIncome': ['UIncome[L]', 'UIncome[M]']},\n",
-       "            columns={'ShelveLoc': range(2, 4), 'UIncome': range(0, 2)},\n",
-       "            variables=['UIncome', 'ShelveLoc']), use_transform=True, pure_columns=False, override_encoder_colnames=False)"
-      ]
-     },
-     "execution_count": 42,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design.terms_[0]"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "98eef5c8",
-   "metadata": {},
-   "source": [
-    "Comparing this to the previous `R` model."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 43,
-   "id": "58c99601",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "Call:\n",
-      "lm(formula = Sales ~ UIncome:ShelveLoc + UIncome, data = Carseats)\n",
-      "\n",
-      "Coefficients:\n",
-      "             (Intercept)                  UIncomeM                  UIncomeH  \n",
-      "                  5.1317                    0.1151                    1.1561  \n",
-      "  UIncomeL:ShelveLocGood    UIncomeM:ShelveLocGood    UIncomeH:ShelveLocGood  \n",
-      "                  4.5121                    5.5752                    3.7381  \n",
-      "UIncomeL:ShelveLocMedium  UIncomeM:ShelveLocMedium  UIncomeH:ShelveLocMedium  \n",
-      "                  1.2473                    2.4782                    1.5141  \n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "%%R\n",
-    "lm(Sales ~ UIncome:ShelveLoc + UIncome, data=Carseats)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "9c979d7e",
-   "metadata": {},
-   "source": [
-    "We note a few important things:\n",
-    "\n",
-    "1. `R` has reorganized the columns of the design from the formula: although we wrote `UIncome:ShelveLoc` first these\n",
-    "columns have been built later. **`ModelSpec` builds columns in the order determined by `terms`!**\n",
-    "\n",
-    "2. As noted above, `R` has encoded `UIncome` differently in the main effect and in the interaction. For `ModelSpec`, the reference to `UIncome` always refers to the column in `design.column_info_` and will always build only the columns for `L` and `M`. **`ModelSpec` does no inspection of terms to decide how to encode categorical variables.**\n",
-    "\n",
-    "A few notes:\n",
-    "\n",
-    "- **Why not try to inspect the terms?** For any nontrivial formula in `R` with several categorical variables and interactions, predicting what columns will be produced from a given formula is not simple. **`ModelSpec` errs on the side of being explicit.**\n",
-    "\n",
-    "- **Is it impossible to build the design as `R` has?** No. An advanced user who *knows* they want the columns built as `R` has can do so (fairly) easily."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 44,
-   "id": "0cb3b63a",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(     UIncome[H]  UIncome[L]  UIncome[M]\n",
-       " 0           0.0         0.0         1.0\n",
-       " 1           0.0         1.0         0.0\n",
-       " 2           0.0         1.0         0.0\n",
-       " 3           1.0         0.0         0.0\n",
-       " 4           0.0         0.0         1.0\n",
-       " ..          ...         ...         ...\n",
-       " 395         1.0         0.0         0.0\n",
-       " 396         0.0         1.0         0.0\n",
-       " 397         0.0         1.0         0.0\n",
-       " 398         0.0         0.0         1.0\n",
-       " 399         0.0         1.0         0.0\n",
-       " \n",
-       " [400 rows x 3 columns],\n",
-       " ['UIncome[H]', 'UIncome[L]', 'UIncome[M]'])"
-      ]
-     },
-     "execution_count": 44,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "full_encoding = contrast('UIncome', None)\n",
-    "design.build_columns(Carseats, full_encoding)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 45,
-   "id": "272098d7",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "intercept                       5.131739\n",
-       "UIncome[M]                      0.115150\n",
-       "UIncome[H]                      1.156118\n",
-       "UIncome[H]:ShelveLoc[Good]      3.738052\n",
-       "UIncome[H]:ShelveLoc[Medium]    1.514104\n",
-       "UIncome[L]:ShelveLoc[Good]      4.512054\n",
-       "UIncome[L]:ShelveLoc[Medium]    1.247275\n",
-       "UIncome[M]:ShelveLoc[Good]      5.575170\n",
-       "UIncome[M]:ShelveLoc[Medium]    2.478163\n",
-       "dtype: float64"
-      ]
-     },
-     "execution_count": 45,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design = ModelSpec([pref_encoding, (full_encoding, 'ShelveLoc')])\n",
-    "X = design.fit_transform(Carseats)\n",
-    "sm.OLS(Y, X).fit().params"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "fe05c471",
-   "metadata": {},
-   "source": [
-    "## Special encodings\n",
-    "\n",
-    "For flexible models, we may want to consider transformations of features, i.e. polynomial\n",
-    "or spline transformations. Given transforms that follow the `fit/transform` paradigm\n",
-    "we can of course achieve this with a `Column` and an `encoder`. The `ISLP.transforms`\n",
-    "package includes a `Poly` transform"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 46,
-   "id": "67062299",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Variable(variables=('Income',), name='poly(Income, 3, )', encoder=Poly(degree=3), use_transform=True, pure_columns=False, override_encoder_colnames=True)"
-      ]
-     },
-     "execution_count": 46,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "from ISLP.models.model_spec import poly\n",
-    "poly('Income', 3)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 47,
-   "id": "df5e5b4d",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "intercept                5.440077\n",
-       "poly(Income, 3, )[0]    10.036373\n",
-       "poly(Income, 3, )[1]    -2.799156\n",
-       "poly(Income, 3, )[2]     2.399601\n",
-       "ShelveLoc[Good]          4.808133\n",
-       "ShelveLoc[Medium]        1.889533\n",
-       "dtype: float64"
-      ]
-     },
-     "execution_count": 47,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design = ModelSpec([poly('Income', 3), 'ShelveLoc'])\n",
-    "X = design.fit_transform(Carseats)\n",
-    "sm.OLS(Y, X).fit().params"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "01be9c13",
-   "metadata": {},
-   "source": [
-    "Compare:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 48,
-   "id": "3244d6f6",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "     (Intercept) poly(Income, 3)1 poly(Income, 3)2 poly(Income, 3)3 \n",
-      "        5.440077        10.036373        -2.799156         2.399601 \n",
-      "   ShelveLocGood  ShelveLocMedium \n",
-      "        4.808133         1.889533 \n"
-     ]
-    }
-   ],
-   "source": [
-    "%%R\n",
-    "lm(Sales ~ poly(Income, 3) + ShelveLoc, data=Carseats)$coef"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "8ad5bb1d",
-   "metadata": {},
-   "source": [
-    "## Splines\n",
-    "\n",
-    "Support for natural and B-splines is also included"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 49,
-   "id": "6a6f4358",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "intercept                4.240421\n",
-       "ns(Income, , df=5)[0]    1.468196\n",
-       "ns(Income, , df=5)[1]    1.499471\n",
-       "ns(Income, , df=5)[2]    1.152070\n",
-       "ns(Income, , df=5)[3]    2.418398\n",
-       "ns(Income, , df=5)[4]    1.804460\n",
-       "ShelveLoc[Good]          4.810449\n",
-       "ShelveLoc[Medium]        1.881095\n",
-       "dtype: float64"
-      ]
-     },
-     "execution_count": 49,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "from ISLP.models.model_spec import ns, bs, pca\n",
-    "design = ModelSpec([ns('Income', df=5), 'ShelveLoc'])\n",
-    "X = design.fit_transform(Carseats)\n",
-    "sm.OLS(Y, X).fit().params"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 50,
-   "id": "fb740953",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "        (Intercept) ns(Income, df = 5)1 ns(Income, df = 5)2 ns(Income, df = 5)3 \n",
-      "           4.240421            1.468196            1.499471            1.152070 \n",
-      "ns(Income, df = 5)4 ns(Income, df = 5)5       ShelveLocGood     ShelveLocMedium \n",
-      "           2.418398            1.804460            4.810449            1.881095 \n"
-     ]
-    }
-   ],
-   "source": [
-    "%%R\n",
-    "library(splines)\n",
-    "lm(Sales ~ ns(Income, df=5) + ShelveLoc, data=Carseats)$coef"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 51,
-   "id": "fe1bf7fe",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "intercept                          3.495085\n",
-       "bs(Income, , df=7, degree=2)[0]    1.813118\n",
-       "bs(Income, , df=7, degree=2)[1]    0.961852\n",
-       "bs(Income, , df=7, degree=2)[2]    2.471545\n",
-       "bs(Income, , df=7, degree=2)[3]    2.158891\n",
-       "bs(Income, , df=7, degree=2)[4]    2.091625\n",
-       "bs(Income, , df=7, degree=2)[5]    2.600669\n",
-       "bs(Income, , df=7, degree=2)[6]    2.843108\n",
-       "ShelveLoc[Good]                    4.804919\n",
-       "ShelveLoc[Medium]                  1.880337\n",
-       "dtype: float64"
-      ]
-     },
-     "execution_count": 51,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design = ModelSpec([bs('Income', df=7, degree=2), 'ShelveLoc'])\n",
-    "X = design.fit_transform(Carseats)\n",
-    "sm.OLS(Y, X).fit().params"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 52,
-   "id": "86e966e0",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "                    (Intercept) bs(Income, df = 7, degree = 2)1 \n",
-      "                      3.4950851                       1.8131176 \n",
-      "bs(Income, df = 7, degree = 2)2 bs(Income, df = 7, degree = 2)3 \n",
-      "                      0.9618523                       2.4715450 \n",
-      "bs(Income, df = 7, degree = 2)4 bs(Income, df = 7, degree = 2)5 \n",
-      "                      2.1588908                       2.0916252 \n",
-      "bs(Income, df = 7, degree = 2)6 bs(Income, df = 7, degree = 2)7 \n",
-      "                      2.6006694                       2.8431084 \n",
-      "                  ShelveLocGood                 ShelveLocMedium \n",
-      "                      4.8049190                       1.8803375 \n"
-     ]
-    }
-   ],
-   "source": [
-    "%%R\n",
-    "lm(Sales ~ bs(Income, df=7, degree=2) + ShelveLoc, data=Carseats)$coef"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "877d4784",
-   "metadata": {},
-   "source": [
-    "## PCA"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 53,
-   "id": "8ba6cb20",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/Users/jonathantaylor/miniconda3/envs/islp_test/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but PCA was fitted with feature names\n",
-      "  warnings.warn(\n",
-      "/Users/jonathantaylor/miniconda3/envs/islp_test/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but PCA was fitted with feature names\n",
-      "  warnings.warn(\n",
-      "/Users/jonathantaylor/miniconda3/envs/islp_test/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but PCA was fitted with feature names\n",
-      "  warnings.warn(\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "intercept                           5.419405\n",
-       "pca(myvars, , n_components=2)[0]   -0.001131\n",
-       "pca(myvars, , n_components=2)[1]   -0.024217\n",
-       "ShelveLoc[Good]                     4.816253\n",
-       "ShelveLoc[Medium]                   1.924139\n",
-       "dtype: float64"
-      ]
-     },
-     "execution_count": 53,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design = ModelSpec([pca(['Income', \n",
-    "                           'Price', \n",
-    "                           'Advertising', \n",
-    "                           'Population'], \n",
-    "                          n_components=2, \n",
-    "                          name='myvars'), 'ShelveLoc'])\n",
-    "X = design.fit_transform(Carseats)\n",
-    "sm.OLS(Y, X).fit().params"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 54,
-   "id": "f0319e51",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "Call:\n",
-      "lm(formula = Sales ~ prcomp(cbind(Income, Price, Advertising, \n",
-      "    Population))$x[, 1:2] + ShelveLoc, data = Carseats)\n",
-      "\n",
-      "Coefficients:\n",
-      "                                                      (Intercept)  \n",
-      "                                                         5.419405  \n",
-      "prcomp(cbind(Income, Price, Advertising, Population))$x[, 1:2]PC1  \n",
-      "                                                         0.001131  \n",
-      "prcomp(cbind(Income, Price, Advertising, Population))$x[, 1:2]PC2  \n",
-      "                                                        -0.024217  \n",
-      "                                                    ShelveLocGood  \n",
-      "                                                         4.816253  \n",
-      "                                                  ShelveLocMedium  \n",
-      "                                                         1.924139  \n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "%%R\n",
-    "lm(Sales ~ prcomp(cbind(Income, Price, Advertising, Population))$x[,1:2] + ShelveLoc, data=Carseats)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "1f55086a",
-   "metadata": {},
-   "source": [
-    "It is of course common to scale before running PCA."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 55,
-   "id": "bbe9e004",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/Users/jonathantaylor/miniconda3/envs/islp_test/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but StandardScaler was fitted with feature names\n",
-      "  warnings.warn(\n",
-      "/Users/jonathantaylor/miniconda3/envs/islp_test/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but StandardScaler was fitted with feature names\n",
-      "  warnings.warn(\n",
-      "/Users/jonathantaylor/miniconda3/envs/islp_test/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but StandardScaler was fitted with feature names\n",
-      "  warnings.warn(\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "intercept                           5.352159\n",
-       "pca(myvars, , n_components=2)[0]    0.446383\n",
-       "pca(myvars, , n_components=2)[1]   -1.219788\n",
-       "ShelveLoc[Good]                     4.922780\n",
-       "ShelveLoc[Medium]                   2.005617\n",
-       "dtype: float64"
-      ]
-     },
-     "execution_count": 55,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design = ModelSpec([pca(['Income', \n",
-    "                           'Price', \n",
-    "                           'Advertising', \n",
-    "                           'Population'], \n",
-    "                          n_components=2, \n",
-    "                          name='myvars',\n",
-    "                          scale=True), 'ShelveLoc'])\n",
-    "X = design.fit_transform(Carseats)\n",
-    "sm.OLS(Y, X).fit().params"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 56,
-   "id": "d78c02e4",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "Call:\n",
-      "lm(formula = Sales ~ prcomp(cbind(Income, Price, Advertising, \n",
-      "    Population), scale = TRUE)$x[, 1:2] + ShelveLoc, data = Carseats)\n",
-      "\n",
-      "Coefficients:\n",
-      "                                                                    (Intercept)  \n",
-      "                                                                         5.3522  \n",
-      "prcomp(cbind(Income, Price, Advertising, Population), scale = TRUE)$x[, 1:2]PC1  \n",
-      "                                                                         0.4469  \n",
-      "prcomp(cbind(Income, Price, Advertising, Population), scale = TRUE)$x[, 1:2]PC2  \n",
-      "                                                                        -1.2213  \n",
-      "                                                                  ShelveLocGood  \n",
-      "                                                                         4.9228  \n",
-      "                                                                ShelveLocMedium  \n",
-      "                                                                         2.0056  \n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "%%R\n",
-    "lm(Sales ~ prcomp(cbind(Income, Price, Advertising, Population), scale=TRUE)$x[,1:2] + ShelveLoc, data=Carseats)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "8a03c603",
-   "metadata": {},
-   "source": [
-    "There will be some small differences in the coefficients due to `sklearn` use of `np.std(ddof=0)` instead\n",
-    "of `np.std(ddof=1)`."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 57,
-   "id": "f8215cef",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "array([ 0.44694166, -1.22131519])"
-      ]
-     },
-     "execution_count": 57,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "np.array(sm.OLS(Y, X).fit().params)[1:3] * np.sqrt(X.shape[0] / (X.shape[0]-1))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "a15d0ead",
-   "metadata": {},
-   "source": [
-    "## Submodels\n",
-    "\n",
-    "We can build submodels as well, even if the terms do not appear in the original `terms` argument.\n",
-    "Fundamentally, the terms just need to be able to have the `design.build_columns` work for us to be\n",
-    "able to build a design matrix. The initial inspection of the columns of `Carseats` has created\n",
-    "a column for `US`, hence we can build this submodel."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 58,
-   "id": "d58c6244",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>intercept</th>\n",
-       "      <th>US[Yes]</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>1.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>1.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>1.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>1.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>0.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>...</th>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>395</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>1.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>396</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>1.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>397</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>1.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>398</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>1.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>399</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>1.0</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>400 rows × 2 columns</p>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "     intercept  US[Yes]\n",
-       "0          1.0      1.0\n",
-       "1          1.0      1.0\n",
-       "2          1.0      1.0\n",
-       "3          1.0      1.0\n",
-       "4          1.0      0.0\n",
-       "..         ...      ...\n",
-       "395        1.0      1.0\n",
-       "396        1.0      1.0\n",
-       "397        1.0      1.0\n",
-       "398        1.0      1.0\n",
-       "399        1.0      1.0\n",
-       "\n",
-       "[400 rows x 2 columns]"
-      ]
-     },
-     "execution_count": 58,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design = ModelSpec(['UIncome', 'ShelveLoc', 'Price']).fit(Carseats)\n",
-    "design.build_submodel(Carseats, ['US'])"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "9365ba27",
-   "metadata": {},
-   "source": [
-    "## ANOVA \n",
-    "\n",
-    "For a given `terms` argument, there as a natural sequence of models, namely those specified by `[terms[:i] for i in range(len(terms)+1]`."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 59,
-   "id": "332ab454",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Index(['intercept'], dtype='object')\n",
-      "Index(['intercept', 'ShelveLoc[Good]', 'ShelveLoc[Medium]'], dtype='object')\n",
-      "Index(['intercept', 'ShelveLoc[Good]', 'ShelveLoc[Medium]', 'Price'], dtype='object')\n",
-      "Index(['intercept', 'ShelveLoc[Good]', 'ShelveLoc[Medium]', 'Price',\n",
-      "       'UIncome[L]', 'UIncome[M]'],\n",
-      "      dtype='object')\n",
-      "Index(['intercept', 'ShelveLoc[Good]', 'ShelveLoc[Medium]', 'Price',\n",
-      "       'UIncome[L]', 'UIncome[M]', 'US[Yes]'],\n",
-      "      dtype='object')\n"
-     ]
-    }
-   ],
-   "source": [
-    "design = ModelSpec(['ShelveLoc', 'Price', 'UIncome', 'US']).fit(Carseats)\n",
-    "for D in design.build_sequence(Carseats):\n",
-    "    print(D.columns)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 60,
-   "id": "f6cfd031",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>df_resid</th>\n",
-       "      <th>ssr</th>\n",
-       "      <th>df_diff</th>\n",
-       "      <th>ss_diff</th>\n",
-       "      <th>F</th>\n",
-       "      <th>Pr(&gt;F)</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>399.0</td>\n",
-       "      <td>3182.274698</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>397.0</td>\n",
-       "      <td>2172.743555</td>\n",
-       "      <td>2.0</td>\n",
-       "      <td>1009.531143</td>\n",
-       "      <td>153.010858</td>\n",
-       "      <td>5.452815e-50</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>396.0</td>\n",
-       "      <td>1455.640702</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>717.102853</td>\n",
-       "      <td>217.377192</td>\n",
-       "      <td>1.583751e-39</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>394.0</td>\n",
-       "      <td>1378.915938</td>\n",
-       "      <td>2.0</td>\n",
-       "      <td>76.724764</td>\n",
-       "      <td>11.628885</td>\n",
-       "      <td>1.239031e-05</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>393.0</td>\n",
-       "      <td>1296.462700</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>82.453238</td>\n",
-       "      <td>24.994257</td>\n",
-       "      <td>8.678832e-07</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "   df_resid          ssr  df_diff      ss_diff           F        Pr(>F)\n",
-       "0     399.0  3182.274698      0.0          NaN         NaN           NaN\n",
-       "1     397.0  2172.743555      2.0  1009.531143  153.010858  5.452815e-50\n",
-       "2     396.0  1455.640702      1.0   717.102853  217.377192  1.583751e-39\n",
-       "3     394.0  1378.915938      2.0    76.724764   11.628885  1.239031e-05\n",
-       "4     393.0  1296.462700      1.0    82.453238   24.994257  8.678832e-07"
-      ]
-     },
-     "execution_count": 60,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "sm.stats.anova_lm(*(sm.OLS(Y, D).fit() for D in design.build_sequence(Carseats) ))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 61,
-   "id": "11c4aee8",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Analysis of Variance Table\n",
-      "\n",
-      "Response: Sales\n",
-      "           Df  Sum Sq Mean Sq F value    Pr(>F)    \n",
-      "ShelveLoc   2 1009.53  504.77 153.011 < 2.2e-16 ***\n",
-      "Price       1  717.10  717.10 217.377 < 2.2e-16 ***\n",
-      "UIncome     2   76.72   38.36  11.629 1.240e-05 ***\n",
-      "US          1   82.45   82.45  24.994 8.679e-07 ***\n",
-      "Residuals 393 1296.46    3.30                      \n",
-      "---\n",
-      "Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1\n"
-     ]
-    }
-   ],
-   "source": [
-    "%%R\n",
-    "anova(lm(Sales ~ ShelveLoc + Price + UIncome + US, data=Carseats))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "9a4e6e63",
-   "metadata": {},
-   "source": [
-    "Recall that `ModelSpec` does not inspect `terms` to reorder based on degree of \n",
-    "interaction as `R` does:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 62,
-   "id": "6e7bf361",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>df_resid</th>\n",
-       "      <th>ssr</th>\n",
-       "      <th>df_diff</th>\n",
-       "      <th>ss_diff</th>\n",
-       "      <th>F</th>\n",
-       "      <th>Pr(&gt;F)</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>399.0</td>\n",
-       "      <td>3182.274698</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>393.0</td>\n",
-       "      <td>2059.376413</td>\n",
-       "      <td>6.0</td>\n",
-       "      <td>1122.898284</td>\n",
-       "      <td>35.940047</td>\n",
-       "      <td>1.175738e-34</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>391.0</td>\n",
-       "      <td>2036.044596</td>\n",
-       "      <td>2.0</td>\n",
-       "      <td>23.331817</td>\n",
-       "      <td>2.240310</td>\n",
-       "      <td>1.077900e-01</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "   df_resid          ssr  df_diff      ss_diff          F        Pr(>F)\n",
-       "0     399.0  3182.274698      0.0          NaN        NaN           NaN\n",
-       "1     393.0  2059.376413      6.0  1122.898284  35.940047  1.175738e-34\n",
-       "2     391.0  2036.044596      2.0    23.331817   2.240310  1.077900e-01"
-      ]
-     },
-     "execution_count": 62,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design = ModelSpec([(full_encoding, 'ShelveLoc'), pref_encoding]).fit(Carseats)\n",
-    "sm.stats.anova_lm(*(sm.OLS(Y, D).fit() for D in design.build_sequence(Carseats) ))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 63,
-   "id": "ed7d4bfa",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Analysis of Variance Table\n",
-      "\n",
-      "Response: Sales\n",
-      "                   Df  Sum Sq Mean Sq F value    Pr(>F)    \n",
-      "UIncome             2   61.92  30.962  5.9458  0.002859 ** \n",
-      "UIncome:ShelveLoc   6 1084.31 180.718 34.7049 < 2.2e-16 ***\n",
-      "Residuals         391 2036.04   5.207                      \n",
-      "---\n",
-      "Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1\n"
-     ]
-    }
-   ],
-   "source": [
-    "%%R\n",
-    "anova(lm(Sales ~ UIncome:ShelveLoc + UIncome, data=Carseats))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "0350da34",
-   "metadata": {},
-   "source": [
-    "To agree with `R` we must order `terms` as `R` will."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 64,
-   "id": "5ddaf87c",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>df_resid</th>\n",
-       "      <th>ssr</th>\n",
-       "      <th>df_diff</th>\n",
-       "      <th>ss_diff</th>\n",
-       "      <th>F</th>\n",
-       "      <th>Pr(&gt;F)</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>399.0</td>\n",
-       "      <td>3182.274698</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>397.0</td>\n",
-       "      <td>3120.351382</td>\n",
-       "      <td>2.0</td>\n",
-       "      <td>61.923316</td>\n",
-       "      <td>5.945846</td>\n",
-       "      <td>2.855424e-03</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>391.0</td>\n",
-       "      <td>2036.044596</td>\n",
-       "      <td>6.0</td>\n",
-       "      <td>1084.306785</td>\n",
-       "      <td>34.704868</td>\n",
-       "      <td>1.346561e-33</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "   df_resid          ssr  df_diff      ss_diff          F        Pr(>F)\n",
-       "0     399.0  3182.274698      0.0          NaN        NaN           NaN\n",
-       "1     397.0  3120.351382      2.0    61.923316   5.945846  2.855424e-03\n",
-       "2     391.0  2036.044596      6.0  1084.306785  34.704868  1.346561e-33"
-      ]
-     },
-     "execution_count": 64,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design = ModelSpec([pref_encoding, (full_encoding, 'ShelveLoc')]).fit(Carseats)\n",
-    "sm.stats.anova_lm(*(sm.OLS(Y, D).fit() for D in design.build_sequence(Carseats)))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "1ef70ce3",
-   "metadata": {},
-   "source": [
-    "## More complicated interactions\n",
-    "\n",
-    "Can we have an interaction of a polynomial effect with a categorical? Absolutely"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 65,
-   "id": "a1a14742",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Analysis of Variance Table\n",
-      "\n",
-      "Response: Sales\n",
-      "                         Df  Sum Sq Mean Sq F value  Pr(>F)  \n",
-      "UIncome                   2   61.92 30.9617  4.0310 0.01851 *\n",
-      "UIncome:poly(Income, 3)   9   79.72  8.8581  1.1533 0.32408  \n",
-      "UIncome:US                3   83.51 27.8367  3.6242 0.01324 *\n",
-      "Residuals               385 2957.12  7.6808                  \n",
-      "---\n",
-      "Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1\n"
-     ]
-    }
-   ],
-   "source": [
-    "%%R\n",
-    "anova(lm(Sales ~ UIncome + poly(Income, 3):UIncome + UIncome:US, data=Carseats))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "a909be1a",
-   "metadata": {},
-   "source": [
-    "To match `R` we note that it has used its inspection rules to encode `UIncome` with 3 levels\n",
-    "for the two interactions."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 66,
-   "id": "ae286cf3",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "intercept                            65.978856\n",
-       "UIncome[M]                          -60.159607\n",
-       "UIncome[H]                         -147.276154\n",
-       "poly(Income, 3, )[0]:UIncome[H]    1957.694387\n",
-       "poly(Income, 3, )[0]:UIncome[L]    1462.060650\n",
-       "poly(Income, 3, )[0]:UIncome[M]      83.035153\n",
-       "poly(Income, 3, )[1]:UIncome[H]    -984.494570\n",
-       "poly(Income, 3, )[1]:UIncome[L]     881.537647\n",
-       "poly(Income, 3, )[1]:UIncome[M]     -18.006234\n",
-       "poly(Income, 3, )[2]:UIncome[H]     207.614692\n",
-       "poly(Income, 3, )[2]:UIncome[L]     217.190749\n",
-       "poly(Income, 3, )[2]:UIncome[M]      34.065434\n",
-       "UIncome[H]:US                         0.903404\n",
-       "UIncome[L]:US                         0.895538\n",
-       "UIncome[M]:US                         1.048728\n",
-       "dtype: float64"
-      ]
-     },
-     "execution_count": 66,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "p3 = poly('Income', 3)\n",
-    "design = ModelSpec([pref_encoding, (p3, full_encoding), (full_encoding, 'US')]).fit(Carseats)\n",
-    "X = design.transform(Carseats)\n",
-    "sm.OLS(Y, X).fit().params"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 67,
-   "id": "236ab2d2",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>df_resid</th>\n",
-       "      <th>ssr</th>\n",
-       "      <th>df_diff</th>\n",
-       "      <th>ss_diff</th>\n",
-       "      <th>F</th>\n",
-       "      <th>Pr(&gt;F)</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>399.0</td>\n",
-       "      <td>3182.274698</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>397.0</td>\n",
-       "      <td>3120.351382</td>\n",
-       "      <td>2.0</td>\n",
-       "      <td>61.923316</td>\n",
-       "      <td>4.031032</td>\n",
-       "      <td>0.018488</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>388.0</td>\n",
-       "      <td>3040.628559</td>\n",
-       "      <td>9.0</td>\n",
-       "      <td>79.722823</td>\n",
-       "      <td>1.153273</td>\n",
-       "      <td>0.324049</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>385.0</td>\n",
-       "      <td>2957.118444</td>\n",
-       "      <td>3.0</td>\n",
-       "      <td>83.510115</td>\n",
-       "      <td>3.624181</td>\n",
-       "      <td>0.013244</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "   df_resid          ssr  df_diff    ss_diff         F    Pr(>F)\n",
-       "0     399.0  3182.274698      0.0        NaN       NaN       NaN\n",
-       "1     397.0  3120.351382      2.0  61.923316  4.031032  0.018488\n",
-       "2     388.0  3040.628559      9.0  79.722823  1.153273  0.324049\n",
-       "3     385.0  2957.118444      3.0  83.510115  3.624181  0.013244"
-      ]
-     },
-     "execution_count": 67,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "sm.stats.anova_lm(*(sm.OLS(Y, D).fit() for D in design.build_sequence(Carseats)))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "0a45c720",
-   "metadata": {},
-   "source": [
-    "## Grouping columns for ANOVA\n",
-    "\n",
-    "The `Variable` construct can be used to group\n",
-    "variables together to get custom sequences of models for `anova_lm`."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 68,
-   "id": "f36c1b3b",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Index(['intercept'], dtype='object')\n",
-      "Index(['intercept', 'Price', 'UIncome[M]', 'UIncome[H]'], dtype='object')\n",
-      "Index(['intercept', 'Price', 'UIncome[M]', 'UIncome[H]', 'US[Yes]',\n",
-      "       'Advertising'],\n",
-      "      dtype='object')\n"
-     ]
-    }
-   ],
-   "source": [
-    "group1 = Variable(('Price', pref_encoding), 'group1', None)\n",
-    "group2 = Variable(('US', 'Advertising'), 'group2', None)\n",
-    "design = ModelSpec([group1, group2]).fit(Carseats)\n",
-    "for D in design.build_sequence(Carseats):\n",
-    "    print(D.columns)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 69,
-   "id": "3daf7638",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>df_resid</th>\n",
-       "      <th>ssr</th>\n",
-       "      <th>df_diff</th>\n",
-       "      <th>ss_diff</th>\n",
-       "      <th>F</th>\n",
-       "      <th>Pr(&gt;F)</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>399.0</td>\n",
-       "      <td>3182.274698</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>396.0</td>\n",
-       "      <td>2508.187788</td>\n",
-       "      <td>3.0</td>\n",
-       "      <td>674.086910</td>\n",
-       "      <td>39.304841</td>\n",
-       "      <td>2.970412e-22</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>394.0</td>\n",
-       "      <td>2252.396343</td>\n",
-       "      <td>2.0</td>\n",
-       "      <td>255.791445</td>\n",
-       "      <td>22.372135</td>\n",
-       "      <td>6.267562e-10</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "   df_resid          ssr  df_diff     ss_diff          F        Pr(>F)\n",
-       "0     399.0  3182.274698      0.0         NaN        NaN           NaN\n",
-       "1     396.0  2508.187788      3.0  674.086910  39.304841  2.970412e-22\n",
-       "2     394.0  2252.396343      2.0  255.791445  22.372135  6.267562e-10"
-      ]
-     },
-     "execution_count": 69,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "sm.stats.anova_lm(*(sm.OLS(Y, D).fit() for D in design.build_sequence(Carseats)))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "46c1ace8",
-   "metadata": {},
-   "source": [
-    "It is not clear this is simple to do in `R` as the formula object expands all parentheses."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 70,
-   "id": "0b87e430",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Analysis of Variance Table\n",
-      "\n",
-      "Response: Sales\n",
-      "             Df  Sum Sq Mean Sq  F value    Pr(>F)    \n",
-      "Price         1  630.03  630.03 110.2079 < 2.2e-16 ***\n",
-      "UIncome       2   44.06   22.03   3.8533   0.02201 *  \n",
-      "US            1  121.88  121.88  21.3196 5.270e-06 ***\n",
-      "Advertising   1  133.91  133.91  23.4247 1.868e-06 ***\n",
-      "Residuals   394 2252.40    5.72                       \n",
-      "---\n",
-      "Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1\n"
-     ]
-    }
-   ],
-   "source": [
-    "%%R\n",
-    "anova(lm(Sales ~ (Price + UIncome) + (US + Advertising), data=Carseats))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "7c137360",
-   "metadata": {},
-   "source": [
-    "It can be done by building up the models\n",
-    "by hand and likely is possible to be done programmatically but it seems not obvious."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 71,
-   "id": "b678d323",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Analysis of Variance Table\n",
-      "\n",
-      "Model 1: Sales ~ 1\n",
-      "Model 2: Sales ~ Price + UIncome\n",
-      "Model 3: Sales ~ Price + UIncome + US + Advertising\n",
-      "  Res.Df    RSS Df Sum of Sq      F    Pr(>F)    \n",
-      "1    399 3182.3                                  \n",
-      "2    396 2508.2  3    674.09 39.305 < 2.2e-16 ***\n",
-      "3    394 2252.4  2    255.79 22.372 6.268e-10 ***\n",
-      "---\n",
-      "Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1\n"
-     ]
-    }
-   ],
-   "source": [
-    "%%R\n",
-    "M1 = lm(Sales ~ 1, data=Carseats)\n",
-    "M2 = lm(Sales ~ Price + UIncome, data=Carseats)\n",
-    "M3 = lm(Sales ~ Price + UIncome + US + Advertising, data=Carseats)\n",
-    "anova(M1, M2, M3)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "b0388949",
-   "metadata": {},
-   "source": [
-    "## Alternative anova\n",
-    "\n",
-    "Another common ANOVA table involves dropping each term in succession from the model and comparing\n",
-    "to the full model."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 72,
-   "id": "ac5b916a",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'intercept'}\n",
-      "   df_resid          ssr  df_diff      ss_diff           F        Pr(>F)\n",
-      "0     395.0  4417.273517      0.0          NaN         NaN           NaN\n",
-      "1     394.0  2252.396343      1.0  2164.877175  378.690726  1.359177e-59\n",
-      "{'Price', 'UIncome[H]', 'UIncome[M]'}\n",
-      "   df_resid          ssr  df_diff     ss_diff          F        Pr(>F)\n",
-      "0     397.0  2950.808154      0.0         NaN        NaN           NaN\n",
-      "1     394.0  2252.396343      3.0  698.411811  40.723184  6.077848e-23\n",
-      "{'US[Yes]', 'Advertising'}\n",
-      "   df_resid          ssr  df_diff     ss_diff          F        Pr(>F)\n",
-      "0     396.0  2508.187788      0.0         NaN        NaN           NaN\n",
-      "1     394.0  2252.396343      2.0  255.791445  22.372135  6.267562e-10\n"
-     ]
-    }
-   ],
-   "source": [
-    "Dfull = design.transform(Carseats)\n",
-    "Mfull = sm.OLS(Y, Dfull).fit()\n",
-    "for i, D in enumerate(design.build_sequence(Carseats, anova_type='drop')):\n",
-    "    if i == 0:\n",
-    "        D0 = D\n",
-    "    print(set(D.columns) ^ set(Dfull.columns))\n",
-    "    print(sm.stats.anova_lm(sm.OLS(Y, D).fit(), Mfull))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 73,
-   "id": "a0c71948",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Analysis of Variance Table\n",
-      "\n",
-      "Model 1: Sales ~ US + Advertising\n",
-      "Model 2: Sales ~ Price + UIncome + US + Advertising\n",
-      "  Res.Df    RSS Df Sum of Sq      F    Pr(>F)    \n",
-      "1    397 2950.8                                  \n",
-      "2    394 2252.4  3    698.41 40.723 < 2.2e-16 ***\n",
-      "---\n",
-      "Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1\n",
-      "Analysis of Variance Table\n",
-      "\n",
-      "Model 1: Sales ~ Price + UIncome\n",
-      "Model 2: Sales ~ Price + UIncome + US + Advertising\n",
-      "  Res.Df    RSS Df Sum of Sq      F    Pr(>F)    \n",
-      "1    396 2508.2                                  \n",
-      "2    394 2252.4  2    255.79 22.372 6.268e-10 ***\n",
-      "---\n",
-      "Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1\n"
-     ]
-    }
-   ],
-   "source": [
-    "%%R\n",
-    "M1 = lm(Sales ~ Price + UIncome + US + Advertising, data=Carseats)\n",
-    "M2 = lm(Sales ~ US + Advertising, data=Carseats)\n",
-    "print(anova(M2, M1))\n",
-    "M3 = lm(Sales ~ Price + UIncome, data=Carseats)\n",
-    "print(anova(M3, M1))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "a5e4880d",
-   "metadata": {},
-   "source": [
-    "The comparison without the intercept here is actually very hard to achieve in `R` with `anova` due to its inspection\n",
-    "of the formula."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 74,
-   "id": "4b383401",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Analysis of Variance Table\n",
-      "\n",
-      "Model 1: Sales ~ Price + UIncome + US + Advertising - 1\n",
-      "Model 2: Sales ~ Price + UIncome + US + Advertising\n",
-      "  Res.Df    RSS Df  Sum of Sq F Pr(>F)\n",
-      "1    394 2252.4                       \n",
-      "2    394 2252.4  0 9.0949e-13         \n"
-     ]
-    }
-   ],
-   "source": [
-    "%%R\n",
-    "M1 = lm(Sales ~ Price + UIncome + US + Advertising, data=Carseats)\n",
-    "M4 = lm(Sales ~ Price + UIncome + US + Advertising - 1, data=Carseats)\n",
-    "print(anova(M4, M1))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "72d7c83b",
-   "metadata": {},
-   "source": [
-    "It can be found with `summary`."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 75,
-   "id": "4d5ce789",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "Call:\n",
-      "lm(formula = Sales ~ Price + UIncome + US + Advertising, data = Carseats)\n",
-      "\n",
-      "Residuals:\n",
-      "    Min      1Q  Median      3Q     Max \n",
-      "-7.4437 -1.6351 -0.0932  1.4920  6.8076 \n",
-      "\n",
-      "Coefficients:\n",
-      "             Estimate Std. Error t value Pr(>|t|)    \n",
-      "(Intercept) 12.520356   0.643390  19.460  < 2e-16 ***\n",
-      "Price       -0.054000   0.005072 -10.647  < 2e-16 ***\n",
-      "UIncomeM     0.548906   0.281693   1.949   0.0521 .  \n",
-      "UIncomeH     0.708219   0.322028   2.199   0.0284 *  \n",
-      "USYes        0.024181   0.343246   0.070   0.9439    \n",
-      "Advertising  0.119509   0.024692   4.840 1.87e-06 ***\n",
-      "---\n",
-      "Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1\n",
-      "\n",
-      "Residual standard error: 2.391 on 394 degrees of freedom\n",
-      "Multiple R-squared:  0.2922,\tAdjusted R-squared:  0.2832 \n",
-      "F-statistic: 32.53 on 5 and 394 DF,  p-value: < 2.2e-16\n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "%%R\n",
-    "summary(M1)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 76,
-   "id": "56b82d02",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(378.690726, 378.69160000000005)"
-      ]
-     },
-     "execution_count": 76,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "378.690726, 19.46**2"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "872f645c-1d6f-4d08-9eec-2b80276bc82c",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "jupytext": {
-   "formats": "source/models///ipynb,jupyterbook/models///md:myst,jupyterbook/models///ipynb"
-  },
-  "kernelspec": {
-   "display_name": "islp_test",
-   "language": "python",
-   "name": "islp_test"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.9.13"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
diff --git a/docs/jupyterbook/models/submodels.md b/docs/jupyterbook/models/submodels.md
deleted file mode 100644
index c2a97fd..0000000
--- a/docs/jupyterbook/models/submodels.md
+++ /dev/null
@@ -1,652 +0,0 @@
----
-jupytext:
-  formats: source/models///ipynb,jupyterbook/models///md:myst,jupyterbook/models///ipynb
-  text_representation:
-    extension: .md
-    format_name: myst
-    format_version: 0.13
-    jupytext_version: 1.14.1
-kernelspec:
-  display_name: islp_test
-  language: python
-  name: islp_test
----
-
-# Building design matrices with `ModelSpec`
-
-Force rebuild
-
-```{code-cell} ipython3
-x=4
-import numpy as np, pandas as pd
-%load_ext rpy2.ipython
-
-from ISLP import load_data
-from ISLP.models import ModelSpec
-
-import statsmodels.api as sm
-```
-
-```{code-cell} ipython3
-Carseats = load_data('Carseats')
-%R -i Carseats
-Carseats.columns
-```
-
-## Let's break up income into groups
-
-```{code-cell} ipython3
-Carseats['OIncome'] = pd.cut(Carseats['Income'], 
-                             [0,50,90,200], 
-                             labels=['L','M','H'])
-Carseats['OIncome']
-```
-
-Let's also create an unordered version
-
-```{code-cell} ipython3
-Carseats['UIncome'] = pd.cut(Carseats['Income'], 
-                             [0,50,90,200], 
-                             labels=['L','M','H'],
-                             ordered=False)
-Carseats['UIncome']
-```
-
-## A simple model
-
-```{code-cell} ipython3
-design = ModelSpec(['Price', 'Income'])
-X = design.fit_transform(Carseats)
-X.columns
-```
-
-```{code-cell} ipython3
-Y = Carseats['Sales']
-M = sm.OLS(Y, X).fit()
-M.params
-```
-
-## Basic procedure
-
-The design matrix is built by cobbling together a set of columns and possibly transforming them.
-A `pd.DataFrame` is essentially a list of columns. One of the first tasks done  in `ModelSpec.fit`
-is to inspect a dataframe for column info. The column `ShelveLoc` is categorical:
-
-```{code-cell} ipython3
-Carseats['ShelveLoc']
-```
-
-This is recognized by `ModelSpec` in the form of `Column` objects which are just named tuples with two methods
-`get_columns` and `fit_encoder`.
-
-```{code-cell} ipython3
-design.column_info_['ShelveLoc']
-```
-
-It recognized ordinal columns as well.
-
-```{code-cell} ipython3
-design.column_info_['OIncome']
-```
-
-```{code-cell} ipython3
-income = design.column_info_['Income']
-cols, names = income.get_columns(Carseats)
-(cols[:4], names)
-```
-
-## Encoding a column
-
-In building a design matrix we must extract columns from our dataframe (or `np.ndarray`). Categorical
-variables usually are encoded by several columns, typically one less than the number of categories.
-This task is handled by the `encoder` of the `Column`. The encoder must satisfy the `sklearn` transform
-model, i.e. `fit` on some array and `transform` on future arrays. The `fit_encoder` method of `Column` fits
-its encoder the first time data is passed to it.
-
-```{code-cell} ipython3
-shelve = design.column_info_['ShelveLoc']
-cols, names = shelve.get_columns(Carseats)
-(cols[:4], names)
-```
-
-```{code-cell} ipython3
-oincome = design.column_info_['OIncome']
-oincome.get_columns(Carseats)[0][:4]
-```
-
-## The terms
-
-The design matrix consists of several sets of columns. This is managed by the `ModelSpec` through
-the `terms` argument which should be a sequence. The elements of `terms` are often
-going to be strings (or tuples of strings for interactions, see below) but are converted to a
-`Variable` object and stored in the `terms_` of the fitted `ModelSpec`. A `Variable` is just a named tuple.
-
-```{code-cell} ipython3
-design.terms
-```
-
-```{code-cell} ipython3
-design.terms_
-```
-
-While each `Column` can itself extract data, they are all promoted to `Variable` to be of a uniform type.  A
-`Variable` can also create columns through the `build_columns` method of `ModelSpec`
-
-```{code-cell} ipython3
-price = design.terms_[0]
-design.build_columns(Carseats, price)
-```
-
-Note that `Variable` objects have a tuple of `variables` as well as an `encoder` attribute. The
-tuple of `variables` first creates a concatenated dataframe from all corresponding variables and then
-is run through `encoder.transform`. The `encoder.fit` method of each `Variable` is run once during 
-the call to `ModelSpec.fit`.
-
-```{code-cell} ipython3
-from ISLP.models.model_spec import Variable
-
-new_var = Variable(('Price', 'Income', 'UIncome'), name='mynewvar', encoder=None)
-design.build_columns(Carseats, new_var)
-```
-
-Let's now transform these columns with an encoder. Within `ModelSpec` we will first build the
-arrays above and then call `pca.fit` and finally `pca.transform` within `design.build_columns`.
-
-```{code-cell} ipython3
-from sklearn.decomposition import PCA
-pca = PCA(n_components=2)
-pca.fit(design.build_columns(Carseats, new_var)[0]) # this is done within `ModelSpec.fit`
-pca_var = Variable(('Price', 'Income', 'UIncome'), name='mynewvar', encoder=pca)
-design.build_columns(Carseats, pca_var)
-```
-
-The elements of the `variables` attribute may be column identifiers ( `"Price"`), `Column` instances (`price`)
-or `Variable` instances (`pca_var`).
-
-```{code-cell} ipython3
-fancy_var = Variable(('Price', price, pca_var), name='fancy', encoder=None)
-design.build_columns(Carseats, fancy_var)
-```
-
-We can of course run PCA again on these features (if we wanted).
-
-```{code-cell} ipython3
-pca2 = PCA(n_components=2)
-pca2.fit(design.build_columns(Carseats, fancy_var)[0]) # this is done within `ModelSpec.fit`
-pca2_var = Variable(('Price', price, pca_var), name='fancy_pca', encoder=pca2)
-design.build_columns(Carseats, pca2_var)
-```
-
-## Building the design matrix
-
-With these notions in mind, the final design is essentially then
-
-```{code-cell} ipython3
-X_hand = np.column_stack([design.build_columns(Carseats, v)[0] for v in design.terms_])[:4]
-```
-
-An intercept column is added if `design.intercept` is `True` and if the original argument to `transform` is
-a dataframe the index is adjusted accordingly.
-
-```{code-cell} ipython3
-design.intercept
-```
-
-```{code-cell} ipython3
-design.transform(Carseats)[:4]
-```
-
-## Predicting
-
-Constructing the design matrix at any values is carried out by the `transform` method.
-
-```{code-cell} ipython3
-new_data = pd.DataFrame({'Price':[10,20], 'Income':[40, 50]})
-new_X = design.transform(new_data)
-M.get_prediction(new_X).predicted_mean
-```
-
-```{code-cell} ipython3
-%%R -i new_data,Carseats
-predict(lm(Sales ~ Price + Income, data=Carseats), new_data)
-```
-
-### Difference between using `pd.DataFrame` and `np.ndarray`
-
-If the `terms` only refer to a few columns of the data frame, the `transform` method only needs a dataframe with those columns.
-
-If we had used an `np.ndarray`, the column identifiers would be integers identifying specific columns so,
-in order to work correctly, `transform` would need another `np.ndarray` where the columns have the same meaning.
-
-```{code-cell} ipython3
-Carseats_np = np.asarray(Carseats[['Price', 'ShelveLoc', 'US', 'Income']])
-design_np = ModelSpec([0,3]).fit(Carseats_np)
-design_np.transform(Carseats_np)[:4]
-```
-
-The following will fail for hopefully obvious reasons
-
-```{code-cell} ipython3
-try:
-    new_D = np.zeros((2,2))
-    new_D[:,0] = [10,20]
-    new_D[:,1] = [40,50]
-    M.get_prediction(new_D).predicted_mean
-except ValueError as e:
-    print(e)
-```
-
-Ultimately, `M` expects 3 columns for new predictions because it was fit
-with a matrix having 3 columns (the first representing an intercept).
-
-We might be tempted to try as with the `pd.DataFrame` and produce
-an `np.ndarray` with only the necessary variables.
-
-```{code-cell} ipython3
-try:
-    new_X = np.zeros((2,2))
-    new_X[:,0] = [10,20]
-    new_X[:,1] = [40,50]
-    new_D = design_np.transform(new_X)
-    M.get_prediction(new_D).predicted_mean
-except IndexError as e:
-    print(e)
-```
-
-This fails because `design_np` is looking for column `3` from its `terms`:
-
-```{code-cell} ipython3
-design_np.terms_
-```
-
-However, if we have an `np.ndarray` in which the first column indeed represents `Price` and the fourth indeed
-represents `Income` then we can arrive at the correct answer by supplying such the array to `design_np.transform`:
-
-```{code-cell} ipython3
-new_X = np.zeros((2,4))
-new_X[:,0] = [10,20]
-new_X[:,3] = [40,50]
-new_D = design_np.transform(new_X)
-M.get_prediction(new_D).predicted_mean
-```
-
-Given this subtlety about needing to supply arrays with identical column structure to `transform` when
-using `np.ndarray` we presume that using a `pd.DataFrame` will be the more popular use case.
-
-+++
-
-## A model with some categorical variables
-
-Categorical variables become `Column` instances with encoders.
-
-```{code-cell} ipython3
-design = ModelSpec(['Population', 'Price', 'UIncome', 'ShelveLoc']).fit(Carseats)
-design.column_info_['UIncome']
-```
-
-```{code-cell} ipython3
-X = design.fit_transform(Carseats)
-X.columns
-```
-
-```{code-cell} ipython3
-sm.OLS(Y, X).fit().params
-```
-
-```{code-cell} ipython3
-%%R
-lm(Sales ~ Population + Price + UIncome + ShelveLoc, data=Carseats)$coef
-```
-
-## Getting the encoding you want
-
-By default the level dropped by `ModelSpec` will be the first of the `categories_` values from 
-`sklearn.preprocessing.OneHotEncoder()`. We might wish to change this. It seems
-as if the correct way to do this would be something like `Variable(('UIncome',), 'mynewencoding', new_encoder)`
-where `new_encoder` would somehow drop the column we want dropped. 
-
-However, when using the convenient identifier `UIncome` in the `variables` argument, this maps to the `Column` associated to `UIncome` within `design.column_info_`:
-
-```{code-cell} ipython3
-design.column_info_['UIncome']
-```
-
-This column already has an encoder and `Column` instances are immutable as named tuples. Further, there are times when 
-we may want to encode `UIncome` differently within the same model. In the model below the main effect of `UIncome` is encoded with two columns while in the interaction `UIncome` (see below) has three columns. This is a design of interest
-and we need a way to allow different encodings of the same column of `Carseats`
-
-```{code-cell} ipython3
-%%R
-lm(Sales ~ UIncome:ShelveLoc + UIncome, data=Carseats)
-```
-
- We can create a new 
-`Column` with the encoder we want. For categorical variables, there is a convenience function to do so.
-
-```{code-cell} ipython3
-from ISLP.models.model_spec import contrast
-pref_encoding = contrast('UIncome', 'drop', 'L')
-```
-
-```{code-cell} ipython3
-design.build_columns(Carseats, pref_encoding)
-```
-
-```{code-cell} ipython3
-design = ModelSpec(['Population', 'Price', pref_encoding, 'ShelveLoc']).fit(Carseats)
-X = design.fit_transform(Carseats)
-X.columns
-```
-
-```{code-cell} ipython3
-sm.OLS(Y, X).fit().params
-```
-
-```{code-cell} ipython3
-%%R
-lm(Sales ~ Population + Price + UIncome + ShelveLoc, data=Carseats)$coef
-```
-
-## Interactions
-
-We've referred to interactions above. These are specified (by convenience) as tuples in the `terms` argument
-to `ModelSpec`.
-
-```{code-cell} ipython3
-design = ModelSpec([('UIncome', 'ShelveLoc'), 'UIncome'])
-X = design.fit_transform(Carseats)
-sm.OLS(Y, X).fit().params
-```
-
-The tuples in `terms` are converted to `Variable` in the formalized `terms_` attribute by creating a `Variable` with
-`variables` set to the tuple and the encoder an `Interaction` encoder which (unsurprisingly) creates the interaction columns from the concatenated data frames of `UIncome` and `ShelveLoc`.
-
-```{code-cell} ipython3
-design.terms_[0]
-```
-
-Comparing this to the previous `R` model.
-
-```{code-cell} ipython3
-%%R
-lm(Sales ~ UIncome:ShelveLoc + UIncome, data=Carseats)
-```
-
-We note a few important things:
-
-1. `R` has reorganized the columns of the design from the formula: although we wrote `UIncome:ShelveLoc` first these
-columns have been built later. **`ModelSpec` builds columns in the order determined by `terms`!**
-
-2. As noted above, `R` has encoded `UIncome` differently in the main effect and in the interaction. For `ModelSpec`, the reference to `UIncome` always refers to the column in `design.column_info_` and will always build only the columns for `L` and `M`. **`ModelSpec` does no inspection of terms to decide how to encode categorical variables.**
-
-A few notes:
-
-- **Why not try to inspect the terms?** For any nontrivial formula in `R` with several categorical variables and interactions, predicting what columns will be produced from a given formula is not simple. **`ModelSpec` errs on the side of being explicit.**
-
-- **Is it impossible to build the design as `R` has?** No. An advanced user who *knows* they want the columns built as `R` has can do so (fairly) easily.
-
-```{code-cell} ipython3
-full_encoding = contrast('UIncome', None)
-design.build_columns(Carseats, full_encoding)
-```
-
-```{code-cell} ipython3
-design = ModelSpec([pref_encoding, (full_encoding, 'ShelveLoc')])
-X = design.fit_transform(Carseats)
-sm.OLS(Y, X).fit().params
-```
-
-## Special encodings
-
-For flexible models, we may want to consider transformations of features, i.e. polynomial
-or spline transformations. Given transforms that follow the `fit/transform` paradigm
-we can of course achieve this with a `Column` and an `encoder`. The `ISLP.transforms`
-package includes a `Poly` transform
-
-```{code-cell} ipython3
-from ISLP.models.model_spec import poly
-poly('Income', 3)
-```
-
-```{code-cell} ipython3
-design = ModelSpec([poly('Income', 3), 'ShelveLoc'])
-X = design.fit_transform(Carseats)
-sm.OLS(Y, X).fit().params
-```
-
-Compare:
-
-```{code-cell} ipython3
-%%R
-lm(Sales ~ poly(Income, 3) + ShelveLoc, data=Carseats)$coef
-```
-
-## Splines
-
-Support for natural and B-splines is also included
-
-```{code-cell} ipython3
-from ISLP.models.model_spec import ns, bs, pca
-design = ModelSpec([ns('Income', df=5), 'ShelveLoc'])
-X = design.fit_transform(Carseats)
-sm.OLS(Y, X).fit().params
-```
-
-```{code-cell} ipython3
-%%R
-library(splines)
-lm(Sales ~ ns(Income, df=5) + ShelveLoc, data=Carseats)$coef
-```
-
-```{code-cell} ipython3
-design = ModelSpec([bs('Income', df=7, degree=2), 'ShelveLoc'])
-X = design.fit_transform(Carseats)
-sm.OLS(Y, X).fit().params
-```
-
-```{code-cell} ipython3
-%%R
-lm(Sales ~ bs(Income, df=7, degree=2) + ShelveLoc, data=Carseats)$coef
-```
-
-## PCA
-
-```{code-cell} ipython3
-design = ModelSpec([pca(['Income', 
-                           'Price', 
-                           'Advertising', 
-                           'Population'], 
-                          n_components=2, 
-                          name='myvars'), 'ShelveLoc'])
-X = design.fit_transform(Carseats)
-sm.OLS(Y, X).fit().params
-```
-
-```{code-cell} ipython3
-%%R
-lm(Sales ~ prcomp(cbind(Income, Price, Advertising, Population))$x[,1:2] + ShelveLoc, data=Carseats)
-```
-
-It is of course common to scale before running PCA.
-
-```{code-cell} ipython3
-design = ModelSpec([pca(['Income', 
-                           'Price', 
-                           'Advertising', 
-                           'Population'], 
-                          n_components=2, 
-                          name='myvars',
-                          scale=True), 'ShelveLoc'])
-X = design.fit_transform(Carseats)
-sm.OLS(Y, X).fit().params
-```
-
-```{code-cell} ipython3
-%%R
-lm(Sales ~ prcomp(cbind(Income, Price, Advertising, Population), scale=TRUE)$x[,1:2] + ShelveLoc, data=Carseats)
-```
-
-There will be some small differences in the coefficients due to `sklearn` use of `np.std(ddof=0)` instead
-of `np.std(ddof=1)`.
-
-```{code-cell} ipython3
-np.array(sm.OLS(Y, X).fit().params)[1:3] * np.sqrt(X.shape[0] / (X.shape[0]-1))
-```
-
-## Submodels
-
-We can build submodels as well, even if the terms do not appear in the original `terms` argument.
-Fundamentally, the terms just need to be able to have the `design.build_columns` work for us to be
-able to build a design matrix. The initial inspection of the columns of `Carseats` has created
-a column for `US`, hence we can build this submodel.
-
-```{code-cell} ipython3
-design = ModelSpec(['UIncome', 'ShelveLoc', 'Price']).fit(Carseats)
-design.build_submodel(Carseats, ['US'])
-```
-
-## ANOVA 
-
-For a given `terms` argument, there as a natural sequence of models, namely those specified by `[terms[:i] for i in range(len(terms)+1]`.
-
-```{code-cell} ipython3
-design = ModelSpec(['ShelveLoc', 'Price', 'UIncome', 'US']).fit(Carseats)
-for D in design.build_sequence(Carseats):
-    print(D.columns)
-```
-
-```{code-cell} ipython3
-sm.stats.anova_lm(*(sm.OLS(Y, D).fit() for D in design.build_sequence(Carseats) ))
-```
-
-```{code-cell} ipython3
-%%R
-anova(lm(Sales ~ ShelveLoc + Price + UIncome + US, data=Carseats))
-```
-
-Recall that `ModelSpec` does not inspect `terms` to reorder based on degree of 
-interaction as `R` does:
-
-```{code-cell} ipython3
-design = ModelSpec([(full_encoding, 'ShelveLoc'), pref_encoding]).fit(Carseats)
-sm.stats.anova_lm(*(sm.OLS(Y, D).fit() for D in design.build_sequence(Carseats) ))
-```
-
-```{code-cell} ipython3
-%%R
-anova(lm(Sales ~ UIncome:ShelveLoc + UIncome, data=Carseats))
-```
-
-To agree with `R` we must order `terms` as `R` will.
-
-```{code-cell} ipython3
-design = ModelSpec([pref_encoding, (full_encoding, 'ShelveLoc')]).fit(Carseats)
-sm.stats.anova_lm(*(sm.OLS(Y, D).fit() for D in design.build_sequence(Carseats)))
-```
-
-## More complicated interactions
-
-Can we have an interaction of a polynomial effect with a categorical? Absolutely
-
-```{code-cell} ipython3
-%%R
-anova(lm(Sales ~ UIncome + poly(Income, 3):UIncome + UIncome:US, data=Carseats))
-```
-
-To match `R` we note that it has used its inspection rules to encode `UIncome` with 3 levels
-for the two interactions.
-
-```{code-cell} ipython3
-p3 = poly('Income', 3)
-design = ModelSpec([pref_encoding, (p3, full_encoding), (full_encoding, 'US')]).fit(Carseats)
-X = design.transform(Carseats)
-sm.OLS(Y, X).fit().params
-```
-
-```{code-cell} ipython3
-sm.stats.anova_lm(*(sm.OLS(Y, D).fit() for D in design.build_sequence(Carseats)))
-```
-
-## Grouping columns for ANOVA
-
-The `Variable` construct can be used to group
-variables together to get custom sequences of models for `anova_lm`.
-
-```{code-cell} ipython3
-group1 = Variable(('Price', pref_encoding), 'group1', None)
-group2 = Variable(('US', 'Advertising'), 'group2', None)
-design = ModelSpec([group1, group2]).fit(Carseats)
-for D in design.build_sequence(Carseats):
-    print(D.columns)
-```
-
-```{code-cell} ipython3
-sm.stats.anova_lm(*(sm.OLS(Y, D).fit() for D in design.build_sequence(Carseats)))
-```
-
-It is not clear this is simple to do in `R` as the formula object expands all parentheses.
-
-```{code-cell} ipython3
-%%R
-anova(lm(Sales ~ (Price + UIncome) + (US + Advertising), data=Carseats))
-```
-
-It can be done by building up the models
-by hand and likely is possible to be done programmatically but it seems not obvious.
-
-```{code-cell} ipython3
-%%R
-M1 = lm(Sales ~ 1, data=Carseats)
-M2 = lm(Sales ~ Price + UIncome, data=Carseats)
-M3 = lm(Sales ~ Price + UIncome + US + Advertising, data=Carseats)
-anova(M1, M2, M3)
-```
-
-## Alternative anova
-
-Another common ANOVA table involves dropping each term in succession from the model and comparing
-to the full model.
-
-```{code-cell} ipython3
-Dfull = design.transform(Carseats)
-Mfull = sm.OLS(Y, Dfull).fit()
-for i, D in enumerate(design.build_sequence(Carseats, anova_type='drop')):
-    if i == 0:
-        D0 = D
-    print(set(D.columns) ^ set(Dfull.columns))
-    print(sm.stats.anova_lm(sm.OLS(Y, D).fit(), Mfull))
-```
-
-```{code-cell} ipython3
-%%R
-M1 = lm(Sales ~ Price + UIncome + US + Advertising, data=Carseats)
-M2 = lm(Sales ~ US + Advertising, data=Carseats)
-print(anova(M2, M1))
-M3 = lm(Sales ~ Price + UIncome, data=Carseats)
-print(anova(M3, M1))
-```
-
-The comparison without the intercept here is actually very hard to achieve in `R` with `anova` due to its inspection
-of the formula.
-
-```{code-cell} ipython3
-%%R
-M1 = lm(Sales ~ Price + UIncome + US + Advertising, data=Carseats)
-M4 = lm(Sales ~ Price + UIncome + US + Advertising - 1, data=Carseats)
-print(anova(M4, M1))
-```
-
-It can be found with `summary`.
-
-```{code-cell} ipython3
-%%R
-summary(M1)
-```
-
-```{code-cell} ipython3
-378.690726, 19.46**2
-```
-
-```{code-cell} ipython3
-
-```
diff --git a/docs/jupyterbook/transforms/PCA.ipynb b/docs/jupyterbook/transforms/PCA.ipynb
index d8b41f3..ec1e0ae 100644
--- a/docs/jupyterbook/transforms/PCA.ipynb
+++ b/docs/jupyterbook/transforms/PCA.ipynb
@@ -19,9 +19,14 @@
    "outputs": [],
    "source": [
     "import numpy as np\n",
+    "from sklearn.decomposition import PCA\n",
+    "\n",
     "from ISLP import load_data\n",
-    "from ISLP.models import ModelSpec, pca, Variable, derived_variable\n",
-    "from sklearn.decomposition import PCA"
+    "from ISLP.models import (ModelSpec, \n",
+    "                         pca, \n",
+    "                         Feature, \n",
+    "                         derived_feature,\n",
+    "                         build_columns)"
    ]
   },
   {
@@ -71,7 +76,7 @@
    "id": "fff603bf",
    "metadata": {},
    "source": [
-    "Suppose we want to make a `Variable` representing the first 3 principal components of the\n",
+    "Suppose we want to make a `Feature` representing the first 3 principal components of the\n",
     " features `['CompPrice', 'Income', 'Advertising', 'Population', 'Price']`."
    ]
   },
@@ -80,8 +85,8 @@
    "id": "eab49ad1-3957-478f-8a76-28a8f58551e9",
    "metadata": {},
    "source": [
-    "We first make a `Variable` that represents these five features columns, then `pca`\n",
-    "can be used to compute a new `Variable` that returns the first three principal components."
+    "We first make a `Feature` that represents these five features columns, then `pca`\n",
+    "can be used to compute a new `Feature` that returns the first three principal components."
    ]
   },
   {
@@ -91,7 +96,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "grouped = Variable(('CompPrice', 'Income', 'Advertising', 'Population', 'Price'), name='grouped', encoder=None)\n",
+    "grouped = Feature(('CompPrice', 'Income', 'Advertising', 'Population', 'Price'), name='grouped', encoder=None)\n",
     "sklearn_pca = PCA(n_components=3, whiten=True)"
    ]
   },
@@ -100,7 +105,7 @@
    "id": "b45655a3-393d-4b4c-b754-cda61ed0e014",
    "metadata": {},
    "source": [
-    "We can now fit `sklearn_pca` and create our new variable."
+    "We can now fit `sklearn_pca` and create our new feature."
    ]
   },
   {
@@ -108,175 +113,18 @@
    "execution_count": 5,
    "id": "6cfe8861-ad07-47b9-95d1-5d5513ff6fbe",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/Users/jonathantaylor/miniconda3/envs/islp_test/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but PCA was fitted with feature names\n",
-      "  warnings.warn(\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
-    "sklearn_pca.fit(design.build_columns(Carseats, grouped)[0]) \n",
-    "pca_var = derived_variable(['CompPrice', 'Income', 'Advertising', 'Population', 'Price'],\n",
+    "grouped_features = build_columns(design.column_info_,\n",
+    "                                 Carseats,\n",
+    "                                 grouped)[0]\n",
+    "sklearn_pca.fit(grouped_features) \n",
+    "pca_var = derived_feature(['CompPrice', 'Income', 'Advertising', 'Population', 'Price'],\n",
     "                           name='pca(grouped)', encoder=sklearn_pca)\n",
-    "derived_features, _ = design.build_columns(Carseats, pca_var)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "id": "aeb47184-9e15-4a6e-b60a-916f5ff89063",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>CompPrice</th>\n",
-       "      <th>Income</th>\n",
-       "      <th>Advertising</th>\n",
-       "      <th>Population</th>\n",
-       "      <th>Price</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>138</td>\n",
-       "      <td>73</td>\n",
-       "      <td>11</td>\n",
-       "      <td>276</td>\n",
-       "      <td>120</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>111</td>\n",
-       "      <td>48</td>\n",
-       "      <td>16</td>\n",
-       "      <td>260</td>\n",
-       "      <td>83</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>113</td>\n",
-       "      <td>35</td>\n",
-       "      <td>10</td>\n",
-       "      <td>269</td>\n",
-       "      <td>80</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>117</td>\n",
-       "      <td>100</td>\n",
-       "      <td>4</td>\n",
-       "      <td>466</td>\n",
-       "      <td>97</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>141</td>\n",
-       "      <td>64</td>\n",
-       "      <td>3</td>\n",
-       "      <td>340</td>\n",
-       "      <td>128</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>...</th>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>395</th>\n",
-       "      <td>138</td>\n",
-       "      <td>108</td>\n",
-       "      <td>17</td>\n",
-       "      <td>203</td>\n",
-       "      <td>128</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>396</th>\n",
-       "      <td>139</td>\n",
-       "      <td>23</td>\n",
-       "      <td>3</td>\n",
-       "      <td>37</td>\n",
-       "      <td>120</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>397</th>\n",
-       "      <td>162</td>\n",
-       "      <td>26</td>\n",
-       "      <td>12</td>\n",
-       "      <td>368</td>\n",
-       "      <td>159</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>398</th>\n",
-       "      <td>100</td>\n",
-       "      <td>79</td>\n",
-       "      <td>7</td>\n",
-       "      <td>284</td>\n",
-       "      <td>95</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>399</th>\n",
-       "      <td>134</td>\n",
-       "      <td>37</td>\n",
-       "      <td>0</td>\n",
-       "      <td>27</td>\n",
-       "      <td>120</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>400 rows × 5 columns</p>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "     CompPrice  Income  Advertising  Population  Price\n",
-       "0          138      73           11         276    120\n",
-       "1          111      48           16         260     83\n",
-       "2          113      35           10         269     80\n",
-       "3          117     100            4         466     97\n",
-       "4          141      64            3         340    128\n",
-       "..         ...     ...          ...         ...    ...\n",
-       "395        138     108           17         203    128\n",
-       "396        139      23            3          37    120\n",
-       "397        162      26           12         368    159\n",
-       "398        100      79            7         284     95\n",
-       "399        134      37            0          27    120\n",
-       "\n",
-       "[400 rows x 5 columns]"
-      ]
-     },
-     "execution_count": 6,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design.build_columns(Carseats, grouped)[0]"
+    "derived_features, _ = build_columns(design.column_info_,\n",
+    "                                    Carseats, \n",
+    "                                    pca_var,\n",
+    "                                    encoders=design.encoders_)"
    ]
   },
   {
@@ -291,7 +139,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 6,
    "id": "9f4b0955",
    "metadata": {},
    "outputs": [],
@@ -304,22 +152,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 7,
    "id": "6b382699-eb86-457f-8e91-09a63eb21d49",
    "metadata": {},
    "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/Users/jonathantaylor/miniconda3/envs/islp_test/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but PCA was fitted with feature names\n",
-      "  warnings.warn(\n",
-      "/Users/jonathantaylor/miniconda3/envs/islp_test/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but PCA was fitted with feature names\n",
-      "  warnings.warn(\n",
-      "/Users/jonathantaylor/miniconda3/envs/islp_test/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but PCA was fitted with feature names\n",
-      "  warnings.warn(\n"
-     ]
-    },
     {
      "data": {
       "text/plain": [
@@ -329,7 +165,7 @@
        "      dtype='object')"
       ]
      },
-     "execution_count": 8,
+     "execution_count": 7,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -350,7 +186,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 8,
    "id": "4a8d9b28",
    "metadata": {},
    "outputs": [],
@@ -361,7 +197,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 13,
    "id": "6efa6c67-86e1-4f51-86c2-25c838a90bf4",
    "metadata": {},
    "outputs": [
@@ -371,7 +207,7 @@
        "(4.073428490498941e-14, 0.0)"
       ]
      },
-     "execution_count": 10,
+     "execution_count": 13,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -386,9 +222,9 @@
    "formats": "source/transforms///ipynb,jupyterbook/transforms///md:myst,jupyterbook/transforms///ipynb"
   },
   "kernelspec": {
-   "display_name": "islp_test",
+   "display_name": "python3",
    "language": "python",
-   "name": "islp_test"
+   "name": "python3"
   },
   "language_info": {
    "codemirror_mode": {
diff --git a/docs/jupyterbook/transforms/PCA.md b/docs/jupyterbook/transforms/PCA.md
index b9ba769..6b1a77f 100644
--- a/docs/jupyterbook/transforms/PCA.md
+++ b/docs/jupyterbook/transforms/PCA.md
@@ -5,11 +5,11 @@ jupytext:
     extension: .md
     format_name: myst
     format_version: 0.13
-    jupytext_version: 1.14.1
+    jupytext_version: 1.14.5
 kernelspec:
-  display_name: islp_test
+  display_name: python3
   language: python
-  name: islp_test
+  name: python3
 ---
 
 # Derived features: using PCA on a subset of columns
@@ -19,9 +19,14 @@ construction of transformers applied to features.
 
 ```{code-cell} ipython3
 import numpy as np
-from ISLP import load_data
-from ISLP.models import ModelSpec, pca, Variable, derived_variable
 from sklearn.decomposition import PCA
+
+from ISLP import load_data
+from ISLP.models import (ModelSpec, 
+                         pca, 
+                         Feature, 
+                         derived_feature,
+                         build_columns)
 ```
 
 ```{code-cell} ipython3
@@ -35,30 +40,32 @@ Let's create a `ModelSpec` that is aware of all of the relevant columns.
 design = ModelSpec(Carseats.columns.drop(['Sales'])).fit(Carseats)
 ```
 
-Suppose we want to make a `Variable` representing the first 3 principal components of the
+Suppose we want to make a `Feature` representing the first 3 principal components of the
  features `['CompPrice', 'Income', 'Advertising', 'Population', 'Price']`.
 
 +++
 
-We first make a `Variable` that represents these five features columns, then `pca`
-can be used to compute a new `Variable` that returns the first three principal components.
+We first make a `Feature` that represents these five features columns, then `pca`
+can be used to compute a new `Feature` that returns the first three principal components.
 
 ```{code-cell} ipython3
-grouped = Variable(('CompPrice', 'Income', 'Advertising', 'Population', 'Price'), name='grouped', encoder=None)
+grouped = Feature(('CompPrice', 'Income', 'Advertising', 'Population', 'Price'), name='grouped', encoder=None)
 sklearn_pca = PCA(n_components=3, whiten=True)
 ```
 
-We can now fit `sklearn_pca` and create our new variable.
+We can now fit `sklearn_pca` and create our new feature.
 
 ```{code-cell} ipython3
-sklearn_pca.fit(design.build_columns(Carseats, grouped)[0]) 
-pca_var = derived_variable(['CompPrice', 'Income', 'Advertising', 'Population', 'Price'],
+grouped_features = build_columns(design.column_info_,
+                                 Carseats,
+                                 grouped)[0]
+sklearn_pca.fit(grouped_features) 
+pca_var = derived_feature(['CompPrice', 'Income', 'Advertising', 'Population', 'Price'],
                            name='pca(grouped)', encoder=sklearn_pca)
-derived_features, _ = design.build_columns(Carseats, pca_var)
-```
-
-```{code-cell} ipython3
-design.build_columns(Carseats, grouped)[0]
+derived_features, _ = build_columns(design.column_info_,
+                                    Carseats, 
+                                    pca_var,
+                                    encoders=design.encoders_)
 ```
 
 ## Helper function
diff --git a/docs/jupyterbook/transforms/poly.ipynb b/docs/jupyterbook/transforms/poly.ipynb
index 54d7b4e..45c862e 100644
--- a/docs/jupyterbook/transforms/poly.ipynb
+++ b/docs/jupyterbook/transforms/poly.ipynb
@@ -168,7 +168,7 @@
    "source": [
     "## Underlying model\n",
     "\n",
-    "If we look at `quartic`, we see it is a `Variable`, i.e. it can be used to produce a set of columns\n",
+    "If we look at `quartic`, we see it is a `Feature`, i.e. it can be used to produce a set of columns\n",
     "in a design matrix when it is a term used in creating the `ModelSpec`.\n",
     "\n",
     "Its encoder is `Poly(degree=4)`. This is a special `sklearn` transform that expects a single column\n",
@@ -319,9 +319,9 @@
    "formats": "source/transforms///ipynb,jupyterbook/transforms///md:myst,jupyterbook/transforms///ipynb"
   },
   "kernelspec": {
-   "display_name": "islp_test",
+   "display_name": "python3",
    "language": "python",
-   "name": "islp_test"
+   "name": "python3"
   },
   "language_info": {
    "codemirror_mode": {
diff --git a/docs/jupyterbook/transforms/poly.md b/docs/jupyterbook/transforms/poly.md
index 45e0e3d..e5aef11 100644
--- a/docs/jupyterbook/transforms/poly.md
+++ b/docs/jupyterbook/transforms/poly.md
@@ -5,11 +5,11 @@ jupytext:
     extension: .md
     format_name: myst
     format_version: 0.13
-    jupytext_version: 1.14.1
+    jupytext_version: 1.14.5
 kernelspec:
-  display_name: islp_test
+  display_name: python3
   language: python
-  name: islp_test
+  name: python3
 ---
 
 # Polynomial features
@@ -66,7 +66,7 @@ np.linalg.norm(ISLP_features - R_features)
 
 ## Underlying model
 
-If we look at `quartic`, we see it is a `Variable`, i.e. it can be used to produce a set of columns
+If we look at `quartic`, we see it is a `Feature`, i.e. it can be used to produce a set of columns
 in a design matrix when it is a term used in creating the `ModelSpec`.
 
 Its encoder is `Poly(degree=4)`. This is a special `sklearn` transform that expects a single column
diff --git a/docs/jupyterbook/transforms/splines.ipynb b/docs/jupyterbook/transforms/splines.ipynb
index f28d786..399b0be 100644
--- a/docs/jupyterbook/transforms/splines.ipynb
+++ b/docs/jupyterbook/transforms/splines.ipynb
@@ -310,9 +310,9 @@
    "formats": "source/transforms///ipynb,jupyterbook/transforms///md:myst,jupyterbook/transforms///ipynb"
   },
   "kernelspec": {
-   "display_name": "islp_test",
+   "display_name": "python3",
    "language": "python",
-   "name": "islp_test"
+   "name": "python3"
   },
   "language_info": {
    "codemirror_mode": {
diff --git a/docs/jupyterbook/transforms/splines.md b/docs/jupyterbook/transforms/splines.md
index f14bc17..de0ee3d 100644
--- a/docs/jupyterbook/transforms/splines.md
+++ b/docs/jupyterbook/transforms/splines.md
@@ -5,11 +5,11 @@ jupytext:
     extension: .md
     format_name: myst
     format_version: 0.13
-    jupytext_version: 1.14.1
+    jupytext_version: 1.14.5
 kernelspec:
-  display_name: islp_test
+  display_name: python3
   language: python
-  name: islp_test
+  name: python3
 ---
 
 # Spline features
diff --git a/docs/make_notebooks.py b/docs/make_notebooks.py
new file mode 100644
index 0000000..cfea244
--- /dev/null
+++ b/docs/make_notebooks.py
@@ -0,0 +1,107 @@
+'''
+Run notebooks in an isolated environment specified by a requirements.txt file
+'''
+
+from hashlib import md5
+import tempfile
+import os
+from argparse import ArgumentParser
+
+
+parser = ArgumentParser()
+parser.add_argument('--requirements',
+                    default='requirements.txt')
+parser.add_argument('labs',
+                    metavar='N',
+                    type=str,
+                    nargs='+')
+parser.add_argument('--python',
+                    default='3.10')
+parser.add_argument('--tarball',
+                    default=None,
+                    dest='tarball')
+parser.add_argument('--inplace',
+                    default=False,
+                    action='store_true',
+                    help='run notebooks in place?')
+parser.add_argument('--timeout',
+                    default=5000,
+                    help='preprocessor timeout')
+parser.add_argument('--env_tag',
+                    default='')
+
+def make_notebooks(requirements='requirements.txt',
+                   srcs=[],
+                   dests=[],
+                   tarball='',
+                   inplace=False,
+                   tmpdir='',
+                   python='3.10',
+                   timeout=5000, # should be enough for Ch10
+                   env_tag='',
+                   ):
+
+    if tarball and inplace:
+        raise ValueError('tarball option expects notebooks in a tmpdir, while inplace does not copy to a tmpdir')
+    
+    md5_ = md5()
+    md5_.update(open(requirements, 'rb').read());
+    hash_ = md5_.hexdigest()[:8]
+
+    env_name = f'isolated_env_{hash_}' + env_tag
+
+    setup_cmd = f'''
+    conda create -n {env_name} python={python} -y;
+    conda run -n {env_name} pip install -r {requirements} jupyter jupytext;
+    '''
+
+    print(setup_cmd)
+    os.system(setup_cmd)
+
+    # may need to up "ulimit -n 4096"
+    archive_files = []
+    for src_, dest_ in zip(srcs, dests):
+        if src_ != dest_:
+            os.system(f'cp {src_} {dest_}')
+        name = os.path.split(dest_)[1]
+        build_cmd = f'''conda run -n {env_name} jupyter nbconvert --inplace --execute --ExecutePreprocessor.timeout={timeout} {dest_} '''
+        if '02' in name:
+            build_cmd += ' --allow-errors '
+
+        print(build_cmd)
+        os.system(build_cmd)
+        archive_files.append(name)
+
+    archive_files = ' '.join(archive_files)
+
+    if tarball:
+        tarball = os.path.abspath(tarball)
+        tarball_cmd = f'''
+        cd {tmpdir}; tar -cvzf {tarball} {archive_files}
+        '''
+        print(tarball_cmd)
+        os.system(tarball_cmd)
+
+    os.system(f'conda env remove -n {env_name}')
+
+if __name__ == '__main__':
+
+    args = parser.parse_args()
+    srcs = [os.path.abspath(l) for l in args.labs]
+
+    tmpdir = tempfile.mkdtemp()
+
+    if args.inplace:
+        dests = srcs
+    else:
+        dests = [os.path.join(tmpdir, os.path.split(l)[1]) for l in args.labs]
+
+    make_notebooks(requirements=os.path.abspath(args.requirements),
+                   srcs=srcs,
+                   dests=dests,
+                   inplace=args.inplace,
+                   tmpdir=tmpdir,
+                   python=args.python,
+                   tarball=args.tarball,
+                   timeout=args.timeout,
+                   env_tag=args.env_tag)
diff --git a/docs/requirements.txt b/docs/requirements.txt
index 68ef4bc..10bce0e 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -1,3 +1,7 @@
 texext
 numpydoc
 myst_nb
+sphinx-book-theme
+rpy2
+sphinx_rtd_theme
+jupytext
diff --git a/docs/source/.ipynb_checkpoints/imdb-checkpoint.ipynb b/docs/source/.ipynb_checkpoints/imdb-checkpoint.ipynb
deleted file mode 100644
index c78ca44..0000000
--- a/docs/source/.ipynb_checkpoints/imdb-checkpoint.ipynb
+++ /dev/null
@@ -1,271 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "id": "3eff5ba8",
-   "metadata": {},
-   "source": [
-    "# Creating a clean IMDB dataset\n",
-    "\n",
-    "Running this example requires `keras`. Use `pip install keras` to install if necessary."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "id": "53925437",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import pickle"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "id": "a855c7c0",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import numpy as np\n",
-    "from scipy.sparse import coo_matrix, save_npz\n",
-    "import torch"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "id": "fe16fa84",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from keras.datasets import imdb\n",
-    "from tensorflow.keras.preprocessing.sequence import pad_sequences\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "id": "0369a36a",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# the 3 is for three terms: <START> <UNK> <UNUSED> \n",
-    "num_words = 10000+3\n",
-    "((S_train, Y_train), \n",
-    " (S_test, Y_test)) = imdb.load_data(num_words=num_words)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "id": "9e84d7e3",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "Y_train = Y_train.astype(np.float32)\n",
-    "Y_test = Y_test.astype(np.float32)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "id": "1a737737",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def one_hot(sequences, ncol):\n",
-    "    idx, vals = [], []\n",
-    "    for i, s in enumerate(sequences):\n",
-    "        idx.extend({(i,v):1 for v in s}.keys())\n",
-    "    idx = np.array(idx).T\n",
-    "    vals = np.ones(idx.shape[1], dtype=np.float32)\n",
-    "    tens = torch.sparse_coo_tensor(indices=idx,\n",
-    "                                   values=vals,\n",
-    "                                   size=(len(sequences), ncol))\n",
-    "    return tens.coalesce()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "id": "f08ad327",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "X_train, L_train = one_hot(S_train, num_words), Y_train\n",
-    "X_test = one_hot(S_test, num_words)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "id": "98481bbb",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def convert_sparse_tensor(X):\n",
-    "    idx = np.asarray(X.indices())\n",
-    "    vals = np.asarray(X.values())\n",
-    "    return coo_matrix((vals,\n",
-    "                      (idx[0],\n",
-    "                       idx[1])),\n",
-    "                      shape=X.shape).tocsr()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "id": "5a17bd62",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "X_train_s = convert_sparse_tensor(X_train)\n",
-    "X_test_s = convert_sparse_tensor(X_test)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "id": "ca57aea4",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "X_train_d = torch.tensor(X_train_s.todense())\n",
-    "X_test_d = torch.tensor(X_test_s.todense())"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "id": "3d017780",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "torch.save(X_train_d, 'IMDB_X_train.tensor')\n",
-    "torch.save(X_test_d, 'IMDB_X_test.tensor')"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "f9bb0163",
-   "metadata": {},
-   "source": [
-    "save the sparse matrices"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "id": "23afd3e5",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "save_npz('IMDB_X_test.npz', X_test_s)\n",
-    "save_npz('IMDB_X_train.npz', X_train_s)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "id": "d33568d1",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "np.save('IMDB_Y_test.npy', Y_test)\n",
-    "np.save('IMDB_Y_train.npy', L_train)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "f9110984",
-   "metadata": {},
-   "source": [
-    "save and pickle the word index"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "id": "ff44a0b4",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "word_index = imdb.get_word_index()\n",
-    "lookup = {(i+3):w for w, i in word_index.items()}\n",
-    "lookup[0] = \"<PAD>\"\n",
-    "lookup[1] = \"<START>\"\n",
-    "lookup[2] = \"<UNK>\"\n",
-    "lookup[4] = \"<UNUSED>\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "id": "1486c640",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "pickle.dump(lookup, open('IMDB_word_index.pkl', 'bw'))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "57e606c5",
-   "metadata": {},
-   "source": [
-    "create the padded representations"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 16,
-   "id": "3ab7a4ac",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "(S_train,\n",
-    " S_test) = [torch.tensor(pad_sequences(S, maxlen=500, value=0))\n",
-    "            for S in [S_train,\n",
-    "                      S_test]]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 17,
-   "id": "55cb2d49",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "torch.save(S_train, 'IMDB_S_train.tensor')\n",
-    "torch.save(S_test, 'IMDB_S_test.tensor')"
-   ]
-  }
- ],
- "metadata": {
-  "jupytext": {
-   "cell_metadata_filter": "-all",
-   "formats": "py:percent,ipynb,md:myst",
-   "main_language": "python"
-  },
-  "kernelspec": {
-   "display_name": "islp_test",
-   "language": "python",
-   "name": "islp_test"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.9.13"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
diff --git a/docs/source/api/gen.rst b/docs/source/api/gen.rst
index 2539220..fb3bec5 100644
--- a/docs/source/api/gen.rst
+++ b/docs/source/api/gen.rst
@@ -6,7 +6,6 @@
    generated/ISLP.bart.bart
    generated/ISLP.bart.likelihood
    generated/ISLP.bart.particle_tree
-   generated/ISLP.bart.tmpbart
    generated/ISLP.bart.tree
    generated/ISLP.cluster
    generated/ISLP.models
diff --git a/docs/source/api/generated/ISLP.bart.tmpbart.rst b/docs/source/api/generated/ISLP.bart.tmpbart.rst
deleted file mode 100644
index b72117a..0000000
--- a/docs/source/api/generated/ISLP.bart.tmpbart.rst
+++ /dev/null
@@ -1,42 +0,0 @@
-.. AUTO-GENERATED FILE -- DO NOT EDIT!
-
-bart.tmpbart
-============
-
-Module: :mod:`bart.tmpbart`
----------------------------
-Inheritance diagram for ``ISLP.bart.tmpbart``:
-
-.. inheritance-diagram:: ISLP.bart.tmpbart 
-   :parts: 3
-
-.. automodule:: ISLP.bart.tmpbart
-
-.. currentmodule:: ISLP.bart.tmpbart
-
-Classes
--------
-
-:class:`BART`
-~~~~~~~~~~~~~
-
-
-.. autoclass:: BART
-  :members:
-  :undoc-members:
-  :show-inheritance:
-  :inherited-members:
-
-  .. automethod:: __init__
-
-:class:`SampleSplittingVariable`
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-
-.. autoclass:: SampleSplittingVariable
-  :members:
-  :undoc-members:
-  :show-inheritance:
-  :inherited-members:
-
-  .. automethod:: __init__
diff --git a/docs/source/api/generated/ISLP.models.model_spec.rst b/docs/source/api/generated/ISLP.models.model_spec.rst
index c379253..d457e3a 100644
--- a/docs/source/api/generated/ISLP.models.model_spec.rst
+++ b/docs/source/api/generated/ISLP.models.model_spec.rst
@@ -29,11 +29,11 @@ Classes
 
   .. automethod:: __init__
 
-:class:`ModelSpec`
-~~~~~~~~~~~~~~~~~~
+:class:`Feature`
+~~~~~~~~~~~~~~~~
 
 
-.. autoclass:: ModelSpec
+.. autoclass:: Feature
   :members:
   :undoc-members:
   :show-inheritance:
@@ -41,11 +41,11 @@ Classes
 
   .. automethod:: __init__
 
-:class:`Variable`
-~~~~~~~~~~~~~~~~~
+:class:`ModelSpec`
+~~~~~~~~~~~~~~~~~~
 
 
-.. autoclass:: Variable
+.. autoclass:: ModelSpec
   :members:
   :undoc-members:
   :show-inheritance:
@@ -63,10 +63,13 @@ Functions
 .. autofunction:: ISLP.models.model_spec.build_columns
 
 
+.. autofunction:: ISLP.models.model_spec.build_model
+
+
 .. autofunction:: ISLP.models.model_spec.contrast
 
 
-.. autofunction:: ISLP.models.model_spec.derived_variable
+.. autofunction:: ISLP.models.model_spec.derived_feature
 
 
 .. autofunction:: ISLP.models.model_spec.fit_encoder
diff --git a/docs/source/api/index.rst b/docs/source/api/index.rst
index 4734cda..8aededd 100644
--- a/docs/source/api/index.rst
+++ b/docs/source/api/index.rst
@@ -1,12 +1,7 @@
-.. _api-index:
+ISLP reference
+--------------
 
-#####
- API
-#####
 
-.. only:: html
+.. toctree::
 
-   :Release: |version|
-   :Date: |today|
-
-.. include:: gen.rst
+   gen
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 5da3dda..546d74f 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -2,12 +2,32 @@
 
 # -- Project information
 
+import json
+import os
+
 project = 'ISLP'
 copyright = '2023, ISLP authors'
 author = 'Jonathan Taylor'
 
-release = '0.1'
-version = '0.1.0'
+import ISLP
+version = ISLP.__version__
+
+import __main__
+dirname = os.path.split(__file__)[0]
+print(dirname, 'dirname')
+
+docs_version = json.loads(open(os.path.join(dirname, 'docs_version.json')).read())
+lab_version = docs_version['labs']
+
+myst_enable_extensions = ['substitution']
+
+myst_substitutions = {
+    "ISLP_lab_link": f"[ISLP_labs/{lab_version}](https://github.com/intro-stat-learning/ISLP_labs/tree/{lab_version})",
+    "ISLP_zip_link": f"[ISLP_labs/{lab_version}.zip](https://github.com/intro-stat-learning/ISLP_labs/archive/refs/tags/{lab_version}.zip)",
+    "ISLP_binder_code": f"[![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/intro-stat-learning/ISLP_labs/{lab_version})",
+    "ISLP_lab_version": "[ISLP/{0}](https://github.com/intro-stat-learning/ISLP/tree/{0})".format(docs_version['library'])
+    }
+myst_number_code_blocks = ['python', 'ipython3']
 
 # -- General configuration
 
@@ -27,7 +47,16 @@
 
 graphviz_dot = '/opt/homebrew/bin/dot'
 numpydoc_class_members_toctree = False
-nb_execution_mode = "cache"
+nb_execution_mode = "auto"
+nb_execution_timeout = 60*20 #*100
+# labs will be built with specific commits of ISLP/ISLP_labs
+# we want Ch06 run to exlucde the warnings
+nb_execution_excludepatterns = (['imdb.ipynb'] +
+                                [f'Ch{i:02d}*' for i in range(2, 14)])
+print('exclude patterns', nb_execution_excludepatterns)
+nb_execution_allow_errors = True
+
+#nb_kernel_rgx_aliases = {'python3': "islp_test"}
 
 intersphinx_mapping = {
     'python': ('https://docs.python.org/3/', None),
@@ -42,7 +71,19 @@
 
 # -- Options for HTML output
 
-html_theme = 'sphinx_rtd_theme'
+html_theme = "sphinx_book_theme" 
+html_theme_options = {
+    "repository_url": "https://github.com/intro-stat-learning/ISLP.git",
+    "use_repository_button": True,
+}
+html_title = "Introduction to Statistical Learning (Python)"
+html_logo = "logo.png"
+
+source_suffix = {
+    '.rst': 'restructuredtext',
+    '.ipynb': 'myst-nb',
+    '.myst': 'myst-nb',
+}
 
 # -- Options for EPUB output
 epub_show_urls = 'footnote'
diff --git a/docs/source/datasets/Auto.ipynb b/docs/source/datasets/Auto.ipynb
index b88ea02..b588844 100644
--- a/docs/source/datasets/Auto.ipynb
+++ b/docs/source/datasets/Auto.ipynb
@@ -44,7 +44,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "182ea1d1",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:47:43.883548Z",
+     "iopub.status.busy": "2023-07-26T12:47:43.883261Z",
+     "iopub.status.idle": "2023-07-26T12:47:44.433075Z",
+     "shell.execute_reply": "2023-07-26T12:47:44.432801Z"
+    }
+   },
    "outputs": [],
    "source": [
     "from ISLP import load_data\n",
@@ -56,7 +63,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "979abd7e",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:47:44.434662Z",
+     "iopub.status.busy": "2023-07-26T12:47:44.434558Z",
+     "iopub.status.idle": "2023-07-26T12:47:44.436577Z",
+     "shell.execute_reply": "2023-07-26T12:47:44.436322Z"
+    }
+   },
    "outputs": [],
    "source": [
     "Auto.shape"
@@ -66,7 +80,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "7444c0f0",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:47:44.438047Z",
+     "iopub.status.busy": "2023-07-26T12:47:44.437943Z",
+     "iopub.status.idle": "2023-07-26T12:47:44.439951Z",
+     "shell.execute_reply": "2023-07-26T12:47:44.439712Z"
+    }
+   },
    "outputs": [],
    "source": [
     "Auto.columns"
@@ -76,7 +97,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "59b6e919",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:47:44.441257Z",
+     "iopub.status.busy": "2023-07-26T12:47:44.441161Z",
+     "iopub.status.idle": "2023-07-26T12:47:44.449658Z",
+     "shell.execute_reply": "2023-07-26T12:47:44.449426Z"
+    }
+   },
    "outputs": [],
    "source": [
     "Auto.describe().iloc[:,:4]"
@@ -91,6 +119,18 @@
    "display_name": "python3",
    "language": "python",
    "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.17"
   }
  },
  "nbformat": 4,
diff --git a/docs/source/datasets/Bikeshare.ipynb b/docs/source/datasets/Bikeshare.ipynb
index ddb1053..ab42024 100644
--- a/docs/source/datasets/Bikeshare.ipynb
+++ b/docs/source/datasets/Bikeshare.ipynb
@@ -56,7 +56,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "bcdb89b6",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:47:19.462730Z",
+     "iopub.status.busy": "2023-07-26T12:47:19.461535Z",
+     "iopub.status.idle": "2023-07-26T12:47:20.022610Z",
+     "shell.execute_reply": "2023-07-26T12:47:20.022326Z"
+    }
+   },
    "outputs": [],
    "source": [
     "from ISLP import load_data\n",
@@ -68,7 +75,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "72075fb0",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:47:20.024144Z",
+     "iopub.status.busy": "2023-07-26T12:47:20.024034Z",
+     "iopub.status.idle": "2023-07-26T12:47:20.026016Z",
+     "shell.execute_reply": "2023-07-26T12:47:20.025777Z"
+    }
+   },
    "outputs": [],
    "source": [
     "Bikeshare.shape"
@@ -78,7 +92,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "45396d69",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:47:20.027480Z",
+     "iopub.status.busy": "2023-07-26T12:47:20.027378Z",
+     "iopub.status.idle": "2023-07-26T12:47:20.029427Z",
+     "shell.execute_reply": "2023-07-26T12:47:20.029199Z"
+    }
+   },
    "outputs": [],
    "source": [
     "Bikeshare.columns"
@@ -88,7 +109,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "26c24d9a",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:47:20.030734Z",
+     "iopub.status.busy": "2023-07-26T12:47:20.030638Z",
+     "iopub.status.idle": "2023-07-26T12:47:20.042031Z",
+     "shell.execute_reply": "2023-07-26T12:47:20.041787Z"
+    }
+   },
    "outputs": [],
    "source": [
     "Bikeshare.describe().iloc[:,:4]"
@@ -105,6 +133,18 @@
    "display_name": "python3",
    "language": "python",
    "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.17"
   }
  },
  "nbformat": 4,
diff --git a/docs/source/datasets/Boston.ipynb b/docs/source/datasets/Boston.ipynb
index 569f5b4..027585a 100644
--- a/docs/source/datasets/Boston.ipynb
+++ b/docs/source/datasets/Boston.ipynb
@@ -49,7 +49,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "b8bb96f0",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:47:31.625524Z",
+     "iopub.status.busy": "2023-07-26T12:47:31.625196Z",
+     "iopub.status.idle": "2023-07-26T12:47:32.177553Z",
+     "shell.execute_reply": "2023-07-26T12:47:32.177240Z"
+    }
+   },
    "outputs": [],
    "source": [
     "from ISLP import load_data\n",
@@ -61,7 +68,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "ab4b03f8",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:47:32.179272Z",
+     "iopub.status.busy": "2023-07-26T12:47:32.179157Z",
+     "iopub.status.idle": "2023-07-26T12:47:32.181230Z",
+     "shell.execute_reply": "2023-07-26T12:47:32.180964Z"
+    }
+   },
    "outputs": [],
    "source": [
     "Boston.shape"
@@ -71,7 +85,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "74890e1f",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:47:32.182653Z",
+     "iopub.status.busy": "2023-07-26T12:47:32.182557Z",
+     "iopub.status.idle": "2023-07-26T12:47:32.184501Z",
+     "shell.execute_reply": "2023-07-26T12:47:32.184276Z"
+    }
+   },
    "outputs": [],
    "source": [
     "Boston.columns"
@@ -81,7 +102,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "90ecf46f",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:47:32.185826Z",
+     "iopub.status.busy": "2023-07-26T12:47:32.185735Z",
+     "iopub.status.idle": "2023-07-26T12:47:32.198310Z",
+     "shell.execute_reply": "2023-07-26T12:47:32.198074Z"
+    }
+   },
    "outputs": [],
    "source": [
     "Boston.describe()"
@@ -98,6 +126,18 @@
    "display_name": "python3",
    "language": "python",
    "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.17"
   }
  },
  "nbformat": 4,
diff --git a/docs/source/datasets/BrainCancer.ipynb b/docs/source/datasets/BrainCancer.ipynb
index cb75946..89e8b2c 100644
--- a/docs/source/datasets/BrainCancer.ipynb
+++ b/docs/source/datasets/BrainCancer.ipynb
@@ -39,7 +39,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "519fa8cf",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:47:09.619445Z",
+     "iopub.status.busy": "2023-07-26T12:47:09.618768Z",
+     "iopub.status.idle": "2023-07-26T12:47:10.149955Z",
+     "shell.execute_reply": "2023-07-26T12:47:10.149508Z"
+    }
+   },
    "outputs": [],
    "source": [
     "from ISLP import load_data\n",
@@ -51,7 +58,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "ac7f1920",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:47:10.151658Z",
+     "iopub.status.busy": "2023-07-26T12:47:10.151541Z",
+     "iopub.status.idle": "2023-07-26T12:47:10.153944Z",
+     "shell.execute_reply": "2023-07-26T12:47:10.153658Z"
+    }
+   },
    "outputs": [],
    "source": [
     "BrainCancer.shape"
@@ -61,7 +75,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "64b3177f",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:47:10.155433Z",
+     "iopub.status.busy": "2023-07-26T12:47:10.155323Z",
+     "iopub.status.idle": "2023-07-26T12:47:10.157819Z",
+     "shell.execute_reply": "2023-07-26T12:47:10.157458Z"
+    }
+   },
    "outputs": [],
    "source": [
     "BrainCancer.columns"
@@ -71,7 +92,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "8132496d",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:47:10.159542Z",
+     "iopub.status.busy": "2023-07-26T12:47:10.159420Z",
+     "iopub.status.idle": "2023-07-26T12:47:10.166890Z",
+     "shell.execute_reply": "2023-07-26T12:47:10.166610Z"
+    }
+   },
    "outputs": [],
    "source": [
     "BrainCancer.describe()"
@@ -81,7 +109,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "ed04719d",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:47:10.168420Z",
+     "iopub.status.busy": "2023-07-26T12:47:10.168324Z",
+     "iopub.status.idle": "2023-07-26T12:47:10.171157Z",
+     "shell.execute_reply": "2023-07-26T12:47:10.170862Z"
+    }
+   },
    "outputs": [],
    "source": [
     "BrainCancer['diagnosis'].value_counts()"
@@ -98,6 +133,18 @@
    "display_name": "python3",
    "language": "python",
    "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.17"
   }
  },
  "nbformat": 4,
diff --git a/docs/source/datasets/Caravan.ipynb b/docs/source/datasets/Caravan.ipynb
index f093422..ab39457 100644
--- a/docs/source/datasets/Caravan.ipynb
+++ b/docs/source/datasets/Caravan.ipynb
@@ -27,7 +27,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "1f9a6aaa",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:47:12.041705Z",
+     "iopub.status.busy": "2023-07-26T12:47:12.040979Z",
+     "iopub.status.idle": "2023-07-26T12:47:12.637566Z",
+     "shell.execute_reply": "2023-07-26T12:47:12.637297Z"
+    }
+   },
    "outputs": [],
    "source": [
     "from ISLP import load_data\n",
@@ -39,7 +46,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "88755969",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:47:12.639146Z",
+     "iopub.status.busy": "2023-07-26T12:47:12.639031Z",
+     "iopub.status.idle": "2023-07-26T12:47:12.640881Z",
+     "shell.execute_reply": "2023-07-26T12:47:12.640666Z"
+    }
+   },
    "outputs": [],
    "source": [
     "Caravan.shape"
@@ -49,7 +63,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "52ea2641",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:47:12.642281Z",
+     "iopub.status.busy": "2023-07-26T12:47:12.642186Z",
+     "iopub.status.idle": "2023-07-26T12:47:12.644243Z",
+     "shell.execute_reply": "2023-07-26T12:47:12.644020Z"
+    }
+   },
    "outputs": [],
    "source": [
     "Caravan.columns[:20]"
@@ -66,6 +87,18 @@
    "display_name": "python3",
    "language": "python",
    "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.17"
   }
  },
  "nbformat": 4,
diff --git a/docs/source/datasets/Carseats.ipynb b/docs/source/datasets/Carseats.ipynb
index dfd36d4..92ff1b4 100644
--- a/docs/source/datasets/Carseats.ipynb
+++ b/docs/source/datasets/Carseats.ipynb
@@ -37,7 +37,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "984643c9",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:47:26.781289Z",
+     "iopub.status.busy": "2023-07-26T12:47:26.780964Z",
+     "iopub.status.idle": "2023-07-26T12:47:27.314225Z",
+     "shell.execute_reply": "2023-07-26T12:47:27.313885Z"
+    }
+   },
    "outputs": [],
    "source": [
     "from ISLP import load_data\n",
@@ -49,7 +56,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "663f5f6a",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:47:27.316055Z",
+     "iopub.status.busy": "2023-07-26T12:47:27.315854Z",
+     "iopub.status.idle": "2023-07-26T12:47:27.318176Z",
+     "shell.execute_reply": "2023-07-26T12:47:27.317912Z"
+    }
+   },
    "outputs": [],
    "source": [
     "Carseats.shape"
@@ -59,7 +73,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "386299b2",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:47:27.319606Z",
+     "iopub.status.busy": "2023-07-26T12:47:27.319504Z",
+     "iopub.status.idle": "2023-07-26T12:47:27.321648Z",
+     "shell.execute_reply": "2023-07-26T12:47:27.321403Z"
+    }
+   },
    "outputs": [],
    "source": [
     "Carseats.columns"
@@ -69,7 +90,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "5c8c69c8",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:47:27.323205Z",
+     "iopub.status.busy": "2023-07-26T12:47:27.323091Z",
+     "iopub.status.idle": "2023-07-26T12:47:27.331921Z",
+     "shell.execute_reply": "2023-07-26T12:47:27.331627Z"
+    }
+   },
    "outputs": [],
    "source": [
     "Carseats.describe().iloc[:,:4]"
@@ -86,6 +114,18 @@
    "display_name": "python3",
    "language": "python",
    "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.17"
   }
  },
  "nbformat": 4,
diff --git a/docs/source/datasets/College.ipynb b/docs/source/datasets/College.ipynb
index af1027d..27a4d1d 100644
--- a/docs/source/datasets/College.ipynb
+++ b/docs/source/datasets/College.ipynb
@@ -58,7 +58,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "680ceb3e",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:47:17.006699Z",
+     "iopub.status.busy": "2023-07-26T12:47:17.006226Z",
+     "iopub.status.idle": "2023-07-26T12:47:17.561114Z",
+     "shell.execute_reply": "2023-07-26T12:47:17.560739Z"
+    }
+   },
    "outputs": [],
    "source": [
     "from ISLP import load_data\n",
@@ -70,7 +77,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "ccdf3e4f",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:47:17.563075Z",
+     "iopub.status.busy": "2023-07-26T12:47:17.562947Z",
+     "iopub.status.idle": "2023-07-26T12:47:17.565074Z",
+     "shell.execute_reply": "2023-07-26T12:47:17.564824Z"
+    }
+   },
    "outputs": [],
    "source": [
     "College.shape"
@@ -80,7 +94,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "09f59747",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:47:17.566389Z",
+     "iopub.status.busy": "2023-07-26T12:47:17.566297Z",
+     "iopub.status.idle": "2023-07-26T12:47:17.568257Z",
+     "shell.execute_reply": "2023-07-26T12:47:17.568025Z"
+    }
+   },
    "outputs": [],
    "source": [
     "College.columns"
@@ -90,7 +111,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "6a48dfd5",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:47:17.569585Z",
+     "iopub.status.busy": "2023-07-26T12:47:17.569492Z",
+     "iopub.status.idle": "2023-07-26T12:47:17.582384Z",
+     "shell.execute_reply": "2023-07-26T12:47:17.582154Z"
+    }
+   },
    "outputs": [],
    "source": [
     "College.describe().iloc[:,:4]"
@@ -107,6 +135,18 @@
    "display_name": "python3",
    "language": "python",
    "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.17"
   }
  },
  "nbformat": 4,
diff --git a/docs/source/datasets/Credit.ipynb b/docs/source/datasets/Credit.ipynb
index f5e51a9..d604aaa 100644
--- a/docs/source/datasets/Credit.ipynb
+++ b/docs/source/datasets/Credit.ipynb
@@ -43,7 +43,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "c4895446",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:47:39.024610Z",
+     "iopub.status.busy": "2023-07-26T12:47:39.024341Z",
+     "iopub.status.idle": "2023-07-26T12:47:39.593395Z",
+     "shell.execute_reply": "2023-07-26T12:47:39.593133Z"
+    }
+   },
    "outputs": [],
    "source": [
     "from ISLP import load_data\n",
@@ -55,7 +62,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "c738c66b",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:47:39.595074Z",
+     "iopub.status.busy": "2023-07-26T12:47:39.594871Z",
+     "iopub.status.idle": "2023-07-26T12:47:39.596893Z",
+     "shell.execute_reply": "2023-07-26T12:47:39.596667Z"
+    }
+   },
    "outputs": [],
    "source": [
     "Credit.shape"
@@ -65,7 +79,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "d612f5a7",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:47:39.598266Z",
+     "iopub.status.busy": "2023-07-26T12:47:39.598173Z",
+     "iopub.status.idle": "2023-07-26T12:47:39.600134Z",
+     "shell.execute_reply": "2023-07-26T12:47:39.599913Z"
+    }
+   },
    "outputs": [],
    "source": [
     "Credit.columns"
@@ -75,7 +96,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "45633b1a",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:47:39.601442Z",
+     "iopub.status.busy": "2023-07-26T12:47:39.601344Z",
+     "iopub.status.idle": "2023-07-26T12:47:39.609927Z",
+     "shell.execute_reply": "2023-07-26T12:47:39.609656Z"
+    }
+   },
    "outputs": [],
    "source": [
     "Credit.describe().iloc[:,:4]"
@@ -92,6 +120,18 @@
    "display_name": "python3",
    "language": "python",
    "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.17"
   }
  },
  "nbformat": 4,
diff --git a/docs/source/datasets/Default.ipynb b/docs/source/datasets/Default.ipynb
index 64357ef..8023d39 100644
--- a/docs/source/datasets/Default.ipynb
+++ b/docs/source/datasets/Default.ipynb
@@ -27,7 +27,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "ab810dee",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:47:36.566964Z",
+     "iopub.status.busy": "2023-07-26T12:47:36.566691Z",
+     "iopub.status.idle": "2023-07-26T12:47:37.127499Z",
+     "shell.execute_reply": "2023-07-26T12:47:37.127183Z"
+    }
+   },
    "outputs": [],
    "source": [
     "from ISLP import load_data\n",
@@ -39,7 +46,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "086ef3a2",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:47:37.129114Z",
+     "iopub.status.busy": "2023-07-26T12:47:37.129003Z",
+     "iopub.status.idle": "2023-07-26T12:47:37.131023Z",
+     "shell.execute_reply": "2023-07-26T12:47:37.130778Z"
+    }
+   },
    "outputs": [],
    "source": [
     "Default.shape"
@@ -49,7 +63,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "6600c13b",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:47:37.132471Z",
+     "iopub.status.busy": "2023-07-26T12:47:37.132373Z",
+     "iopub.status.idle": "2023-07-26T12:47:37.134281Z",
+     "shell.execute_reply": "2023-07-26T12:47:37.134067Z"
+    }
+   },
    "outputs": [],
    "source": [
     "Default.columns"
@@ -59,7 +80,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "09e98840",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:47:37.135578Z",
+     "iopub.status.busy": "2023-07-26T12:47:37.135480Z",
+     "iopub.status.idle": "2023-07-26T12:47:37.141213Z",
+     "shell.execute_reply": "2023-07-26T12:47:37.140974Z"
+    }
+   },
    "outputs": [],
    "source": [
     "Default.describe()"
@@ -69,7 +97,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "425f0cb1",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:47:37.142597Z",
+     "iopub.status.busy": "2023-07-26T12:47:37.142519Z",
+     "iopub.status.idle": "2023-07-26T12:47:37.145148Z",
+     "shell.execute_reply": "2023-07-26T12:47:37.144915Z"
+    }
+   },
    "outputs": [],
    "source": [
     "Default['student'].value_counts()"
@@ -86,6 +121,18 @@
    "display_name": "python3",
    "language": "python",
    "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.17"
   }
  },
  "nbformat": 4,
diff --git a/docs/source/datasets/Fund.ipynb b/docs/source/datasets/Fund.ipynb
index fce1859..2e5dcb5 100644
--- a/docs/source/datasets/Fund.ipynb
+++ b/docs/source/datasets/Fund.ipynb
@@ -15,7 +15,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "5eba8e49",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:46:59.809785Z",
+     "iopub.status.busy": "2023-07-26T12:46:59.809389Z",
+     "iopub.status.idle": "2023-07-26T12:47:00.410897Z",
+     "shell.execute_reply": "2023-07-26T12:47:00.410627Z"
+    }
+   },
    "outputs": [],
    "source": [
     "from ISLP import load_data\n",
@@ -27,7 +34,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "ced3b335",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:47:00.412492Z",
+     "iopub.status.busy": "2023-07-26T12:47:00.412385Z",
+     "iopub.status.idle": "2023-07-26T12:47:00.414444Z",
+     "shell.execute_reply": "2023-07-26T12:47:00.414168Z"
+    }
+   },
    "outputs": [],
    "source": [
     "Fund.shape"
@@ -37,7 +51,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "bfff1ac6",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:47:00.415891Z",
+     "iopub.status.busy": "2023-07-26T12:47:00.415789Z",
+     "iopub.status.idle": "2023-07-26T12:47:00.417755Z",
+     "shell.execute_reply": "2023-07-26T12:47:00.417529Z"
+    }
+   },
    "outputs": [],
    "source": [
     "Fund.columns"
@@ -54,6 +75,18 @@
    "display_name": "python3",
    "language": "python",
    "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.17"
   }
  },
  "nbformat": 4,
diff --git a/docs/source/datasets/Hitters.ipynb b/docs/source/datasets/Hitters.ipynb
index 6f261cd..5af634c 100644
--- a/docs/source/datasets/Hitters.ipynb
+++ b/docs/source/datasets/Hitters.ipynb
@@ -64,7 +64,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "4fa187f0",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:47:34.072657Z",
+     "iopub.status.busy": "2023-07-26T12:47:34.072382Z",
+     "iopub.status.idle": "2023-07-26T12:47:34.654518Z",
+     "shell.execute_reply": "2023-07-26T12:47:34.654230Z"
+    }
+   },
    "outputs": [],
    "source": [
     "from ISLP import load_data\n",
@@ -76,7 +83,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "04535ffb",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:47:34.656071Z",
+     "iopub.status.busy": "2023-07-26T12:47:34.655969Z",
+     "iopub.status.idle": "2023-07-26T12:47:34.657899Z",
+     "shell.execute_reply": "2023-07-26T12:47:34.657674Z"
+    }
+   },
    "outputs": [],
    "source": [
     "Hitters.shape"
@@ -86,7 +100,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "6875aac6",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:47:34.659335Z",
+     "iopub.status.busy": "2023-07-26T12:47:34.659236Z",
+     "iopub.status.idle": "2023-07-26T12:47:34.661182Z",
+     "shell.execute_reply": "2023-07-26T12:47:34.660944Z"
+    }
+   },
    "outputs": [],
    "source": [
     "Hitters.columns"
@@ -96,7 +117,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "9e2cffc8",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:47:34.662645Z",
+     "iopub.status.busy": "2023-07-26T12:47:34.662543Z",
+     "iopub.status.idle": "2023-07-26T12:47:34.674958Z",
+     "shell.execute_reply": "2023-07-26T12:47:34.674698Z"
+    }
+   },
    "outputs": [],
    "source": [
     "Hitters.describe().iloc[:,:4]"
@@ -113,6 +141,18 @@
    "display_name": "python3",
    "language": "python",
    "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.17"
   }
  },
  "nbformat": 4,
diff --git a/docs/source/datasets/Khan.ipynb b/docs/source/datasets/Khan.ipynb
index f12a5ca..c1ce7bf 100644
--- a/docs/source/datasets/Khan.ipynb
+++ b/docs/source/datasets/Khan.ipynb
@@ -43,7 +43,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "bfda6cad",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:46:53.879692Z",
+     "iopub.status.busy": "2023-07-26T12:46:53.879072Z",
+     "iopub.status.idle": "2023-07-26T12:46:54.473904Z",
+     "shell.execute_reply": "2023-07-26T12:46:54.473562Z"
+    }
+   },
    "outputs": [],
    "source": [
     "from ISLP import load_data\n",
@@ -55,7 +62,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "70514dc5",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:46:54.475443Z",
+     "iopub.status.busy": "2023-07-26T12:46:54.475340Z",
+     "iopub.status.idle": "2023-07-26T12:46:54.477103Z",
+     "shell.execute_reply": "2023-07-26T12:46:54.476883Z"
+    }
+   },
    "outputs": [],
    "source": [
     "for X in ['xtest', 'xtrain']:\n",
@@ -66,7 +80,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "e9df5de8",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:46:54.478408Z",
+     "iopub.status.busy": "2023-07-26T12:46:54.478336Z",
+     "iopub.status.idle": "2023-07-26T12:46:54.480540Z",
+     "shell.execute_reply": "2023-07-26T12:46:54.480299Z"
+    }
+   },
    "outputs": [],
    "source": [
     "for Y in ['ytest', 'ytrain']:\n",
@@ -84,6 +105,18 @@
    "display_name": "python3",
    "language": "python",
    "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.17"
   }
  },
  "nbformat": 4,
diff --git a/docs/source/datasets/NCI60.ipynb b/docs/source/datasets/NCI60.ipynb
index bbb576f..b38f981 100644
--- a/docs/source/datasets/NCI60.ipynb
+++ b/docs/source/datasets/NCI60.ipynb
@@ -26,7 +26,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "c88c2eaf",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:47:07.189429Z",
+     "iopub.status.busy": "2023-07-26T12:47:07.188891Z",
+     "iopub.status.idle": "2023-07-26T12:47:07.734853Z",
+     "shell.execute_reply": "2023-07-26T12:47:07.734392Z"
+    }
+   },
    "outputs": [],
    "source": [
     "from ISLP import load_data\n",
@@ -38,7 +45,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "0e6279ad",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:47:07.736643Z",
+     "iopub.status.busy": "2023-07-26T12:47:07.736477Z",
+     "iopub.status.idle": "2023-07-26T12:47:07.740295Z",
+     "shell.execute_reply": "2023-07-26T12:47:07.739954Z"
+    }
+   },
    "outputs": [],
    "source": [
     "NCI60['labels'].value_counts()"
@@ -48,7 +62,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "ed5ddd2f",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:47:07.741963Z",
+     "iopub.status.busy": "2023-07-26T12:47:07.741866Z",
+     "iopub.status.idle": "2023-07-26T12:47:07.744496Z",
+     "shell.execute_reply": "2023-07-26T12:47:07.744146Z"
+    }
+   },
    "outputs": [],
    "source": [
     "NCI60['data'].shape"
@@ -65,6 +86,18 @@
    "display_name": "python3",
    "language": "python",
    "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.17"
   }
  },
  "nbformat": 4,
diff --git a/docs/source/datasets/NYSE.ipynb b/docs/source/datasets/NYSE.ipynb
index 5f9dbd5..4fb6ea5 100644
--- a/docs/source/datasets/NYSE.ipynb
+++ b/docs/source/datasets/NYSE.ipynb
@@ -33,7 +33,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "fcff6c95",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:47:24.365935Z",
+     "iopub.status.busy": "2023-07-26T12:47:24.365648Z",
+     "iopub.status.idle": "2023-07-26T12:47:24.910157Z",
+     "shell.execute_reply": "2023-07-26T12:47:24.909886Z"
+    }
+   },
    "outputs": [],
    "source": [
     "from ISLP import load_data\n",
@@ -45,7 +52,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "84426961",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:47:24.911976Z",
+     "iopub.status.busy": "2023-07-26T12:47:24.911859Z",
+     "iopub.status.idle": "2023-07-26T12:47:24.913899Z",
+     "shell.execute_reply": "2023-07-26T12:47:24.913685Z"
+    }
+   },
    "outputs": [],
    "source": [
     "NYSE.shape"
@@ -55,7 +69,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "e6194a8c",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:47:24.915295Z",
+     "iopub.status.busy": "2023-07-26T12:47:24.915180Z",
+     "iopub.status.idle": "2023-07-26T12:47:24.917209Z",
+     "shell.execute_reply": "2023-07-26T12:47:24.916991Z"
+    }
+   },
    "outputs": [],
    "source": [
     "NYSE.columns"
@@ -65,7 +86,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "0c7bf3d7",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:47:24.918571Z",
+     "iopub.status.busy": "2023-07-26T12:47:24.918468Z",
+     "iopub.status.idle": "2023-07-26T12:47:24.924914Z",
+     "shell.execute_reply": "2023-07-26T12:47:24.924671Z"
+    }
+   },
    "outputs": [],
    "source": [
     "NYSE.describe()"
@@ -82,6 +110,18 @@
    "display_name": "python3",
    "language": "python",
    "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.17"
   }
  },
  "nbformat": 4,
diff --git a/docs/source/datasets/OJ.ipynb b/docs/source/datasets/OJ.ipynb
index e18a4de..55ffeb9 100644
--- a/docs/source/datasets/OJ.ipynb
+++ b/docs/source/datasets/OJ.ipynb
@@ -61,7 +61,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "609742da",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:47:14.553008Z",
+     "iopub.status.busy": "2023-07-26T12:47:14.551694Z",
+     "iopub.status.idle": "2023-07-26T12:47:15.102658Z",
+     "shell.execute_reply": "2023-07-26T12:47:15.102334Z"
+    }
+   },
    "outputs": [],
    "source": [
     "from ISLP import load_data\n",
@@ -73,7 +80,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "6f195dcd",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:47:15.104419Z",
+     "iopub.status.busy": "2023-07-26T12:47:15.104301Z",
+     "iopub.status.idle": "2023-07-26T12:47:15.106415Z",
+     "shell.execute_reply": "2023-07-26T12:47:15.106177Z"
+    }
+   },
    "outputs": [],
    "source": [
     "OJ.shape"
@@ -83,7 +97,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "aaafb83b",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:47:15.107821Z",
+     "iopub.status.busy": "2023-07-26T12:47:15.107723Z",
+     "iopub.status.idle": "2023-07-26T12:47:15.109747Z",
+     "shell.execute_reply": "2023-07-26T12:47:15.109486Z"
+    }
+   },
    "outputs": [],
    "source": [
     "OJ.columns"
@@ -93,7 +114,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "774dfa86",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:47:15.111066Z",
+     "iopub.status.busy": "2023-07-26T12:47:15.110974Z",
+     "iopub.status.idle": "2023-07-26T12:47:15.123225Z",
+     "shell.execute_reply": "2023-07-26T12:47:15.122965Z"
+    }
+   },
    "outputs": [],
    "source": [
     "OJ.describe().iloc[:,:4]"
@@ -110,6 +138,18 @@
    "display_name": "python3",
    "language": "python",
    "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.17"
   }
  },
  "nbformat": 4,
diff --git a/docs/source/datasets/Portfolio.ipynb b/docs/source/datasets/Portfolio.ipynb
index 6d6a60d..1a6d711 100644
--- a/docs/source/datasets/Portfolio.ipynb
+++ b/docs/source/datasets/Portfolio.ipynb
@@ -22,7 +22,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "3adff220",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:47:02.309375Z",
+     "iopub.status.busy": "2023-07-26T12:47:02.308873Z",
+     "iopub.status.idle": "2023-07-26T12:47:02.849537Z",
+     "shell.execute_reply": "2023-07-26T12:47:02.849247Z"
+    }
+   },
    "outputs": [],
    "source": [
     "from ISLP import load_data\n",
@@ -34,7 +41,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "b02a9e67",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:47:02.851392Z",
+     "iopub.status.busy": "2023-07-26T12:47:02.851244Z",
+     "iopub.status.idle": "2023-07-26T12:47:02.853779Z",
+     "shell.execute_reply": "2023-07-26T12:47:02.853348Z"
+    }
+   },
    "outputs": [],
    "source": [
     "Portfolio.shape"
@@ -44,7 +58,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "3e83a0ed",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:47:02.855660Z",
+     "iopub.status.busy": "2023-07-26T12:47:02.855540Z",
+     "iopub.status.idle": "2023-07-26T12:47:02.858065Z",
+     "shell.execute_reply": "2023-07-26T12:47:02.857779Z"
+    }
+   },
    "outputs": [],
    "source": [
     "Portfolio.columns"
@@ -54,7 +75,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "3ebec412",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:47:02.859606Z",
+     "iopub.status.busy": "2023-07-26T12:47:02.859503Z",
+     "iopub.status.idle": "2023-07-26T12:47:02.865754Z",
+     "shell.execute_reply": "2023-07-26T12:47:02.865418Z"
+    }
+   },
    "outputs": [],
    "source": [
     "Portfolio.describe()"
@@ -71,6 +99,18 @@
    "display_name": "python3",
    "language": "python",
    "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.17"
   }
  },
  "nbformat": 4,
diff --git a/docs/source/datasets/Publication.ipynb b/docs/source/datasets/Publication.ipynb
index a4a6dfa..de4a449 100644
--- a/docs/source/datasets/Publication.ipynb
+++ b/docs/source/datasets/Publication.ipynb
@@ -45,7 +45,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "61d7c2b3",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:47:29.196850Z",
+     "iopub.status.busy": "2023-07-26T12:47:29.196559Z",
+     "iopub.status.idle": "2023-07-26T12:47:29.727827Z",
+     "shell.execute_reply": "2023-07-26T12:47:29.727421Z"
+    }
+   },
    "outputs": [],
    "source": [
     "from ISLP import load_data\n",
@@ -57,7 +64,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "4d72460d",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:47:29.729844Z",
+     "iopub.status.busy": "2023-07-26T12:47:29.729686Z",
+     "iopub.status.idle": "2023-07-26T12:47:29.732275Z",
+     "shell.execute_reply": "2023-07-26T12:47:29.732008Z"
+    }
+   },
    "outputs": [],
    "source": [
     "Publication.shape"
@@ -67,7 +81,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "fd34224c",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:47:29.734028Z",
+     "iopub.status.busy": "2023-07-26T12:47:29.733885Z",
+     "iopub.status.idle": "2023-07-26T12:47:29.736365Z",
+     "shell.execute_reply": "2023-07-26T12:47:29.736014Z"
+    }
+   },
    "outputs": [],
    "source": [
     "Publication.columns"
@@ -77,7 +98,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "51bfb0aa",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:47:29.738169Z",
+     "iopub.status.busy": "2023-07-26T12:47:29.738046Z",
+     "iopub.status.idle": "2023-07-26T12:47:29.747027Z",
+     "shell.execute_reply": "2023-07-26T12:47:29.746722Z"
+    }
+   },
    "outputs": [],
    "source": [
     "Publication.describe().iloc[:,:4]"
@@ -94,6 +122,18 @@
    "display_name": "python3",
    "language": "python",
    "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.17"
   }
  },
  "nbformat": 4,
diff --git a/docs/source/datasets/Smarket.ipynb b/docs/source/datasets/Smarket.ipynb
index cced2a9..0be4dd9 100644
--- a/docs/source/datasets/Smarket.ipynb
+++ b/docs/source/datasets/Smarket.ipynb
@@ -41,7 +41,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "3d920337",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:47:21.928355Z",
+     "iopub.status.busy": "2023-07-26T12:47:21.927766Z",
+     "iopub.status.idle": "2023-07-26T12:47:22.480597Z",
+     "shell.execute_reply": "2023-07-26T12:47:22.480297Z"
+    }
+   },
    "outputs": [],
    "source": [
     "from ISLP import load_data\n",
@@ -53,7 +60,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "25d90138",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:47:22.482125Z",
+     "iopub.status.busy": "2023-07-26T12:47:22.482016Z",
+     "iopub.status.idle": "2023-07-26T12:47:22.484017Z",
+     "shell.execute_reply": "2023-07-26T12:47:22.483801Z"
+    }
+   },
    "outputs": [],
    "source": [
     "Smarket.shape"
@@ -63,7 +77,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "0e8c57de",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:47:22.485456Z",
+     "iopub.status.busy": "2023-07-26T12:47:22.485359Z",
+     "iopub.status.idle": "2023-07-26T12:47:22.487416Z",
+     "shell.execute_reply": "2023-07-26T12:47:22.487186Z"
+    }
+   },
    "outputs": [],
    "source": [
     "Smarket.columns"
@@ -73,7 +94,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "2d455f1e",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:47:22.488803Z",
+     "iopub.status.busy": "2023-07-26T12:47:22.488706Z",
+     "iopub.status.idle": "2023-07-26T12:47:22.497401Z",
+     "shell.execute_reply": "2023-07-26T12:47:22.497165Z"
+    }
+   },
    "outputs": [],
    "source": [
     "Smarket.describe().iloc[:,-4:]"
@@ -90,6 +118,18 @@
    "display_name": "python3",
    "language": "python",
    "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.17"
   }
  },
  "nbformat": 4,
diff --git a/docs/source/datasets/USArrests.ipynb b/docs/source/datasets/USArrests.ipynb
index 1107424..d860098 100644
--- a/docs/source/datasets/USArrests.ipynb
+++ b/docs/source/datasets/USArrests.ipynb
@@ -28,9 +28,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "id": "feab45d4-ce30-4ea9-800c-bbe9e7c11f6d",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:46:56.351520Z",
+     "iopub.status.busy": "2023-07-26T12:46:56.350481Z",
+     "iopub.status.idle": "2023-07-26T12:46:58.021100Z",
+     "shell.execute_reply": "2023-07-26T12:46:58.019698Z"
+    }
+   },
    "outputs": [],
    "source": [
     "from statsmodels.datasets import get_rdataset\n",
@@ -39,157 +46,51 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "id": "bdfffad4-6ab1-45da-8d62-8a7c4326fb24",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(50, 4)"
-      ]
-     },
-     "execution_count": 2,
-     "metadata": {},
-     "output_type": "execute_result"
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:46:58.027241Z",
+     "iopub.status.busy": "2023-07-26T12:46:58.026857Z",
+     "iopub.status.idle": "2023-07-26T12:46:58.034424Z",
+     "shell.execute_reply": "2023-07-26T12:46:58.033781Z"
     }
-   ],
+   },
+   "outputs": [],
    "source": [
     "USArrests.shape"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "id": "02f28a67-e8b9-4a17-ad0d-88672e1de26d",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Index(['Murder', 'Assault', 'UrbanPop', 'Rape'], dtype='object')"
-      ]
-     },
-     "execution_count": 3,
-     "metadata": {},
-     "output_type": "execute_result"
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:46:58.038173Z",
+     "iopub.status.busy": "2023-07-26T12:46:58.037943Z",
+     "iopub.status.idle": "2023-07-26T12:46:58.041828Z",
+     "shell.execute_reply": "2023-07-26T12:46:58.041345Z"
     }
-   ],
+   },
+   "outputs": [],
    "source": [
     "USArrests.columns"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "id": "711db396-64d6-4fbd-9be4-bebe4117216f",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>Murder</th>\n",
-       "      <th>Assault</th>\n",
-       "      <th>UrbanPop</th>\n",
-       "      <th>Rape</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>count</th>\n",
-       "      <td>50.00000</td>\n",
-       "      <td>50.000000</td>\n",
-       "      <td>50.000000</td>\n",
-       "      <td>50.000000</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>mean</th>\n",
-       "      <td>7.78800</td>\n",
-       "      <td>170.760000</td>\n",
-       "      <td>65.540000</td>\n",
-       "      <td>21.232000</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>std</th>\n",
-       "      <td>4.35551</td>\n",
-       "      <td>83.337661</td>\n",
-       "      <td>14.474763</td>\n",
-       "      <td>9.366385</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>min</th>\n",
-       "      <td>0.80000</td>\n",
-       "      <td>45.000000</td>\n",
-       "      <td>32.000000</td>\n",
-       "      <td>7.300000</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>25%</th>\n",
-       "      <td>4.07500</td>\n",
-       "      <td>109.000000</td>\n",
-       "      <td>54.500000</td>\n",
-       "      <td>15.075000</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>50%</th>\n",
-       "      <td>7.25000</td>\n",
-       "      <td>159.000000</td>\n",
-       "      <td>66.000000</td>\n",
-       "      <td>20.100000</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>75%</th>\n",
-       "      <td>11.25000</td>\n",
-       "      <td>249.000000</td>\n",
-       "      <td>77.750000</td>\n",
-       "      <td>26.175000</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>max</th>\n",
-       "      <td>17.40000</td>\n",
-       "      <td>337.000000</td>\n",
-       "      <td>91.000000</td>\n",
-       "      <td>46.000000</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "         Murder     Assault   UrbanPop       Rape\n",
-       "count  50.00000   50.000000  50.000000  50.000000\n",
-       "mean    7.78800  170.760000  65.540000  21.232000\n",
-       "std     4.35551   83.337661  14.474763   9.366385\n",
-       "min     0.80000   45.000000  32.000000   7.300000\n",
-       "25%     4.07500  109.000000  54.500000  15.075000\n",
-       "50%     7.25000  159.000000  66.000000  20.100000\n",
-       "75%    11.25000  249.000000  77.750000  26.175000\n",
-       "max    17.40000  337.000000  91.000000  46.000000"
-      ]
-     },
-     "execution_count": 4,
-     "metadata": {},
-     "output_type": "execute_result"
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:46:58.044543Z",
+     "iopub.status.busy": "2023-07-26T12:46:58.044381Z",
+     "iopub.status.idle": "2023-07-26T12:46:58.057559Z",
+     "shell.execute_reply": "2023-07-26T12:46:58.057142Z"
     }
-   ],
+   },
+   "outputs": [],
    "source": [
     "USArrests.describe()"
    ]
@@ -216,7 +117,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.13"
+   "version": "3.9.17"
   }
  },
  "nbformat": 4,
diff --git a/docs/source/datasets/Wage.ipynb b/docs/source/datasets/Wage.ipynb
index b95d853..28bb484 100644
--- a/docs/source/datasets/Wage.ipynb
+++ b/docs/source/datasets/Wage.ipynb
@@ -53,7 +53,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "6832d321",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:47:04.731864Z",
+     "iopub.status.busy": "2023-07-26T12:47:04.731413Z",
+     "iopub.status.idle": "2023-07-26T12:47:05.295785Z",
+     "shell.execute_reply": "2023-07-26T12:47:05.295452Z"
+    }
+   },
    "outputs": [],
    "source": [
     "from ISLP import load_data\n",
@@ -65,7 +72,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "1c1ad3f3",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:47:05.297482Z",
+     "iopub.status.busy": "2023-07-26T12:47:05.297357Z",
+     "iopub.status.idle": "2023-07-26T12:47:05.299508Z",
+     "shell.execute_reply": "2023-07-26T12:47:05.299247Z"
+    }
+   },
    "outputs": [],
    "source": [
     "Wage.shape"
@@ -75,7 +89,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "d56ab6a4",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:47:05.300989Z",
+     "iopub.status.busy": "2023-07-26T12:47:05.300875Z",
+     "iopub.status.idle": "2023-07-26T12:47:05.303024Z",
+     "shell.execute_reply": "2023-07-26T12:47:05.302786Z"
+    }
+   },
    "outputs": [],
    "source": [
     "Wage.columns"
@@ -85,7 +106,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "5f021939",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:47:05.304606Z",
+     "iopub.status.busy": "2023-07-26T12:47:05.304487Z",
+     "iopub.status.idle": "2023-07-26T12:47:05.311771Z",
+     "shell.execute_reply": "2023-07-26T12:47:05.311522Z"
+    }
+   },
    "outputs": [],
    "source": [
     "Wage.describe()"
@@ -102,6 +130,18 @@
    "display_name": "python3",
    "language": "python",
    "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.17"
   }
  },
  "nbformat": 4,
diff --git a/docs/source/datasets/Weekly.ipynb b/docs/source/datasets/Weekly.ipynb
index 69f26d6..15a1050 100644
--- a/docs/source/datasets/Weekly.ipynb
+++ b/docs/source/datasets/Weekly.ipynb
@@ -41,7 +41,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "d19dd431",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:47:41.468580Z",
+     "iopub.status.busy": "2023-07-26T12:47:41.468291Z",
+     "iopub.status.idle": "2023-07-26T12:47:41.999679Z",
+     "shell.execute_reply": "2023-07-26T12:47:41.999341Z"
+    }
+   },
    "outputs": [],
    "source": [
     "from ISLP import load_data\n",
@@ -53,7 +60,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "17d2cda4",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:47:42.002632Z",
+     "iopub.status.busy": "2023-07-26T12:47:42.002470Z",
+     "iopub.status.idle": "2023-07-26T12:47:42.004871Z",
+     "shell.execute_reply": "2023-07-26T12:47:42.004611Z"
+    }
+   },
    "outputs": [],
    "source": [
     "Weekly.shape"
@@ -63,7 +77,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "f822715b",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:47:42.006534Z",
+     "iopub.status.busy": "2023-07-26T12:47:42.006422Z",
+     "iopub.status.idle": "2023-07-26T12:47:42.008496Z",
+     "shell.execute_reply": "2023-07-26T12:47:42.008187Z"
+    }
+   },
    "outputs": [],
    "source": [
     "Weekly.columns"
@@ -73,7 +94,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "9a5f4d04",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:47:42.010010Z",
+     "iopub.status.busy": "2023-07-26T12:47:42.009911Z",
+     "iopub.status.idle": "2023-07-26T12:47:42.019036Z",
+     "shell.execute_reply": "2023-07-26T12:47:42.018706Z"
+    }
+   },
    "outputs": [],
    "source": [
     "Weekly.describe().iloc[:,:4]"
@@ -98,6 +126,18 @@
    "display_name": "python3",
    "language": "python",
    "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.17"
   }
  },
  "nbformat": 4,
diff --git a/docs/source/docs_version.json b/docs/source/docs_version.json
new file mode 100644
index 0000000..d6217ce
--- /dev/null
+++ b/docs/source/docs_version.json
@@ -0,0 +1,4 @@
+{"labs": "v2.2",
+ "library": "v0.4.0",
+ "comment":"labs should be version of ISLP pointed to in ISLP_labs/README.md, library version should be explicitly marked in ISLP_labs/requirements.txt; don't forget to strip warnings!!!!!!!!"
+}
diff --git a/docs/source/helpers/cluster.ipynb b/docs/source/helpers/cluster.ipynb
index 56cf3d8..4aa7de3 100644
--- a/docs/source/helpers/cluster.ipynb
+++ b/docs/source/helpers/cluster.ipynb
@@ -8,14 +8,21 @@
     "# Clustering\n",
     "\n",
     "This module has a single function, used to help visualize a dendrogram from a\n",
-    "hierarchical clustering."
+    "hierarchical clustering. The function is based on this example from [sklearn.cluster](https://scikit-learn.org/stable/auto_examples/cluster/plot_agglomerative_dendrogram.html)."
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "id": "d5df152d",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:46:42.214971Z",
+     "iopub.status.busy": "2023-07-26T12:46:42.214537Z",
+     "iopub.status.idle": "2023-07-26T12:46:42.860533Z",
+     "shell.execute_reply": "2023-07-26T12:46:42.860243Z"
+    }
+   },
    "outputs": [],
    "source": [
     "import numpy as np\n",
@@ -36,7 +43,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "0135c1fb",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:46:42.862401Z",
+     "iopub.status.busy": "2023-07-26T12:46:42.862250Z",
+     "iopub.status.idle": "2023-07-26T12:46:42.864336Z",
+     "shell.execute_reply": "2023-07-26T12:46:42.864118Z"
+    }
+   },
    "outputs": [],
    "source": [
     "rng = np.random.default_rng(1)\n",
@@ -56,7 +70,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "17c52650",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:46:42.865831Z",
+     "iopub.status.busy": "2023-07-26T12:46:42.865731Z",
+     "iopub.status.idle": "2023-07-26T12:46:42.867386Z",
+     "shell.execute_reply": "2023-07-26T12:46:42.867147Z"
+    }
+   },
    "outputs": [],
    "source": [
     "clust = AgglomerativeClustering(distance_threshold=0,\n",
@@ -68,7 +89,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "a3ae2622",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:46:42.868746Z",
+     "iopub.status.busy": "2023-07-26T12:46:42.868668Z",
+     "iopub.status.idle": "2023-07-26T12:46:42.872497Z",
+     "shell.execute_reply": "2023-07-26T12:46:42.872240Z"
+    }
+   },
    "outputs": [],
    "source": [
     "clust.fit(X)"
@@ -86,7 +114,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "64e726a4",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:46:42.873930Z",
+     "iopub.status.busy": "2023-07-26T12:46:42.873845Z",
+     "iopub.status.idle": "2023-07-26T12:46:43.195508Z",
+     "shell.execute_reply": "2023-07-26T12:46:43.195084Z"
+    }
+   },
    "outputs": [],
    "source": [
     "linkage = compute_linkage(clust)\n",
@@ -101,9 +136,21 @@
    "main_language": "python"
   },
   "kernelspec": {
-   "display_name": "python3",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.17"
   }
  },
  "nbformat": 4,
diff --git a/docs/source/helpers/pygam.ipynb b/docs/source/helpers/pygam.ipynb
index aab61d1..b452294 100644
--- a/docs/source/helpers/pygam.ipynb
+++ b/docs/source/helpers/pygam.ipynb
@@ -16,7 +16,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "9a52fb27",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:46:47.820912Z",
+     "iopub.status.busy": "2023-07-26T12:46:47.820490Z",
+     "iopub.status.idle": "2023-07-26T12:46:48.577304Z",
+     "shell.execute_reply": "2023-07-26T12:46:48.577007Z"
+    }
+   },
    "outputs": [],
    "source": [
     "import numpy as np\n",
@@ -46,7 +53,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "4bddce77",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:46:48.579295Z",
+     "iopub.status.busy": "2023-07-26T12:46:48.579114Z",
+     "iopub.status.idle": "2023-07-26T12:46:48.581608Z",
+     "shell.execute_reply": "2023-07-26T12:46:48.581355Z"
+    }
+   },
    "outputs": [],
    "source": [
     "rng = np.random.default_rng(1)\n",
@@ -69,7 +83,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "3f8946e0",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:46:48.583287Z",
+     "iopub.status.busy": "2023-07-26T12:46:48.583187Z",
+     "iopub.status.idle": "2023-07-26T12:46:48.618486Z",
+     "shell.execute_reply": "2023-07-26T12:46:48.614888Z"
+    }
+   },
    "outputs": [],
    "source": [
     "terms = [s(f, lam=0.01) for f in range(3)]\n",
@@ -91,7 +112,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "c5b38706",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:46:48.624580Z",
+     "iopub.status.busy": "2023-07-26T12:46:48.624177Z",
+     "iopub.status.idle": "2023-07-26T12:46:48.814238Z",
+     "shell.execute_reply": "2023-07-26T12:46:48.808746Z"
+    }
+   },
    "outputs": [],
    "source": [
     "ax = plot(gam, 0)"
@@ -109,7 +137,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "e4d2b6f0",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:46:48.825281Z",
+     "iopub.status.busy": "2023-07-26T12:46:48.824327Z",
+     "iopub.status.idle": "2023-07-26T12:46:48.897739Z",
+     "shell.execute_reply": "2023-07-26T12:46:48.897447Z"
+    }
+   },
    "outputs": [],
    "source": [
     "ax.scatter(X[:,0], \n",
@@ -131,7 +166,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "82374baa",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:46:48.899404Z",
+     "iopub.status.busy": "2023-07-26T12:46:48.899288Z",
+     "iopub.status.idle": "2023-07-26T12:46:48.916570Z",
+     "shell.execute_reply": "2023-07-26T12:46:48.915079Z"
+    }
+   },
    "outputs": [],
    "source": [
     "[degrees_of_freedom(X,\n",
@@ -153,7 +195,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "0576d1f3",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:46:48.924539Z",
+     "iopub.status.busy": "2023-07-26T12:46:48.924174Z",
+     "iopub.status.idle": "2023-07-26T12:46:48.955630Z",
+     "shell.execute_reply": "2023-07-26T12:46:48.954722Z"
+    }
+   },
    "outputs": [],
    "source": [
     "lam_vals = [approx_lam(X,\n",
@@ -174,7 +223,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "3a8b546e",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:46:48.961056Z",
+     "iopub.status.busy": "2023-07-26T12:46:48.960521Z",
+     "iopub.status.idle": "2023-07-26T12:46:48.989331Z",
+     "shell.execute_reply": "2023-07-26T12:46:48.987244Z"
+    }
+   },
    "outputs": [],
    "source": [
     "fixed_terms = [s(f, lam=l) for \n",
@@ -189,7 +245,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "f2cfbea2",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:46:48.995461Z",
+     "iopub.status.busy": "2023-07-26T12:46:48.994945Z",
+     "iopub.status.idle": "2023-07-26T12:46:49.130069Z",
+     "shell.execute_reply": "2023-07-26T12:46:49.129127Z"
+    }
+   },
    "outputs": [],
    "source": [
     "ax = plot(fixed_gam, 0)\n",
@@ -210,6 +273,18 @@
    "display_name": "python3",
    "language": "python",
    "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.17"
   }
  },
  "nbformat": 4,
diff --git a/docs/source/helpers/survival.ipynb b/docs/source/helpers/survival.ipynb
index 7cb30a3..f90123e 100644
--- a/docs/source/helpers/survival.ipynb
+++ b/docs/source/helpers/survival.ipynb
@@ -15,7 +15,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "0932cabc",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:46:45.058072Z",
+     "iopub.status.busy": "2023-07-26T12:46:45.057742Z",
+     "iopub.status.idle": "2023-07-26T12:46:45.657730Z",
+     "shell.execute_reply": "2023-07-26T12:46:45.657332Z"
+    }
+   },
    "outputs": [],
    "source": [
     "import numpy as np\n",
@@ -40,7 +47,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "d82896bb",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:46:45.659634Z",
+     "iopub.status.busy": "2023-07-26T12:46:45.659493Z",
+     "iopub.status.idle": "2023-07-26T12:46:45.661327Z",
+     "shell.execute_reply": "2023-07-26T12:46:45.661109Z"
+    }
+   },
    "outputs": [],
    "source": [
     "cum_haz = lambda t: t\n",
@@ -51,7 +65,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "c9f9d590",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:46:45.662631Z",
+     "iopub.status.busy": "2023-07-26T12:46:45.662534Z",
+     "iopub.status.idle": "2023-07-26T12:46:45.672267Z",
+     "shell.execute_reply": "2023-07-26T12:46:45.672017Z"
+    }
+   },
    "outputs": [],
    "source": [
     "T = np.array([sim_time(np.log(2), cum_haz, rng) for _ in range(500)])"
@@ -69,7 +90,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "2d8478dc",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:46:45.673768Z",
+     "iopub.status.busy": "2023-07-26T12:46:45.673685Z",
+     "iopub.status.idle": "2023-07-26T12:46:45.934676Z",
+     "shell.execute_reply": "2023-07-26T12:46:45.934321Z"
+    }
+   },
    "outputs": [],
    "source": [
     "kmf = KaplanMeierFitter(label=\"Simulated data\")\n",
@@ -111,6 +139,18 @@
    "display_name": "python3",
    "language": "python",
    "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.17"
   }
  },
  "nbformat": 4,
diff --git a/docs/source/helpers/svm.ipynb b/docs/source/helpers/svm.ipynb
index 593d840..eb950b5 100644
--- a/docs/source/helpers/svm.ipynb
+++ b/docs/source/helpers/svm.ipynb
@@ -14,7 +14,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "2746a357",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:46:51.026740Z",
+     "iopub.status.busy": "2023-07-26T12:46:51.026289Z",
+     "iopub.status.idle": "2023-07-26T12:46:51.779743Z",
+     "shell.execute_reply": "2023-07-26T12:46:51.779280Z"
+    }
+   },
    "outputs": [],
    "source": [
     "import numpy as np\n",
@@ -34,7 +41,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "4728535b",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:46:51.781697Z",
+     "iopub.status.busy": "2023-07-26T12:46:51.781546Z",
+     "iopub.status.idle": "2023-07-26T12:46:51.783810Z",
+     "shell.execute_reply": "2023-07-26T12:46:51.783514Z"
+    }
+   },
    "outputs": [],
    "source": [
     "rng = np.random.default_rng(1)\n",
@@ -56,7 +70,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "74da6860",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:46:51.785373Z",
+     "iopub.status.busy": "2023-07-26T12:46:51.785272Z",
+     "iopub.status.idle": "2023-07-26T12:46:51.789605Z",
+     "shell.execute_reply": "2023-07-26T12:46:51.789351Z"
+    }
+   },
    "outputs": [],
    "source": [
     "svm = SVC(kernel='linear')\n",
@@ -67,7 +88,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "d87b6f75",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:46:51.790987Z",
+     "iopub.status.busy": "2023-07-26T12:46:51.790907Z",
+     "iopub.status.idle": "2023-07-26T12:46:51.883284Z",
+     "shell.execute_reply": "2023-07-26T12:46:51.882993Z"
+    }
+   },
    "outputs": [],
    "source": [
     "plot(X, Y, svm)"
@@ -89,7 +117,14 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "bc58956a",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-07-26T12:46:51.884984Z",
+     "iopub.status.busy": "2023-07-26T12:46:51.884867Z",
+     "iopub.status.idle": "2023-07-26T12:46:52.011375Z",
+     "shell.execute_reply": "2023-07-26T12:46:52.011081Z"
+    }
+   },
    "outputs": [],
    "source": [
     "plot(X, Y, svm, features=(3, 4))"
@@ -106,6 +141,18 @@
    "display_name": "python3",
    "language": "python",
    "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.17"
   }
  },
  "nbformat": 4,
diff --git a/docs/source/imdb.ipynb b/docs/source/imdb.ipynb
index 1718a58..d9ba5cb 100644
--- a/docs/source/imdb.ipynb
+++ b/docs/source/imdb.ipynb
@@ -5,71 +5,109 @@
    "id": "50f2b809",
    "metadata": {},
    "source": [
-    "# Creating a clean IMDB dataset\n",
+    "# Creating IMDB dataset from `keras` version\n",
+    "\n",
+    "This script details how the `IMDB` data in `ISLP` was constructed.\n",
     "\n",
     "Running this example requires `keras`. Use `pip install keras` to install if necessary."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "id": "d920bb2e",
    "metadata": {},
    "outputs": [],
    "source": [
-    "import pickle"
+    "import pickle\n",
+    "import numpy as np\n",
+    "from scipy.sparse import coo_matrix, save_npz\n",
+    "import torch\n",
+    "from keras.datasets import imdb\n",
+    "from tensorflow.keras.preprocessing.sequence import pad_sequences"
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "e507f1fb",
+   "cell_type": "markdown",
+   "id": "eaf27f0c-0cb0-4ad5-8775-d138e3f20933",
    "metadata": {},
-   "outputs": [],
    "source": [
-    "import numpy as np\n",
-    "from scipy.sparse import coo_matrix, save_npz\n",
-    "import torch"
+    "We first load the data using `keras`, limiting focus to the 10000 most commmon words."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "id": "b94d3f35",
+   "execution_count": 2,
+   "id": "29f0e01e",
    "metadata": {},
    "outputs": [],
    "source": [
-    "from keras.datasets import imdb\n",
-    "from tensorflow.keras.preprocessing.sequence import pad_sequences"
+    "# the 3 is for three terms: <START> <UNK> <UNUSED> \n",
+    "num_words = 10000+3\n",
+    "((S_train, L_train), \n",
+    " (S_test, L_test)) = imdb.load_data(num_words=num_words)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9020ab27-cc62-4b86-85ba-80a94ff692de",
+   "metadata": {},
+   "source": [
+    "The object `S_train` is effectively a list in which each document has been encoded into a sequence of\n",
+    "values from 0 to 10002."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "id": "29f0e01e",
+   "execution_count": 3,
+   "id": "e27564c4-320f-42b6-9f2e-2a2afdebefcf",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[1, 14, 22, 16, 43, 530, 973, 1622, 1385, 65]"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "# the 3 is for three terms: <START> <UNK> <UNUSED> \n",
-    "num_words = 10000+3\n",
-    "((S_train, Y_train), \n",
-    " (S_test, Y_test)) = imdb.load_data(num_words=num_words)"
+    "S_train[0][:10]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "15f039fe-faed-4884-a725-1c51d6c8d4d4",
+   "metadata": {},
+   "source": [
+    "We'll use `np.float32` as that is the common precision used in `torch`."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
    "id": "6cc3c3cb",
    "metadata": {},
    "outputs": [],
    "source": [
-    "Y_train = Y_train.astype(np.float32)\n",
-    "Y_test = Y_test.astype(np.float32)"
+    "L_train = L_train.astype(np.float32)\n",
+    "L_test = L_test.astype(np.float32)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "005679bc-4337-4757-831e-f9a6ea50f6aa",
+   "metadata": {},
+   "source": [
+    "We will use a one-hot encoding that captures whether or not a given word appears in a given review."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
    "id": "7b6d1098",
    "metadata": {},
    "outputs": [],
@@ -88,18 +126,30 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 6,
    "id": "afcdc8b2",
    "metadata": {},
    "outputs": [],
    "source": [
-    "X_train, L_train = one_hot(S_train, num_words), Y_train\n",
+    "X_train = one_hot(S_train, num_words)\n",
     "X_test = one_hot(S_test, num_words)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "a67e299d-8774-4758-8953-77afdce775ab",
+   "metadata": {},
+   "source": [
+    "## Store as sparse tensors\n",
+    "\n",
+    "We see later in the lab that the dense representation is faster. Nevertheless,\n",
+    "let's store the one-hot representation as sparse `torch` tensors \n",
+    "as well as sparse `scipy` matrices."
+   ]
+  },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 7,
    "id": "b19366ea",
    "metadata": {},
    "outputs": [],
@@ -115,7 +165,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 8,
    "id": "b45ae6d1",
    "metadata": {},
    "outputs": [],
@@ -126,7 +176,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 9,
    "id": "a47d6eb6",
    "metadata": {},
    "outputs": [],
@@ -137,7 +187,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 10,
    "id": "d1b37b37",
    "metadata": {},
    "outputs": [],
@@ -151,12 +201,12 @@
    "id": "1119823a",
    "metadata": {},
    "source": [
-    "save the sparse matrices"
+    "### Save as sparse `scipy` matrices"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 11,
    "id": "6cb6bfdf",
    "metadata": {},
    "outputs": [],
@@ -167,12 +217,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 12,
    "id": "eac1c2ae",
    "metadata": {},
    "outputs": [],
    "source": [
-    "np.save('IMDB_Y_test.npy', Y_test)\n",
+    "np.save('IMDB_Y_test.npy', L_test)\n",
     "np.save('IMDB_Y_train.npy', L_train)"
    ]
   },
@@ -181,12 +231,14 @@
    "id": "25c128e3",
    "metadata": {},
    "source": [
-    "save and pickle the word index"
+    "## Save and pickle the word index\n",
+    "\n",
+    "We'll also want to store a lookup table to convert representations such as `S_train[0]` into words"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 13,
    "id": "8458bf67",
    "metadata": {},
    "outputs": [],
@@ -199,9 +251,46 @@
     "lookup[4] = \"<UNUSED>\""
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "5e62ebff-2575-4d35-b46c-51c6f7598efc",
+   "metadata": {},
+   "source": [
+    "Let's look at our first training document:"
+   ]
+  },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 14,
+   "id": "2aaefdf8-0a49-4bdb-8b40-55665283c8a8",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "\"<START> this film was just brilliant casting location scenery story direction everyone's really suited <UNUSED> part they played and you\""
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "' '.join([lookup[i] for i in S_train[0][:20]])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0e985a73-bfd9-42bd-a523-3dc6e223d602",
+   "metadata": {},
+   "source": [
+    "We save this lookup table so it can be loaded later "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
    "id": "d95252de",
    "metadata": {},
    "outputs": [],
@@ -214,12 +303,15 @@
    "id": "b3d900b9",
    "metadata": {},
    "source": [
-    "create the padded representations"
+    "## Padded representations\n",
+    "\n",
+    "For some of the recurrent models, we'll need sequences of common lengths, padded if necessary.\n",
+    "Here, we pad up to a maximum length of 500, filling the remaining entries with 0."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 16,
    "id": "637b3c5e",
    "metadata": {},
    "outputs": [],
@@ -230,9 +322,17 @@
     "                      S_test]]"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "a6218300-b355-44cc-b7fb-4bff81211aa6",
+   "metadata": {},
+   "source": [
+    "Finally, we save these for later use in the deep learning lab."
+   ]
+  },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 17,
    "id": "bac69f88",
    "metadata": {},
    "outputs": [],
@@ -245,13 +345,24 @@
  "metadata": {
   "jupytext": {
    "cell_metadata_filter": "-all",
-   "formats": "source///ipynb,jupyterbook///md:myst,jupyterbook///ipynb",
-   "main_language": "python"
+   "formats": "md,ipynb"
   },
   "kernelspec": {
-   "display_name": "python3",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.17"
   }
  },
  "nbformat": 4,
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 2c80bdc..44b40fc 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -3,8 +3,7 @@ Welcome to ISLP documentation!
 
 .. automodule:: ISLP
 
-Check out the :doc:`installation` section for further information.
-
+See the :doc:`api/index`
 
 Contents
 --------
@@ -16,5 +15,6 @@ Contents
    transforms
    models
    helpers
+   labs
    imdb
-   api/index
+
diff --git a/docs/source/installation.myst b/docs/source/installation.myst
new file mode 100644
index 0000000..5fba989
--- /dev/null
+++ b/docs/source/installation.myst
@@ -0,0 +1,119 @@
+---
+file_format: mystnb
+kernelspec:
+  name: python3
+  display_name: python3
+---
+
+
+# Install instructions
+
+We generally recommend creating a [conda](https://anaconda.org) environment to isolate any code
+from other dependencies. The `ISLP` package does not have unusual dependencies, but this is still
+good practice. 
+
+## Mac OS X / Linux
+
+To create a Python conda environment  in a Mac OS X or Linux environment run:
+
+```{code-cell} ipython3
+---
+tags: [skip-execution]
+---
+conda create --name islp python
+```
+
+Current conda should have this at least 3.9. If not, replace `python`
+with `python=3.10`, `python=3.11` or `python=3.12`. 
+To run python
+code in this environment, you must activate it:
+
+```{code-cell} ipython3
+---
+tags: [skip-execution]
+---
+conda activate islp
+```
+
+## Windows
+
+On windows, create a `Python` environment called `islp` in the Anaconda app. This can be done by selecting `Environments` on the left hand side of the app's screen. After creating the environment, open a terminal within that environment by clicking on the "Play" button.
+
+# Installing `ISLP`
+
+Having completed the steps above, we use `pip` to install the `ISLP` package:
+
+```{code-cell} ipython3
+---
+tags: [skip-execution]
+---
+pip install ISLP
+```
+
+## Frozen environment
+
+```{attention}
+
+Python packages change frequently. The labs here are built
+with  {{ ISLP_lab_link }}. Visit the lab git repo for specific instructions
+to install the frozen environment.
+```
+
+## Torch requirements
+
+The `ISLP` labs use `torch` and various related packages for the lab
+on deep learning. Most of the requirements are included in the requirements for `ISLP` though the labs
+also use `torchinfo` and `torchvision`. These will be installed by the `requirements.txt` above.
+
+```{attention}
+Because
+`torch` and related libraries change frequently, you will note that we
+have pinned the versions at specific versions that were used to make
+current verisons of the labs.
+```
+
+## Jupyter
+
+```{attention}
+If using the Anaconda App, `jupyter` can be installed with a GUI. Use
+the GUI install instead of the `pip` install below.
+```
+
+### Mac OS X
+
+```{attention}
+
+If you are using the Anaconda GUI, it is recommended that you install JupyterLab through the GUI
+and skip the step below. Installing both through the GUI and `pip` may result in conflicts and
+a broken JupyterLab.
+
+If you have installed JupyterLab in your environment via the GUI, the above call `pip install ISLP` may be made within
+any running notebook within that environment.
+```
+
+If JupyterLab is not already installed, run the following after having activated your `islp` environment:
+
+```{code-cell} ipython3
+---
+tags: [skip-execution]
+---
+pip install jupyterlab
+```
+
+### Windows
+
+Either use the same `pip` command above or install JupyterLab from the
+`Home` tab. Ensure that the environment is your `islp`
+environment. This information appears near the top left in the
+Anaconda `Home` page.
+
+# Google Colab
+
+The notebooks for the labs can be run in [Google
+Colab](https://colab.research.google.com) with a few caveats:
+
+- Labs that use files in the filesystem will require one to mount your
+  Google Drive. See Google's [help](https://colab.research.google.com/notebooks/io.ipynb).
+
+- The packages will have to be reinstalled each time a new runtime is started.
+For most labs, inserting `pip install ISLP` at the top of the notebook will suffice, though Colab will ask you to restart after installation.
diff --git a/docs/source/installation.rst b/docs/source/installation.rst
deleted file mode 100644
index 981b1ae..0000000
--- a/docs/source/installation.rst
+++ /dev/null
@@ -1,18 +0,0 @@
-Usage
-=====
-
-.. _installation:
-
-Installation
-------------
-
-To use ISLP, first install it using pip:
-
-.. code-block:: console
-
-   (.venv) $ pip install ISLP
-
-Creating recipes
-----------------
-
-BLAH
diff --git a/docs/source/labs.myst b/docs/source/labs.myst
new file mode 100644
index 0000000..b33bd3d
--- /dev/null
+++ b/docs/source/labs.myst
@@ -0,0 +1,58 @@
+---
+file_format: mystnb
+kernelspec:
+  name: python3
+  display_name: python3
+myst_number_code_blocks: python
+---
+
+# Labs
+
+{{ ISLP_binder_code }}
+
+The current version of the labs for `ISLP` are included here.
+
+
+## Package versions
+
+
+```{attention}
+
+Python packages change frequently. The labs here are built
+with  {{ ISLP_lab_link }}. Visit the lab git repo for specific instructions
+to install the frozen environment.
+
+
+A zip file containig all the labs and data files can be downloaded
+here {{ ISLP_zip_link }}.
+
+```
+
+```{warning}
+The version of the `ISLP` library used to build these labs
+may differ slightly from the one documented here.
+The labs are built with {{ ISLP_lab_version }}.
+
+The [Binder](http://mybinder.org) link above will run {{ ISLP_lab_link }} with
+library version {{ ISLP_lab_version }}.
+
+```
+
+
+```{toctree}
+maxdepth: 1
+      
+labs/Ch02-statlearn-lab
+labs/Ch03-linreg-lab
+labs/Ch04-classification-lab
+labs/Ch05-resample-lab
+labs/Ch06-varselect-lab
+labs/Ch07-nonlin-lab
+labs/Ch08-baggboost-lab
+labs/Ch09-svm-lab
+labs/Ch10-deeplearning-lab
+labs/Ch11-surv-lab
+labs/Ch12-unsup-lab
+labs/Ch13-multiple-lab
+```
+
diff --git a/docs/source/logo.png b/docs/source/logo.png
new file mode 100644
index 0000000..237c1cd
Binary files /dev/null and b/docs/source/logo.png differ
diff --git a/docs/source/models.rst b/docs/source/models.rst
index b34581f..5f9e5c8 100644
--- a/docs/source/models.rst
+++ b/docs/source/models.rst
@@ -4,8 +4,8 @@ Tools for regression models
 .. toctree::
   
    models/spec
-   models/derived
-   models/submodels
    models/selection
+   models/anova
+
        
 
diff --git a/docs/source/models/anova.ipynb b/docs/source/models/anova.ipynb
new file mode 100644
index 0000000..41e8bcb
--- /dev/null
+++ b/docs/source/models/anova.ipynb
@@ -0,0 +1,648 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "ee33d364",
+   "metadata": {},
+   "source": [
+    "# ANOVA using `ModelSpec`\n",
+    "\n",
+    "\n",
+    "In this lab we illustrate how to run create specific ANOVA analyses\n",
+    "using `ModelSpec`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "4c70fbaa",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "\n",
+    "from statsmodels.api import OLS\n",
+    "from statsmodels.stats.anova import anova_lm\n",
+    "\n",
+    "from ISLP import load_data\n",
+    "from ISLP.models import (ModelSpec,\n",
+    "                         derived_feature,\n",
+    "                         summarize)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "333a49cf",
+   "metadata": {},
+   "source": [
+    "We will carry out two simple ANOVA analyses of the `Hitters` data.\n",
+    "We wish to predict a baseball player’s `Salary` on the\n",
+    "basis of various statistics associated with performance in the\n",
+    "previous year."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "8a708215",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "59"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "Hitters = load_data('Hitters')\n",
+    "np.isnan(Hitters['Salary']).sum()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "dad5e991",
+   "metadata": {},
+   "source": [
+    "    \n",
+    " We see that `Salary` is missing for 59 players. The\n",
+    "`dropna()`  method of data frames removes all of the rows that have missing\n",
+    "values in any variable (by default --- see  `Hitters.dropna?`)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "ac7086a5",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Index(['AtBat', 'Hits', 'HmRun', 'Runs', 'RBI', 'Walks', 'Years', 'CAtBat',\n",
+       "       'CHits', 'CHmRun', 'CRuns', 'CRBI', 'CWalks', 'League', 'Division',\n",
+       "       'PutOuts', 'Assists', 'Errors', 'Salary', 'NewLeague'],\n",
+       "      dtype='object')"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "Hitters = Hitters.dropna()\n",
+    "Hitters.columns"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1a0a3521-be74-40df-a404-3895d80a11dc",
+   "metadata": {},
+   "source": [
+    "## Grouping variables\n",
+    "\n",
+    "A look at the [description](https://islp.readthedocs.io/en/latest/datasets/Hitters.html) of the data shows\n",
+    "that there are both career and 1986 offensive stats, as well as some defensive stats.\n",
+    "\n",
+    "Let's group the offensive into recent and career offensive stats, as well as a group of defensive variables."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "a215e43b-7bc8-4bdd-91cf-40d717cd7978",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "confounders = derived_feature(['Division', 'League', 'NewLeague'],\n",
+    "                              name='confounders')\n",
+    "offense_career = derived_feature(['CAtBat', 'CHits', 'CHmRun', 'CRuns', 'CRBI', 'CWalks'],\n",
+    "                                 name='offense_career')\n",
+    "offense_1986 = derived_feature(['AtBat', 'Hits', 'HmRun', 'Runs', 'RBI', 'Walks'],\n",
+    "                               name='offense_1986')\n",
+    "defense_1986 = derived_feature(['PutOuts', 'Assists', 'Errors'],\n",
+    "                               name='defense_1986')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "aa15fd0c-1e8a-431e-8425-c61da8439976",
+   "metadata": {},
+   "source": [
+    "We'll first do a sequential ANOVA where terms are added sequentially"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "40cd6c28",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "design = ModelSpec([confounders, offense_career, defense_1986, offense_1986]).fit(Hitters)\n",
+    "Y = np.array(Hitters['Salary'])\n",
+    "X = design.transform(Hitters)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "074120b1",
+   "metadata": {},
+   "source": [
+    "Along with a score we need to specify the search strategy. This is done through the object\n",
+    "`Stepwise()`  in the `ISLP.models` package. The method `Stepwise.first_peak()`\n",
+    "runs forward stepwise until any further additions to the model do not result\n",
+    "in an improvement in the evaluation score. Similarly, the method `Stepwise.fixed_steps()`\n",
+    "runs a fixed number of steps of stepwise search."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "e65f5607",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>coef</th>\n",
+       "      <th>std err</th>\n",
+       "      <th>t</th>\n",
+       "      <th>P&gt;|t|</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>intercept</th>\n",
+       "      <td>148.2187</td>\n",
+       "      <td>73.595</td>\n",
+       "      <td>2.014</td>\n",
+       "      <td>0.045</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Division[W]</th>\n",
+       "      <td>-116.0404</td>\n",
+       "      <td>40.188</td>\n",
+       "      <td>-2.887</td>\n",
+       "      <td>0.004</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>League[N]</th>\n",
+       "      <td>63.7503</td>\n",
+       "      <td>79.006</td>\n",
+       "      <td>0.807</td>\n",
+       "      <td>0.421</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>NewLeague[N]</th>\n",
+       "      <td>-24.3989</td>\n",
+       "      <td>78.843</td>\n",
+       "      <td>-0.309</td>\n",
+       "      <td>0.757</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>CAtBat</th>\n",
+       "      <td>-0.1887</td>\n",
+       "      <td>0.120</td>\n",
+       "      <td>-1.572</td>\n",
+       "      <td>0.117</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>CHits</th>\n",
+       "      <td>0.1636</td>\n",
+       "      <td>0.665</td>\n",
+       "      <td>0.246</td>\n",
+       "      <td>0.806</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>CHmRun</th>\n",
+       "      <td>-0.1517</td>\n",
+       "      <td>1.612</td>\n",
+       "      <td>-0.094</td>\n",
+       "      <td>0.925</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>CRuns</th>\n",
+       "      <td>1.4716</td>\n",
+       "      <td>0.747</td>\n",
+       "      <td>1.971</td>\n",
+       "      <td>0.050</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>CRBI</th>\n",
+       "      <td>0.8021</td>\n",
+       "      <td>0.691</td>\n",
+       "      <td>1.161</td>\n",
+       "      <td>0.247</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>CWalks</th>\n",
+       "      <td>-0.8124</td>\n",
+       "      <td>0.327</td>\n",
+       "      <td>-2.481</td>\n",
+       "      <td>0.014</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>PutOuts</th>\n",
+       "      <td>0.2827</td>\n",
+       "      <td>0.077</td>\n",
+       "      <td>3.661</td>\n",
+       "      <td>0.000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Assists</th>\n",
+       "      <td>0.3755</td>\n",
+       "      <td>0.220</td>\n",
+       "      <td>1.705</td>\n",
+       "      <td>0.089</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Errors</th>\n",
+       "      <td>-3.2940</td>\n",
+       "      <td>4.377</td>\n",
+       "      <td>-0.753</td>\n",
+       "      <td>0.452</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>AtBat</th>\n",
+       "      <td>-1.9509</td>\n",
+       "      <td>0.624</td>\n",
+       "      <td>-3.125</td>\n",
+       "      <td>0.002</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Hits</th>\n",
+       "      <td>7.4395</td>\n",
+       "      <td>2.363</td>\n",
+       "      <td>3.148</td>\n",
+       "      <td>0.002</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>HmRun</th>\n",
+       "      <td>4.3449</td>\n",
+       "      <td>6.190</td>\n",
+       "      <td>0.702</td>\n",
+       "      <td>0.483</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Runs</th>\n",
+       "      <td>-2.3312</td>\n",
+       "      <td>2.971</td>\n",
+       "      <td>-0.785</td>\n",
+       "      <td>0.433</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>RBI</th>\n",
+       "      <td>-1.0670</td>\n",
+       "      <td>2.595</td>\n",
+       "      <td>-0.411</td>\n",
+       "      <td>0.681</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Walks</th>\n",
+       "      <td>6.2196</td>\n",
+       "      <td>1.825</td>\n",
+       "      <td>3.409</td>\n",
+       "      <td>0.001</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                  coef  std err      t  P>|t|\n",
+       "intercept     148.2187   73.595  2.014  0.045\n",
+       "Division[W]  -116.0404   40.188 -2.887  0.004\n",
+       "League[N]      63.7503   79.006  0.807  0.421\n",
+       "NewLeague[N]  -24.3989   78.843 -0.309  0.757\n",
+       "CAtBat         -0.1887    0.120 -1.572  0.117\n",
+       "CHits           0.1636    0.665  0.246  0.806\n",
+       "CHmRun         -0.1517    1.612 -0.094  0.925\n",
+       "CRuns           1.4716    0.747  1.971  0.050\n",
+       "CRBI            0.8021    0.691  1.161  0.247\n",
+       "CWalks         -0.8124    0.327 -2.481  0.014\n",
+       "PutOuts         0.2827    0.077  3.661  0.000\n",
+       "Assists         0.3755    0.220  1.705  0.089\n",
+       "Errors         -3.2940    4.377 -0.753  0.452\n",
+       "AtBat          -1.9509    0.624 -3.125  0.002\n",
+       "Hits            7.4395    2.363  3.148  0.002\n",
+       "HmRun           4.3449    6.190  0.702  0.483\n",
+       "Runs           -2.3312    2.971 -0.785  0.433\n",
+       "RBI            -1.0670    2.595 -0.411  0.681\n",
+       "Walks           6.2196    1.825  3.409  0.001"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "M = OLS(Y, X).fit()\n",
+    "summarize(M)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "29d9b55f",
+   "metadata": {},
+   "source": [
+    "We'll first produce the sequential, or Type I ANOVA results. This builds up a model sequentially and compares\n",
+    "two successive models."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "cfbe5b92",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>df_resid</th>\n",
+       "      <th>ssr</th>\n",
+       "      <th>df_diff</th>\n",
+       "      <th>ss_diff</th>\n",
+       "      <th>F</th>\n",
+       "      <th>Pr(&gt;F)</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>intercept</th>\n",
+       "      <td>262.0</td>\n",
+       "      <td>5.331911e+07</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>confounders</th>\n",
+       "      <td>259.0</td>\n",
+       "      <td>5.131263e+07</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>2.006478e+06</td>\n",
+       "      <td>6.741147</td>\n",
+       "      <td>2.144265e-04</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>offense_career</th>\n",
+       "      <td>253.0</td>\n",
+       "      <td>3.059130e+07</td>\n",
+       "      <td>6.0</td>\n",
+       "      <td>2.072134e+07</td>\n",
+       "      <td>34.808656</td>\n",
+       "      <td>1.470455e-30</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>defense_1986</th>\n",
+       "      <td>250.0</td>\n",
+       "      <td>2.730614e+07</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>3.285156e+06</td>\n",
+       "      <td>11.037111</td>\n",
+       "      <td>7.880207e-07</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>offense_1986</th>\n",
+       "      <td>244.0</td>\n",
+       "      <td>2.420857e+07</td>\n",
+       "      <td>6.0</td>\n",
+       "      <td>3.097572e+06</td>\n",
+       "      <td>5.203444</td>\n",
+       "      <td>4.648586e-05</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                df_resid           ssr  df_diff       ss_diff          F  \\\n",
+       "intercept          262.0  5.331911e+07      0.0           NaN        NaN   \n",
+       "confounders        259.0  5.131263e+07      3.0  2.006478e+06   6.741147   \n",
+       "offense_career     253.0  3.059130e+07      6.0  2.072134e+07  34.808656   \n",
+       "defense_1986       250.0  2.730614e+07      3.0  3.285156e+06  11.037111   \n",
+       "offense_1986       244.0  2.420857e+07      6.0  3.097572e+06   5.203444   \n",
+       "\n",
+       "                      Pr(>F)  \n",
+       "intercept                NaN  \n",
+       "confounders     2.144265e-04  \n",
+       "offense_career  1.470455e-30  \n",
+       "defense_1986    7.880207e-07  \n",
+       "offense_1986    4.648586e-05  "
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df = anova_lm(*[OLS(Y, D).fit() for D in design.build_sequence(Hitters, anova_type='sequential')])\n",
+    "df.index = design.names\n",
+    "df"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "7092f666",
+   "metadata": {},
+   "source": [
+    "We can similarly compute the Type II ANOVA results which drops each term and compares to the full model."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "e2d43844",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>df_resid</th>\n",
+       "      <th>ssr</th>\n",
+       "      <th>df_diff</th>\n",
+       "      <th>ss_diff</th>\n",
+       "      <th>F</th>\n",
+       "      <th>Pr(&gt;F)</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>intercept</th>\n",
+       "      <td>244.0</td>\n",
+       "      <td>2.420857e+07</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>4.024254e+05</td>\n",
+       "      <td>4.056076</td>\n",
+       "      <td>4.511037e-02</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>confounders</th>\n",
+       "      <td>244.0</td>\n",
+       "      <td>2.420857e+07</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>9.661738e+05</td>\n",
+       "      <td>3.246046</td>\n",
+       "      <td>2.261572e-02</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>offense_career</th>\n",
+       "      <td>244.0</td>\n",
+       "      <td>2.420857e+07</td>\n",
+       "      <td>6.0</td>\n",
+       "      <td>1.051025e+07</td>\n",
+       "      <td>17.655596</td>\n",
+       "      <td>5.701196e-17</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>defense_1986</th>\n",
+       "      <td>244.0</td>\n",
+       "      <td>2.420857e+07</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>1.467933e+06</td>\n",
+       "      <td>4.931803</td>\n",
+       "      <td>2.415732e-03</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>offense_1986</th>\n",
+       "      <td>244.0</td>\n",
+       "      <td>2.420857e+07</td>\n",
+       "      <td>6.0</td>\n",
+       "      <td>3.097572e+06</td>\n",
+       "      <td>5.203444</td>\n",
+       "      <td>4.648586e-05</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                df_resid           ssr  df_diff       ss_diff          F  \\\n",
+       "intercept          244.0  2.420857e+07      1.0  4.024254e+05   4.056076   \n",
+       "confounders        244.0  2.420857e+07      3.0  9.661738e+05   3.246046   \n",
+       "offense_career     244.0  2.420857e+07      6.0  1.051025e+07  17.655596   \n",
+       "defense_1986       244.0  2.420857e+07      3.0  1.467933e+06   4.931803   \n",
+       "offense_1986       244.0  2.420857e+07      6.0  3.097572e+06   5.203444   \n",
+       "\n",
+       "                      Pr(>F)  \n",
+       "intercept       4.511037e-02  \n",
+       "confounders     2.261572e-02  \n",
+       "offense_career  5.701196e-17  \n",
+       "defense_1986    2.415732e-03  \n",
+       "offense_1986    4.648586e-05  "
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "D_full = design.transform(Hitters)\n",
+    "OLS_full = OLS(Y, D_full).fit()\n",
+    "dfs = []\n",
+    "for d in design.build_sequence(Hitters, anova_type='drop'):\n",
+    "    dfs.append(anova_lm(OLS(Y,d).fit(), OLS_full).iloc[1:])\n",
+    "df = pd.concat(dfs)\n",
+    "df.index = design.names\n",
+    "df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "362709ae-9558-4c4c-8f5e-f8388caf631d",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "jupytext": {
+   "formats": "source/models///ipynb,jupyterbook/models///md:myst,jupyterbook/models///ipynb"
+  },
+  "kernelspec": {
+   "display_name": "python3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/docs/source/models/derived.ipynb b/docs/source/models/derived.ipynb
deleted file mode 100644
index cc1b0ac..0000000
--- a/docs/source/models/derived.ipynb
+++ /dev/null
@@ -1,2125 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "id": "38217f02",
-   "metadata": {},
-   "source": [
-    "# Building design matrices with `ModelSpec`\n",
-    "\n",
-    "Force rebuild"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "id": "3107d1f9",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "x=4\n",
-    "import numpy as np, pandas as pd\n",
-    "%load_ext rpy2.ipython\n",
-    "\n",
-    "from ISLP import load_data\n",
-    "from ISLP.models import ModelSpec\n",
-    "\n",
-    "import statsmodels.api as sm"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "id": "cdc46a4e",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Index(['Sales', 'CompPrice', 'Income', 'Advertising', 'Population', 'Price',\n",
-       "       'ShelveLoc', 'Age', 'Education', 'Urban', 'US'],\n",
-       "      dtype='object')"
-      ]
-     },
-     "execution_count": 2,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "Carseats = load_data('Carseats')\n",
-    "%R -i Carseats\n",
-    "Carseats.columns"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "e0a2a83a",
-   "metadata": {},
-   "source": [
-    "## Let's break up income into groups"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "id": "68b40caf",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0      M\n",
-       "1      L\n",
-       "2      L\n",
-       "3      H\n",
-       "4      M\n",
-       "      ..\n",
-       "395    H\n",
-       "396    L\n",
-       "397    L\n",
-       "398    M\n",
-       "399    L\n",
-       "Name: OIncome, Length: 400, dtype: category\n",
-       "Categories (3, object): ['L' < 'M' < 'H']"
-      ]
-     },
-     "execution_count": 3,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "Carseats['OIncome'] = pd.cut(Carseats['Income'], \n",
-    "                             [0,50,90,200], \n",
-    "                             labels=['L','M','H'])\n",
-    "Carseats['OIncome']"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "35558d88",
-   "metadata": {},
-   "source": [
-    "Let's also create an unordered version"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "id": "e5e81a95",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0      M\n",
-       "1      L\n",
-       "2      L\n",
-       "3      H\n",
-       "4      M\n",
-       "      ..\n",
-       "395    H\n",
-       "396    L\n",
-       "397    L\n",
-       "398    M\n",
-       "399    L\n",
-       "Name: UIncome, Length: 400, dtype: category\n",
-       "Categories (3, object): ['L', 'M', 'H']"
-      ]
-     },
-     "execution_count": 4,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "Carseats['UIncome'] = pd.cut(Carseats['Income'], \n",
-    "                             [0,50,90,200], \n",
-    "                             labels=['L','M','H'],\n",
-    "                             ordered=False)\n",
-    "Carseats['UIncome']"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "4bbf9e13",
-   "metadata": {},
-   "source": [
-    "## A simple model"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "id": "1ad729b3",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Index(['intercept', 'Price', 'Income'], dtype='object')"
-      ]
-     },
-     "execution_count": 5,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design = ModelSpec(['Price', 'Income'])\n",
-    "X = design.fit_transform(Carseats)\n",
-    "X.columns"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "id": "d05e9ec8",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "intercept    12.661546\n",
-       "Price        -0.052213\n",
-       "Income        0.012829\n",
-       "dtype: float64"
-      ]
-     },
-     "execution_count": 6,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "Y = Carseats['Sales']\n",
-    "M = sm.OLS(Y, X).fit()\n",
-    "M.params"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "b4e9ee33",
-   "metadata": {},
-   "source": [
-    "## Basic procedure\n",
-    "\n",
-    "The design matrix is built by cobbling together a set of columns and possibly transforming them.\n",
-    "A `pd.DataFrame` is essentially a list of columns. One of the first tasks done  in `ModelSpec.fit`\n",
-    "is to inspect a dataframe for column info. The column `ShelveLoc` is categorical:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "id": "64ac65d3",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0         Bad\n",
-       "1        Good\n",
-       "2      Medium\n",
-       "3      Medium\n",
-       "4         Bad\n",
-       "        ...  \n",
-       "395      Good\n",
-       "396    Medium\n",
-       "397    Medium\n",
-       "398       Bad\n",
-       "399      Good\n",
-       "Name: ShelveLoc, Length: 400, dtype: category\n",
-       "Categories (3, object): ['Bad', 'Good', 'Medium']"
-      ]
-     },
-     "execution_count": 7,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "Carseats['ShelveLoc']"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "620f0e01",
-   "metadata": {},
-   "source": [
-    "This is recognized by `ModelSpec` in the form of `Column` objects which are just named tuples with two methods\n",
-    "`get_columns` and `fit_encoder`."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "id": "77b898e0",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Column(idx='ShelveLoc', name='ShelveLoc', is_categorical=True, is_ordinal=False, columns=('ShelveLoc[Good]', 'ShelveLoc[Medium]'), encoder=Contrast())"
-      ]
-     },
-     "execution_count": 8,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design.column_info_['ShelveLoc']"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "4580a6bf",
-   "metadata": {},
-   "source": [
-    "It recognized ordinal columns as well."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "id": "c2dab855",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Column(idx='OIncome', name='OIncome', is_categorical=True, is_ordinal=True, columns=('OIncome',), encoder=OrdinalEncoder())"
-      ]
-     },
-     "execution_count": 9,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design.column_info_['OIncome']"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "id": "5e7963d6",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(array([ 73,  48,  35, 100]), ('Income',))"
-      ]
-     },
-     "execution_count": 10,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "income = design.column_info_['Income']\n",
-    "cols, names = income.get_columns(Carseats)\n",
-    "(cols[:4], names)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "6b689966",
-   "metadata": {},
-   "source": [
-    "## Encoding a column\n",
-    "\n",
-    "In building a design matrix we must extract columns from our dataframe (or `np.ndarray`). Categorical\n",
-    "variables usually are encoded by several columns, typically one less than the number of categories.\n",
-    "This task is handled by the `encoder` of the `Column`. The encoder must satisfy the `sklearn` transform\n",
-    "model, i.e. `fit` on some array and `transform` on future arrays. The `fit_encoder` method of `Column` fits\n",
-    "its encoder the first time data is passed to it."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "id": "ff3b96b6",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(array([[0., 0.],\n",
-       "        [1., 0.],\n",
-       "        [0., 1.],\n",
-       "        [0., 1.]]),\n",
-       " ['ShelveLoc[Good]', 'ShelveLoc[Medium]'])"
-      ]
-     },
-     "execution_count": 11,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "shelve = design.column_info_['ShelveLoc']\n",
-    "cols, names = shelve.get_columns(Carseats)\n",
-    "(cols[:4], names)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "id": "7e87da20",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "array([[2.],\n",
-       "       [1.],\n",
-       "       [1.],\n",
-       "       [0.]])"
-      ]
-     },
-     "execution_count": 12,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "oincome = design.column_info_['OIncome']\n",
-    "oincome.get_columns(Carseats)[0][:4]"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "4f2030ac",
-   "metadata": {},
-   "source": [
-    "## The terms\n",
-    "\n",
-    "The design matrix consists of several sets of columns. This is managed by the `ModelSpec` through\n",
-    "the `terms` argument which should be a sequence. The elements of `terms` are often\n",
-    "going to be strings (or tuples of strings for interactions, see below) but are converted to a\n",
-    "`Variable` object and stored in the `terms_` of the fitted `ModelSpec`. A `Variable` is just a named tuple."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "id": "27fc4fb3",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['Price', 'Income']"
-      ]
-     },
-     "execution_count": 13,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design.terms"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "id": "16316981",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "[Variable(variables=('Price',), name='Price', encoder=None, use_transform=True, pure_columns=True, override_encoder_colnames=False),\n",
-       " Variable(variables=('Income',), name='Income', encoder=None, use_transform=True, pure_columns=True, override_encoder_colnames=False)]"
-      ]
-     },
-     "execution_count": 14,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design.terms_"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "ef3f2bd0",
-   "metadata": {},
-   "source": [
-    "While each `Column` can itself extract data, they are all promoted to `Variable` to be of a uniform type.  A\n",
-    "`Variable` can also create columns through the `build_columns` method of `ModelSpec`"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "id": "dd9c7fa6",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(     Price\n",
-       " 0      120\n",
-       " 1       83\n",
-       " 2       80\n",
-       " 3       97\n",
-       " 4      128\n",
-       " ..     ...\n",
-       " 395    128\n",
-       " 396    120\n",
-       " 397    159\n",
-       " 398     95\n",
-       " 399    120\n",
-       " \n",
-       " [400 rows x 1 columns],\n",
-       " ['Price'])"
-      ]
-     },
-     "execution_count": 15,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "price = design.terms_[0]\n",
-    "design.build_columns(Carseats, price)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "5fc4cc45",
-   "metadata": {},
-   "source": [
-    "Note that `Variable` objects have a tuple of `variables` as well as an `encoder` attribute. The\n",
-    "tuple of `variables` first creates a concatenated dataframe from all corresponding variables and then\n",
-    "is run through `encoder.transform`. The `encoder.fit` method of each `Variable` is run once during \n",
-    "the call to `ModelSpec.fit`."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 16,
-   "id": "49d7fb46",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(     Price  Income  UIncome[L]  UIncome[M]\n",
-       " 0    120.0    73.0         0.0         1.0\n",
-       " 1     83.0    48.0         1.0         0.0\n",
-       " 2     80.0    35.0         1.0         0.0\n",
-       " 3     97.0   100.0         0.0         0.0\n",
-       " 4    128.0    64.0         0.0         1.0\n",
-       " ..     ...     ...         ...         ...\n",
-       " 395  128.0   108.0         0.0         0.0\n",
-       " 396  120.0    23.0         1.0         0.0\n",
-       " 397  159.0    26.0         1.0         0.0\n",
-       " 398   95.0    79.0         0.0         1.0\n",
-       " 399  120.0    37.0         1.0         0.0\n",
-       " \n",
-       " [400 rows x 4 columns],\n",
-       " ['Price', 'Income', 'UIncome[L]', 'UIncome[M]'])"
-      ]
-     },
-     "execution_count": 16,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "from ISLP.models.model_spec import Variable\n",
-    "\n",
-    "new_var = Variable(('Price', 'Income', 'UIncome'), name='mynewvar', encoder=None)\n",
-    "design.build_columns(Carseats, new_var)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "bdfc0fe9",
-   "metadata": {},
-   "source": [
-    "Let's now transform these columns with an encoder. Within `ModelSpec` we will first build the\n",
-    "arrays above and then call `pca.fit` and finally `pca.transform` within `design.build_columns`."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 17,
-   "id": "cf6f3f4c",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/Users/jonathantaylor/miniconda3/envs/islp_test/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but PCA was fitted with feature names\n",
-      "  warnings.warn(\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "(     mynewvar[0]  mynewvar[1]\n",
-       " 0      -3.608693    -4.853177\n",
-       " 1      15.081506    35.708630\n",
-       " 2      27.422871    40.774250\n",
-       " 3     -33.973209    13.470489\n",
-       " 4       6.567316   -11.290100\n",
-       " ..           ...          ...\n",
-       " 395   -36.846346   -18.415783\n",
-       " 396    45.741500     3.245602\n",
-       " 397    49.097533   -35.725355\n",
-       " 398   -13.577772    18.845139\n",
-       " 399    31.927566     0.978436\n",
-       " \n",
-       " [400 rows x 2 columns],\n",
-       " ['mynewvar[0]', 'mynewvar[1]'])"
-      ]
-     },
-     "execution_count": 17,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "from sklearn.decomposition import PCA\n",
-    "pca = PCA(n_components=2)\n",
-    "pca.fit(design.build_columns(Carseats, new_var)[0]) # this is done within `ModelSpec.fit`\n",
-    "pca_var = Variable(('Price', 'Income', 'UIncome'), name='mynewvar', encoder=pca)\n",
-    "design.build_columns(Carseats, pca_var)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "1552d19a",
-   "metadata": {},
-   "source": [
-    "The elements of the `variables` attribute may be column identifiers ( `\"Price\"`), `Column` instances (`price`)\n",
-    "or `Variable` instances (`pca_var`)."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 18,
-   "id": "12d955dd",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/Users/jonathantaylor/miniconda3/envs/islp_test/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but PCA was fitted with feature names\n",
-      "  warnings.warn(\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "(     Price  Price  mynewvar[0]  mynewvar[1]\n",
-       " 0    120.0  120.0    -3.608693    -4.853177\n",
-       " 1     83.0   83.0    15.081506    35.708630\n",
-       " 2     80.0   80.0    27.422871    40.774250\n",
-       " 3     97.0   97.0   -33.973209    13.470489\n",
-       " 4    128.0  128.0     6.567316   -11.290100\n",
-       " ..     ...    ...          ...          ...\n",
-       " 395  128.0  128.0   -36.846346   -18.415783\n",
-       " 396  120.0  120.0    45.741500     3.245602\n",
-       " 397  159.0  159.0    49.097533   -35.725355\n",
-       " 398   95.0   95.0   -13.577772    18.845139\n",
-       " 399  120.0  120.0    31.927566     0.978436\n",
-       " \n",
-       " [400 rows x 4 columns],\n",
-       " ['Price', 'Price', 'mynewvar[0]', 'mynewvar[1]'])"
-      ]
-     },
-     "execution_count": 18,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "fancy_var = Variable(('Price', price, pca_var), name='fancy', encoder=None)\n",
-    "design.build_columns(Carseats, fancy_var)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "f5ea292d",
-   "metadata": {},
-   "source": [
-    "We can of course run PCA again on these features (if we wanted)."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 19,
-   "id": "ae2af29b",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/Users/jonathantaylor/miniconda3/envs/islp_test/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but PCA was fitted with feature names\n",
-      "  warnings.warn(\n",
-      "/Users/jonathantaylor/miniconda3/envs/islp_test/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but PCA was fitted with feature names\n",
-      "  warnings.warn(\n",
-      "/Users/jonathantaylor/miniconda3/envs/islp_test/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but PCA was fitted with feature names\n",
-      "  warnings.warn(\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "(     fancy_pca[0]  fancy_pca[1]\n",
-       " 0       -6.951792      4.859283\n",
-       " 1       55.170148    -24.694875\n",
-       " 2       59.418556    -38.033572\n",
-       " 3       34.722389     28.922184\n",
-       " 4      -21.419184     -3.120673\n",
-       " ..            ...           ...\n",
-       " 395    -18.257348     40.760122\n",
-       " 396    -10.546709    -45.021658\n",
-       " 397    -77.706359    -37.174379\n",
-       " 398     36.668694      7.730851\n",
-       " 399     -9.540535    -31.059122\n",
-       " \n",
-       " [400 rows x 2 columns],\n",
-       " ['fancy_pca[0]', 'fancy_pca[1]'])"
-      ]
-     },
-     "execution_count": 19,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "pca2 = PCA(n_components=2)\n",
-    "pca2.fit(design.build_columns(Carseats, fancy_var)[0]) # this is done within `ModelSpec.fit`\n",
-    "pca2_var = Variable(('Price', price, pca_var), name='fancy_pca', encoder=pca2)\n",
-    "design.build_columns(Carseats, pca2_var)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "57305dbe",
-   "metadata": {},
-   "source": [
-    "## Building the design matrix\n",
-    "\n",
-    "With these notions in mind, the final design is essentially then"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 20,
-   "id": "89656ec4",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "X_hand = np.column_stack([design.build_columns(Carseats, v)[0] for v in design.terms_])[:4]"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "f6cb8167",
-   "metadata": {},
-   "source": [
-    "An intercept column is added if `design.intercept` is `True` and if the original argument to `transform` is\n",
-    "a dataframe the index is adjusted accordingly."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 21,
-   "id": "547cb625",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "True"
-      ]
-     },
-     "execution_count": 21,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design.intercept"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 22,
-   "id": "ff5b41d5",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>intercept</th>\n",
-       "      <th>Price</th>\n",
-       "      <th>Income</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>120</td>\n",
-       "      <td>73</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>83</td>\n",
-       "      <td>48</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>80</td>\n",
-       "      <td>35</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>97</td>\n",
-       "      <td>100</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "   intercept  Price  Income\n",
-       "0        1.0    120      73\n",
-       "1        1.0     83      48\n",
-       "2        1.0     80      35\n",
-       "3        1.0     97     100"
-      ]
-     },
-     "execution_count": 22,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design.transform(Carseats)[:4]"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "932759cf",
-   "metadata": {},
-   "source": [
-    "## Predicting\n",
-    "\n",
-    "Constructing the design matrix at any values is carried out by the `transform` method."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 23,
-   "id": "e2190b00",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "array([12.65257604, 12.25873428])"
-      ]
-     },
-     "execution_count": 23,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "new_data = pd.DataFrame({'Price':[10,20], 'Income':[40, 50]})\n",
-    "new_X = design.transform(new_data)\n",
-    "M.get_prediction(new_X).predicted_mean"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 24,
-   "id": "6545c5da",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "       0        1 \n",
-      "12.65258 12.25873 \n"
-     ]
-    }
-   ],
-   "source": [
-    "%%R -i new_data,Carseats\n",
-    "predict(lm(Sales ~ Price + Income, data=Carseats), new_data)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "cd088b51",
-   "metadata": {},
-   "source": [
-    "### Difference between using `pd.DataFrame` and `np.ndarray`\n",
-    "\n",
-    "If the `terms` only refer to a few columns of the data frame, the `transform` method only needs a dataframe with those columns.\n",
-    "\n",
-    "If we had used an `np.ndarray`, the column identifiers would be integers identifying specific columns so,\n",
-    "in order to work correctly, `transform` would need another `np.ndarray` where the columns have the same meaning."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 25,
-   "id": "8f37ae20",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "array([[1.0, 120, 73],\n",
-       "       [1.0, 83, 48],\n",
-       "       [1.0, 80, 35],\n",
-       "       [1.0, 97, 100]], dtype=object)"
-      ]
-     },
-     "execution_count": 25,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "Carseats_np = np.asarray(Carseats[['Price', 'ShelveLoc', 'US', 'Income']])\n",
-    "design_np = ModelSpec([0,3]).fit(Carseats_np)\n",
-    "design_np.transform(Carseats_np)[:4]"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "184aefc2",
-   "metadata": {},
-   "source": [
-    "The following will fail for hopefully obvious reasons"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 26,
-   "id": "e4134980",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "shapes (2,2) and (3,) not aligned: 2 (dim 1) != 3 (dim 0)\n"
-     ]
-    }
-   ],
-   "source": [
-    "try:\n",
-    "    new_D = np.zeros((2,2))\n",
-    "    new_D[:,0] = [10,20]\n",
-    "    new_D[:,1] = [40,50]\n",
-    "    M.get_prediction(new_D).predicted_mean\n",
-    "except ValueError as e:\n",
-    "    print(e)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "53808f3b",
-   "metadata": {},
-   "source": [
-    "Ultimately, `M` expects 3 columns for new predictions because it was fit\n",
-    "with a matrix having 3 columns (the first representing an intercept).\n",
-    "\n",
-    "We might be tempted to try as with the `pd.DataFrame` and produce\n",
-    "an `np.ndarray` with only the necessary variables."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 27,
-   "id": "62059c57",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "index 3 is out of bounds for axis 1 with size 2\n"
-     ]
-    }
-   ],
-   "source": [
-    "try:\n",
-    "    new_X = np.zeros((2,2))\n",
-    "    new_X[:,0] = [10,20]\n",
-    "    new_X[:,1] = [40,50]\n",
-    "    new_D = design_np.transform(new_X)\n",
-    "    M.get_prediction(new_D).predicted_mean\n",
-    "except IndexError as e:\n",
-    "    print(e)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "ded12f69",
-   "metadata": {},
-   "source": [
-    "This fails because `design_np` is looking for column `3` from its `terms`:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 28,
-   "id": "fbb509d1",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "[Variable(variables=(0,), name='0', encoder=None, use_transform=True, pure_columns=True, override_encoder_colnames=False),\n",
-       " Variable(variables=(3,), name='3', encoder=None, use_transform=True, pure_columns=True, override_encoder_colnames=False)]"
-      ]
-     },
-     "execution_count": 28,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design_np.terms_"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "f01391e4",
-   "metadata": {},
-   "source": [
-    "However, if we have an `np.ndarray` in which the first column indeed represents `Price` and the fourth indeed\n",
-    "represents `Income` then we can arrive at the correct answer by supplying such the array to `design_np.transform`:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 29,
-   "id": "10df55ae",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "array([12.65257604, 12.25873428])"
-      ]
-     },
-     "execution_count": 29,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "new_X = np.zeros((2,4))\n",
-    "new_X[:,0] = [10,20]\n",
-    "new_X[:,3] = [40,50]\n",
-    "new_D = design_np.transform(new_X)\n",
-    "M.get_prediction(new_D).predicted_mean"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "b43099fb",
-   "metadata": {},
-   "source": [
-    "Given this subtlety about needing to supply arrays with identical column structure to `transform` when\n",
-    "using `np.ndarray` we presume that using a `pd.DataFrame` will be the more popular use case."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "50bce64d",
-   "metadata": {},
-   "source": [
-    "## A model with some categorical variables\n",
-    "\n",
-    "Categorical variables become `Column` instances with encoders."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 30,
-   "id": "2eb2ff16",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Column(idx='UIncome', name='UIncome', is_categorical=True, is_ordinal=False, columns=('UIncome[L]', 'UIncome[M]'), encoder=Contrast())"
-      ]
-     },
-     "execution_count": 30,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design = ModelSpec(['Population', 'Price', 'UIncome', 'ShelveLoc']).fit(Carseats)\n",
-    "design.column_info_['UIncome']"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 31,
-   "id": "6686dff8",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Index(['intercept', 'Population', 'Price', 'UIncome[L]', 'UIncome[M]',\n",
-       "       'ShelveLoc[Good]', 'ShelveLoc[Medium]'],\n",
-       "      dtype='object')"
-      ]
-     },
-     "execution_count": 31,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "X = design.fit_transform(Carseats)\n",
-    "X.columns"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 32,
-   "id": "0e0eafd7",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "intercept            11.876012\n",
-       "Population            0.001163\n",
-       "Price                -0.055725\n",
-       "UIncome[L]           -1.042297\n",
-       "UIncome[M]           -0.119123\n",
-       "ShelveLoc[Good]       4.999623\n",
-       "ShelveLoc[Medium]     1.964278\n",
-       "dtype: float64"
-      ]
-     },
-     "execution_count": 32,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "sm.OLS(Y, X).fit().params"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 33,
-   "id": "43cce209",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "    (Intercept)      Population           Price        UIncomeM        UIncomeH \n",
-      "    10.83371503      0.00116301     -0.05572469      0.92317388      1.04229679 \n",
-      "  ShelveLocGood ShelveLocMedium \n",
-      "     4.99962319      1.96427771 \n"
-     ]
-    }
-   ],
-   "source": [
-    "%%R\n",
-    "lm(Sales ~ Population + Price + UIncome + ShelveLoc, data=Carseats)$coef"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "99bf408e",
-   "metadata": {},
-   "source": [
-    "## Getting the encoding you want\n",
-    "\n",
-    "By default the level dropped by `ModelSpec` will be the first of the `categories_` values from \n",
-    "`sklearn.preprocessing.OneHotEncoder()`. We might wish to change this. It seems\n",
-    "as if the correct way to do this would be something like `Variable(('UIncome',), 'mynewencoding', new_encoder)`\n",
-    "where `new_encoder` would somehow drop the column we want dropped. \n",
-    "\n",
-    "However, when using the convenient identifier `UIncome` in the `variables` argument, this maps to the `Column` associated to `UIncome` within `design.column_info_`:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 34,
-   "id": "11c19ebf",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Column(idx='UIncome', name='UIncome', is_categorical=True, is_ordinal=False, columns=('UIncome[L]', 'UIncome[M]'), encoder=Contrast())"
-      ]
-     },
-     "execution_count": 34,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design.column_info_['UIncome']"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "4b48e5d2",
-   "metadata": {},
-   "source": [
-    "This column already has an encoder and `Column` instances are immutable as named tuples. Further, there are times when \n",
-    "we may want to encode `UIncome` differently within the same model. In the model below the main effect of `UIncome` is encoded with two columns while in the interaction `UIncome` (see below) has three columns. This is a design of interest\n",
-    "and we need a way to allow different encodings of the same column of `Carseats`"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 35,
-   "id": "81f641ba",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "Call:\n",
-      "lm(formula = Sales ~ UIncome:ShelveLoc + UIncome, data = Carseats)\n",
-      "\n",
-      "Coefficients:\n",
-      "             (Intercept)                  UIncomeM                  UIncomeH  \n",
-      "                  5.1317                    0.1151                    1.1561  \n",
-      "  UIncomeL:ShelveLocGood    UIncomeM:ShelveLocGood    UIncomeH:ShelveLocGood  \n",
-      "                  4.5121                    5.5752                    3.7381  \n",
-      "UIncomeL:ShelveLocMedium  UIncomeM:ShelveLocMedium  UIncomeH:ShelveLocMedium  \n",
-      "                  1.2473                    2.4782                    1.5141  \n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "%%R\n",
-    "lm(Sales ~ UIncome:ShelveLoc + UIncome, data=Carseats)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "79f7eb4d",
-   "metadata": {},
-   "source": [
-    " We can create a new \n",
-    "`Column` with the encoder we want. For categorical variables, there is a convenience function to do so."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 36,
-   "id": "2afb3b5d",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from ISLP.models.model_spec import contrast\n",
-    "pref_encoding = contrast('UIncome', 'drop', 'L')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 37,
-   "id": "c44692ab",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(     UIncome[M]  UIncome[H]\n",
-       " 0           1.0         0.0\n",
-       " 1           0.0         0.0\n",
-       " 2           0.0         0.0\n",
-       " 3           0.0         1.0\n",
-       " 4           1.0         0.0\n",
-       " ..          ...         ...\n",
-       " 395         0.0         1.0\n",
-       " 396         0.0         0.0\n",
-       " 397         0.0         0.0\n",
-       " 398         1.0         0.0\n",
-       " 399         0.0         0.0\n",
-       " \n",
-       " [400 rows x 2 columns],\n",
-       " ['UIncome[M]', 'UIncome[H]'])"
-      ]
-     },
-     "execution_count": 37,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design.build_columns(Carseats, pref_encoding)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 38,
-   "id": "c0bfb2a5",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Index(['intercept', 'Population', 'Price', 'UIncome[M]', 'UIncome[H]',\n",
-       "       'ShelveLoc[Good]', 'ShelveLoc[Medium]'],\n",
-       "      dtype='object')"
-      ]
-     },
-     "execution_count": 38,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design = ModelSpec(['Population', 'Price', pref_encoding, 'ShelveLoc']).fit(Carseats)\n",
-    "X = design.fit_transform(Carseats)\n",
-    "X.columns"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 39,
-   "id": "d263056c",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "intercept            10.833715\n",
-       "Population            0.001163\n",
-       "Price                -0.055725\n",
-       "UIncome[M]            0.923174\n",
-       "UIncome[H]            1.042297\n",
-       "ShelveLoc[Good]       4.999623\n",
-       "ShelveLoc[Medium]     1.964278\n",
-       "dtype: float64"
-      ]
-     },
-     "execution_count": 39,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "sm.OLS(Y, X).fit().params"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 40,
-   "id": "edf0dc68",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "    (Intercept)      Population           Price        UIncomeM        UIncomeH \n",
-      "    10.83371503      0.00116301     -0.05572469      0.92317388      1.04229679 \n",
-      "  ShelveLocGood ShelveLocMedium \n",
-      "     4.99962319      1.96427771 \n"
-     ]
-    }
-   ],
-   "source": [
-    "%%R\n",
-    "lm(Sales ~ Population + Price + UIncome + ShelveLoc, data=Carseats)$coef"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "82071a54",
-   "metadata": {},
-   "source": [
-    "## Interactions\n",
-    "\n",
-    "We've referred to interactions above. These are specified (by convenience) as tuples in the `terms` argument\n",
-    "to `ModelSpec`."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 41,
-   "id": "cd18a4a4",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "intercept                       7.866634\n",
-       "UIncome[L]:ShelveLoc[Good]      4.512054\n",
-       "UIncome[L]:ShelveLoc[Medium]    1.247275\n",
-       "UIncome[M]:ShelveLoc[Good]      5.575170\n",
-       "UIncome[M]:ShelveLoc[Medium]    2.478163\n",
-       "UIncome[L]                     -2.734895\n",
-       "UIncome[M]                     -2.619745\n",
-       "dtype: float64"
-      ]
-     },
-     "execution_count": 41,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design = ModelSpec([('UIncome', 'ShelveLoc'), 'UIncome'])\n",
-    "X = design.fit_transform(Carseats)\n",
-    "sm.OLS(Y, X).fit().params"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "229fa32d",
-   "metadata": {},
-   "source": [
-    "The tuples in `terms` are converted to `Variable` in the formalized `terms_` attribute by creating a `Variable` with\n",
-    "`variables` set to the tuple and the encoder an `Interaction` encoder which (unsurprisingly) creates the interaction columns from the concatenated data frames of `UIncome` and `ShelveLoc`."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 42,
-   "id": "b8c52dbb",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Variable(variables=('UIncome', 'ShelveLoc'), name='UIncome:ShelveLoc', encoder=Interaction(column_names={'ShelveLoc': ['ShelveLoc[Good]', 'ShelveLoc[Medium]'],\n",
-       "                          'UIncome': ['UIncome[L]', 'UIncome[M]']},\n",
-       "            columns={'ShelveLoc': range(2, 4), 'UIncome': range(0, 2)},\n",
-       "            variables=['UIncome', 'ShelveLoc']), use_transform=True, pure_columns=False, override_encoder_colnames=False)"
-      ]
-     },
-     "execution_count": 42,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design.terms_[0]"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "e7f93464",
-   "metadata": {},
-   "source": [
-    "Comparing this to the previous `R` model."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 43,
-   "id": "4094c01f",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "Call:\n",
-      "lm(formula = Sales ~ UIncome:ShelveLoc + UIncome, data = Carseats)\n",
-      "\n",
-      "Coefficients:\n",
-      "             (Intercept)                  UIncomeM                  UIncomeH  \n",
-      "                  5.1317                    0.1151                    1.1561  \n",
-      "  UIncomeL:ShelveLocGood    UIncomeM:ShelveLocGood    UIncomeH:ShelveLocGood  \n",
-      "                  4.5121                    5.5752                    3.7381  \n",
-      "UIncomeL:ShelveLocMedium  UIncomeM:ShelveLocMedium  UIncomeH:ShelveLocMedium  \n",
-      "                  1.2473                    2.4782                    1.5141  \n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "%%R\n",
-    "lm(Sales ~ UIncome:ShelveLoc + UIncome, data=Carseats)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "d448c9ca",
-   "metadata": {},
-   "source": [
-    "We note a few important things:\n",
-    "\n",
-    "1. `R` has reorganized the columns of the design from the formula: although we wrote `UIncome:ShelveLoc` first these\n",
-    "columns have been built later. **`ModelSpec` builds columns in the order determined by `terms`!**\n",
-    "\n",
-    "2. As noted above, `R` has encoded `UIncome` differently in the main effect and in the interaction. For `ModelSpec`, the reference to `UIncome` always refers to the column in `design.column_info_` and will always build only the columns for `L` and `M`. **`ModelSpec` does no inspection of terms to decide how to encode categorical variables.**\n",
-    "\n",
-    "A few notes:\n",
-    "\n",
-    "- **Why not try to inspect the terms?** For any nontrivial formula in `R` with several categorical variables and interactions, predicting what columns will be produced from a given formula is not simple. **`ModelSpec` errs on the side of being explicit.**\n",
-    "\n",
-    "- **Is it impossible to build the design as `R` has?** No. An advanced user who *knows* they want the columns built as `R` has can do so (fairly) easily."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 44,
-   "id": "634e05c6",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(     UIncome[H]  UIncome[L]  UIncome[M]\n",
-       " 0           0.0         0.0         1.0\n",
-       " 1           0.0         1.0         0.0\n",
-       " 2           0.0         1.0         0.0\n",
-       " 3           1.0         0.0         0.0\n",
-       " 4           0.0         0.0         1.0\n",
-       " ..          ...         ...         ...\n",
-       " 395         1.0         0.0         0.0\n",
-       " 396         0.0         1.0         0.0\n",
-       " 397         0.0         1.0         0.0\n",
-       " 398         0.0         0.0         1.0\n",
-       " 399         0.0         1.0         0.0\n",
-       " \n",
-       " [400 rows x 3 columns],\n",
-       " ['UIncome[H]', 'UIncome[L]', 'UIncome[M]'])"
-      ]
-     },
-     "execution_count": 44,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "full_encoding = contrast('UIncome', None)\n",
-    "design.build_columns(Carseats, full_encoding)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 45,
-   "id": "4c09c93f",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "intercept                       5.131739\n",
-       "UIncome[M]                      0.115150\n",
-       "UIncome[H]                      1.156118\n",
-       "UIncome[H]:ShelveLoc[Good]      3.738052\n",
-       "UIncome[H]:ShelveLoc[Medium]    1.514104\n",
-       "UIncome[L]:ShelveLoc[Good]      4.512054\n",
-       "UIncome[L]:ShelveLoc[Medium]    1.247275\n",
-       "UIncome[M]:ShelveLoc[Good]      5.575170\n",
-       "UIncome[M]:ShelveLoc[Medium]    2.478163\n",
-       "dtype: float64"
-      ]
-     },
-     "execution_count": 45,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design = ModelSpec([pref_encoding, (full_encoding, 'ShelveLoc')])\n",
-    "X = design.fit_transform(Carseats)\n",
-    "sm.OLS(Y, X).fit().params"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "48c1989f",
-   "metadata": {},
-   "source": [
-    "## Special encodings\n",
-    "\n",
-    "For flexible models, we may want to consider transformations of features, i.e. polynomial\n",
-    "or spline transformations. Given transforms that follow the `fit/transform` paradigm\n",
-    "we can of course achieve this with a `Column` and an `encoder`. The `ISLP.transforms`\n",
-    "package includes a `Poly` transform"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 46,
-   "id": "85a28d87",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Variable(variables=('Income',), name='poly(Income, 3)', encoder=Poly(degree=3), use_transform=True, pure_columns=False, override_encoder_colnames=True)"
-      ]
-     },
-     "execution_count": 46,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "from ISLP.models.model_spec import poly\n",
-    "poly('Income', 3)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 47,
-   "id": "e17c8a9d",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "intercept              5.440077\n",
-       "poly(Income, 3)[0]    10.036373\n",
-       "poly(Income, 3)[1]    -2.799156\n",
-       "poly(Income, 3)[2]     2.399601\n",
-       "ShelveLoc[Good]        4.808133\n",
-       "ShelveLoc[Medium]      1.889533\n",
-       "dtype: float64"
-      ]
-     },
-     "execution_count": 47,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design = ModelSpec([poly('Income', 3), 'ShelveLoc'])\n",
-    "X = design.fit_transform(Carseats)\n",
-    "sm.OLS(Y, X).fit().params"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "944f56d6",
-   "metadata": {},
-   "source": [
-    "Compare:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 48,
-   "id": "1889caca",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "     (Intercept) poly(Income, 3)1 poly(Income, 3)2 poly(Income, 3)3 \n",
-      "        5.440077        10.036373        -2.799156         2.399601 \n",
-      "   ShelveLocGood  ShelveLocMedium \n",
-      "        4.808133         1.889533 \n"
-     ]
-    }
-   ],
-   "source": [
-    "%%R\n",
-    "lm(Sales ~ poly(Income, 3) + ShelveLoc, data=Carseats)$coef"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "bd4dca31",
-   "metadata": {},
-   "source": [
-    "## Splines\n",
-    "\n",
-    "Support for natural and B-splines is also included"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 49,
-   "id": "70fae990",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from ISLP.models.model_spec import ns, bs, pca"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "2d812694",
-   "metadata": {},
-   "source": [
-    "## Custom encoding\n",
-    "\n",
-    "Instead of PCA we might run some clustering on some features and then uses the clusters to\n",
-    "create new features. This can be done with `derived_variable`. Indeed, `pca`, `ns` and `bs` are all examples\n",
-    "of this."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 50,
-   "id": "8e5d2305",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from ISLP.models.model_spec import derived_variable, Contrast"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 51,
-   "id": "8a40c663",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "array([1, 1, 2, 1, 2, 1, 0, 1, 0, 0, 0, 1, 2, 2, 0, 1, 2, 1, 0, 0, 0, 2,\n",
-       "       2, 2, 1, 2, 1, 0, 0, 1, 0, 1, 2, 1, 2, 0, 0, 2, 2, 2, 0, 2, 0, 2,\n",
-       "       0, 2, 0, 0, 2, 0, 1, 0, 2, 0, 0, 0, 0, 0, 1, 0, 1, 2, 2, 0, 1, 2,\n",
-       "       0, 1, 1, 2, 1, 1, 2, 0, 0, 1, 1, 0, 2, 0, 1, 0, 0, 2, 2, 0, 1, 2,\n",
-       "       2, 2, 2, 2, 0, 2, 0, 2, 2, 0, 1, 2, 0, 0, 2, 0, 0, 1, 2, 0, 1, 0,\n",
-       "       0, 1, 0, 2, 0, 2, 0, 2, 0, 0, 1, 1, 0, 0, 0, 0, 0, 2, 0, 0, 1, 0,\n",
-       "       0, 0, 2, 1, 0, 2, 1, 1, 1, 2, 0, 0, 2, 0, 2, 1, 0, 0, 0, 1, 2, 2,\n",
-       "       1, 0, 2, 2, 0, 2, 2, 2, 2, 0, 0, 2, 1, 0, 0, 1, 1, 1, 0, 0, 2, 0,\n",
-       "       1, 0, 0, 2, 1, 0, 2, 1, 2, 1, 0, 2, 2, 1, 1, 2, 2, 0, 1, 1, 2, 2,\n",
-       "       1, 0, 0, 0, 2, 0, 0, 2, 0, 0, 2, 2, 2, 1, 1, 0, 0, 1, 2, 2, 1, 1,\n",
-       "       1, 2, 0, 2, 2, 2, 2, 0, 1, 0, 0, 0, 0, 1, 1, 2, 1, 2, 2, 0, 0, 0,\n",
-       "       2, 2, 2, 2, 1, 0, 0, 0, 1, 0, 0, 2, 1, 0, 2, 1, 2, 1, 1, 2, 1, 2,\n",
-       "       2, 2, 1, 1, 0, 2, 2, 2, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 2,\n",
-       "       1, 2, 2, 1, 1, 0, 1, 0, 0, 1, 2, 1, 2, 1, 0, 0, 1, 1, 1, 1, 2, 0,\n",
-       "       1, 0, 1, 1, 0, 1, 2, 2, 2, 2, 1, 1, 1, 2, 2, 1, 2, 0, 2, 1, 0, 1,\n",
-       "       2, 1, 1, 2, 1, 1, 2, 2, 2, 2, 2, 0, 1, 2, 0, 2, 0, 2, 1, 1, 1, 1,\n",
-       "       1, 1, 2, 0, 0, 0, 0, 1, 0, 2, 0, 2, 1, 2, 1, 0, 2, 1, 1, 0, 2, 2,\n",
-       "       2, 2, 1, 2, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 2, 0, 0, 1, 0, 1, 1,\n",
-       "       2, 2, 0, 2], dtype=int32)"
-      ]
-     },
-     "execution_count": 51,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "from sklearn.cluster import KMeans\n",
-    "from sklearn.pipeline import make_pipeline\n",
-    "from sklearn.preprocessing import StandardScaler\n",
-    "cluster = make_pipeline(StandardScaler(), KMeans(n_clusters=3, random_state=0))\n",
-    "group = Variable(('Income', 'Price', 'Advertising', 'Population'), 'group', None)\n",
-    "X = design.build_submodel(Carseats, [group]).drop('intercept', axis=1)\n",
-    "cluster.fit(X.values)\n",
-    "cluster.predict(X.values)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "9bc38836",
-   "metadata": {},
-   "source": [
-    "For clustering, we often want to use the `predict` method rather than the `transform` method. If the ultimate\n",
-    "features all use `transform` then the do not even need to use these two calls to `make_pipeline`."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 52,
-   "id": "8ceab9b6",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/Users/jonathantaylor/miniconda3/envs/islp_test/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but StandardScaler was fitted with feature names\n",
-      "  warnings.warn(\n",
-      "/Users/jonathantaylor/miniconda3/envs/islp_test/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but StandardScaler was fitted with feature names\n",
-      "  warnings.warn(\n",
-      "/Users/jonathantaylor/miniconda3/envs/islp_test/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but StandardScaler was fitted with feature names\n",
-      "  warnings.warn(\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>intercept</th>\n",
-       "      <th>myclus</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>...</th>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>395</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>396</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>397</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>398</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>399</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>2</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>400 rows × 2 columns</p>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "     intercept  myclus\n",
-       "0          1.0       1\n",
-       "1          1.0       1\n",
-       "2          1.0       2\n",
-       "3          1.0       1\n",
-       "4          1.0       2\n",
-       "..         ...     ...\n",
-       "395        1.0       1\n",
-       "396        1.0       2\n",
-       "397        1.0       2\n",
-       "398        1.0       0\n",
-       "399        1.0       2\n",
-       "\n",
-       "[400 rows x 2 columns]"
-      ]
-     },
-     "execution_count": 52,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "cluster2 = make_pipeline(StandardScaler(), KMeans(n_clusters=3, random_state=0))\n",
-    "cluster_var = derived_variable(['Income', 'Price', 'Advertising', 'Population'], \n",
-    "                               name='myclus', \n",
-    "                               encoder=cluster2,\n",
-    "                               use_transform=False)\n",
-    "design = ModelSpec([cluster_var]).fit(Carseats)\n",
-    "design.transform(Carseats)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "1f9b2630",
-   "metadata": {},
-   "source": [
-    "Somewhat clunkily, we can make this a categorical variable by creating a `Variable` with a\n",
-    "categorical encoder."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 53,
-   "id": "ffde00a5",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Variable(variables=(Variable(variables=('Income', 'Price', 'Advertising', 'Population'), name='myclus', encoder=Pipeline(steps=[('standardscaler', StandardScaler()),\n",
-       "                ('kmeans', KMeans(n_clusters=3, random_state=0))]), use_transform=False, pure_columns=False, override_encoder_colnames=True),), name='mynewcat', encoder=Contrast(), use_transform=True, pure_columns=False, override_encoder_colnames=False)"
-      ]
-     },
-     "execution_count": 53,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "cluster2 = make_pipeline(StandardScaler(), KMeans(n_clusters=3, random_state=0))\n",
-    "cluster_var = derived_variable(['Income', 'Price', 'Advertising', 'Population'], \n",
-    "                               name='myclus', \n",
-    "                               encoder=cluster2,\n",
-    "                               use_transform=False)\n",
-    "cat_cluster = Variable((cluster_var,), name='mynewcat', encoder=Contrast(method='drop'))\n",
-    "cat_cluster"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 54,
-   "id": "5afeab7c",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/Users/jonathantaylor/miniconda3/envs/islp_test/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but StandardScaler was fitted with feature names\n",
-      "  warnings.warn(\n",
-      "/Users/jonathantaylor/miniconda3/envs/islp_test/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but StandardScaler was fitted with feature names\n",
-      "  warnings.warn(\n",
-      "/Users/jonathantaylor/miniconda3/envs/islp_test/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but StandardScaler was fitted with feature names\n",
-      "  warnings.warn(\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>intercept</th>\n",
-       "      <th>1</th>\n",
-       "      <th>2</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>0.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>0.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>1.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>0.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>1.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>...</th>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>395</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>0.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>396</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>1.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>397</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>1.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>398</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>399</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>1.0</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>400 rows × 3 columns</p>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "     intercept    1    2\n",
-       "0          1.0  1.0  0.0\n",
-       "1          1.0  1.0  0.0\n",
-       "2          1.0  0.0  1.0\n",
-       "3          1.0  1.0  0.0\n",
-       "4          1.0  0.0  1.0\n",
-       "..         ...  ...  ...\n",
-       "395        1.0  1.0  0.0\n",
-       "396        1.0  0.0  1.0\n",
-       "397        1.0  0.0  1.0\n",
-       "398        1.0  0.0  0.0\n",
-       "399        1.0  0.0  1.0\n",
-       "\n",
-       "[400 rows x 3 columns]"
-      ]
-     },
-     "execution_count": 54,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design = ModelSpec([cat_cluster]).fit(Carseats)\n",
-    "\n",
-    "design.transform(Carseats)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "e24d5637-80fb-49bf-ac10-8ff68cb8bd8f",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "jupytext": {
-   "formats": "source/models///ipynb,jupyterbook/models///md:myst,jupyterbook/models///ipynb"
-  },
-  "kernelspec": {
-   "display_name": "python3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.9.13"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
diff --git a/docs/source/models/selection.ipynb b/docs/source/models/selection.ipynb
index 3a7d002..fd66d95 100644
--- a/docs/source/models/selection.ipynb
+++ b/docs/source/models/selection.ipynb
@@ -2,2723 +2,259 @@
  "cells": [
   {
    "cell_type": "markdown",
-   "id": "72bae06a",
+   "id": "247387ec-1477-42e6-9e69-cad1cacb5721",
    "metadata": {},
    "source": [
-    "# Model selection using `ModelSpec`"
+    "# Model selection using `ModelSpec`\n",
+    "\n",
+    "\n",
+    "In this lab we illustrate how to run forward stepwise model selection\n",
+    "using the model specification capability of `ModelSpec`."
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 1,
-   "id": "ae6bd850",
+   "id": "4720bb2a-6bec-4e91-a57e-9689aa4f0532",
    "metadata": {},
    "outputs": [],
    "source": [
-    "import numpy as np, pandas as pd\n",
-    "%load_ext rpy2.ipython\n",
-    "\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "from statsmodels.api import OLS\n",
     "from ISLP import load_data\n",
-    "from ISLP.models import ModelSpec\n",
-    "\n",
-    "import statsmodels.api as sm"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "id": "5ac10e72",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Index(['Sales', 'CompPrice', 'Income', 'Advertising', 'Population', 'Price',\n",
-       "       'ShelveLoc', 'Age', 'Education', 'Urban', 'US'],\n",
-       "      dtype='object')"
-      ]
-     },
-     "execution_count": 2,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "Carseats = load_data('Carseats')\n",
-    "%R -i Carseats\n",
-    "Carseats.columns"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "80a586d9",
-   "metadata": {},
-   "source": [
-    "## Let's break up income into groups"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "id": "850356ba",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0      M\n",
-       "1      L\n",
-       "2      L\n",
-       "3      H\n",
-       "4      M\n",
-       "      ..\n",
-       "395    H\n",
-       "396    L\n",
-       "397    L\n",
-       "398    M\n",
-       "399    L\n",
-       "Name: OIncome, Length: 400, dtype: category\n",
-       "Categories (3, object): ['L' < 'M' < 'H']"
-      ]
-     },
-     "execution_count": 3,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "Carseats['OIncome'] = pd.cut(Carseats['Income'], \n",
-    "                             [0,50,90,200], \n",
-    "                             labels=['L','M','H'])\n",
-    "Carseats['OIncome']"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "e24def3a",
-   "metadata": {},
-   "source": [
-    "Let's also create an unordered version"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "id": "edf83080",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0      M\n",
-       "1      L\n",
-       "2      L\n",
-       "3      H\n",
-       "4      M\n",
-       "      ..\n",
-       "395    H\n",
-       "396    L\n",
-       "397    L\n",
-       "398    M\n",
-       "399    L\n",
-       "Name: UIncome, Length: 400, dtype: category\n",
-       "Categories (3, object): ['L', 'M', 'H']"
-      ]
-     },
-     "execution_count": 4,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "Carseats['UIncome'] = pd.cut(Carseats['Income'], \n",
-    "                             [0,50,90,200], \n",
-    "                             labels=['L','M','H'],\n",
-    "                             ordered=False)\n",
-    "Carseats['UIncome']"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "aa22bb9c",
-   "metadata": {},
-   "source": [
-    "## A simple model"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "id": "38d92522",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Index(['intercept', 'Price', 'Income'], dtype='object')"
-      ]
-     },
-     "execution_count": 5,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design = ModelSpec(['Price', 'Income'])\n",
-    "X = design.fit_transform(Carseats)\n",
-    "X.columns"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "id": "cfc2056f",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "intercept    12.661546\n",
-       "Price        -0.052213\n",
-       "Income        0.012829\n",
-       "dtype: float64"
-      ]
-     },
-     "execution_count": 6,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "Y = Carseats['Sales']\n",
-    "M = sm.OLS(Y, X).fit()\n",
-    "M.params"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "4674c345",
-   "metadata": {},
-   "source": [
-    "## Basic procedure\n",
-    "\n",
-    "The design matrix is built by cobbling together a set of columns and possibly transforming them.\n",
-    "A `pd.DataFrame` is essentially a list of columns. One of the first tasks done  in `ModelSpec.fit`\n",
-    "is to inspect a dataframe for column info. The column `ShelveLoc` is categorical:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "id": "5688f0ad",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0         Bad\n",
-       "1        Good\n",
-       "2      Medium\n",
-       "3      Medium\n",
-       "4         Bad\n",
-       "        ...  \n",
-       "395      Good\n",
-       "396    Medium\n",
-       "397    Medium\n",
-       "398       Bad\n",
-       "399      Good\n",
-       "Name: ShelveLoc, Length: 400, dtype: category\n",
-       "Categories (3, object): ['Bad', 'Good', 'Medium']"
-      ]
-     },
-     "execution_count": 7,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "Carseats['ShelveLoc']"
+    "from ISLP.models import (ModelSpec,\n",
+    "                         Stepwise,\n",
+    "                         sklearn_selected)"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "4ae28ffa",
+   "id": "1c224240-ce8b-47f3-a85a-052c43038b26",
    "metadata": {},
    "source": [
-    "This is recognized by `ModelSpec` in the form of `Column` objects which are just named tuples with two methods\n",
-    "`get_columns` and `fit_encoder`."
+    "### Forward Selection\n",
+    " \n",
+    "We will  apply the forward-selection approach to the  `Hitters` \n",
+    "data.  We wish to predict a baseball player’s `Salary` on the\n",
+    "basis of various statistics associated with performance in the\n",
+    "previous year."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
-   "id": "5f8926fd",
+   "execution_count": 2,
+   "id": "2adc66cc",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "Column(idx='ShelveLoc', name='ShelveLoc', is_categorical=True, is_ordinal=False, columns=('ShelveLoc[Good]', 'ShelveLoc[Medium]'), encoder=Contrast())"
+       "59"
       ]
      },
-     "execution_count": 8,
+     "execution_count": 2,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "design.column_info_['ShelveLoc']"
+    "Hitters = load_data('Hitters')\n",
+    "np.isnan(Hitters['Salary']).sum()"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "966f53a5",
-   "metadata": {},
-   "source": [
-    "It recognized ordinal columns as well."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "id": "a137fa1e",
+   "id": "40c9a484",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Column(idx='OIncome', name='OIncome', is_categorical=True, is_ordinal=True, columns=('OIncome',), encoder=OrdinalEncoder())"
-      ]
-     },
-     "execution_count": 9,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
    "source": [
-    "design.column_info_['OIncome']"
+    "    \n",
+    " We see that `Salary` is missing for 59 players. The\n",
+    "`dropna()`  method of data frames removes all of the rows that have missing\n",
+    "values in any variable (by default --- see  `Hitters.dropna?`)."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
-   "id": "3390dcb0",
+   "execution_count": 3,
+   "id": "1869fdab",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "(array([ 73,  48,  35, 100]), ('Income',))"
+       "(263, 20)"
       ]
      },
-     "execution_count": 10,
+     "execution_count": 3,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "income = design.column_info_['Income']\n",
-    "cols, names = income.get_columns(Carseats)\n",
-    "(cols[:4], names)"
+    "Hitters = Hitters.dropna()\n",
+    "Hitters.shape"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "b6667415",
-   "metadata": {},
-   "source": [
-    "## Encoding a column\n",
-    "\n",
-    "In building a design matrix we must extract columns from our dataframe (or `np.ndarray`). Categorical\n",
-    "variables usually are encoded by several columns, typically one less than the number of categories.\n",
-    "This task is handled by the `encoder` of the `Column`. The encoder must satisfy the `sklearn` transform\n",
-    "model, i.e. `fit` on some array and `transform` on future arrays. The `fit_encoder` method of `Column` fits\n",
-    "its encoder the first time data is passed to it."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "id": "a1b42dbd",
+   "id": "0a1fe9e6",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(array([[0., 0.],\n",
-       "        [1., 0.],\n",
-       "        [0., 1.],\n",
-       "        [0., 1.]]),\n",
-       " ['ShelveLoc[Good]', 'ShelveLoc[Medium]'])"
-      ]
-     },
-     "execution_count": 11,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
    "source": [
-    "shelve = design.column_info_['ShelveLoc']\n",
-    "cols, names = shelve.get_columns(Carseats)\n",
-    "(cols[:4], names)"
+    "We first choose the best model using forward selection based on AIC. This score\n",
+    "is not built in as a metric to `sklearn`. We therefore define a function to compute it ourselves, and use\n",
+    "it as a scorer. By default, `sklearn` tries to maximize a score, hence\n",
+    "  our scoring function  computes the negative AIC statistic."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
-   "id": "31367988",
+   "execution_count": 4,
+   "id": "76bd8110",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "array([[2.],\n",
-       "       [1.],\n",
-       "       [1.],\n",
-       "       [0.]])"
-      ]
-     },
-     "execution_count": 12,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
-    "oincome = design.column_info_['OIncome']\n",
-    "oincome.get_columns(Carseats)[0][:4]"
+    "def negAIC(estimator, X, Y):\n",
+    "    \"Negative AIC\"\n",
+    "    n, p = X.shape\n",
+    "    Yhat = estimator.predict(X)\n",
+    "    MSE = np.mean((Y - Yhat)**2)\n",
+    "    return n + n * np.log(MSE) + 2 * (p + 1)\n",
+    "    "
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "751c1487",
-   "metadata": {},
-   "source": [
-    "## The terms\n",
-    "\n",
-    "The design matrix consists of several sets of columns. This is managed by the `ModelSpec` through\n",
-    "the `terms` argument which should be a sequence. The elements of `terms` are often\n",
-    "going to be strings (or tuples of strings for interactions, see below) but are converted to a\n",
-    "`Variable` object and stored in the `terms_` of the fitted `ModelSpec`. A `Variable` is just a named tuple."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "id": "6e2b6155",
+   "id": "14ba6f49",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['Price', 'Income']"
-      ]
-     },
-     "execution_count": 13,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
    "source": [
-    "design.terms"
+    "We need to estimate the residual variance $\\sigma^2$, which is the first argument in our scoring function above.\n",
+    "We will fit the biggest model, using all the variables, and estimate $\\sigma^2$ based on its MSE."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
-   "id": "d3e669da",
+   "execution_count": 5,
+   "id": "94e10f35",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "[Variable(variables=('Price',), name='Price', encoder=None, use_transform=True, pure_columns=True, override_encoder_colnames=False),\n",
-       " Variable(variables=('Income',), name='Income', encoder=None, use_transform=True, pure_columns=True, override_encoder_colnames=False)]"
-      ]
-     },
-     "execution_count": 14,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
-    "design.terms_"
+    "design = ModelSpec(Hitters.columns.drop('Salary')).fit(Hitters)\n",
+    "Y = np.array(Hitters['Salary'])\n",
+    "X = design.transform(Hitters)"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "fb0a45c9",
+   "id": "afdda5f2",
    "metadata": {},
    "source": [
-    "While each `Column` can itself extract data, they are all promoted to `Variable` to be of a uniform type.  A\n",
-    "`Variable` can also create columns through the `build_columns` method of `ModelSpec`"
+    "Along with a score we need to specify the search strategy. This is done through the object\n",
+    "`Stepwise()`  in the `ISLP.models` package. The method `Stepwise.first_peak()`\n",
+    "runs forward stepwise until any further additions to the model do not result\n",
+    "in an improvement in the evaluation score. Similarly, the method `Stepwise.fixed_steps()`\n",
+    "runs a fixed number of steps of stepwise search."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
-   "id": "554c67cb",
+   "execution_count": 6,
+   "id": "048c8500",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(     Price\n",
-       " 0      120\n",
-       " 1       83\n",
-       " 2       80\n",
-       " 3       97\n",
-       " 4      128\n",
-       " ..     ...\n",
-       " 395    128\n",
-       " 396    120\n",
-       " 397    159\n",
-       " 398     95\n",
-       " 399    120\n",
-       " \n",
-       " [400 rows x 1 columns],\n",
-       " ['Price'])"
-      ]
-     },
-     "execution_count": 15,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
-    "price = design.terms_[0]\n",
-    "design.build_columns(Carseats, price)"
+    "strategy = Stepwise.first_peak(design,\n",
+    "                               direction='forward',\n",
+    "                               max_terms=len(design.terms))"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "06956a6f",
+   "id": "e0c0af0e",
    "metadata": {},
    "source": [
-    "Note that `Variable` objects have a tuple of `variables` as well as an `encoder` attribute. The\n",
-    "tuple of `variables` first creates a concatenated dataframe from all corresponding variables and then\n",
-    "is run through `encoder.transform`. The `encoder.fit` method of each `Variable` is run once during \n",
-    "the call to `ModelSpec.fit`."
+    " \n",
+    "We now fit a linear regression model with `Salary` as outcome using forward\n",
+    "selection. To do so, we use the function `sklearn_selected()`  from the `ISLP.models` package. This takes\n",
+    "a model from `statsmodels` along with a search strategy and selects a model with its\n",
+    "`fit` method. Without specifying a `scoring` argument, the score defaults to MSE, and so all 19 variables will be\n",
+    "selected."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
-   "id": "dd434884",
+   "execution_count": 7,
+   "id": "26f09fe9",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "(     Price  Income  UIncome[L]  UIncome[M]\n",
-       " 0    120.0    73.0         0.0         1.0\n",
-       " 1     83.0    48.0         1.0         0.0\n",
-       " 2     80.0    35.0         1.0         0.0\n",
-       " 3     97.0   100.0         0.0         0.0\n",
-       " 4    128.0    64.0         0.0         1.0\n",
-       " ..     ...     ...         ...         ...\n",
-       " 395  128.0   108.0         0.0         0.0\n",
-       " 396  120.0    23.0         1.0         0.0\n",
-       " 397  159.0    26.0         1.0         0.0\n",
-       " 398   95.0    79.0         0.0         1.0\n",
-       " 399  120.0    37.0         1.0         0.0\n",
-       " \n",
-       " [400 rows x 4 columns],\n",
-       " ['Price', 'Income', 'UIncome[L]', 'UIncome[M]'])"
+       "('Assists',\n",
+       " 'AtBat',\n",
+       " 'CAtBat',\n",
+       " 'CHits',\n",
+       " 'CHmRun',\n",
+       " 'CRBI',\n",
+       " 'CRuns',\n",
+       " 'CWalks',\n",
+       " 'Division',\n",
+       " 'Errors',\n",
+       " 'Hits',\n",
+       " 'HmRun',\n",
+       " 'League',\n",
+       " 'NewLeague',\n",
+       " 'PutOuts',\n",
+       " 'RBI',\n",
+       " 'Runs',\n",
+       " 'Walks',\n",
+       " 'Years')"
       ]
      },
-     "execution_count": 16,
+     "execution_count": 7,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "from ISLP.models.model_spec import Variable\n",
-    "\n",
-    "new_var = Variable(('Price', 'Income', 'UIncome'), name='mynewvar', encoder=None)\n",
-    "design.build_columns(Carseats, new_var)"
+    "hitters_MSE = sklearn_selected(OLS,\n",
+    "                               strategy)\n",
+    "hitters_MSE.fit(Hitters, Y)\n",
+    "hitters_MSE.selected_state_"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "5cdb088c",
+   "id": "4acf4792",
    "metadata": {},
    "source": [
-    "Let's now transform these columns with an encoder. Within `ModelSpec` we will first build the\n",
-    "arrays above and then call `pca.fit` and finally `pca.transform` within `design.build_columns`."
+    " Using `neg_Cp` results in a smaller model, as expected, with just 4variables selected."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
-   "id": "519a642e",
+   "execution_count": 8,
+   "id": "a825f4d8",
    "metadata": {},
    "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/Users/jonathantaylor/miniconda3/envs/islp_test/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but PCA was fitted with feature names\n",
-      "  warnings.warn(\n"
-     ]
-    },
     {
      "data": {
       "text/plain": [
-       "(     mynewvar[0]  mynewvar[1]\n",
-       " 0      -3.608693    -4.853177\n",
-       " 1      15.081506    35.708630\n",
-       " 2      27.422871    40.774250\n",
-       " 3     -33.973209    13.470489\n",
-       " 4       6.567316   -11.290100\n",
-       " ..           ...          ...\n",
-       " 395   -36.846346   -18.415783\n",
-       " 396    45.741500     3.245602\n",
-       " 397    49.097533   -35.725355\n",
-       " 398   -13.577772    18.845139\n",
-       " 399    31.927566     0.978436\n",
-       " \n",
-       " [400 rows x 2 columns],\n",
-       " ['mynewvar[0]', 'mynewvar[1]'])"
+       "('Assists', 'Errors', 'League', 'NewLeague')"
       ]
      },
-     "execution_count": 17,
+     "execution_count": 8,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "from sklearn.decomposition import PCA\n",
-    "pca = PCA(n_components=2)\n",
-    "pca.fit(design.build_columns(Carseats, new_var)[0]) # this is done within `ModelSpec.fit`\n",
-    "pca_var = Variable(('Price', 'Income', 'UIncome'), name='mynewvar', encoder=pca)\n",
-    "design.build_columns(Carseats, pca_var)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "403921a2",
-   "metadata": {},
-   "source": [
-    "The elements of the `variables` attribute may be column identifiers ( `\"Price\"`), `Column` instances (`price`)\n",
-    "or `Variable` instances (`pca_var`)."
+    "hitters_Cp = sklearn_selected(OLS,\n",
+    "                              strategy,\n",
+    "                              scoring=negAIC)\n",
+    "hitters_Cp.fit(Hitters, Y)\n",
+    "hitters_Cp.selected_state_"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 18,
-   "id": "b422cde1",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/Users/jonathantaylor/miniconda3/envs/islp_test/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but PCA was fitted with feature names\n",
-      "  warnings.warn(\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "(     Price  Price  mynewvar[0]  mynewvar[1]\n",
-       " 0    120.0  120.0    -3.608693    -4.853177\n",
-       " 1     83.0   83.0    15.081506    35.708630\n",
-       " 2     80.0   80.0    27.422871    40.774250\n",
-       " 3     97.0   97.0   -33.973209    13.470489\n",
-       " 4    128.0  128.0     6.567316   -11.290100\n",
-       " ..     ...    ...          ...          ...\n",
-       " 395  128.0  128.0   -36.846346   -18.415783\n",
-       " 396  120.0  120.0    45.741500     3.245602\n",
-       " 397  159.0  159.0    49.097533   -35.725355\n",
-       " 398   95.0   95.0   -13.577772    18.845139\n",
-       " 399  120.0  120.0    31.927566     0.978436\n",
-       " \n",
-       " [400 rows x 4 columns],\n",
-       " ['Price', 'Price', 'mynewvar[0]', 'mynewvar[1]'])"
-      ]
-     },
-     "execution_count": 18,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "fancy_var = Variable(('Price', price, pca_var), name='fancy', encoder=None)\n",
-    "design.build_columns(Carseats, fancy_var)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "53e38f57",
-   "metadata": {},
-   "source": [
-    "We can of course run PCA again on these features (if we wanted)."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 19,
-   "id": "6347acb6",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/Users/jonathantaylor/miniconda3/envs/islp_test/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but PCA was fitted with feature names\n",
-      "  warnings.warn(\n",
-      "/Users/jonathantaylor/miniconda3/envs/islp_test/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but PCA was fitted with feature names\n",
-      "  warnings.warn(\n",
-      "/Users/jonathantaylor/miniconda3/envs/islp_test/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but PCA was fitted with feature names\n",
-      "  warnings.warn(\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "(     fancy_pca[0]  fancy_pca[1]\n",
-       " 0       -6.951792      4.859283\n",
-       " 1       55.170148    -24.694875\n",
-       " 2       59.418556    -38.033572\n",
-       " 3       34.722389     28.922184\n",
-       " 4      -21.419184     -3.120673\n",
-       " ..            ...           ...\n",
-       " 395    -18.257348     40.760122\n",
-       " 396    -10.546709    -45.021658\n",
-       " 397    -77.706359    -37.174379\n",
-       " 398     36.668694      7.730851\n",
-       " 399     -9.540535    -31.059122\n",
-       " \n",
-       " [400 rows x 2 columns],\n",
-       " ['fancy_pca[0]', 'fancy_pca[1]'])"
-      ]
-     },
-     "execution_count": 19,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "pca2 = PCA(n_components=2)\n",
-    "pca2.fit(design.build_columns(Carseats, fancy_var)[0]) # this is done within `ModelSpec.fit`\n",
-    "pca2_var = Variable(('Price', price, pca_var), name='fancy_pca', encoder=pca2)\n",
-    "design.build_columns(Carseats, pca2_var)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "08b5ddb0",
-   "metadata": {},
-   "source": [
-    "## Building the design matrix\n",
-    "\n",
-    "With these notions in mind, the final design is essentially then"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 20,
-   "id": "a8eb3e33",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "X_hand = np.column_stack([design.build_columns(Carseats, v)[0] for v in design.terms_])[:4]"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "97912337",
-   "metadata": {},
-   "source": [
-    "An intercept column is added if `design.intercept` is `True` and if the original argument to `transform` is\n",
-    "a dataframe the index is adjusted accordingly."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 21,
-   "id": "72b5e629",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "True"
-      ]
-     },
-     "execution_count": 21,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design.intercept"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 22,
-   "id": "8a457e3e",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>intercept</th>\n",
-       "      <th>Price</th>\n",
-       "      <th>Income</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>120</td>\n",
-       "      <td>73</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>83</td>\n",
-       "      <td>48</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>80</td>\n",
-       "      <td>35</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>97</td>\n",
-       "      <td>100</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "   intercept  Price  Income\n",
-       "0        1.0    120      73\n",
-       "1        1.0     83      48\n",
-       "2        1.0     80      35\n",
-       "3        1.0     97     100"
-      ]
-     },
-     "execution_count": 22,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design.transform(Carseats)[:4]"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "8624ab8c",
-   "metadata": {},
-   "source": [
-    "## Predicting\n",
-    "\n",
-    "Constructing the design matrix at any values is carried out by the `transform` method."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 23,
-   "id": "6052765e",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "array([12.65257604, 12.25873428])"
-      ]
-     },
-     "execution_count": 23,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "new_data = pd.DataFrame({'Price':[10,20], 'Income':[40, 50]})\n",
-    "new_X = design.transform(new_data)\n",
-    "M.get_prediction(new_X).predicted_mean"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 24,
-   "id": "9158de59",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "       0        1 \n",
-      "12.65258 12.25873 \n"
-     ]
-    }
-   ],
-   "source": [
-    "%%R -i new_data,Carseats\n",
-    "predict(lm(Sales ~ Price + Income, data=Carseats), new_data)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "9608bed3",
-   "metadata": {},
-   "source": [
-    "### Difference between using `pd.DataFrame` and `np.ndarray`\n",
-    "\n",
-    "If the `terms` only refer to a few columns of the data frame, the `transform` method only needs a dataframe with those columns.\n",
-    "\n",
-    "If we had used an `np.ndarray`, the column identifiers would be integers identifying specific columns so,\n",
-    "in order to work correctly, `transform` would need another `np.ndarray` where the columns have the same meaning."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 25,
-   "id": "f0b8120f",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "array([[1.0, 120, 73],\n",
-       "       [1.0, 83, 48],\n",
-       "       [1.0, 80, 35],\n",
-       "       [1.0, 97, 100]], dtype=object)"
-      ]
-     },
-     "execution_count": 25,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "Carseats_np = np.asarray(Carseats[['Price', 'ShelveLoc', 'US', 'Income']])\n",
-    "design_np = ModelSpec([0,3]).fit(Carseats_np)\n",
-    "design_np.transform(Carseats_np)[:4]"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "270a02a6",
-   "metadata": {},
-   "source": [
-    "The following will fail for hopefully obvious reasons"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 26,
-   "id": "4ffbce7e",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "shapes (2,2) and (3,) not aligned: 2 (dim 1) != 3 (dim 0)\n"
-     ]
-    }
-   ],
-   "source": [
-    "try:\n",
-    "    new_D = np.zeros((2,2))\n",
-    "    new_D[:,0] = [10,20]\n",
-    "    new_D[:,1] = [40,50]\n",
-    "    M.get_prediction(new_D).predicted_mean\n",
-    "except ValueError as e:\n",
-    "    print(e)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "bc5ff62b",
-   "metadata": {},
-   "source": [
-    "Ultimately, `M` expects 3 columns for new predictions because it was fit\n",
-    "with a matrix having 3 columns (the first representing an intercept).\n",
-    "\n",
-    "We might be tempted to try as with the `pd.DataFrame` and produce\n",
-    "an `np.ndarray` with only the necessary variables."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 27,
-   "id": "34dae1e9",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "index 3 is out of bounds for axis 1 with size 2\n"
-     ]
-    }
-   ],
-   "source": [
-    "try:\n",
-    "    new_X = np.zeros((2,2))\n",
-    "    new_X[:,0] = [10,20]\n",
-    "    new_X[:,1] = [40,50]\n",
-    "    new_D = design_np.transform(new_X)\n",
-    "    M.get_prediction(new_D).predicted_mean\n",
-    "except IndexError as e:\n",
-    "    print(e)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "7e9da262",
-   "metadata": {},
-   "source": [
-    "This fails because `design_np` is looking for column `3` from its `terms`:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 28,
-   "id": "938b9430",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "[Variable(variables=(0,), name='0', encoder=None, use_transform=True, pure_columns=True, override_encoder_colnames=False),\n",
-       " Variable(variables=(3,), name='3', encoder=None, use_transform=True, pure_columns=True, override_encoder_colnames=False)]"
-      ]
-     },
-     "execution_count": 28,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design_np.terms_"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "083e9529",
-   "metadata": {},
-   "source": [
-    "However, if we have an `np.ndarray` in which the first column indeed represents `Price` and the fourth indeed\n",
-    "represents `Income` then we can arrive at the correct answer by supplying such the array to `design_np.transform`:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 29,
-   "id": "d413a9fe",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "array([12.65257604, 12.25873428])"
-      ]
-     },
-     "execution_count": 29,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "new_X = np.zeros((2,4))\n",
-    "new_X[:,0] = [10,20]\n",
-    "new_X[:,3] = [40,50]\n",
-    "new_D = design_np.transform(new_X)\n",
-    "M.get_prediction(new_D).predicted_mean"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "0f4b508b",
-   "metadata": {},
-   "source": [
-    "Given this subtlety about needing to supply arrays with identical column structure to `transform` when\n",
-    "using `np.ndarray` we presume that using a `pd.DataFrame` will be the more popular use case."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "8bcbd973",
-   "metadata": {},
-   "source": [
-    "## A model with some categorical variables\n",
-    "\n",
-    "Categorical variables become `Column` instances with encoders."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 30,
-   "id": "cf13f72e",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Column(idx='UIncome', name='UIncome', is_categorical=True, is_ordinal=False, columns=('UIncome[L]', 'UIncome[M]'), encoder=Contrast())"
-      ]
-     },
-     "execution_count": 30,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design = ModelSpec(['Population', 'Price', 'UIncome', 'ShelveLoc']).fit(Carseats)\n",
-    "design.column_info_['UIncome']"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 31,
-   "id": "c1fa0a90",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Index(['intercept', 'Population', 'Price', 'UIncome[L]', 'UIncome[M]',\n",
-       "       'ShelveLoc[Good]', 'ShelveLoc[Medium]'],\n",
-       "      dtype='object')"
-      ]
-     },
-     "execution_count": 31,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "X = design.fit_transform(Carseats)\n",
-    "X.columns"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 32,
-   "id": "b28aa313",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "intercept            11.876012\n",
-       "Population            0.001163\n",
-       "Price                -0.055725\n",
-       "UIncome[L]           -1.042297\n",
-       "UIncome[M]           -0.119123\n",
-       "ShelveLoc[Good]       4.999623\n",
-       "ShelveLoc[Medium]     1.964278\n",
-       "dtype: float64"
-      ]
-     },
-     "execution_count": 32,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "sm.OLS(Y, X).fit().params"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 33,
-   "id": "aa764acc",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "    (Intercept)      Population           Price        UIncomeM        UIncomeH \n",
-      "    10.83371503      0.00116301     -0.05572469      0.92317388      1.04229679 \n",
-      "  ShelveLocGood ShelveLocMedium \n",
-      "     4.99962319      1.96427771 \n"
-     ]
-    }
-   ],
-   "source": [
-    "%%R\n",
-    "lm(Sales ~ Population + Price + UIncome + ShelveLoc, data=Carseats)$coef"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "31876a29",
-   "metadata": {},
-   "source": [
-    "## Getting the encoding you want\n",
-    "\n",
-    "By default the level dropped by `ModelSpec` will be the first of the `categories_` values from \n",
-    "`sklearn.preprocessing.OneHotEncoder()`. We might wish to change this. It seems\n",
-    "as if the correct way to do this would be something like `Variable(('UIncome',), 'mynewencoding', new_encoder)`\n",
-    "where `new_encoder` would somehow drop the column we want dropped. \n",
-    "\n",
-    "However, when using the convenient identifier `UIncome` in the `variables` argument, this maps to the `Column` associated to `UIncome` within `design.column_info_`:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 34,
-   "id": "bac2643c",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Column(idx='UIncome', name='UIncome', is_categorical=True, is_ordinal=False, columns=('UIncome[L]', 'UIncome[M]'), encoder=Contrast())"
-      ]
-     },
-     "execution_count": 34,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design.column_info_['UIncome']"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "1485735d",
-   "metadata": {},
-   "source": [
-    "This column already has an encoder and `Column` instances are immutable as named tuples. Further, there are times when \n",
-    "we may want to encode `UIncome` differently within the same model. In the model below the main effect of `UIncome` is encoded with two columns while in the interaction `UIncome` (see below) has three columns. This is a design of interest\n",
-    "and we need a way to allow different encodings of the same column of `Carseats`"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 35,
-   "id": "3987c5d6",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "Call:\n",
-      "lm(formula = Sales ~ UIncome:ShelveLoc + UIncome, data = Carseats)\n",
-      "\n",
-      "Coefficients:\n",
-      "             (Intercept)                  UIncomeM                  UIncomeH  \n",
-      "                  5.1317                    0.1151                    1.1561  \n",
-      "  UIncomeL:ShelveLocGood    UIncomeM:ShelveLocGood    UIncomeH:ShelveLocGood  \n",
-      "                  4.5121                    5.5752                    3.7381  \n",
-      "UIncomeL:ShelveLocMedium  UIncomeM:ShelveLocMedium  UIncomeH:ShelveLocMedium  \n",
-      "                  1.2473                    2.4782                    1.5141  \n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "%%R\n",
-    "lm(Sales ~ UIncome:ShelveLoc + UIncome, data=Carseats)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "7a6631c9",
-   "metadata": {},
-   "source": [
-    " We can create a new \n",
-    "`Column` with the encoder we want. For categorical variables, there is a convenience function to do so."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 36,
-   "id": "83a9b94e",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from ISLP.models.model_spec import contrast\n",
-    "pref_encoding = contrast('UIncome', 'drop', 'L')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 37,
-   "id": "f0ffabea",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(     UIncome[M]  UIncome[H]\n",
-       " 0           1.0         0.0\n",
-       " 1           0.0         0.0\n",
-       " 2           0.0         0.0\n",
-       " 3           0.0         1.0\n",
-       " 4           1.0         0.0\n",
-       " ..          ...         ...\n",
-       " 395         0.0         1.0\n",
-       " 396         0.0         0.0\n",
-       " 397         0.0         0.0\n",
-       " 398         1.0         0.0\n",
-       " 399         0.0         0.0\n",
-       " \n",
-       " [400 rows x 2 columns],\n",
-       " ['UIncome[M]', 'UIncome[H]'])"
-      ]
-     },
-     "execution_count": 37,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design.build_columns(Carseats, pref_encoding)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 38,
-   "id": "4a5fdc64",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Index(['intercept', 'Population', 'Price', 'UIncome[M]', 'UIncome[H]',\n",
-       "       'ShelveLoc[Good]', 'ShelveLoc[Medium]'],\n",
-       "      dtype='object')"
-      ]
-     },
-     "execution_count": 38,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design = ModelSpec(['Population', 'Price', pref_encoding, 'ShelveLoc']).fit(Carseats)\n",
-    "X = design.fit_transform(Carseats)\n",
-    "X.columns"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 39,
-   "id": "ae7e3bd2",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "intercept            10.833715\n",
-       "Population            0.001163\n",
-       "Price                -0.055725\n",
-       "UIncome[M]            0.923174\n",
-       "UIncome[H]            1.042297\n",
-       "ShelveLoc[Good]       4.999623\n",
-       "ShelveLoc[Medium]     1.964278\n",
-       "dtype: float64"
-      ]
-     },
-     "execution_count": 39,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "sm.OLS(Y, X).fit().params"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 40,
-   "id": "c12ac3df",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "    (Intercept)      Population           Price        UIncomeM        UIncomeH \n",
-      "    10.83371503      0.00116301     -0.05572469      0.92317388      1.04229679 \n",
-      "  ShelveLocGood ShelveLocMedium \n",
-      "     4.99962319      1.96427771 \n"
-     ]
-    }
-   ],
-   "source": [
-    "%%R\n",
-    "lm(Sales ~ Population + Price + UIncome + ShelveLoc, data=Carseats)$coef"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "53bf8aef",
-   "metadata": {},
-   "source": [
-    "## Interactions\n",
-    "\n",
-    "We've referred to interactions above. These are specified (by convenience) as tuples in the `terms` argument\n",
-    "to `ModelSpec`."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 41,
-   "id": "47723bce",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "intercept                       7.866634\n",
-       "UIncome[L]:ShelveLoc[Good]      4.512054\n",
-       "UIncome[L]:ShelveLoc[Medium]    1.247275\n",
-       "UIncome[M]:ShelveLoc[Good]      5.575170\n",
-       "UIncome[M]:ShelveLoc[Medium]    2.478163\n",
-       "UIncome[L]                     -2.734895\n",
-       "UIncome[M]                     -2.619745\n",
-       "dtype: float64"
-      ]
-     },
-     "execution_count": 41,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design = ModelSpec([('UIncome', 'ShelveLoc'), 'UIncome'])\n",
-    "X = design.fit_transform(Carseats)\n",
-    "sm.OLS(Y, X).fit().params"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "86060622",
-   "metadata": {},
-   "source": [
-    "The tuples in `terms` are converted to `Variable` in the formalized `terms_` attribute by creating a `Variable` with\n",
-    "`variables` set to the tuple and the encoder an `Interaction` encoder which (unsurprisingly) creates the interaction columns from the concatenated data frames of `UIncome` and `ShelveLoc`."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 42,
-   "id": "d7a2ab9b",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Variable(variables=('UIncome', 'ShelveLoc'), name='UIncome:ShelveLoc', encoder=Interaction(column_names={'ShelveLoc': ['ShelveLoc[Good]', 'ShelveLoc[Medium]'],\n",
-       "                          'UIncome': ['UIncome[L]', 'UIncome[M]']},\n",
-       "            columns={'ShelveLoc': range(2, 4), 'UIncome': range(0, 2)},\n",
-       "            variables=['UIncome', 'ShelveLoc']), use_transform=True, pure_columns=False, override_encoder_colnames=False)"
-      ]
-     },
-     "execution_count": 42,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design.terms_[0]"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "2a5e7f6b",
-   "metadata": {},
-   "source": [
-    "Comparing this to the previous `R` model."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 43,
-   "id": "bbb02036",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "Call:\n",
-      "lm(formula = Sales ~ UIncome:ShelveLoc + UIncome, data = Carseats)\n",
-      "\n",
-      "Coefficients:\n",
-      "             (Intercept)                  UIncomeM                  UIncomeH  \n",
-      "                  5.1317                    0.1151                    1.1561  \n",
-      "  UIncomeL:ShelveLocGood    UIncomeM:ShelveLocGood    UIncomeH:ShelveLocGood  \n",
-      "                  4.5121                    5.5752                    3.7381  \n",
-      "UIncomeL:ShelveLocMedium  UIncomeM:ShelveLocMedium  UIncomeH:ShelveLocMedium  \n",
-      "                  1.2473                    2.4782                    1.5141  \n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "%%R\n",
-    "lm(Sales ~ UIncome:ShelveLoc + UIncome, data=Carseats)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "89106a85",
-   "metadata": {},
-   "source": [
-    "We note a few important things:\n",
-    "\n",
-    "1. `R` has reorganized the columns of the design from the formula: although we wrote `UIncome:ShelveLoc` first these\n",
-    "columns have been built later. **`ModelSpec` builds columns in the order determined by `terms`!**\n",
-    "\n",
-    "2. As noted above, `R` has encoded `UIncome` differently in the main effect and in the interaction. For `ModelSpec`, the reference to `UIncome` always refers to the column in `design.column_info_` and will always build only the columns for `L` and `M`. **`ModelSpec` does no inspection of terms to decide how to encode categorical variables.**\n",
-    "\n",
-    "A few notes:\n",
-    "\n",
-    "- **Why not try to inspect the terms?** For any nontrivial formula in `R` with several categorical variables and interactions, predicting what columns will be produced from a given formula is not simple. **`ModelSpec` errs on the side of being explicit.**\n",
-    "\n",
-    "- **Is it impossible to build the design as `R` has?** No. An advanced user who *knows* they want the columns built as `R` has can do so (fairly) easily."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 44,
-   "id": "151f3fee",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(     UIncome[H]  UIncome[L]  UIncome[M]\n",
-       " 0           0.0         0.0         1.0\n",
-       " 1           0.0         1.0         0.0\n",
-       " 2           0.0         1.0         0.0\n",
-       " 3           1.0         0.0         0.0\n",
-       " 4           0.0         0.0         1.0\n",
-       " ..          ...         ...         ...\n",
-       " 395         1.0         0.0         0.0\n",
-       " 396         0.0         1.0         0.0\n",
-       " 397         0.0         1.0         0.0\n",
-       " 398         0.0         0.0         1.0\n",
-       " 399         0.0         1.0         0.0\n",
-       " \n",
-       " [400 rows x 3 columns],\n",
-       " ['UIncome[H]', 'UIncome[L]', 'UIncome[M]'])"
-      ]
-     },
-     "execution_count": 44,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "full_encoding = contrast('UIncome', None)\n",
-    "design.build_columns(Carseats, full_encoding)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 45,
-   "id": "945ce7bc",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "intercept                       5.131739\n",
-       "UIncome[M]                      0.115150\n",
-       "UIncome[H]                      1.156118\n",
-       "UIncome[H]:ShelveLoc[Good]      3.738052\n",
-       "UIncome[H]:ShelveLoc[Medium]    1.514104\n",
-       "UIncome[L]:ShelveLoc[Good]      4.512054\n",
-       "UIncome[L]:ShelveLoc[Medium]    1.247275\n",
-       "UIncome[M]:ShelveLoc[Good]      5.575170\n",
-       "UIncome[M]:ShelveLoc[Medium]    2.478163\n",
-       "dtype: float64"
-      ]
-     },
-     "execution_count": 45,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design = ModelSpec([pref_encoding, (full_encoding, 'ShelveLoc')])\n",
-    "X = design.fit_transform(Carseats)\n",
-    "sm.OLS(Y, X).fit().params"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "450b94dd",
-   "metadata": {},
-   "source": [
-    "## Special encodings\n",
-    "\n",
-    "For flexible models, we may want to consider transformations of features, i.e. polynomial\n",
-    "or spline transformations. Given transforms that follow the `fit/transform` paradigm\n",
-    "we can of course achieve this with a `Column` and an `encoder`. The `ISLP.transforms`\n",
-    "package includes a `Poly` transform"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 46,
-   "id": "18d5c1c8",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Variable(variables=('Income',), name='poly(Income, 3, )', encoder=Poly(degree=3), use_transform=True, pure_columns=False, override_encoder_colnames=True)"
-      ]
-     },
-     "execution_count": 46,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "from ISLP.models.model_spec import poly\n",
-    "poly('Income', 3)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 47,
-   "id": "46c7d911",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "intercept                5.440077\n",
-       "poly(Income, 3, )[0]    10.036373\n",
-       "poly(Income, 3, )[1]    -2.799156\n",
-       "poly(Income, 3, )[2]     2.399601\n",
-       "ShelveLoc[Good]          4.808133\n",
-       "ShelveLoc[Medium]        1.889533\n",
-       "dtype: float64"
-      ]
-     },
-     "execution_count": 47,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design = ModelSpec([poly('Income', 3), 'ShelveLoc'])\n",
-    "X = design.fit_transform(Carseats)\n",
-    "sm.OLS(Y, X).fit().params"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "99bf13a1",
-   "metadata": {},
-   "source": [
-    "Compare:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 48,
-   "id": "7606facd",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "     (Intercept) poly(Income, 3)1 poly(Income, 3)2 poly(Income, 3)3 \n",
-      "        5.440077        10.036373        -2.799156         2.399601 \n",
-      "   ShelveLocGood  ShelveLocMedium \n",
-      "        4.808133         1.889533 \n"
-     ]
-    }
-   ],
-   "source": [
-    "%%R\n",
-    "lm(Sales ~ poly(Income, 3) + ShelveLoc, data=Carseats)$coef"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "a4931031",
-   "metadata": {},
-   "source": [
-    "## Splines\n",
-    "\n",
-    "Support for natural and B-splines is also included"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 49,
-   "id": "1c1bf5f3",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "intercept                4.240421\n",
-       "ns(Income, , df=5)[0]    1.468196\n",
-       "ns(Income, , df=5)[1]    1.499471\n",
-       "ns(Income, , df=5)[2]    1.152070\n",
-       "ns(Income, , df=5)[3]    2.418398\n",
-       "ns(Income, , df=5)[4]    1.804460\n",
-       "ShelveLoc[Good]          4.810449\n",
-       "ShelveLoc[Medium]        1.881095\n",
-       "dtype: float64"
-      ]
-     },
-     "execution_count": 49,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "from ISLP.models.model_spec import ns, bs, pca\n",
-    "design = ModelSpec([ns('Income', df=5), 'ShelveLoc'])\n",
-    "X = design.fit_transform(Carseats)\n",
-    "sm.OLS(Y, X).fit().params"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 50,
-   "id": "8c24254b",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "        (Intercept) ns(Income, df = 5)1 ns(Income, df = 5)2 ns(Income, df = 5)3 \n",
-      "           4.240421            1.468196            1.499471            1.152070 \n",
-      "ns(Income, df = 5)4 ns(Income, df = 5)5       ShelveLocGood     ShelveLocMedium \n",
-      "           2.418398            1.804460            4.810449            1.881095 \n"
-     ]
-    }
-   ],
-   "source": [
-    "%%R\n",
-    "library(splines)\n",
-    "lm(Sales ~ ns(Income, df=5) + ShelveLoc, data=Carseats)$coef"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 51,
-   "id": "f9d6c4a7",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "intercept                          3.495085\n",
-       "bs(Income, , df=7, degree=2)[0]    1.813118\n",
-       "bs(Income, , df=7, degree=2)[1]    0.961852\n",
-       "bs(Income, , df=7, degree=2)[2]    2.471545\n",
-       "bs(Income, , df=7, degree=2)[3]    2.158891\n",
-       "bs(Income, , df=7, degree=2)[4]    2.091625\n",
-       "bs(Income, , df=7, degree=2)[5]    2.600669\n",
-       "bs(Income, , df=7, degree=2)[6]    2.843108\n",
-       "ShelveLoc[Good]                    4.804919\n",
-       "ShelveLoc[Medium]                  1.880337\n",
-       "dtype: float64"
-      ]
-     },
-     "execution_count": 51,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design = ModelSpec([bs('Income', df=7, degree=2), 'ShelveLoc'])\n",
-    "X = design.fit_transform(Carseats)\n",
-    "sm.OLS(Y, X).fit().params"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 52,
-   "id": "0bf1726a",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "                    (Intercept) bs(Income, df = 7, degree = 2)1 \n",
-      "                      3.4950851                       1.8131176 \n",
-      "bs(Income, df = 7, degree = 2)2 bs(Income, df = 7, degree = 2)3 \n",
-      "                      0.9618523                       2.4715450 \n",
-      "bs(Income, df = 7, degree = 2)4 bs(Income, df = 7, degree = 2)5 \n",
-      "                      2.1588908                       2.0916252 \n",
-      "bs(Income, df = 7, degree = 2)6 bs(Income, df = 7, degree = 2)7 \n",
-      "                      2.6006694                       2.8431084 \n",
-      "                  ShelveLocGood                 ShelveLocMedium \n",
-      "                      4.8049190                       1.8803375 \n"
-     ]
-    }
-   ],
-   "source": [
-    "%%R\n",
-    "lm(Sales ~ bs(Income, df=7, degree=2) + ShelveLoc, data=Carseats)$coef"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "914df4cf",
-   "metadata": {},
-   "source": [
-    "## PCA"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 53,
-   "id": "cc22e780",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/Users/jonathantaylor/miniconda3/envs/islp_test/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but PCA was fitted with feature names\n",
-      "  warnings.warn(\n",
-      "/Users/jonathantaylor/miniconda3/envs/islp_test/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but PCA was fitted with feature names\n",
-      "  warnings.warn(\n",
-      "/Users/jonathantaylor/miniconda3/envs/islp_test/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but PCA was fitted with feature names\n",
-      "  warnings.warn(\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "intercept                           5.419405\n",
-       "pca(myvars, , n_components=2)[0]   -0.001131\n",
-       "pca(myvars, , n_components=2)[1]   -0.024217\n",
-       "ShelveLoc[Good]                     4.816253\n",
-       "ShelveLoc[Medium]                   1.924139\n",
-       "dtype: float64"
-      ]
-     },
-     "execution_count": 53,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design = ModelSpec([pca(['Income', \n",
-    "                           'Price', \n",
-    "                           'Advertising', \n",
-    "                           'Population'], \n",
-    "                          n_components=2, \n",
-    "                          name='myvars'), 'ShelveLoc'])\n",
-    "X = design.fit_transform(Carseats)\n",
-    "sm.OLS(Y, X).fit().params"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 54,
-   "id": "de571e61",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "Call:\n",
-      "lm(formula = Sales ~ prcomp(cbind(Income, Price, Advertising, \n",
-      "    Population))$x[, 1:2] + ShelveLoc, data = Carseats)\n",
-      "\n",
-      "Coefficients:\n",
-      "                                                      (Intercept)  \n",
-      "                                                         5.419405  \n",
-      "prcomp(cbind(Income, Price, Advertising, Population))$x[, 1:2]PC1  \n",
-      "                                                         0.001131  \n",
-      "prcomp(cbind(Income, Price, Advertising, Population))$x[, 1:2]PC2  \n",
-      "                                                        -0.024217  \n",
-      "                                                    ShelveLocGood  \n",
-      "                                                         4.816253  \n",
-      "                                                  ShelveLocMedium  \n",
-      "                                                         1.924139  \n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "%%R\n",
-    "lm(Sales ~ prcomp(cbind(Income, Price, Advertising, Population))$x[,1:2] + ShelveLoc, data=Carseats)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "0a103b5a",
-   "metadata": {},
-   "source": [
-    "It is of course common to scale before running PCA."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 55,
-   "id": "95ca42f5",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/Users/jonathantaylor/miniconda3/envs/islp_test/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but StandardScaler was fitted with feature names\n",
-      "  warnings.warn(\n",
-      "/Users/jonathantaylor/miniconda3/envs/islp_test/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but StandardScaler was fitted with feature names\n",
-      "  warnings.warn(\n",
-      "/Users/jonathantaylor/miniconda3/envs/islp_test/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but StandardScaler was fitted with feature names\n",
-      "  warnings.warn(\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "intercept                           5.352159\n",
-       "pca(myvars, , n_components=2)[0]    0.446383\n",
-       "pca(myvars, , n_components=2)[1]   -1.219788\n",
-       "ShelveLoc[Good]                     4.922780\n",
-       "ShelveLoc[Medium]                   2.005617\n",
-       "dtype: float64"
-      ]
-     },
-     "execution_count": 55,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design = ModelSpec([pca(['Income', \n",
-    "                           'Price', \n",
-    "                           'Advertising', \n",
-    "                           'Population'], \n",
-    "                          n_components=2, \n",
-    "                          name='myvars',\n",
-    "                          scale=True), 'ShelveLoc'])\n",
-    "X = design.fit_transform(Carseats)\n",
-    "sm.OLS(Y, X).fit().params"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 56,
-   "id": "0dc22e35",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "Call:\n",
-      "lm(formula = Sales ~ prcomp(cbind(Income, Price, Advertising, \n",
-      "    Population), scale = TRUE)$x[, 1:2] + ShelveLoc, data = Carseats)\n",
-      "\n",
-      "Coefficients:\n",
-      "                                                                    (Intercept)  \n",
-      "                                                                         5.3522  \n",
-      "prcomp(cbind(Income, Price, Advertising, Population), scale = TRUE)$x[, 1:2]PC1  \n",
-      "                                                                         0.4469  \n",
-      "prcomp(cbind(Income, Price, Advertising, Population), scale = TRUE)$x[, 1:2]PC2  \n",
-      "                                                                        -1.2213  \n",
-      "                                                                  ShelveLocGood  \n",
-      "                                                                         4.9228  \n",
-      "                                                                ShelveLocMedium  \n",
-      "                                                                         2.0056  \n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "%%R\n",
-    "lm(Sales ~ prcomp(cbind(Income, Price, Advertising, Population), scale=TRUE)$x[,1:2] + ShelveLoc, data=Carseats)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "70347ee9",
-   "metadata": {},
-   "source": [
-    "There will be some small differences in the coefficients due to `sklearn` use of `np.std(ddof=0)` instead\n",
-    "of `np.std(ddof=1)`."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 57,
-   "id": "aa0c2f2e",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "array([ 0.44694166, -1.22131519])"
-      ]
-     },
-     "execution_count": 57,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "np.array(sm.OLS(Y, X).fit().params)[1:3] * np.sqrt(X.shape[0] / (X.shape[0]-1))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "ab05c497",
-   "metadata": {},
-   "source": [
-    "## Model selection\n",
-    "\n",
-    "Another task requiring different design matrices is model selection. Manipulating\n",
-    "the `terms` attribute of a `ModelSpec` (or more precisely its more uniform version `terms_`)\n",
-    "can clearly allow for both exhaustive and stepwise model selection."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 58,
-   "id": "9505c178",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from ISLP.models.strategy import (Stepwise, \n",
-    "                                  min_max)\n",
-    "from ISLP.models.generic_selector import FeatureSelector"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "020c2532",
-   "metadata": {},
-   "source": [
-    "### Best subsets"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 59,
-   "id": "f9aba6db",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "design = ModelSpec(['Price', \n",
-    "                    'UIncome', \n",
-    "                    'Advertising', \n",
-    "                    'US', \n",
-    "                    'Income',\n",
-    "                    'ShelveLoc',\n",
-    "                    'Education',\n",
-    "                    'Urban']).fit(Carseats)\n",
-    "strategy = min_max(design,\n",
-    "                   min_terms=0,\n",
-    "                   max_terms=3)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 60,
-   "id": "91144a3d",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from sklearn.linear_model import LinearRegression\n",
-    "selector = FeatureSelector(LinearRegression(fit_intercept=False),\n",
-    "                           strategy,\n",
-    "                           scoring='neg_mean_squared_error')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 61,
-   "id": "ae3cb2eb",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "<ISLP.models.generic_selector.FeatureSelector at 0x174d82220>"
-      ]
-     },
-     "execution_count": 61,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "selector.fit(Carseats, Y)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 62,
-   "id": "e63b2744",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "('Price', 'Advertising', 'ShelveLoc')"
-      ]
-     },
-     "execution_count": 62,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "selector.selected_state_"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 63,
-   "id": "0a774b48",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "dict_keys([(), ('Price',), ('UIncome',), ('Advertising',), ('US',), ('Income',), ('ShelveLoc',), ('Education',), ('Urban',), ('Price', 'UIncome'), ('Price', 'Advertising'), ('Price', 'US'), ('Price', 'Income'), ('Price', 'ShelveLoc'), ('Price', 'Education'), ('Price', 'Urban'), ('UIncome', 'Advertising'), ('UIncome', 'US'), ('UIncome', 'Income'), ('UIncome', 'ShelveLoc'), ('UIncome', 'Education'), ('UIncome', 'Urban'), ('Advertising', 'US'), ('Advertising', 'Income'), ('Advertising', 'ShelveLoc'), ('Advertising', 'Education'), ('Advertising', 'Urban'), ('US', 'Income'), ('US', 'ShelveLoc'), ('US', 'Education'), ('US', 'Urban'), ('Income', 'ShelveLoc'), ('Income', 'Education'), ('Income', 'Urban'), ('ShelveLoc', 'Education'), ('ShelveLoc', 'Urban'), ('Education', 'Urban'), ('Price', 'UIncome', 'Advertising'), ('Price', 'UIncome', 'US'), ('Price', 'UIncome', 'Income'), ('Price', 'UIncome', 'ShelveLoc'), ('Price', 'UIncome', 'Education'), ('Price', 'UIncome', 'Urban'), ('Price', 'Advertising', 'US'), ('Price', 'Advertising', 'Income'), ('Price', 'Advertising', 'ShelveLoc'), ('Price', 'Advertising', 'Education'), ('Price', 'Advertising', 'Urban'), ('Price', 'US', 'Income'), ('Price', 'US', 'ShelveLoc'), ('Price', 'US', 'Education'), ('Price', 'US', 'Urban'), ('Price', 'Income', 'ShelveLoc'), ('Price', 'Income', 'Education'), ('Price', 'Income', 'Urban'), ('Price', 'ShelveLoc', 'Education'), ('Price', 'ShelveLoc', 'Urban'), ('Price', 'Education', 'Urban'), ('UIncome', 'Advertising', 'US'), ('UIncome', 'Advertising', 'Income'), ('UIncome', 'Advertising', 'ShelveLoc'), ('UIncome', 'Advertising', 'Education'), ('UIncome', 'Advertising', 'Urban'), ('UIncome', 'US', 'Income'), ('UIncome', 'US', 'ShelveLoc'), ('UIncome', 'US', 'Education'), ('UIncome', 'US', 'Urban'), ('UIncome', 'Income', 'ShelveLoc'), ('UIncome', 'Income', 'Education'), ('UIncome', 'Income', 'Urban'), ('UIncome', 'ShelveLoc', 'Education'), ('UIncome', 'ShelveLoc', 'Urban'), ('UIncome', 'Education', 'Urban'), ('Advertising', 'US', 'Income'), ('Advertising', 'US', 'ShelveLoc'), ('Advertising', 'US', 'Education'), ('Advertising', 'US', 'Urban'), ('Advertising', 'Income', 'ShelveLoc'), ('Advertising', 'Income', 'Education'), ('Advertising', 'Income', 'Urban'), ('Advertising', 'ShelveLoc', 'Education'), ('Advertising', 'ShelveLoc', 'Urban'), ('Advertising', 'Education', 'Urban'), ('US', 'Income', 'ShelveLoc'), ('US', 'Income', 'Education'), ('US', 'Income', 'Urban'), ('US', 'ShelveLoc', 'Education'), ('US', 'ShelveLoc', 'Urban'), ('US', 'Education', 'Urban'), ('Income', 'ShelveLoc', 'Education'), ('Income', 'ShelveLoc', 'Urban'), ('Income', 'Education', 'Urban'), ('ShelveLoc', 'Education', 'Urban')])"
-      ]
-     },
-     "execution_count": 63,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "selector.results_.keys()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 64,
-   "id": "0ca1f28c",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "('Price', 'Advertising', 'Income')"
-      ]
-     },
-     "execution_count": 64,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "strategy = min_max(design,\n",
-    "                   min_terms=0,\n",
-    "                   max_terms=3,\n",
-    "                   lower_terms=['Price'],\n",
-    "                   upper_terms=['Price', 'Income', 'Advertising'])\n",
-    "selector = FeatureSelector(LinearRegression(fit_intercept=False),\n",
-    "                           strategy,\n",
-    "                           scoring='neg_mean_squared_error')\n",
-    "selector.fit(Carseats, Y)\n",
-    "selector.selected_state_"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 65,
-   "id": "5c6732fa",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "dict_keys([('Price',), ('Price', 'Advertising'), ('Price', 'Income'), ('Price', 'Advertising', 'Income')])"
-      ]
-     },
-     "execution_count": 65,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "selector.results_.keys()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "7bb6fcc3",
-   "metadata": {},
-   "source": [
-    "### Stepwise selection"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 66,
-   "id": "9985d0fc",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "('Advertising', 'Income', 'Price', 'ShelveLoc')"
-      ]
-     },
-     "execution_count": 66,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "strategy = Stepwise.first_peak(design,\n",
-    "                               min_terms=0,\n",
-    "                               max_terms=6,\n",
-    "                               lower_terms=['Price'],\n",
-    "                               upper_terms=['Price', 'Income', 'Advertising', 'ShelveLoc', 'UIncome', 'US'\n",
-    "                                     'Education', 'Urban'])\n",
-    "selector = FeatureSelector(LinearRegression(fit_intercept=False),\n",
-    "                           strategy,\n",
-    "                           scoring='neg_mean_squared_error',\n",
-    "                           cv=3)\n",
-    "selector.fit(Carseats, Y)\n",
-    "selector.selected_state_"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 67,
-   "id": "d3cf3e9b",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "dict_keys([(), ('Price',), ('Price', 'UIncome'), ('Advertising', 'Price'), ('Income', 'Price'), ('Price', 'ShelveLoc'), ('Price', 'Urban'), ('Price', 'ShelveLoc', 'UIncome'), ('Advertising', 'Price', 'ShelveLoc'), ('Income', 'Price', 'ShelveLoc'), ('Price', 'ShelveLoc', 'Urban'), ('Advertising', 'Price', 'ShelveLoc', 'UIncome'), ('Advertising', 'Income', 'Price', 'ShelveLoc'), ('Advertising', 'Price', 'ShelveLoc', 'Urban'), ('Advertising', 'Income', 'Price', 'ShelveLoc', 'UIncome'), ('Advertising', 'Income', 'Price', 'ShelveLoc', 'Urban')])"
-      ]
-     },
-     "execution_count": 67,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "selector.results_.keys()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 68,
-   "id": "dd43ea7c",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "{(): -8.055847677297269,\n",
-       " ('Price',): -6.514630258019962,\n",
-       " ('Price', 'UIncome'): -6.621654905418576,\n",
-       " ('Advertising', 'Price'): -5.825225309857156,\n",
-       " ('Income', 'Price'): -6.455432795910743,\n",
-       " ('Price', 'ShelveLoc'): -3.780183168075897,\n",
-       " ('Price', 'Urban'): -6.5430157266926114,\n",
-       " ('Price', 'ShelveLoc', 'UIncome'): -3.6938729706475004,\n",
-       " ('Advertising', 'Price', 'ShelveLoc'): -3.2067316025050645,\n",
-       " ('Income', 'Price', 'ShelveLoc'): -3.634698914456587,\n",
-       " ('Price', 'ShelveLoc', 'Urban'): -3.776148947585277,\n",
-       " ('Advertising', 'Price', 'ShelveLoc', 'UIncome'): -3.1240961493998642,\n",
-       " ('Advertising', 'Income', 'Price', 'ShelveLoc'): -3.0801704971796244,\n",
-       " ('Advertising', 'Price', 'ShelveLoc', 'Urban'): -3.207569489139369,\n",
-       " ('Advertising',\n",
-       "  'Income',\n",
-       "  'Price',\n",
-       "  'ShelveLoc',\n",
-       "  'UIncome'): -3.1048826894036115,\n",
-       " ('Advertising', 'Income', 'Price', 'ShelveLoc', 'Urban'): -3.0867130108677423}"
-      ]
-     },
-     "execution_count": 68,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "selector.results_"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 69,
-   "id": "7c026f0a",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "('Advertising', 'Income', 'Price', 'ShelveLoc')"
-      ]
-     },
-     "execution_count": 69,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "selector.selected_state_"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "b4b89d04",
-   "metadata": {},
-   "source": [
-    "### Enforcing constraints\n",
-    "\n",
-    "In models with interactions, we may often want to impose constraints on interactions and main effects.\n",
-    "This can be achieved here by use of a `validator` that checks whether a given model is valid.\n",
-    "\n",
-    "Suppose we want to have the following constraint: `ShelveLoc` may not be in the model unless\n",
-    "`Price` is in the following model."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 70,
-   "id": "1c1e31d0",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "design = ModelSpec(['Price', \n",
-    "                    'Advertising', \n",
-    "                    'Income',\n",
-    "                    'ShelveLoc']).fit(Carseats)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "be929807",
-   "metadata": {},
-   "source": [
-    "The constraints are described with a boolean matrix with `(i,j)` as `j` is a child of `i`: so `j` should not\n",
-    "be in the model when `i` is not and enforced with a callable `validator` that evaluates each candidate state.\n",
-    "\n",
-    "Both `min_max_strategy` and `step_strategy` accept a `validator` argument."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 71,
-   "id": "c075b1b7",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "dict_keys([(), ('Price',), ('Advertising',), ('Income',), ('Price', 'Advertising'), ('Price', 'Income'), ('Price', 'ShelveLoc'), ('Advertising', 'Income'), ('Price', 'Advertising', 'Income'), ('Price', 'Advertising', 'ShelveLoc'), ('Price', 'Income', 'ShelveLoc'), ('Price', 'Advertising', 'Income', 'ShelveLoc')])"
-      ]
-     },
-     "execution_count": 71,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "from ISLP.models.strategy import validator_from_constraints\n",
-    "constraints = np.zeros((4, 4))\n",
-    "constraints[0,3] = 1\n",
-    "strategy = min_max(design,\n",
-    "                   min_terms=0,\n",
-    "                   max_terms=4,\n",
-    "                   validator=validator_from_constraints(design,\n",
-    "                                                        constraints))\n",
-    "selector = FeatureSelector(LinearRegression(fit_intercept=False),\n",
-    "                           strategy,\n",
-    "                           scoring='neg_mean_squared_error',\n",
-    "                           cv=3)\n",
-    "selector.fit(Carseats, Y)\n",
-    "selector.results_.keys()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 72,
-   "id": "3472d47c",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "('Price', 'Advertising', 'Income', 'ShelveLoc')"
-      ]
-     },
-     "execution_count": 72,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "selector.selected_state_"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 73,
-   "id": "5d2c82b9",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "Hitters=load_data('Hitters')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 74,
-   "id": "4b2ac2c2",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Index(['AtBat', 'Hits', 'HmRun', 'Runs', 'RBI', 'Walks', 'Years', 'CAtBat',\n",
-       "       'CHits', 'CHmRun', 'CRuns', 'CRBI', 'CWalks', 'League', 'Division',\n",
-       "       'PutOuts', 'Assists', 'Errors', 'Salary', 'NewLeague'],\n",
-       "      dtype='object')"
-      ]
-     },
-     "execution_count": 74,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "Hitters.columns"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 75,
-   "id": "bd2ad0dd",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "dict_keys([(), ('AtBat',), ('Hits',), ('HmRun',), ('Runs',), ('RBI',), ('Walks',), ('Years',), ('CAtBat',), ('CHits',), ('CHmRun',), ('CRuns',), ('CRBI',), ('CWalks',), ('League',), ('Division',), ('PutOuts',), ('Assists',), ('Errors',), ('NewLeague',), ('AtBat', 'CRBI'), ('CRBI', 'Hits'), ('CRBI', 'HmRun'), ('CRBI', 'Runs'), ('CRBI', 'RBI'), ('CRBI', 'Walks'), ('CRBI', 'Years'), ('CAtBat', 'CRBI'), ('CHits', 'CRBI'), ('CHmRun', 'CRBI'), ('CRBI', 'CRuns'), ('CRBI', 'CWalks'), ('CRBI', 'League'), ('CRBI', 'Division'), ('CRBI', 'PutOuts'), ('Assists', 'CRBI'), ('CRBI', 'Errors'), ('CRBI', 'NewLeague'), ('AtBat', 'CRBI', 'Hits'), ('CRBI', 'Hits', 'HmRun'), ('CRBI', 'Hits', 'Runs'), ('CRBI', 'Hits', 'RBI'), ('CRBI', 'Hits', 'Walks'), ('CRBI', 'Hits', 'Years'), ('CAtBat', 'CRBI', 'Hits'), ('CHits', 'CRBI', 'Hits'), ('CHmRun', 'CRBI', 'Hits'), ('CRBI', 'CRuns', 'Hits'), ('CRBI', 'CWalks', 'Hits'), ('CRBI', 'Hits', 'League'), ('CRBI', 'Division', 'Hits'), ('CRBI', 'Hits', 'PutOuts'), ('Assists', 'CRBI', 'Hits'), ('CRBI', 'Errors', 'Hits'), ('CRBI', 'Hits', 'NewLeague'), ('AtBat', 'CRBI', 'Hits', 'PutOuts'), ('CRBI', 'Hits', 'HmRun', 'PutOuts'), ('CRBI', 'Hits', 'PutOuts', 'Runs'), ('CRBI', 'Hits', 'PutOuts', 'RBI'), ('CRBI', 'Hits', 'PutOuts', 'Walks'), ('CRBI', 'Hits', 'PutOuts', 'Years'), ('CAtBat', 'CRBI', 'Hits', 'PutOuts'), ('CHits', 'CRBI', 'Hits', 'PutOuts'), ('CHmRun', 'CRBI', 'Hits', 'PutOuts'), ('CRBI', 'CRuns', 'Hits', 'PutOuts'), ('CRBI', 'CWalks', 'Hits', 'PutOuts'), ('CRBI', 'Hits', 'League', 'PutOuts'), ('CRBI', 'Division', 'Hits', 'PutOuts'), ('Assists', 'CRBI', 'Hits', 'PutOuts'), ('CRBI', 'Errors', 'Hits', 'PutOuts'), ('CRBI', 'Hits', 'NewLeague', 'PutOuts'), ('AtBat', 'CRBI', 'Division', 'Hits', 'PutOuts'), ('CRBI', 'Division', 'Hits', 'HmRun', 'PutOuts'), ('CRBI', 'Division', 'Hits', 'PutOuts', 'Runs'), ('CRBI', 'Division', 'Hits', 'PutOuts', 'RBI'), ('CRBI', 'Division', 'Hits', 'PutOuts', 'Walks'), ('CRBI', 'Division', 'Hits', 'PutOuts', 'Years'), ('CAtBat', 'CRBI', 'Division', 'Hits', 'PutOuts'), ('CHits', 'CRBI', 'Division', 'Hits', 'PutOuts'), ('CHmRun', 'CRBI', 'Division', 'Hits', 'PutOuts'), ('CRBI', 'CRuns', 'Division', 'Hits', 'PutOuts'), ('CRBI', 'CWalks', 'Division', 'Hits', 'PutOuts'), ('CRBI', 'Division', 'Hits', 'League', 'PutOuts'), ('Assists', 'CRBI', 'Division', 'Hits', 'PutOuts'), ('CRBI', 'Division', 'Errors', 'Hits', 'PutOuts'), ('CRBI', 'Division', 'Hits', 'NewLeague', 'PutOuts'), ('AtBat', 'CRBI', 'Division', 'Hits', 'HmRun', 'PutOuts'), ('AtBat', 'CRBI', 'Division', 'Hits', 'PutOuts', 'Runs'), ('AtBat', 'CRBI', 'Division', 'Hits', 'PutOuts', 'RBI'), ('AtBat', 'CRBI', 'Division', 'Hits', 'PutOuts', 'Walks'), ('AtBat', 'CRBI', 'Division', 'Hits', 'PutOuts', 'Years'), ('AtBat', 'CAtBat', 'CRBI', 'Division', 'Hits', 'PutOuts'), ('AtBat', 'CHits', 'CRBI', 'Division', 'Hits', 'PutOuts'), ('AtBat', 'CHmRun', 'CRBI', 'Division', 'Hits', 'PutOuts'), ('AtBat', 'CRBI', 'CRuns', 'Division', 'Hits', 'PutOuts'), ('AtBat', 'CRBI', 'CWalks', 'Division', 'Hits', 'PutOuts'), ('AtBat', 'CRBI', 'Division', 'Hits', 'League', 'PutOuts'), ('Assists', 'AtBat', 'CRBI', 'Division', 'Hits', 'PutOuts'), ('AtBat', 'CRBI', 'Division', 'Errors', 'Hits', 'PutOuts'), ('AtBat', 'CRBI', 'Division', 'Hits', 'NewLeague', 'PutOuts'), ('AtBat', 'CRBI', 'Division', 'Hits', 'HmRun', 'PutOuts', 'Walks'), ('AtBat', 'CRBI', 'Division', 'Hits', 'PutOuts', 'Runs', 'Walks'), ('AtBat', 'CRBI', 'Division', 'Hits', 'PutOuts', 'RBI', 'Walks'), ('AtBat', 'CRBI', 'Division', 'Hits', 'PutOuts', 'Walks', 'Years'), ('AtBat', 'CAtBat', 'CRBI', 'Division', 'Hits', 'PutOuts', 'Walks'), ('AtBat', 'CHits', 'CRBI', 'Division', 'Hits', 'PutOuts', 'Walks'), ('AtBat', 'CHmRun', 'CRBI', 'Division', 'Hits', 'PutOuts', 'Walks'), ('AtBat', 'CRBI', 'CRuns', 'Division', 'Hits', 'PutOuts', 'Walks'), ('AtBat', 'CRBI', 'CWalks', 'Division', 'Hits', 'PutOuts', 'Walks'), ('AtBat', 'CRBI', 'Division', 'Hits', 'League', 'PutOuts', 'Walks'), ('Assists', 'AtBat', 'CRBI', 'Division', 'Hits', 'PutOuts', 'Walks'), ('AtBat', 'CRBI', 'Division', 'Errors', 'Hits', 'PutOuts', 'Walks'), ('AtBat', 'CRBI', 'Division', 'Hits', 'NewLeague', 'PutOuts', 'Walks'), ('AtBat', 'CRBI', 'CWalks', 'Division', 'Hits', 'HmRun', 'PutOuts', 'Walks'), ('AtBat', 'CRBI', 'CWalks', 'Division', 'Hits', 'PutOuts', 'Runs', 'Walks'), ('AtBat', 'CRBI', 'CWalks', 'Division', 'Hits', 'PutOuts', 'RBI', 'Walks'), ('AtBat', 'CRBI', 'CWalks', 'Division', 'Hits', 'PutOuts', 'Walks', 'Years'), ('AtBat', 'CAtBat', 'CRBI', 'CWalks', 'Division', 'Hits', 'PutOuts', 'Walks'), ('AtBat', 'CHits', 'CRBI', 'CWalks', 'Division', 'Hits', 'PutOuts', 'Walks'), ('AtBat', 'CHmRun', 'CRBI', 'CWalks', 'Division', 'Hits', 'PutOuts', 'Walks'), ('AtBat', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Hits', 'PutOuts', 'Walks'), ('AtBat', 'CRBI', 'CWalks', 'Division', 'Hits', 'League', 'PutOuts', 'Walks'), ('Assists', 'AtBat', 'CRBI', 'CWalks', 'Division', 'Hits', 'PutOuts', 'Walks'), ('AtBat', 'CRBI', 'CWalks', 'Division', 'Errors', 'Hits', 'PutOuts', 'Walks'), ('AtBat', 'CRBI', 'CWalks', 'Division', 'Hits', 'NewLeague', 'PutOuts', 'Walks'), ('AtBat', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Hits', 'HmRun', 'PutOuts', 'Walks'), ('AtBat', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Hits', 'PutOuts', 'Runs', 'Walks'), ('AtBat', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Hits', 'PutOuts', 'RBI', 'Walks'), ('AtBat', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Hits', 'PutOuts', 'Walks', 'Years'), ('AtBat', 'CAtBat', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Hits', 'PutOuts', 'Walks'), ('AtBat', 'CHits', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Hits', 'PutOuts', 'Walks'), ('AtBat', 'CHmRun', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Hits', 'PutOuts', 'Walks'), ('AtBat', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Hits', 'League', 'PutOuts', 'Walks'), ('Assists', 'AtBat', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Hits', 'PutOuts', 'Walks'), ('AtBat', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Errors', 'Hits', 'PutOuts', 'Walks'), ('AtBat', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Hits', 'NewLeague', 'PutOuts', 'Walks'), ('AtBat', 'CAtBat', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Hits', 'HmRun', 'PutOuts', 'Walks'), ('AtBat', 'CAtBat', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Hits', 'PutOuts', 'Runs', 'Walks'), ('AtBat', 'CAtBat', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Hits', 'PutOuts', 'RBI', 'Walks'), ('AtBat', 'CAtBat', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Hits', 'PutOuts', 'Walks', 'Years'), ('AtBat', 'CAtBat', 'CHits', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Hits', 'PutOuts', 'Walks'), ('AtBat', 'CAtBat', 'CHmRun', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Hits', 'PutOuts', 'Walks'), ('AtBat', 'CAtBat', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Hits', 'League', 'PutOuts', 'Walks'), ('Assists', 'AtBat', 'CAtBat', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Hits', 'PutOuts', 'Walks'), ('AtBat', 'CAtBat', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Errors', 'Hits', 'PutOuts', 'Walks'), ('AtBat', 'CAtBat', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Hits', 'NewLeague', 'PutOuts', 'Walks'), ('Assists', 'AtBat', 'CAtBat', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Hits', 'HmRun', 'PutOuts', 'Walks'), ('Assists', 'AtBat', 'CAtBat', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Hits', 'PutOuts', 'Runs', 'Walks'), ('Assists', 'AtBat', 'CAtBat', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Hits', 'PutOuts', 'RBI', 'Walks'), ('Assists', 'AtBat', 'CAtBat', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Hits', 'PutOuts', 'Walks', 'Years'), ('Assists', 'AtBat', 'CAtBat', 'CHits', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Hits', 'PutOuts', 'Walks'), ('Assists', 'AtBat', 'CAtBat', 'CHmRun', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Hits', 'PutOuts', 'Walks'), ('Assists', 'AtBat', 'CAtBat', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Hits', 'League', 'PutOuts', 'Walks'), ('Assists', 'AtBat', 'CAtBat', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Errors', 'Hits', 'PutOuts', 'Walks'), ('Assists', 'AtBat', 'CAtBat', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Hits', 'NewLeague', 'PutOuts', 'Walks'), ('Assists', 'AtBat', 'CAtBat', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Hits', 'HmRun', 'League', 'PutOuts', 'Walks'), ('Assists', 'AtBat', 'CAtBat', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Hits', 'League', 'PutOuts', 'Runs', 'Walks'), ('Assists', 'AtBat', 'CAtBat', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Hits', 'League', 'PutOuts', 'RBI', 'Walks'), ('Assists', 'AtBat', 'CAtBat', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Hits', 'League', 'PutOuts', 'Walks', 'Years'), ('Assists', 'AtBat', 'CAtBat', 'CHits', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Hits', 'League', 'PutOuts', 'Walks'), ('Assists', 'AtBat', 'CAtBat', 'CHmRun', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Hits', 'League', 'PutOuts', 'Walks'), ('Assists', 'AtBat', 'CAtBat', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Errors', 'Hits', 'League', 'PutOuts', 'Walks'), ('Assists', 'AtBat', 'CAtBat', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Hits', 'League', 'NewLeague', 'PutOuts', 'Walks'), ('Assists', 'AtBat', 'CAtBat', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Hits', 'HmRun', 'League', 'PutOuts', 'Runs', 'Walks'), ('Assists', 'AtBat', 'CAtBat', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Hits', 'League', 'PutOuts', 'RBI', 'Runs', 'Walks'), ('Assists', 'AtBat', 'CAtBat', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Hits', 'League', 'PutOuts', 'Runs', 'Walks', 'Years'), ('Assists', 'AtBat', 'CAtBat', 'CHits', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Hits', 'League', 'PutOuts', 'Runs', 'Walks'), ('Assists', 'AtBat', 'CAtBat', 'CHmRun', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Hits', 'League', 'PutOuts', 'Runs', 'Walks'), ('Assists', 'AtBat', 'CAtBat', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Errors', 'Hits', 'League', 'PutOuts', 'Runs', 'Walks'), ('Assists', 'AtBat', 'CAtBat', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Hits', 'League', 'NewLeague', 'PutOuts', 'Runs', 'Walks'), ('Assists', 'AtBat', 'CAtBat', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Errors', 'Hits', 'HmRun', 'League', 'PutOuts', 'Runs', 'Walks'), ('Assists', 'AtBat', 'CAtBat', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Errors', 'Hits', 'League', 'PutOuts', 'RBI', 'Runs', 'Walks'), ('Assists', 'AtBat', 'CAtBat', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Errors', 'Hits', 'League', 'PutOuts', 'Runs', 'Walks', 'Years'), ('Assists', 'AtBat', 'CAtBat', 'CHits', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Errors', 'Hits', 'League', 'PutOuts', 'Runs', 'Walks'), ('Assists', 'AtBat', 'CAtBat', 'CHmRun', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Errors', 'Hits', 'League', 'PutOuts', 'Runs', 'Walks'), ('Assists', 'AtBat', 'CAtBat', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Errors', 'Hits', 'League', 'NewLeague', 'PutOuts', 'Runs', 'Walks'), ('Assists', 'AtBat', 'CAtBat', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Errors', 'Hits', 'HmRun', 'League', 'PutOuts', 'RBI', 'Runs', 'Walks'), ('Assists', 'AtBat', 'CAtBat', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Errors', 'Hits', 'HmRun', 'League', 'PutOuts', 'Runs', 'Walks', 'Years'), ('Assists', 'AtBat', 'CAtBat', 'CHits', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Errors', 'Hits', 'HmRun', 'League', 'PutOuts', 'Runs', 'Walks'), ('Assists', 'AtBat', 'CAtBat', 'CHmRun', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Errors', 'Hits', 'HmRun', 'League', 'PutOuts', 'Runs', 'Walks'), ('Assists', 'AtBat', 'CAtBat', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Errors', 'Hits', 'HmRun', 'League', 'NewLeague', 'PutOuts', 'Runs', 'Walks'), ('Assists', 'AtBat', 'CAtBat', 'CHits', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Errors', 'Hits', 'HmRun', 'League', 'PutOuts', 'RBI', 'Runs', 'Walks'), ('Assists', 'AtBat', 'CAtBat', 'CHits', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Errors', 'Hits', 'HmRun', 'League', 'PutOuts', 'Runs', 'Walks', 'Years'), ('Assists', 'AtBat', 'CAtBat', 'CHits', 'CHmRun', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Errors', 'Hits', 'HmRun', 'League', 'PutOuts', 'Runs', 'Walks'), ('Assists', 'AtBat', 'CAtBat', 'CHits', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Errors', 'Hits', 'HmRun', 'League', 'NewLeague', 'PutOuts', 'Runs', 'Walks'), ('Assists', 'AtBat', 'CAtBat', 'CHits', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Errors', 'Hits', 'HmRun', 'League', 'PutOuts', 'RBI', 'Runs', 'Walks', 'Years'), ('Assists', 'AtBat', 'CAtBat', 'CHits', 'CHmRun', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Errors', 'Hits', 'HmRun', 'League', 'PutOuts', 'RBI', 'Runs', 'Walks'), ('Assists', 'AtBat', 'CAtBat', 'CHits', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Errors', 'Hits', 'HmRun', 'League', 'NewLeague', 'PutOuts', 'RBI', 'Runs', 'Walks'), ('Assists', 'AtBat', 'CAtBat', 'CHits', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Errors', 'Hits', 'HmRun', 'League', 'NewLeague', 'PutOuts', 'RBI', 'Runs', 'Walks', 'Years'), ('Assists', 'AtBat', 'CAtBat', 'CHits', 'CHmRun', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Errors', 'Hits', 'HmRun', 'League', 'NewLeague', 'PutOuts', 'RBI', 'Runs', 'Walks'), ('Assists', 'AtBat', 'CAtBat', 'CHits', 'CHmRun', 'CRBI', 'CRuns', 'CWalks', 'Division', 'Errors', 'Hits', 'HmRun', 'League', 'NewLeague', 'PutOuts', 'RBI', 'Runs', 'Walks', 'Years')])"
-      ]
-     },
-     "execution_count": 75,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "Hitters = Hitters.dropna()\n",
-    "Y=Hitters['Salary']\n",
-    "X=Hitters.drop('Salary', axis=1)\n",
-    "design = ModelSpec(X.columns).fit(X)\n",
-    "strategy = Stepwise.first_peak(design,\n",
-    "                               direction='forward',\n",
-    "                               min_terms=0,\n",
-    "                               max_terms=19)\n",
-    "selector = FeatureSelector(LinearRegression(fit_intercept=False),\n",
-    "                           strategy,\n",
-    "                           scoring='neg_mean_squared_error', cv=None)\n",
-    "selector.fit(X, Y)\n",
-    "selector.results_.keys()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 76,
-   "id": "31788748",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "19"
-      ]
-     },
-     "execution_count": 76,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "len(selector.selected_state_)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 77,
-   "id": "e97d80c3",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "19"
-      ]
-     },
-     "execution_count": 77,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "len(X.columns)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "a71f0332",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Start:  AIC=3215.77\n",
-      "Salary ~ 1\n",
-      "\n",
-      "            Df Sum of Sq      RSS    AIC\n",
-      "+ CRBI       1  17139434 36179679 3115.8\n",
-      "+ CRuns      1  16881162 36437951 3117.6\n",
-      "+ CHits      1  16065140 37253973 3123.5\n",
-      "+ CAtBat     1  14759710 38559403 3132.5\n",
-      "+ CHmRun     1  14692193 38626920 3133.0\n",
-      "+ CWalks     1  12792622 40526491 3145.6\n",
-      "+ RBI        1  10771083 42548030 3158.4\n",
-      "+ Walks      1  10504833 42814280 3160.1\n",
-      "+ Hits       1  10260491 43058621 3161.6\n",
-      "+ Runs       1   9399158 43919955 3166.8\n",
-      "+ Years      1   8559105 44760007 3171.7\n",
-      "+ AtBat      1   8309469 45009644 3173.2\n",
-      "+ HmRun      1   6273967 47045145 3184.8\n",
-      "+ PutOuts    1   4814100 48505013 3192.9\n",
-      "+ Division   1   1976102 51343011 3207.8\n",
-      "<none>                   53319113 3215.8\n",
-      "+ Assists    1     34497 53284615 3217.6\n",
-      "+ League     1     10876 53308237 3217.7\n",
-      "+ Errors     1      1555 53317558 3217.8\n",
-      "+ NewLeague  1       428 53318684 3217.8\n",
-      "\n",
-      "Step:  AIC=3115.78\n",
-      "Salary ~ CRBI\n",
-      "\n",
-      "            Df Sum of Sq      RSS    AIC\n",
-      "+ Hits       1   5533119 30646560 3074.1\n",
-      "+ Runs       1   5176532 31003147 3077.2\n",
-      "+ Walks      1   4199733 31979946 3085.3\n",
-      "+ AtBat      1   4064585 32115095 3086.4\n",
-      "+ RBI        1   3308272 32871407 3092.6\n",
-      "+ PutOuts    1   3267035 32912644 3092.9\n",
-      "+ Division   1   1733887 34445793 3104.9\n",
-      "+ Years      1   1667339 34512340 3105.4\n",
-      "+ HmRun      1   1271587 34908092 3108.4\n",
-      "+ CRuns      1    354561 35825119 3115.2\n",
-      "+ Assists    1    346020 35833659 3115.2\n",
-      "<none>                   36179679 3115.8\n",
-      "+ Errors     1    194403 35985276 3116.4\n",
-      "+ CAtBat     1     92261 36087418 3117.1\n",
-      "+ CHits      1     75469 36104210 3117.2\n",
-      "+ CWalks     1     51974 36127705 3117.4\n",
-      "+ NewLeague  1     17778 36161901 3117.7\n",
-      "+ League     1     11825 36167855 3117.7\n",
-      "+ CHmRun     1       515 36179165 3117.8\n",
-      "\n",
-      "Step:  AIC=3074.13\n",
-      "Salary ~ CRBI + Hits\n",
-      "\n",
-      "            Df Sum of Sq      RSS    AIC\n",
-      "+ PutOuts    1   1397263 29249297 3063.8\n",
-      "+ Division   1   1279275 29367285 3064.9\n",
-      "+ AtBat      1    821767 29824793 3069.0\n",
-      "+ Walks      1    781767 29864793 3069.3\n",
-      "+ Years      1    254910 30391650 3073.9\n",
-      "<none>                   30646560 3074.1\n",
-      "+ League     1    208880 30437680 3074.3\n",
-      "+ CRuns      1    132614 30513946 3075.0\n",
-      "+ NewLeague  1    118474 30528086 3075.1\n",
-      "+ Runs       1    114198 30532362 3075.1\n",
-      "+ Errors     1     99776 30546784 3075.3\n",
-      "+ CAtBat     1     83517 30563043 3075.4\n",
-      "+ Assists    1     44781 30601779 3075.7\n",
-      "+ CWalks     1     23668 30622892 3075.9\n",
-      "+ CHmRun     1      4790 30641769 3076.1\n",
-      "+ CHits      1      4358 30642202 3076.1\n",
-      "+ HmRun      1      2173 30644387 3076.1\n",
-      "+ RBI        1      1137 30645423 3076.1\n",
-      "\n",
-      "Step:  AIC=3063.85\n",
-      "Salary ~ CRBI + Hits + PutOuts\n",
-      "\n",
-      "            Df Sum of Sq      RSS    AIC\n",
-      "+ Division   1   1278445 27970852 3054.1\n",
-      "+ AtBat      1   1009933 28239364 3056.6\n",
-      "+ Walks      1    539490 28709807 3061.0\n",
-      "+ CRuns      1    273649 28975648 3063.4\n",
-      "<none>                   29249297 3063.8\n",
-      "+ Years      1    136906 29112391 3064.6\n",
-      "+ League     1    122841 29126456 3064.8\n",
-      "+ Runs       1    117930 29131367 3064.8\n",
-      "+ Errors     1     97244 29152053 3065.0\n",
-      "+ NewLeague  1     57839 29191458 3065.3\n",
-      "+ CHits      1     35096 29214201 3065.5\n",
-      "+ RBI        1     33965 29215331 3065.6\n",
-      "+ HmRun      1     31227 29218070 3065.6\n",
-      "+ CWalks     1     28572 29220725 3065.6\n",
-      "+ CAtBat     1     20518 29228779 3065.7\n",
-      "+ Assists    1      1681 29247616 3065.8\n",
-      "+ CHmRun     1      1419 29247878 3065.8\n",
-      "\n",
-      "Step:  AIC=3054.1\n",
-      "Salary ~ CRBI + Hits + PutOuts + Division\n",
-      "\n",
-      "            Df Sum of Sq      RSS    AIC\n",
-      "+ AtBat      1    820952 27149899 3048.3\n",
-      "+ Walks      1    491584 27479268 3051.4\n",
-      "<none>                   27970852 3054.1\n",
-      "+ CRuns      1    193604 27777248 3054.3\n",
-      "+ Years      1    166845 27804007 3054.5\n",
-      "+ League     1    110628 27860224 3055.1\n",
-      "+ Errors     1     81385 27889467 3055.3\n",
-      "+ Runs       1     65921 27904931 3055.5\n",
-      "+ RBI        1     53719 27917133 3055.6\n",
-      "+ NewLeague  1     52275 27918577 3055.6\n",
-      "+ CHits      1     33863 27936989 3055.8\n",
-      "+ HmRun      1     26390 27944462 3055.8\n",
-      "+ CAtBat     1     18751 27952101 3055.9\n",
-      "+ CWalks     1      5723 27965129 3056.0\n",
-      "+ Assists    1      1036 27969816 3056.1\n",
-      "+ CHmRun     1       165 27970687 3056.1\n",
-      "\n",
-      "Step:  AIC=3048.26\n",
-      "Salary ~ CRBI + Hits + PutOuts + Division + AtBat\n",
-      "\n",
-      "            Df Sum of Sq      RSS    AIC\n",
-      "+ Walks      1    954996 26194904 3040.8\n",
-      "+ Years      1    253362 26896537 3047.8\n",
-      "+ Runs       1    208743 26941157 3048.2\n",
-      "<none>                   27149899 3048.3\n",
-      "+ CRuns      1    185825 26964075 3048.5\n",
-      "+ League     1     95986 27053913 3049.3\n",
-      "+ NewLeague  1     52693 27097206 3049.8\n",
-      "+ CHmRun     1     43173 27106726 3049.8\n",
-      "+ Assists    1     28898 27121001 3050.0\n",
-      "+ CAtBat     1     20989 27128910 3050.1\n",
-      "+ CWalks     1     15599 27134301 3050.1\n",
-      "+ Errors     1      6265 27143634 3050.2\n",
-      "+ CHits      1      5305 27144594 3050.2\n",
-      "+ RBI        1      1236 27148663 3050.2\n",
-      "+ HmRun      1        11 27149888 3050.3\n",
-      "\n",
-      "Step:  AIC=3040.85\n",
-      "Salary ~ CRBI + Hits + PutOuts + Division + AtBat + Walks\n",
-      "\n",
-      "            Df Sum of Sq      RSS    AIC\n",
-      "+ CWalks     1    240687 25954217 3040.4\n",
-      "<none>                   26194904 3040.8\n",
-      "+ Years      1    184508 26010396 3041.0\n",
-      "+ CRuns      1    110695 26084209 3041.7\n",
-      "+ League     1     77974 26116930 3042.1\n",
-      "+ Assists    1     75782 26119122 3042.1\n",
-      "+ NewLeague  1     40909 26153995 3042.4\n",
-      "+ CHits      1     37304 26157599 3042.5\n",
-      "+ RBI        1     11728 26183176 3042.7\n",
-      "+ HmRun      1      4747 26190157 3042.8\n",
-      "+ Errors     1      2727 26192177 3042.8\n",
-      "+ CAtBat     1      2630 26192274 3042.8\n",
-      "+ CHmRun     1       943 26193961 3042.8\n",
-      "+ Runs       1        37 26194867 3042.8\n",
-      "\n",
-      "Step:  AIC=3040.42\n",
-      "Salary ~ CRBI + Hits + PutOuts + Division + AtBat + Walks + CWalks\n",
-      "\n",
-      "            Df Sum of Sq      RSS    AIC\n",
-      "+ CRuns      1    794983 25159234 3034.2\n",
-      "+ CHits      1    273728 25680489 3039.6\n",
-      "<none>                   25954217 3040.4\n",
-      "+ Assists    1    138506 25815711 3041.0\n",
-      "+ CAtBat     1     89289 25864929 3041.5\n",
-      "+ RBI        1     86941 25867276 3041.5\n",
-      "+ League     1     77159 25877058 3041.6\n",
-      "+ Years      1     70126 25884091 3041.7\n",
-      "+ NewLeague  1     37807 25916410 3042.0\n",
-      "+ HmRun      1     33601 25920616 3042.1\n",
-      "+ CHmRun     1      9034 25945183 3042.3\n",
-      "+ Errors     1      6928"
-     ]
-    }
-   ],
-   "source": [
-    "%%R -i Hitters\n",
-    "step(lm(Salary ~ 1, data=Hitters), scope=list(upper=lm(Salary ~ ., data=Hitters)), direction='forward', trace=TRUE)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "6117f650",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "536a8bc3",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "bddc13c5",
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {
diff --git a/docs/source/models/spec.ipynb b/docs/source/models/spec.ipynb
index d6ba7b0..fce6b32 100644
--- a/docs/source/models/spec.ipynb
+++ b/docs/source/models/spec.ipynb
@@ -31,7 +31,7 @@
     "from ISLP.models import (ModelSpec,\n",
     "                         summarize,\n",
     "                         Column,\n",
-    "                         Variable,\n",
+    "                         Feature,\n",
     "                         build_columns)\n",
     "\n",
     "import statsmodels.api as sm"
@@ -257,7 +257,7 @@
    "metadata": {},
    "source": [
     "We note that a column has been added for the intercept by default. This can be changed using the\n",
-    "`intercept` argument. "
+    "`intercept` argument."
    ]
   },
   {
@@ -391,7 +391,7 @@
     "in the column space of the design matrix.\n",
     "\n",
     "To include this intercept via `ShelveLoc` we can use 3 columns to encode this categorical variable. Following the nomenclature of\n",
-    "`R`, we call this a `Contrast` of the categorical variable. "
+    "`R`, we call this a `Contrast` of the categorical variable."
    ]
   },
   {
@@ -597,14 +597,6 @@
     "shelve.get_columns(Carseats)"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "id": "5d8b048f-3c31-47ac-8946-0662f5e57b63",
-   "metadata": {},
-   "source": [
-    "shelve.get_columns?"
-   ]
-  },
   {
    "cell_type": "markdown",
    "id": "269e6d18-4ae4-4a77-8498-90281ae7c803",
@@ -946,7 +938,7 @@
     "\n",
     "The first argument to `ModelSpec` is stored as the `terms` attribute. Under the hood,\n",
     "this sequence is inspected to produce the `terms_` attribute which specify the objects\n",
-    "that will ultimately create the design matrix. "
+    "that will ultimately create the design matrix."
    ]
   },
   {
@@ -958,8 +950,8 @@
     {
      "data": {
       "text/plain": [
-       "[Variable(variables=('ShelveLoc',), name='ShelveLoc', encoder=None, use_transform=True, pure_columns=True, override_encoder_colnames=False),\n",
-       " Variable(variables=('Price',), name='Price', encoder=None, use_transform=True, pure_columns=True, override_encoder_colnames=False)]"
+       "[Feature(variables=('ShelveLoc',), name='ShelveLoc', encoder=None, use_transform=True, pure_columns=True, override_encoder_colnames=False),\n",
+       " Feature(variables=('Price',), name='Price', encoder=None, use_transform=True, pure_columns=True, override_encoder_colnames=False)]"
       ]
      },
      "execution_count": 13,
@@ -978,7 +970,7 @@
    "id": "warming-mobile",
    "metadata": {},
    "source": [
-    "Each element of `terms_` should be a `Variable` which describes a set of columns to be extracted from\n",
+    "Each element of `terms_` should be a `Feature` which describes a set of columns to be extracted from\n",
     "a columnar data form as well as possible a possible encoder."
    ]
   },
@@ -1134,17 +1126,17 @@
    "id": "former-spring",
    "metadata": {},
    "source": [
-    "### `Variable` objects\n",
+    "### `Feature` objects\n",
     "\n",
-    "Note that `Variable` objects have a tuple of `variables` as well as an `encoder` attribute. The\n",
+    "Note that `Feature` objects have a tuple of `variables` as well as an `encoder` attribute. The\n",
     "tuple of `variables` first creates a concatenated dataframe from all corresponding variables and then\n",
-    "is run through `encoder.transform`. The `encoder.fit` method of each `Variable` is run once during \n",
+    "is run through `encoder.transform`. The `encoder.fit` method of each `Feature` is run once during \n",
     "the call to `ModelSpec.fit`."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 18,
    "id": "floral-liabilities",
    "metadata": {},
    "outputs": [
@@ -1263,15 +1255,13 @@
        "[400 rows x 3 columns]"
       ]
      },
-     "execution_count": 16,
+     "execution_count": 18,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "from ISLP.models.model_spec import Variable\n",
-    "\n",
-    "new_var = Variable(('Price', 'Income', 'OIncome'), name='mynewvar', encoder=None)\n",
+    "new_var = Feature(('Price', 'Income', 'OIncome'), name='mynewvar', encoder=None)\n",
     "build_columns(MS.column_info_,\n",
     "              Carseats, \n",
     "              new_var)[0]"
@@ -1288,18 +1278,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 20,
    "id": "imported-measure",
    "metadata": {},
    "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/Users/jonathantaylor/miniconda3/envs/islp_test/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but PCA was fitted with feature names\n",
-      "  warnings.warn(\n"
-     ]
-    },
     {
      "data": {
       "text/html": [
@@ -1403,7 +1385,7 @@
        "[400 rows x 2 columns]"
       ]
      },
-     "execution_count": 17,
+     "execution_count": 20,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1412,7 +1394,7 @@
     "from sklearn.decomposition import PCA\n",
     "pca = PCA(n_components=2)\n",
     "pca.fit(build_columns(MS.column_info_, Carseats, new_var)[0]) # this is done within `ModelSpec.fit`\n",
-    "pca_var = Variable(('Price', 'Income', 'OIncome'), name='mynewvar', encoder=pca)\n",
+    "pca_var = Feature(('Price', 'Income', 'OIncome'), name='mynewvar', encoder=pca)\n",
     "build_columns(MS.column_info_,\n",
     "              Carseats, \n",
     "              pca_var)[0]"
@@ -1424,23 +1406,15 @@
    "metadata": {},
    "source": [
     "The elements of the `variables` attribute may be column identifiers ( `\"Price\"`), `Column` instances (`price`)\n",
-    "or `Variable` instances (`pca_var`)."
+    "or `Feature` instances (`pca_var`)."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 21,
    "id": "western-bloom",
    "metadata": {},
    "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/Users/jonathantaylor/miniconda3/envs/islp_test/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but PCA was fitted with feature names\n",
-      "  warnings.warn(\n"
-     ]
-    },
     {
      "data": {
       "text/html": [
@@ -1568,14 +1542,14 @@
        "[400 rows x 4 columns]"
       ]
      },
-     "execution_count": 18,
+     "execution_count": 21,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
     "price = MS.column_info_['Price']\n",
-    "fancy_var = Variable(('Income', price, pca_var), name='fancy', encoder=None)\n",
+    "fancy_var = Feature(('Income', price, pca_var), name='fancy', encoder=None)\n",
     "build_columns(MS.column_info_,\n",
     "              Carseats, \n",
     "              fancy_var)[0]"
@@ -1583,121 +1557,95 @@
   },
   {
    "cell_type": "markdown",
-   "id": "absent-branch",
+   "id": "e289feba-e3f5-48e0-9e29-cdd88d7f9923",
    "metadata": {},
    "source": [
-    "## Predicting at new points\n",
-    "\n",
-    "As `ModelSpec` is a transformer, it can be evaluated at new feature values.\n",
-    "Constructing the design matrix at any values is carried out by the `transform` method."
+    "## Predicting at new points"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
-   "id": "naked-hollywood",
+   "execution_count": 22,
+   "id": "6efed2fa-9e5d-429c-a8d9-ac544cab2b41",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "array([ 9.73389663, 26.06456997])"
+       "intercept    12.661546\n",
+       "Price        -0.052213\n",
+       "Income        0.012829\n",
+       "dtype: float64"
       ]
      },
-     "execution_count": 19,
+     "execution_count": 22,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "new_data = pd.DataFrame({'Income':['Bad', 'Good'], 'Price':[40, 50]})\n",
-    "new_X = MS.transform(new_data)\n",
-    "M_ols.get_prediction(new_X).predicted_mean"
+    "MS = ModelSpec(['Price', 'Income']).fit(Carseats)\n",
+    "X = MS.transform(Carseats)\n",
+    "Y = Carseats['Sales']\n",
+    "M_ols = sm.OLS(Y, X).fit()\n",
+    "M_ols.params"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "signal-yahoo",
+   "id": "e6b4609b-fcb2-4cc2-b630-509df4c87546",
    "metadata": {},
    "source": [
-    "## Using `np.ndarray`\n",
-    "\n",
-    "As the basic model is to concatenate columns extracted from a columnar data\n",
-    "representation, one *can* use `np.ndarray` as the column data. In this case,\n",
-    "columns will be selected by integer indices. \n",
-    "\n",
-    "### Caveats using `np.ndarray`\n",
-    "\n",
-    "If the `terms` only refer to a few columns of the data frame, the `transform` method only needs a dataframe with those columns.\n",
-    "However,\n",
-    "unless all features are floats, `np.ndarray` will default to a dtype of `object`, complicating issues.\n",
-    "\n",
-    "However, if we had used an `np.ndarray`, the column identifiers would be integers identifying specific columns so,\n",
-    "in order to work correctly, `transform` would need another `np.ndarray` where the columns have the same meaning. \n",
-    "\n",
-    "We illustrate this below, where we build a model from `Price` and `Income` for `Sales` and want to find predictions at new\n",
-    "values of `Price` and `Location`. We first find the predicitions using `pd.DataFrame` and then illustrate the difficulties\n",
-    "in using `np.ndarray`."
+    "As `ModelSpec` is a transformer, it can be evaluated at new feature values.\n",
+    "Constructing the design matrix at any values is carried out by the `transform` method."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 42,
-   "id": "964ecc79-7303-410c-b258-2d58341c7dc0",
+   "execution_count": 23,
+   "id": "8784b0e8-ce53-4a90-aee6-b935834295c7",
    "metadata": {},
    "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "   intercept  Price  Income\n",
-      "0        1.0     40      10\n",
-      "1        1.0     50      20\n"
-     ]
-    },
     {
      "data": {
       "text/plain": [
-       "intercept    12.661546\n",
-       "Price        -0.052213\n",
-       "Income        0.012829\n",
-       "dtype: float64"
+       "array([10.70130676, 10.307465  ])"
       ]
      },
-     "execution_count": 42,
+     "execution_count": 23,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "MS = ModelSpec(['Price', 'Income']).fit(Carseats)\n",
-    "M_ols = sm.OLS(Y, MS.transform(Carseats)).fit()\n",
-    "\n",
     "new_data = pd.DataFrame({'Price':[40, 50], 'Income':[10, 20]})\n",
     "new_X = MS.transform(new_data)\n",
-    "print(new_X)\n",
-    "M_ols.params"
+    "M_ols.get_prediction(new_X).predicted_mean"
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": 25,
-   "id": "a42c239e-a5eb-4c5d-919e-16c4d58d1c8d",
+   "cell_type": "markdown",
+   "id": "signal-yahoo",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "array([10.70130676, 10.307465  ])"
-      ]
-     },
-     "execution_count": 25,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
    "source": [
-    "M_ols.get_prediction(new_X).predicted_mean"
+    "## Using `np.ndarray`\n",
+    "\n",
+    "As the basic model is to concatenate columns extracted from a columnar data\n",
+    "representation, one *can* use `np.ndarray` as the column data. In this case,\n",
+    "columns will be selected by integer indices. \n",
+    "\n",
+    "### Caveats using `np.ndarray`\n",
+    "\n",
+    "If the `terms` only refer to a few columns of the data frame, the `transform` method only needs a dataframe with those columns.\n",
+    "However,\n",
+    "unless all features are floats, `np.ndarray` will default to a dtype of `object`, complicating issues.\n",
+    "\n",
+    "However, if we had used an `np.ndarray`, the column identifiers would be integers identifying specific columns so,\n",
+    "in order to work correctly, `transform` would need another `np.ndarray` where the columns have the same meaning. \n",
+    "\n",
+    "We illustrate this below, where we build a model from `Price` and `Income` for `Sales` and want to find predictions at new\n",
+    "values of `Price` and `Location`. We first find the predicitions using `pd.DataFrame` and then illustrate the difficulties\n",
+    "in using `np.ndarray`."
    ]
   },
   {
@@ -1710,7 +1658,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 34,
+   "execution_count": 24,
    "id": "4fec9030-7445-48be-a15f-2ac5a789e717",
    "metadata": {},
    "outputs": [
@@ -1726,7 +1674,7 @@
        "       [  1., 120.,  37.]])"
       ]
      },
-     "execution_count": 34,
+     "execution_count": 24,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1739,7 +1687,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 36,
+   "execution_count": 25,
    "id": "c864e365-2476-4ca6-9d27-625cac2b2271",
    "metadata": {},
    "outputs": [
@@ -1752,7 +1700,7 @@
        "dtype: float64"
       ]
      },
-     "execution_count": 36,
+     "execution_count": 25,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1779,7 +1727,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 44,
+   "execution_count": 26,
    "id": "incredible-concert",
    "metadata": {},
    "outputs": [
@@ -1813,7 +1761,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 45,
+   "execution_count": 27,
    "id": "stunning-container",
    "metadata": {},
    "outputs": [
@@ -1831,7 +1779,7 @@
        "array([10.70130676, 10.307465  ])"
       ]
      },
-     "execution_count": 45,
+     "execution_count": 27,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1840,7 +1788,7 @@
     "new_D = np.array([[40,50], [np.nan, np.nan], [10,20]]).T\n",
     "new_X = MS_np.transform(new_D)\n",
     "print(new_X)\n",
-    "M_ols.get_prediction(new_X).predicted_mean\n"
+    "M_ols.get_prediction(new_X).predicted_mean"
    ]
   },
   {
@@ -1855,10 +1803,10 @@
  ],
  "metadata": {
   "jupytext": {
-   "formats": "ipynb"
+   "formats": "source/models///ipynb,jupyterbook/models///md:myst,jupyterbook/models///ipynb"
   },
   "kernelspec": {
-   "display_name": "python3",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -1872,7 +1820,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.13"
+   "version": "3.10.10"
   }
  },
  "nbformat": 4,
diff --git a/docs/source/models/submodels.ipynb b/docs/source/models/submodels.ipynb
deleted file mode 100644
index 825bedd..0000000
--- a/docs/source/models/submodels.ipynb
+++ /dev/null
@@ -1,3127 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "id": "ee33d364",
-   "metadata": {},
-   "source": [
-    "# Building design matrices with `ModelSpec`\n",
-    "\n",
-    "Force rebuild"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "id": "4c70fbaa",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "x=4\n",
-    "import numpy as np, pandas as pd\n",
-    "%load_ext rpy2.ipython\n",
-    "\n",
-    "from ISLP import load_data\n",
-    "from ISLP.models import ModelSpec\n",
-    "\n",
-    "import statsmodels.api as sm"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "id": "8a708215",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Index(['Sales', 'CompPrice', 'Income', 'Advertising', 'Population', 'Price',\n",
-       "       'ShelveLoc', 'Age', 'Education', 'Urban', 'US'],\n",
-       "      dtype='object')"
-      ]
-     },
-     "execution_count": 2,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "Carseats = load_data('Carseats')\n",
-    "%R -i Carseats\n",
-    "Carseats.columns"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "dad5e991",
-   "metadata": {},
-   "source": [
-    "## Let's break up income into groups"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "id": "ac7086a5",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0      M\n",
-       "1      L\n",
-       "2      L\n",
-       "3      H\n",
-       "4      M\n",
-       "      ..\n",
-       "395    H\n",
-       "396    L\n",
-       "397    L\n",
-       "398    M\n",
-       "399    L\n",
-       "Name: OIncome, Length: 400, dtype: category\n",
-       "Categories (3, object): ['L' < 'M' < 'H']"
-      ]
-     },
-     "execution_count": 3,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "Carseats['OIncome'] = pd.cut(Carseats['Income'], \n",
-    "                             [0,50,90,200], \n",
-    "                             labels=['L','M','H'])\n",
-    "Carseats['OIncome']"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "261446c8",
-   "metadata": {},
-   "source": [
-    "Let's also create an unordered version"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "id": "674bb806",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0      M\n",
-       "1      L\n",
-       "2      L\n",
-       "3      H\n",
-       "4      M\n",
-       "      ..\n",
-       "395    H\n",
-       "396    L\n",
-       "397    L\n",
-       "398    M\n",
-       "399    L\n",
-       "Name: UIncome, Length: 400, dtype: category\n",
-       "Categories (3, object): ['L', 'M', 'H']"
-      ]
-     },
-     "execution_count": 4,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "Carseats['UIncome'] = pd.cut(Carseats['Income'], \n",
-    "                             [0,50,90,200], \n",
-    "                             labels=['L','M','H'],\n",
-    "                             ordered=False)\n",
-    "Carseats['UIncome']"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "8f030039",
-   "metadata": {},
-   "source": [
-    "## A simple model"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "id": "40cd6c28",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Index(['intercept', 'Price', 'Income'], dtype='object')"
-      ]
-     },
-     "execution_count": 5,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design = ModelSpec(['Price', 'Income'])\n",
-    "X = design.fit_transform(Carseats)\n",
-    "X.columns"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "id": "e65f5607",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "intercept    12.661546\n",
-       "Price        -0.052213\n",
-       "Income        0.012829\n",
-       "dtype: float64"
-      ]
-     },
-     "execution_count": 6,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "Y = Carseats['Sales']\n",
-    "M = sm.OLS(Y, X).fit()\n",
-    "M.params"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "29d9b55f",
-   "metadata": {},
-   "source": [
-    "## Basic procedure\n",
-    "\n",
-    "The design matrix is built by cobbling together a set of columns and possibly transforming them.\n",
-    "A `pd.DataFrame` is essentially a list of columns. One of the first tasks done  in `ModelSpec.fit`\n",
-    "is to inspect a dataframe for column info. The column `ShelveLoc` is categorical:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "id": "cfbe5b92",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0         Bad\n",
-       "1        Good\n",
-       "2      Medium\n",
-       "3      Medium\n",
-       "4         Bad\n",
-       "        ...  \n",
-       "395      Good\n",
-       "396    Medium\n",
-       "397    Medium\n",
-       "398       Bad\n",
-       "399      Good\n",
-       "Name: ShelveLoc, Length: 400, dtype: category\n",
-       "Categories (3, object): ['Bad', 'Good', 'Medium']"
-      ]
-     },
-     "execution_count": 7,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "Carseats['ShelveLoc']"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "7092f666",
-   "metadata": {},
-   "source": [
-    "This is recognized by `ModelSpec` in the form of `Column` objects which are just named tuples with two methods\n",
-    "`get_columns` and `fit_encoder`."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "id": "e2d43844",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Column(idx='ShelveLoc', name='ShelveLoc', is_categorical=True, is_ordinal=False, columns=('ShelveLoc[Good]', 'ShelveLoc[Medium]'), encoder=Contrast())"
-      ]
-     },
-     "execution_count": 8,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design.column_info_['ShelveLoc']"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "46a01612",
-   "metadata": {},
-   "source": [
-    "It recognized ordinal columns as well."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "id": "465a9326",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Column(idx='OIncome', name='OIncome', is_categorical=True, is_ordinal=True, columns=('OIncome',), encoder=OrdinalEncoder())"
-      ]
-     },
-     "execution_count": 9,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design.column_info_['OIncome']"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "id": "76f8480d",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(array([ 73,  48,  35, 100]), ('Income',))"
-      ]
-     },
-     "execution_count": 10,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "income = design.column_info_['Income']\n",
-    "cols, names = income.get_columns(Carseats)\n",
-    "(cols[:4], names)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "25fcc1de",
-   "metadata": {},
-   "source": [
-    "## Encoding a column\n",
-    "\n",
-    "In building a design matrix we must extract columns from our dataframe (or `np.ndarray`). Categorical\n",
-    "variables usually are encoded by several columns, typically one less than the number of categories.\n",
-    "This task is handled by the `encoder` of the `Column`. The encoder must satisfy the `sklearn` transform\n",
-    "model, i.e. `fit` on some array and `transform` on future arrays. The `fit_encoder` method of `Column` fits\n",
-    "its encoder the first time data is passed to it."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "id": "dfe6cc35",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(array([[0., 0.],\n",
-       "        [1., 0.],\n",
-       "        [0., 1.],\n",
-       "        [0., 1.]]),\n",
-       " ['ShelveLoc[Good]', 'ShelveLoc[Medium]'])"
-      ]
-     },
-     "execution_count": 11,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "shelve = design.column_info_['ShelveLoc']\n",
-    "cols, names = shelve.get_columns(Carseats)\n",
-    "(cols[:4], names)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "id": "8fc9779a",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "array([[2.],\n",
-       "       [1.],\n",
-       "       [1.],\n",
-       "       [0.]])"
-      ]
-     },
-     "execution_count": 12,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "oincome = design.column_info_['OIncome']\n",
-    "oincome.get_columns(Carseats)[0][:4]"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "8e04da60",
-   "metadata": {},
-   "source": [
-    "## The terms\n",
-    "\n",
-    "The design matrix consists of several sets of columns. This is managed by the `ModelSpec` through\n",
-    "the `terms` argument which should be a sequence. The elements of `terms` are often\n",
-    "going to be strings (or tuples of strings for interactions, see below) but are converted to a\n",
-    "`Variable` object and stored in the `terms_` of the fitted `ModelSpec`. A `Variable` is just a named tuple."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "id": "c579dbce",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['Price', 'Income']"
-      ]
-     },
-     "execution_count": 13,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design.terms"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "id": "4587b8bd",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "[Variable(variables=('Price',), name='Price', encoder=None, use_transform=True, pure_columns=True, override_encoder_colnames=False),\n",
-       " Variable(variables=('Income',), name='Income', encoder=None, use_transform=True, pure_columns=True, override_encoder_colnames=False)]"
-      ]
-     },
-     "execution_count": 14,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design.terms_"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "2595f0fa",
-   "metadata": {},
-   "source": [
-    "While each `Column` can itself extract data, they are all promoted to `Variable` to be of a uniform type.  A\n",
-    "`Variable` can also create columns through the `build_columns` method of `ModelSpec`"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "id": "03bd9366",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(     Price\n",
-       " 0      120\n",
-       " 1       83\n",
-       " 2       80\n",
-       " 3       97\n",
-       " 4      128\n",
-       " ..     ...\n",
-       " 395    128\n",
-       " 396    120\n",
-       " 397    159\n",
-       " 398     95\n",
-       " 399    120\n",
-       " \n",
-       " [400 rows x 1 columns],\n",
-       " ['Price'])"
-      ]
-     },
-     "execution_count": 15,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "price = design.terms_[0]\n",
-    "design.build_columns(Carseats, price)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "de04ca48",
-   "metadata": {},
-   "source": [
-    "Note that `Variable` objects have a tuple of `variables` as well as an `encoder` attribute. The\n",
-    "tuple of `variables` first creates a concatenated dataframe from all corresponding variables and then\n",
-    "is run through `encoder.transform`. The `encoder.fit` method of each `Variable` is run once during \n",
-    "the call to `ModelSpec.fit`."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 16,
-   "id": "a42af4c5",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(     Price  Income  UIncome[L]  UIncome[M]\n",
-       " 0    120.0    73.0         0.0         1.0\n",
-       " 1     83.0    48.0         1.0         0.0\n",
-       " 2     80.0    35.0         1.0         0.0\n",
-       " 3     97.0   100.0         0.0         0.0\n",
-       " 4    128.0    64.0         0.0         1.0\n",
-       " ..     ...     ...         ...         ...\n",
-       " 395  128.0   108.0         0.0         0.0\n",
-       " 396  120.0    23.0         1.0         0.0\n",
-       " 397  159.0    26.0         1.0         0.0\n",
-       " 398   95.0    79.0         0.0         1.0\n",
-       " 399  120.0    37.0         1.0         0.0\n",
-       " \n",
-       " [400 rows x 4 columns],\n",
-       " ['Price', 'Income', 'UIncome[L]', 'UIncome[M]'])"
-      ]
-     },
-     "execution_count": 16,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "from ISLP.models.model_spec import Variable\n",
-    "\n",
-    "new_var = Variable(('Price', 'Income', 'UIncome'), name='mynewvar', encoder=None)\n",
-    "design.build_columns(Carseats, new_var)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "b146d0c0",
-   "metadata": {},
-   "source": [
-    "Let's now transform these columns with an encoder. Within `ModelSpec` we will first build the\n",
-    "arrays above and then call `pca.fit` and finally `pca.transform` within `design.build_columns`."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 17,
-   "id": "b6c394a6",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/Users/jonathantaylor/miniconda3/envs/islp_test/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but PCA was fitted with feature names\n",
-      "  warnings.warn(\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "(     mynewvar[0]  mynewvar[1]\n",
-       " 0      -3.608693    -4.853177\n",
-       " 1      15.081506    35.708630\n",
-       " 2      27.422871    40.774250\n",
-       " 3     -33.973209    13.470489\n",
-       " 4       6.567316   -11.290100\n",
-       " ..           ...          ...\n",
-       " 395   -36.846346   -18.415783\n",
-       " 396    45.741500     3.245602\n",
-       " 397    49.097533   -35.725355\n",
-       " 398   -13.577772    18.845139\n",
-       " 399    31.927566     0.978436\n",
-       " \n",
-       " [400 rows x 2 columns],\n",
-       " ['mynewvar[0]', 'mynewvar[1]'])"
-      ]
-     },
-     "execution_count": 17,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "from sklearn.decomposition import PCA\n",
-    "pca = PCA(n_components=2)\n",
-    "pca.fit(design.build_columns(Carseats, new_var)[0]) # this is done within `ModelSpec.fit`\n",
-    "pca_var = Variable(('Price', 'Income', 'UIncome'), name='mynewvar', encoder=pca)\n",
-    "design.build_columns(Carseats, pca_var)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "3bb30a3f",
-   "metadata": {},
-   "source": [
-    "The elements of the `variables` attribute may be column identifiers ( `\"Price\"`), `Column` instances (`price`)\n",
-    "or `Variable` instances (`pca_var`)."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 18,
-   "id": "ea7770ff",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/Users/jonathantaylor/miniconda3/envs/islp_test/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but PCA was fitted with feature names\n",
-      "  warnings.warn(\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "(     Price  Price  mynewvar[0]  mynewvar[1]\n",
-       " 0    120.0  120.0    -3.608693    -4.853177\n",
-       " 1     83.0   83.0    15.081506    35.708630\n",
-       " 2     80.0   80.0    27.422871    40.774250\n",
-       " 3     97.0   97.0   -33.973209    13.470489\n",
-       " 4    128.0  128.0     6.567316   -11.290100\n",
-       " ..     ...    ...          ...          ...\n",
-       " 395  128.0  128.0   -36.846346   -18.415783\n",
-       " 396  120.0  120.0    45.741500     3.245602\n",
-       " 397  159.0  159.0    49.097533   -35.725355\n",
-       " 398   95.0   95.0   -13.577772    18.845139\n",
-       " 399  120.0  120.0    31.927566     0.978436\n",
-       " \n",
-       " [400 rows x 4 columns],\n",
-       " ['Price', 'Price', 'mynewvar[0]', 'mynewvar[1]'])"
-      ]
-     },
-     "execution_count": 18,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "fancy_var = Variable(('Price', price, pca_var), name='fancy', encoder=None)\n",
-    "design.build_columns(Carseats, fancy_var)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "b2b4a01a",
-   "metadata": {},
-   "source": [
-    "We can of course run PCA again on these features (if we wanted)."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 19,
-   "id": "21ad8b44",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/Users/jonathantaylor/miniconda3/envs/islp_test/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but PCA was fitted with feature names\n",
-      "  warnings.warn(\n",
-      "/Users/jonathantaylor/miniconda3/envs/islp_test/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but PCA was fitted with feature names\n",
-      "  warnings.warn(\n",
-      "/Users/jonathantaylor/miniconda3/envs/islp_test/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but PCA was fitted with feature names\n",
-      "  warnings.warn(\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "(     fancy_pca[0]  fancy_pca[1]\n",
-       " 0       -6.951792      4.859283\n",
-       " 1       55.170148    -24.694875\n",
-       " 2       59.418556    -38.033572\n",
-       " 3       34.722389     28.922184\n",
-       " 4      -21.419184     -3.120673\n",
-       " ..            ...           ...\n",
-       " 395    -18.257348     40.760122\n",
-       " 396    -10.546709    -45.021658\n",
-       " 397    -77.706359    -37.174379\n",
-       " 398     36.668694      7.730851\n",
-       " 399     -9.540535    -31.059122\n",
-       " \n",
-       " [400 rows x 2 columns],\n",
-       " ['fancy_pca[0]', 'fancy_pca[1]'])"
-      ]
-     },
-     "execution_count": 19,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "pca2 = PCA(n_components=2)\n",
-    "pca2.fit(design.build_columns(Carseats, fancy_var)[0]) # this is done within `ModelSpec.fit`\n",
-    "pca2_var = Variable(('Price', price, pca_var), name='fancy_pca', encoder=pca2)\n",
-    "design.build_columns(Carseats, pca2_var)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "2262377d",
-   "metadata": {},
-   "source": [
-    "## Building the design matrix\n",
-    "\n",
-    "With these notions in mind, the final design is essentially then"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 20,
-   "id": "1654ca47",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "X_hand = np.column_stack([design.build_columns(Carseats, v)[0] for v in design.terms_])[:4]"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "1db0e0a9",
-   "metadata": {},
-   "source": [
-    "An intercept column is added if `design.intercept` is `True` and if the original argument to `transform` is\n",
-    "a dataframe the index is adjusted accordingly."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 21,
-   "id": "d20e8ea8",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "True"
-      ]
-     },
-     "execution_count": 21,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design.intercept"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 22,
-   "id": "450fe910",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>intercept</th>\n",
-       "      <th>Price</th>\n",
-       "      <th>Income</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>120</td>\n",
-       "      <td>73</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>83</td>\n",
-       "      <td>48</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>80</td>\n",
-       "      <td>35</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>97</td>\n",
-       "      <td>100</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "   intercept  Price  Income\n",
-       "0        1.0    120      73\n",
-       "1        1.0     83      48\n",
-       "2        1.0     80      35\n",
-       "3        1.0     97     100"
-      ]
-     },
-     "execution_count": 22,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design.transform(Carseats)[:4]"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "0705ba6f",
-   "metadata": {},
-   "source": [
-    "## Predicting\n",
-    "\n",
-    "Constructing the design matrix at any values is carried out by the `transform` method."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 23,
-   "id": "866c2863",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "array([12.65257604, 12.25873428])"
-      ]
-     },
-     "execution_count": 23,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "new_data = pd.DataFrame({'Price':[10,20], 'Income':[40, 50]})\n",
-    "new_X = design.transform(new_data)\n",
-    "M.get_prediction(new_X).predicted_mean"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 24,
-   "id": "f2021166",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "       0        1 \n",
-      "12.65258 12.25873 \n"
-     ]
-    }
-   ],
-   "source": [
-    "%%R -i new_data,Carseats\n",
-    "predict(lm(Sales ~ Price + Income, data=Carseats), new_data)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "20e1a31a",
-   "metadata": {},
-   "source": [
-    "### Difference between using `pd.DataFrame` and `np.ndarray`\n",
-    "\n",
-    "If the `terms` only refer to a few columns of the data frame, the `transform` method only needs a dataframe with those columns.\n",
-    "\n",
-    "If we had used an `np.ndarray`, the column identifiers would be integers identifying specific columns so,\n",
-    "in order to work correctly, `transform` would need another `np.ndarray` where the columns have the same meaning."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 25,
-   "id": "a5926ec9",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "array([[1.0, 120, 73],\n",
-       "       [1.0, 83, 48],\n",
-       "       [1.0, 80, 35],\n",
-       "       [1.0, 97, 100]], dtype=object)"
-      ]
-     },
-     "execution_count": 25,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "Carseats_np = np.asarray(Carseats[['Price', 'ShelveLoc', 'US', 'Income']])\n",
-    "design_np = ModelSpec([0,3]).fit(Carseats_np)\n",
-    "design_np.transform(Carseats_np)[:4]"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "997a63cb",
-   "metadata": {},
-   "source": [
-    "The following will fail for hopefully obvious reasons"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 26,
-   "id": "40410c48",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "shapes (2,2) and (3,) not aligned: 2 (dim 1) != 3 (dim 0)\n"
-     ]
-    }
-   ],
-   "source": [
-    "try:\n",
-    "    new_D = np.zeros((2,2))\n",
-    "    new_D[:,0] = [10,20]\n",
-    "    new_D[:,1] = [40,50]\n",
-    "    M.get_prediction(new_D).predicted_mean\n",
-    "except ValueError as e:\n",
-    "    print(e)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "920203e9",
-   "metadata": {},
-   "source": [
-    "Ultimately, `M` expects 3 columns for new predictions because it was fit\n",
-    "with a matrix having 3 columns (the first representing an intercept).\n",
-    "\n",
-    "We might be tempted to try as with the `pd.DataFrame` and produce\n",
-    "an `np.ndarray` with only the necessary variables."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 27,
-   "id": "1061da77",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "index 3 is out of bounds for axis 1 with size 2\n"
-     ]
-    }
-   ],
-   "source": [
-    "try:\n",
-    "    new_X = np.zeros((2,2))\n",
-    "    new_X[:,0] = [10,20]\n",
-    "    new_X[:,1] = [40,50]\n",
-    "    new_D = design_np.transform(new_X)\n",
-    "    M.get_prediction(new_D).predicted_mean\n",
-    "except IndexError as e:\n",
-    "    print(e)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "c6bfe001",
-   "metadata": {},
-   "source": [
-    "This fails because `design_np` is looking for column `3` from its `terms`:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 28,
-   "id": "5ae6d25f",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "[Variable(variables=(0,), name='0', encoder=None, use_transform=True, pure_columns=True, override_encoder_colnames=False),\n",
-       " Variable(variables=(3,), name='3', encoder=None, use_transform=True, pure_columns=True, override_encoder_colnames=False)]"
-      ]
-     },
-     "execution_count": 28,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design_np.terms_"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "edd7ebeb",
-   "metadata": {},
-   "source": [
-    "However, if we have an `np.ndarray` in which the first column indeed represents `Price` and the fourth indeed\n",
-    "represents `Income` then we can arrive at the correct answer by supplying such the array to `design_np.transform`:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 29,
-   "id": "9455e532",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "array([12.65257604, 12.25873428])"
-      ]
-     },
-     "execution_count": 29,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "new_X = np.zeros((2,4))\n",
-    "new_X[:,0] = [10,20]\n",
-    "new_X[:,3] = [40,50]\n",
-    "new_D = design_np.transform(new_X)\n",
-    "M.get_prediction(new_D).predicted_mean"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "fd726791",
-   "metadata": {},
-   "source": [
-    "Given this subtlety about needing to supply arrays with identical column structure to `transform` when\n",
-    "using `np.ndarray` we presume that using a `pd.DataFrame` will be the more popular use case."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "967d9ebc",
-   "metadata": {},
-   "source": [
-    "## A model with some categorical variables\n",
-    "\n",
-    "Categorical variables become `Column` instances with encoders."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 30,
-   "id": "d0429b56",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Column(idx='UIncome', name='UIncome', is_categorical=True, is_ordinal=False, columns=('UIncome[L]', 'UIncome[M]'), encoder=Contrast())"
-      ]
-     },
-     "execution_count": 30,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design = ModelSpec(['Population', 'Price', 'UIncome', 'ShelveLoc']).fit(Carseats)\n",
-    "design.column_info_['UIncome']"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 31,
-   "id": "415e3fd0",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Index(['intercept', 'Population', 'Price', 'UIncome[L]', 'UIncome[M]',\n",
-       "       'ShelveLoc[Good]', 'ShelveLoc[Medium]'],\n",
-       "      dtype='object')"
-      ]
-     },
-     "execution_count": 31,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "X = design.fit_transform(Carseats)\n",
-    "X.columns"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 32,
-   "id": "8a99c3a5",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "intercept            11.876012\n",
-       "Population            0.001163\n",
-       "Price                -0.055725\n",
-       "UIncome[L]           -1.042297\n",
-       "UIncome[M]           -0.119123\n",
-       "ShelveLoc[Good]       4.999623\n",
-       "ShelveLoc[Medium]     1.964278\n",
-       "dtype: float64"
-      ]
-     },
-     "execution_count": 32,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "sm.OLS(Y, X).fit().params"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 33,
-   "id": "9250a28a",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "    (Intercept)      Population           Price        UIncomeM        UIncomeH \n",
-      "    10.83371503      0.00116301     -0.05572469      0.92317388      1.04229679 \n",
-      "  ShelveLocGood ShelveLocMedium \n",
-      "     4.99962319      1.96427771 \n"
-     ]
-    }
-   ],
-   "source": [
-    "%%R\n",
-    "lm(Sales ~ Population + Price + UIncome + ShelveLoc, data=Carseats)$coef"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "fe90c12c",
-   "metadata": {},
-   "source": [
-    "## Getting the encoding you want\n",
-    "\n",
-    "By default the level dropped by `ModelSpec` will be the first of the `categories_` values from \n",
-    "`sklearn.preprocessing.OneHotEncoder()`. We might wish to change this. It seems\n",
-    "as if the correct way to do this would be something like `Variable(('UIncome',), 'mynewencoding', new_encoder)`\n",
-    "where `new_encoder` would somehow drop the column we want dropped. \n",
-    "\n",
-    "However, when using the convenient identifier `UIncome` in the `variables` argument, this maps to the `Column` associated to `UIncome` within `design.column_info_`:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 34,
-   "id": "0546ec84",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Column(idx='UIncome', name='UIncome', is_categorical=True, is_ordinal=False, columns=('UIncome[L]', 'UIncome[M]'), encoder=Contrast())"
-      ]
-     },
-     "execution_count": 34,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design.column_info_['UIncome']"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "6ec4fe65",
-   "metadata": {},
-   "source": [
-    "This column already has an encoder and `Column` instances are immutable as named tuples. Further, there are times when \n",
-    "we may want to encode `UIncome` differently within the same model. In the model below the main effect of `UIncome` is encoded with two columns while in the interaction `UIncome` (see below) has three columns. This is a design of interest\n",
-    "and we need a way to allow different encodings of the same column of `Carseats`"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 35,
-   "id": "61e7f56e",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "Call:\n",
-      "lm(formula = Sales ~ UIncome:ShelveLoc + UIncome, data = Carseats)\n",
-      "\n",
-      "Coefficients:\n",
-      "             (Intercept)                  UIncomeM                  UIncomeH  \n",
-      "                  5.1317                    0.1151                    1.1561  \n",
-      "  UIncomeL:ShelveLocGood    UIncomeM:ShelveLocGood    UIncomeH:ShelveLocGood  \n",
-      "                  4.5121                    5.5752                    3.7381  \n",
-      "UIncomeL:ShelveLocMedium  UIncomeM:ShelveLocMedium  UIncomeH:ShelveLocMedium  \n",
-      "                  1.2473                    2.4782                    1.5141  \n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "%%R\n",
-    "lm(Sales ~ UIncome:ShelveLoc + UIncome, data=Carseats)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "802ed854",
-   "metadata": {},
-   "source": [
-    " We can create a new \n",
-    "`Column` with the encoder we want. For categorical variables, there is a convenience function to do so."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 36,
-   "id": "82d7a01d",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from ISLP.models.model_spec import contrast\n",
-    "pref_encoding = contrast('UIncome', 'drop', 'L')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 37,
-   "id": "e26849a1",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(     UIncome[M]  UIncome[H]\n",
-       " 0           1.0         0.0\n",
-       " 1           0.0         0.0\n",
-       " 2           0.0         0.0\n",
-       " 3           0.0         1.0\n",
-       " 4           1.0         0.0\n",
-       " ..          ...         ...\n",
-       " 395         0.0         1.0\n",
-       " 396         0.0         0.0\n",
-       " 397         0.0         0.0\n",
-       " 398         1.0         0.0\n",
-       " 399         0.0         0.0\n",
-       " \n",
-       " [400 rows x 2 columns],\n",
-       " ['UIncome[M]', 'UIncome[H]'])"
-      ]
-     },
-     "execution_count": 37,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design.build_columns(Carseats, pref_encoding)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 38,
-   "id": "2fc4cd8c",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Index(['intercept', 'Population', 'Price', 'UIncome[M]', 'UIncome[H]',\n",
-       "       'ShelveLoc[Good]', 'ShelveLoc[Medium]'],\n",
-       "      dtype='object')"
-      ]
-     },
-     "execution_count": 38,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design = ModelSpec(['Population', 'Price', pref_encoding, 'ShelveLoc']).fit(Carseats)\n",
-    "X = design.fit_transform(Carseats)\n",
-    "X.columns"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 39,
-   "id": "49e33d41",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "intercept            10.833715\n",
-       "Population            0.001163\n",
-       "Price                -0.055725\n",
-       "UIncome[M]            0.923174\n",
-       "UIncome[H]            1.042297\n",
-       "ShelveLoc[Good]       4.999623\n",
-       "ShelveLoc[Medium]     1.964278\n",
-       "dtype: float64"
-      ]
-     },
-     "execution_count": 39,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "sm.OLS(Y, X).fit().params"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 40,
-   "id": "ce018fdf",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "    (Intercept)      Population           Price        UIncomeM        UIncomeH \n",
-      "    10.83371503      0.00116301     -0.05572469      0.92317388      1.04229679 \n",
-      "  ShelveLocGood ShelveLocMedium \n",
-      "     4.99962319      1.96427771 \n"
-     ]
-    }
-   ],
-   "source": [
-    "%%R\n",
-    "lm(Sales ~ Population + Price + UIncome + ShelveLoc, data=Carseats)$coef"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "2d42b822",
-   "metadata": {},
-   "source": [
-    "## Interactions\n",
-    "\n",
-    "We've referred to interactions above. These are specified (by convenience) as tuples in the `terms` argument\n",
-    "to `ModelSpec`."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 41,
-   "id": "fbb3e3ba",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "intercept                       7.866634\n",
-       "UIncome[L]:ShelveLoc[Good]      4.512054\n",
-       "UIncome[L]:ShelveLoc[Medium]    1.247275\n",
-       "UIncome[M]:ShelveLoc[Good]      5.575170\n",
-       "UIncome[M]:ShelveLoc[Medium]    2.478163\n",
-       "UIncome[L]                     -2.734895\n",
-       "UIncome[M]                     -2.619745\n",
-       "dtype: float64"
-      ]
-     },
-     "execution_count": 41,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design = ModelSpec([('UIncome', 'ShelveLoc'), 'UIncome'])\n",
-    "X = design.fit_transform(Carseats)\n",
-    "sm.OLS(Y, X).fit().params"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "f9a7d4ad",
-   "metadata": {},
-   "source": [
-    "The tuples in `terms` are converted to `Variable` in the formalized `terms_` attribute by creating a `Variable` with\n",
-    "`variables` set to the tuple and the encoder an `Interaction` encoder which (unsurprisingly) creates the interaction columns from the concatenated data frames of `UIncome` and `ShelveLoc`."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 42,
-   "id": "5a6f8e69",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Variable(variables=('UIncome', 'ShelveLoc'), name='UIncome:ShelveLoc', encoder=Interaction(column_names={'ShelveLoc': ['ShelveLoc[Good]', 'ShelveLoc[Medium]'],\n",
-       "                          'UIncome': ['UIncome[L]', 'UIncome[M]']},\n",
-       "            columns={'ShelveLoc': range(2, 4), 'UIncome': range(0, 2)},\n",
-       "            variables=['UIncome', 'ShelveLoc']), use_transform=True, pure_columns=False, override_encoder_colnames=False)"
-      ]
-     },
-     "execution_count": 42,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design.terms_[0]"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "98eef5c8",
-   "metadata": {},
-   "source": [
-    "Comparing this to the previous `R` model."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 43,
-   "id": "58c99601",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "Call:\n",
-      "lm(formula = Sales ~ UIncome:ShelveLoc + UIncome, data = Carseats)\n",
-      "\n",
-      "Coefficients:\n",
-      "             (Intercept)                  UIncomeM                  UIncomeH  \n",
-      "                  5.1317                    0.1151                    1.1561  \n",
-      "  UIncomeL:ShelveLocGood    UIncomeM:ShelveLocGood    UIncomeH:ShelveLocGood  \n",
-      "                  4.5121                    5.5752                    3.7381  \n",
-      "UIncomeL:ShelveLocMedium  UIncomeM:ShelveLocMedium  UIncomeH:ShelveLocMedium  \n",
-      "                  1.2473                    2.4782                    1.5141  \n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "%%R\n",
-    "lm(Sales ~ UIncome:ShelveLoc + UIncome, data=Carseats)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "9c979d7e",
-   "metadata": {},
-   "source": [
-    "We note a few important things:\n",
-    "\n",
-    "1. `R` has reorganized the columns of the design from the formula: although we wrote `UIncome:ShelveLoc` first these\n",
-    "columns have been built later. **`ModelSpec` builds columns in the order determined by `terms`!**\n",
-    "\n",
-    "2. As noted above, `R` has encoded `UIncome` differently in the main effect and in the interaction. For `ModelSpec`, the reference to `UIncome` always refers to the column in `design.column_info_` and will always build only the columns for `L` and `M`. **`ModelSpec` does no inspection of terms to decide how to encode categorical variables.**\n",
-    "\n",
-    "A few notes:\n",
-    "\n",
-    "- **Why not try to inspect the terms?** For any nontrivial formula in `R` with several categorical variables and interactions, predicting what columns will be produced from a given formula is not simple. **`ModelSpec` errs on the side of being explicit.**\n",
-    "\n",
-    "- **Is it impossible to build the design as `R` has?** No. An advanced user who *knows* they want the columns built as `R` has can do so (fairly) easily."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 44,
-   "id": "0cb3b63a",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(     UIncome[H]  UIncome[L]  UIncome[M]\n",
-       " 0           0.0         0.0         1.0\n",
-       " 1           0.0         1.0         0.0\n",
-       " 2           0.0         1.0         0.0\n",
-       " 3           1.0         0.0         0.0\n",
-       " 4           0.0         0.0         1.0\n",
-       " ..          ...         ...         ...\n",
-       " 395         1.0         0.0         0.0\n",
-       " 396         0.0         1.0         0.0\n",
-       " 397         0.0         1.0         0.0\n",
-       " 398         0.0         0.0         1.0\n",
-       " 399         0.0         1.0         0.0\n",
-       " \n",
-       " [400 rows x 3 columns],\n",
-       " ['UIncome[H]', 'UIncome[L]', 'UIncome[M]'])"
-      ]
-     },
-     "execution_count": 44,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "full_encoding = contrast('UIncome', None)\n",
-    "design.build_columns(Carseats, full_encoding)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 45,
-   "id": "272098d7",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "intercept                       5.131739\n",
-       "UIncome[M]                      0.115150\n",
-       "UIncome[H]                      1.156118\n",
-       "UIncome[H]:ShelveLoc[Good]      3.738052\n",
-       "UIncome[H]:ShelveLoc[Medium]    1.514104\n",
-       "UIncome[L]:ShelveLoc[Good]      4.512054\n",
-       "UIncome[L]:ShelveLoc[Medium]    1.247275\n",
-       "UIncome[M]:ShelveLoc[Good]      5.575170\n",
-       "UIncome[M]:ShelveLoc[Medium]    2.478163\n",
-       "dtype: float64"
-      ]
-     },
-     "execution_count": 45,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design = ModelSpec([pref_encoding, (full_encoding, 'ShelveLoc')])\n",
-    "X = design.fit_transform(Carseats)\n",
-    "sm.OLS(Y, X).fit().params"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "fe05c471",
-   "metadata": {},
-   "source": [
-    "## Special encodings\n",
-    "\n",
-    "For flexible models, we may want to consider transformations of features, i.e. polynomial\n",
-    "or spline transformations. Given transforms that follow the `fit/transform` paradigm\n",
-    "we can of course achieve this with a `Column` and an `encoder`. The `ISLP.transforms`\n",
-    "package includes a `Poly` transform"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 46,
-   "id": "67062299",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Variable(variables=('Income',), name='poly(Income, 3, )', encoder=Poly(degree=3), use_transform=True, pure_columns=False, override_encoder_colnames=True)"
-      ]
-     },
-     "execution_count": 46,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "from ISLP.models.model_spec import poly\n",
-    "poly('Income', 3)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 47,
-   "id": "df5e5b4d",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "intercept                5.440077\n",
-       "poly(Income, 3, )[0]    10.036373\n",
-       "poly(Income, 3, )[1]    -2.799156\n",
-       "poly(Income, 3, )[2]     2.399601\n",
-       "ShelveLoc[Good]          4.808133\n",
-       "ShelveLoc[Medium]        1.889533\n",
-       "dtype: float64"
-      ]
-     },
-     "execution_count": 47,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design = ModelSpec([poly('Income', 3), 'ShelveLoc'])\n",
-    "X = design.fit_transform(Carseats)\n",
-    "sm.OLS(Y, X).fit().params"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "01be9c13",
-   "metadata": {},
-   "source": [
-    "Compare:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 48,
-   "id": "3244d6f6",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "     (Intercept) poly(Income, 3)1 poly(Income, 3)2 poly(Income, 3)3 \n",
-      "        5.440077        10.036373        -2.799156         2.399601 \n",
-      "   ShelveLocGood  ShelveLocMedium \n",
-      "        4.808133         1.889533 \n"
-     ]
-    }
-   ],
-   "source": [
-    "%%R\n",
-    "lm(Sales ~ poly(Income, 3) + ShelveLoc, data=Carseats)$coef"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "8ad5bb1d",
-   "metadata": {},
-   "source": [
-    "## Splines\n",
-    "\n",
-    "Support for natural and B-splines is also included"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 49,
-   "id": "6a6f4358",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "intercept                4.240421\n",
-       "ns(Income, , df=5)[0]    1.468196\n",
-       "ns(Income, , df=5)[1]    1.499471\n",
-       "ns(Income, , df=5)[2]    1.152070\n",
-       "ns(Income, , df=5)[3]    2.418398\n",
-       "ns(Income, , df=5)[4]    1.804460\n",
-       "ShelveLoc[Good]          4.810449\n",
-       "ShelveLoc[Medium]        1.881095\n",
-       "dtype: float64"
-      ]
-     },
-     "execution_count": 49,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "from ISLP.models.model_spec import ns, bs, pca\n",
-    "design = ModelSpec([ns('Income', df=5), 'ShelveLoc'])\n",
-    "X = design.fit_transform(Carseats)\n",
-    "sm.OLS(Y, X).fit().params"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 50,
-   "id": "fb740953",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "        (Intercept) ns(Income, df = 5)1 ns(Income, df = 5)2 ns(Income, df = 5)3 \n",
-      "           4.240421            1.468196            1.499471            1.152070 \n",
-      "ns(Income, df = 5)4 ns(Income, df = 5)5       ShelveLocGood     ShelveLocMedium \n",
-      "           2.418398            1.804460            4.810449            1.881095 \n"
-     ]
-    }
-   ],
-   "source": [
-    "%%R\n",
-    "library(splines)\n",
-    "lm(Sales ~ ns(Income, df=5) + ShelveLoc, data=Carseats)$coef"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 51,
-   "id": "fe1bf7fe",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "intercept                          3.495085\n",
-       "bs(Income, , df=7, degree=2)[0]    1.813118\n",
-       "bs(Income, , df=7, degree=2)[1]    0.961852\n",
-       "bs(Income, , df=7, degree=2)[2]    2.471545\n",
-       "bs(Income, , df=7, degree=2)[3]    2.158891\n",
-       "bs(Income, , df=7, degree=2)[4]    2.091625\n",
-       "bs(Income, , df=7, degree=2)[5]    2.600669\n",
-       "bs(Income, , df=7, degree=2)[6]    2.843108\n",
-       "ShelveLoc[Good]                    4.804919\n",
-       "ShelveLoc[Medium]                  1.880337\n",
-       "dtype: float64"
-      ]
-     },
-     "execution_count": 51,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design = ModelSpec([bs('Income', df=7, degree=2), 'ShelveLoc'])\n",
-    "X = design.fit_transform(Carseats)\n",
-    "sm.OLS(Y, X).fit().params"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 52,
-   "id": "86e966e0",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "                    (Intercept) bs(Income, df = 7, degree = 2)1 \n",
-      "                      3.4950851                       1.8131176 \n",
-      "bs(Income, df = 7, degree = 2)2 bs(Income, df = 7, degree = 2)3 \n",
-      "                      0.9618523                       2.4715450 \n",
-      "bs(Income, df = 7, degree = 2)4 bs(Income, df = 7, degree = 2)5 \n",
-      "                      2.1588908                       2.0916252 \n",
-      "bs(Income, df = 7, degree = 2)6 bs(Income, df = 7, degree = 2)7 \n",
-      "                      2.6006694                       2.8431084 \n",
-      "                  ShelveLocGood                 ShelveLocMedium \n",
-      "                      4.8049190                       1.8803375 \n"
-     ]
-    }
-   ],
-   "source": [
-    "%%R\n",
-    "lm(Sales ~ bs(Income, df=7, degree=2) + ShelveLoc, data=Carseats)$coef"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "877d4784",
-   "metadata": {},
-   "source": [
-    "## PCA"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 53,
-   "id": "8ba6cb20",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/Users/jonathantaylor/miniconda3/envs/islp_test/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but PCA was fitted with feature names\n",
-      "  warnings.warn(\n",
-      "/Users/jonathantaylor/miniconda3/envs/islp_test/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but PCA was fitted with feature names\n",
-      "  warnings.warn(\n",
-      "/Users/jonathantaylor/miniconda3/envs/islp_test/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but PCA was fitted with feature names\n",
-      "  warnings.warn(\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "intercept                           5.419405\n",
-       "pca(myvars, , n_components=2)[0]   -0.001131\n",
-       "pca(myvars, , n_components=2)[1]   -0.024217\n",
-       "ShelveLoc[Good]                     4.816253\n",
-       "ShelveLoc[Medium]                   1.924139\n",
-       "dtype: float64"
-      ]
-     },
-     "execution_count": 53,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design = ModelSpec([pca(['Income', \n",
-    "                           'Price', \n",
-    "                           'Advertising', \n",
-    "                           'Population'], \n",
-    "                          n_components=2, \n",
-    "                          name='myvars'), 'ShelveLoc'])\n",
-    "X = design.fit_transform(Carseats)\n",
-    "sm.OLS(Y, X).fit().params"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 54,
-   "id": "f0319e51",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "Call:\n",
-      "lm(formula = Sales ~ prcomp(cbind(Income, Price, Advertising, \n",
-      "    Population))$x[, 1:2] + ShelveLoc, data = Carseats)\n",
-      "\n",
-      "Coefficients:\n",
-      "                                                      (Intercept)  \n",
-      "                                                         5.419405  \n",
-      "prcomp(cbind(Income, Price, Advertising, Population))$x[, 1:2]PC1  \n",
-      "                                                         0.001131  \n",
-      "prcomp(cbind(Income, Price, Advertising, Population))$x[, 1:2]PC2  \n",
-      "                                                        -0.024217  \n",
-      "                                                    ShelveLocGood  \n",
-      "                                                         4.816253  \n",
-      "                                                  ShelveLocMedium  \n",
-      "                                                         1.924139  \n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "%%R\n",
-    "lm(Sales ~ prcomp(cbind(Income, Price, Advertising, Population))$x[,1:2] + ShelveLoc, data=Carseats)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "1f55086a",
-   "metadata": {},
-   "source": [
-    "It is of course common to scale before running PCA."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 55,
-   "id": "bbe9e004",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/Users/jonathantaylor/miniconda3/envs/islp_test/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but StandardScaler was fitted with feature names\n",
-      "  warnings.warn(\n",
-      "/Users/jonathantaylor/miniconda3/envs/islp_test/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but StandardScaler was fitted with feature names\n",
-      "  warnings.warn(\n",
-      "/Users/jonathantaylor/miniconda3/envs/islp_test/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but StandardScaler was fitted with feature names\n",
-      "  warnings.warn(\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "intercept                           5.352159\n",
-       "pca(myvars, , n_components=2)[0]    0.446383\n",
-       "pca(myvars, , n_components=2)[1]   -1.219788\n",
-       "ShelveLoc[Good]                     4.922780\n",
-       "ShelveLoc[Medium]                   2.005617\n",
-       "dtype: float64"
-      ]
-     },
-     "execution_count": 55,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design = ModelSpec([pca(['Income', \n",
-    "                           'Price', \n",
-    "                           'Advertising', \n",
-    "                           'Population'], \n",
-    "                          n_components=2, \n",
-    "                          name='myvars',\n",
-    "                          scale=True), 'ShelveLoc'])\n",
-    "X = design.fit_transform(Carseats)\n",
-    "sm.OLS(Y, X).fit().params"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 56,
-   "id": "d78c02e4",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "Call:\n",
-      "lm(formula = Sales ~ prcomp(cbind(Income, Price, Advertising, \n",
-      "    Population), scale = TRUE)$x[, 1:2] + ShelveLoc, data = Carseats)\n",
-      "\n",
-      "Coefficients:\n",
-      "                                                                    (Intercept)  \n",
-      "                                                                         5.3522  \n",
-      "prcomp(cbind(Income, Price, Advertising, Population), scale = TRUE)$x[, 1:2]PC1  \n",
-      "                                                                         0.4469  \n",
-      "prcomp(cbind(Income, Price, Advertising, Population), scale = TRUE)$x[, 1:2]PC2  \n",
-      "                                                                        -1.2213  \n",
-      "                                                                  ShelveLocGood  \n",
-      "                                                                         4.9228  \n",
-      "                                                                ShelveLocMedium  \n",
-      "                                                                         2.0056  \n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "%%R\n",
-    "lm(Sales ~ prcomp(cbind(Income, Price, Advertising, Population), scale=TRUE)$x[,1:2] + ShelveLoc, data=Carseats)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "8a03c603",
-   "metadata": {},
-   "source": [
-    "There will be some small differences in the coefficients due to `sklearn` use of `np.std(ddof=0)` instead\n",
-    "of `np.std(ddof=1)`."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 57,
-   "id": "f8215cef",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "array([ 0.44694166, -1.22131519])"
-      ]
-     },
-     "execution_count": 57,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "np.array(sm.OLS(Y, X).fit().params)[1:3] * np.sqrt(X.shape[0] / (X.shape[0]-1))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "a15d0ead",
-   "metadata": {},
-   "source": [
-    "## Submodels\n",
-    "\n",
-    "We can build submodels as well, even if the terms do not appear in the original `terms` argument.\n",
-    "Fundamentally, the terms just need to be able to have the `design.build_columns` work for us to be\n",
-    "able to build a design matrix. The initial inspection of the columns of `Carseats` has created\n",
-    "a column for `US`, hence we can build this submodel."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 58,
-   "id": "d58c6244",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>intercept</th>\n",
-       "      <th>US[Yes]</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>1.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>1.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>1.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>1.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>0.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>...</th>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>395</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>1.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>396</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>1.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>397</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>1.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>398</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>1.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>399</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>1.0</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>400 rows × 2 columns</p>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "     intercept  US[Yes]\n",
-       "0          1.0      1.0\n",
-       "1          1.0      1.0\n",
-       "2          1.0      1.0\n",
-       "3          1.0      1.0\n",
-       "4          1.0      0.0\n",
-       "..         ...      ...\n",
-       "395        1.0      1.0\n",
-       "396        1.0      1.0\n",
-       "397        1.0      1.0\n",
-       "398        1.0      1.0\n",
-       "399        1.0      1.0\n",
-       "\n",
-       "[400 rows x 2 columns]"
-      ]
-     },
-     "execution_count": 58,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design = ModelSpec(['UIncome', 'ShelveLoc', 'Price']).fit(Carseats)\n",
-    "design.build_submodel(Carseats, ['US'])"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "9365ba27",
-   "metadata": {},
-   "source": [
-    "## ANOVA \n",
-    "\n",
-    "For a given `terms` argument, there as a natural sequence of models, namely those specified by `[terms[:i] for i in range(len(terms)+1]`."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 59,
-   "id": "332ab454",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Index(['intercept'], dtype='object')\n",
-      "Index(['intercept', 'ShelveLoc[Good]', 'ShelveLoc[Medium]'], dtype='object')\n",
-      "Index(['intercept', 'ShelveLoc[Good]', 'ShelveLoc[Medium]', 'Price'], dtype='object')\n",
-      "Index(['intercept', 'ShelveLoc[Good]', 'ShelveLoc[Medium]', 'Price',\n",
-      "       'UIncome[L]', 'UIncome[M]'],\n",
-      "      dtype='object')\n",
-      "Index(['intercept', 'ShelveLoc[Good]', 'ShelveLoc[Medium]', 'Price',\n",
-      "       'UIncome[L]', 'UIncome[M]', 'US[Yes]'],\n",
-      "      dtype='object')\n"
-     ]
-    }
-   ],
-   "source": [
-    "design = ModelSpec(['ShelveLoc', 'Price', 'UIncome', 'US']).fit(Carseats)\n",
-    "for D in design.build_sequence(Carseats):\n",
-    "    print(D.columns)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 60,
-   "id": "f6cfd031",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>df_resid</th>\n",
-       "      <th>ssr</th>\n",
-       "      <th>df_diff</th>\n",
-       "      <th>ss_diff</th>\n",
-       "      <th>F</th>\n",
-       "      <th>Pr(&gt;F)</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>399.0</td>\n",
-       "      <td>3182.274698</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>397.0</td>\n",
-       "      <td>2172.743555</td>\n",
-       "      <td>2.0</td>\n",
-       "      <td>1009.531143</td>\n",
-       "      <td>153.010858</td>\n",
-       "      <td>5.452815e-50</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>396.0</td>\n",
-       "      <td>1455.640702</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>717.102853</td>\n",
-       "      <td>217.377192</td>\n",
-       "      <td>1.583751e-39</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>394.0</td>\n",
-       "      <td>1378.915938</td>\n",
-       "      <td>2.0</td>\n",
-       "      <td>76.724764</td>\n",
-       "      <td>11.628885</td>\n",
-       "      <td>1.239031e-05</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>393.0</td>\n",
-       "      <td>1296.462700</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>82.453238</td>\n",
-       "      <td>24.994257</td>\n",
-       "      <td>8.678832e-07</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "   df_resid          ssr  df_diff      ss_diff           F        Pr(>F)\n",
-       "0     399.0  3182.274698      0.0          NaN         NaN           NaN\n",
-       "1     397.0  2172.743555      2.0  1009.531143  153.010858  5.452815e-50\n",
-       "2     396.0  1455.640702      1.0   717.102853  217.377192  1.583751e-39\n",
-       "3     394.0  1378.915938      2.0    76.724764   11.628885  1.239031e-05\n",
-       "4     393.0  1296.462700      1.0    82.453238   24.994257  8.678832e-07"
-      ]
-     },
-     "execution_count": 60,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "sm.stats.anova_lm(*(sm.OLS(Y, D).fit() for D in design.build_sequence(Carseats) ))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 61,
-   "id": "11c4aee8",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Analysis of Variance Table\n",
-      "\n",
-      "Response: Sales\n",
-      "           Df  Sum Sq Mean Sq F value    Pr(>F)    \n",
-      "ShelveLoc   2 1009.53  504.77 153.011 < 2.2e-16 ***\n",
-      "Price       1  717.10  717.10 217.377 < 2.2e-16 ***\n",
-      "UIncome     2   76.72   38.36  11.629 1.240e-05 ***\n",
-      "US          1   82.45   82.45  24.994 8.679e-07 ***\n",
-      "Residuals 393 1296.46    3.30                      \n",
-      "---\n",
-      "Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1\n"
-     ]
-    }
-   ],
-   "source": [
-    "%%R\n",
-    "anova(lm(Sales ~ ShelveLoc + Price + UIncome + US, data=Carseats))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "9a4e6e63",
-   "metadata": {},
-   "source": [
-    "Recall that `ModelSpec` does not inspect `terms` to reorder based on degree of \n",
-    "interaction as `R` does:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 62,
-   "id": "6e7bf361",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>df_resid</th>\n",
-       "      <th>ssr</th>\n",
-       "      <th>df_diff</th>\n",
-       "      <th>ss_diff</th>\n",
-       "      <th>F</th>\n",
-       "      <th>Pr(&gt;F)</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>399.0</td>\n",
-       "      <td>3182.274698</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>393.0</td>\n",
-       "      <td>2059.376413</td>\n",
-       "      <td>6.0</td>\n",
-       "      <td>1122.898284</td>\n",
-       "      <td>35.940047</td>\n",
-       "      <td>1.175738e-34</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>391.0</td>\n",
-       "      <td>2036.044596</td>\n",
-       "      <td>2.0</td>\n",
-       "      <td>23.331817</td>\n",
-       "      <td>2.240310</td>\n",
-       "      <td>1.077900e-01</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "   df_resid          ssr  df_diff      ss_diff          F        Pr(>F)\n",
-       "0     399.0  3182.274698      0.0          NaN        NaN           NaN\n",
-       "1     393.0  2059.376413      6.0  1122.898284  35.940047  1.175738e-34\n",
-       "2     391.0  2036.044596      2.0    23.331817   2.240310  1.077900e-01"
-      ]
-     },
-     "execution_count": 62,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design = ModelSpec([(full_encoding, 'ShelveLoc'), pref_encoding]).fit(Carseats)\n",
-    "sm.stats.anova_lm(*(sm.OLS(Y, D).fit() for D in design.build_sequence(Carseats) ))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 63,
-   "id": "ed7d4bfa",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Analysis of Variance Table\n",
-      "\n",
-      "Response: Sales\n",
-      "                   Df  Sum Sq Mean Sq F value    Pr(>F)    \n",
-      "UIncome             2   61.92  30.962  5.9458  0.002859 ** \n",
-      "UIncome:ShelveLoc   6 1084.31 180.718 34.7049 < 2.2e-16 ***\n",
-      "Residuals         391 2036.04   5.207                      \n",
-      "---\n",
-      "Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1\n"
-     ]
-    }
-   ],
-   "source": [
-    "%%R\n",
-    "anova(lm(Sales ~ UIncome:ShelveLoc + UIncome, data=Carseats))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "0350da34",
-   "metadata": {},
-   "source": [
-    "To agree with `R` we must order `terms` as `R` will."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 64,
-   "id": "5ddaf87c",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>df_resid</th>\n",
-       "      <th>ssr</th>\n",
-       "      <th>df_diff</th>\n",
-       "      <th>ss_diff</th>\n",
-       "      <th>F</th>\n",
-       "      <th>Pr(&gt;F)</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>399.0</td>\n",
-       "      <td>3182.274698</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>397.0</td>\n",
-       "      <td>3120.351382</td>\n",
-       "      <td>2.0</td>\n",
-       "      <td>61.923316</td>\n",
-       "      <td>5.945846</td>\n",
-       "      <td>2.855424e-03</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>391.0</td>\n",
-       "      <td>2036.044596</td>\n",
-       "      <td>6.0</td>\n",
-       "      <td>1084.306785</td>\n",
-       "      <td>34.704868</td>\n",
-       "      <td>1.346561e-33</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "   df_resid          ssr  df_diff      ss_diff          F        Pr(>F)\n",
-       "0     399.0  3182.274698      0.0          NaN        NaN           NaN\n",
-       "1     397.0  3120.351382      2.0    61.923316   5.945846  2.855424e-03\n",
-       "2     391.0  2036.044596      6.0  1084.306785  34.704868  1.346561e-33"
-      ]
-     },
-     "execution_count": 64,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design = ModelSpec([pref_encoding, (full_encoding, 'ShelveLoc')]).fit(Carseats)\n",
-    "sm.stats.anova_lm(*(sm.OLS(Y, D).fit() for D in design.build_sequence(Carseats)))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "1ef70ce3",
-   "metadata": {},
-   "source": [
-    "## More complicated interactions\n",
-    "\n",
-    "Can we have an interaction of a polynomial effect with a categorical? Absolutely"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 65,
-   "id": "a1a14742",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Analysis of Variance Table\n",
-      "\n",
-      "Response: Sales\n",
-      "                         Df  Sum Sq Mean Sq F value  Pr(>F)  \n",
-      "UIncome                   2   61.92 30.9617  4.0310 0.01851 *\n",
-      "UIncome:poly(Income, 3)   9   79.72  8.8581  1.1533 0.32408  \n",
-      "UIncome:US                3   83.51 27.8367  3.6242 0.01324 *\n",
-      "Residuals               385 2957.12  7.6808                  \n",
-      "---\n",
-      "Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1\n"
-     ]
-    }
-   ],
-   "source": [
-    "%%R\n",
-    "anova(lm(Sales ~ UIncome + poly(Income, 3):UIncome + UIncome:US, data=Carseats))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "a909be1a",
-   "metadata": {},
-   "source": [
-    "To match `R` we note that it has used its inspection rules to encode `UIncome` with 3 levels\n",
-    "for the two interactions."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 66,
-   "id": "ae286cf3",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "intercept                            65.978856\n",
-       "UIncome[M]                          -60.159607\n",
-       "UIncome[H]                         -147.276154\n",
-       "poly(Income, 3, )[0]:UIncome[H]    1957.694387\n",
-       "poly(Income, 3, )[0]:UIncome[L]    1462.060650\n",
-       "poly(Income, 3, )[0]:UIncome[M]      83.035153\n",
-       "poly(Income, 3, )[1]:UIncome[H]    -984.494570\n",
-       "poly(Income, 3, )[1]:UIncome[L]     881.537647\n",
-       "poly(Income, 3, )[1]:UIncome[M]     -18.006234\n",
-       "poly(Income, 3, )[2]:UIncome[H]     207.614692\n",
-       "poly(Income, 3, )[2]:UIncome[L]     217.190749\n",
-       "poly(Income, 3, )[2]:UIncome[M]      34.065434\n",
-       "UIncome[H]:US                         0.903404\n",
-       "UIncome[L]:US                         0.895538\n",
-       "UIncome[M]:US                         1.048728\n",
-       "dtype: float64"
-      ]
-     },
-     "execution_count": 66,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "p3 = poly('Income', 3)\n",
-    "design = ModelSpec([pref_encoding, (p3, full_encoding), (full_encoding, 'US')]).fit(Carseats)\n",
-    "X = design.transform(Carseats)\n",
-    "sm.OLS(Y, X).fit().params"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 67,
-   "id": "236ab2d2",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>df_resid</th>\n",
-       "      <th>ssr</th>\n",
-       "      <th>df_diff</th>\n",
-       "      <th>ss_diff</th>\n",
-       "      <th>F</th>\n",
-       "      <th>Pr(&gt;F)</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>399.0</td>\n",
-       "      <td>3182.274698</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>397.0</td>\n",
-       "      <td>3120.351382</td>\n",
-       "      <td>2.0</td>\n",
-       "      <td>61.923316</td>\n",
-       "      <td>4.031032</td>\n",
-       "      <td>0.018488</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>388.0</td>\n",
-       "      <td>3040.628559</td>\n",
-       "      <td>9.0</td>\n",
-       "      <td>79.722823</td>\n",
-       "      <td>1.153273</td>\n",
-       "      <td>0.324049</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>385.0</td>\n",
-       "      <td>2957.118444</td>\n",
-       "      <td>3.0</td>\n",
-       "      <td>83.510115</td>\n",
-       "      <td>3.624181</td>\n",
-       "      <td>0.013244</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "   df_resid          ssr  df_diff    ss_diff         F    Pr(>F)\n",
-       "0     399.0  3182.274698      0.0        NaN       NaN       NaN\n",
-       "1     397.0  3120.351382      2.0  61.923316  4.031032  0.018488\n",
-       "2     388.0  3040.628559      9.0  79.722823  1.153273  0.324049\n",
-       "3     385.0  2957.118444      3.0  83.510115  3.624181  0.013244"
-      ]
-     },
-     "execution_count": 67,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "sm.stats.anova_lm(*(sm.OLS(Y, D).fit() for D in design.build_sequence(Carseats)))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "0a45c720",
-   "metadata": {},
-   "source": [
-    "## Grouping columns for ANOVA\n",
-    "\n",
-    "The `Variable` construct can be used to group\n",
-    "variables together to get custom sequences of models for `anova_lm`."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 68,
-   "id": "f36c1b3b",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Index(['intercept'], dtype='object')\n",
-      "Index(['intercept', 'Price', 'UIncome[M]', 'UIncome[H]'], dtype='object')\n",
-      "Index(['intercept', 'Price', 'UIncome[M]', 'UIncome[H]', 'US[Yes]',\n",
-      "       'Advertising'],\n",
-      "      dtype='object')\n"
-     ]
-    }
-   ],
-   "source": [
-    "group1 = Variable(('Price', pref_encoding), 'group1', None)\n",
-    "group2 = Variable(('US', 'Advertising'), 'group2', None)\n",
-    "design = ModelSpec([group1, group2]).fit(Carseats)\n",
-    "for D in design.build_sequence(Carseats):\n",
-    "    print(D.columns)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 69,
-   "id": "3daf7638",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>df_resid</th>\n",
-       "      <th>ssr</th>\n",
-       "      <th>df_diff</th>\n",
-       "      <th>ss_diff</th>\n",
-       "      <th>F</th>\n",
-       "      <th>Pr(&gt;F)</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>399.0</td>\n",
-       "      <td>3182.274698</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>396.0</td>\n",
-       "      <td>2508.187788</td>\n",
-       "      <td>3.0</td>\n",
-       "      <td>674.086910</td>\n",
-       "      <td>39.304841</td>\n",
-       "      <td>2.970412e-22</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>394.0</td>\n",
-       "      <td>2252.396343</td>\n",
-       "      <td>2.0</td>\n",
-       "      <td>255.791445</td>\n",
-       "      <td>22.372135</td>\n",
-       "      <td>6.267562e-10</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "   df_resid          ssr  df_diff     ss_diff          F        Pr(>F)\n",
-       "0     399.0  3182.274698      0.0         NaN        NaN           NaN\n",
-       "1     396.0  2508.187788      3.0  674.086910  39.304841  2.970412e-22\n",
-       "2     394.0  2252.396343      2.0  255.791445  22.372135  6.267562e-10"
-      ]
-     },
-     "execution_count": 69,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "sm.stats.anova_lm(*(sm.OLS(Y, D).fit() for D in design.build_sequence(Carseats)))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "46c1ace8",
-   "metadata": {},
-   "source": [
-    "It is not clear this is simple to do in `R` as the formula object expands all parentheses."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 70,
-   "id": "0b87e430",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Analysis of Variance Table\n",
-      "\n",
-      "Response: Sales\n",
-      "             Df  Sum Sq Mean Sq  F value    Pr(>F)    \n",
-      "Price         1  630.03  630.03 110.2079 < 2.2e-16 ***\n",
-      "UIncome       2   44.06   22.03   3.8533   0.02201 *  \n",
-      "US            1  121.88  121.88  21.3196 5.270e-06 ***\n",
-      "Advertising   1  133.91  133.91  23.4247 1.868e-06 ***\n",
-      "Residuals   394 2252.40    5.72                       \n",
-      "---\n",
-      "Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1\n"
-     ]
-    }
-   ],
-   "source": [
-    "%%R\n",
-    "anova(lm(Sales ~ (Price + UIncome) + (US + Advertising), data=Carseats))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "7c137360",
-   "metadata": {},
-   "source": [
-    "It can be done by building up the models\n",
-    "by hand and likely is possible to be done programmatically but it seems not obvious."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 71,
-   "id": "b678d323",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Analysis of Variance Table\n",
-      "\n",
-      "Model 1: Sales ~ 1\n",
-      "Model 2: Sales ~ Price + UIncome\n",
-      "Model 3: Sales ~ Price + UIncome + US + Advertising\n",
-      "  Res.Df    RSS Df Sum of Sq      F    Pr(>F)    \n",
-      "1    399 3182.3                                  \n",
-      "2    396 2508.2  3    674.09 39.305 < 2.2e-16 ***\n",
-      "3    394 2252.4  2    255.79 22.372 6.268e-10 ***\n",
-      "---\n",
-      "Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1\n"
-     ]
-    }
-   ],
-   "source": [
-    "%%R\n",
-    "M1 = lm(Sales ~ 1, data=Carseats)\n",
-    "M2 = lm(Sales ~ Price + UIncome, data=Carseats)\n",
-    "M3 = lm(Sales ~ Price + UIncome + US + Advertising, data=Carseats)\n",
-    "anova(M1, M2, M3)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "b0388949",
-   "metadata": {},
-   "source": [
-    "## Alternative anova\n",
-    "\n",
-    "Another common ANOVA table involves dropping each term in succession from the model and comparing\n",
-    "to the full model."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 72,
-   "id": "ac5b916a",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'intercept'}\n",
-      "   df_resid          ssr  df_diff      ss_diff           F        Pr(>F)\n",
-      "0     395.0  4417.273517      0.0          NaN         NaN           NaN\n",
-      "1     394.0  2252.396343      1.0  2164.877175  378.690726  1.359177e-59\n",
-      "{'Price', 'UIncome[H]', 'UIncome[M]'}\n",
-      "   df_resid          ssr  df_diff     ss_diff          F        Pr(>F)\n",
-      "0     397.0  2950.808154      0.0         NaN        NaN           NaN\n",
-      "1     394.0  2252.396343      3.0  698.411811  40.723184  6.077848e-23\n",
-      "{'US[Yes]', 'Advertising'}\n",
-      "   df_resid          ssr  df_diff     ss_diff          F        Pr(>F)\n",
-      "0     396.0  2508.187788      0.0         NaN        NaN           NaN\n",
-      "1     394.0  2252.396343      2.0  255.791445  22.372135  6.267562e-10\n"
-     ]
-    }
-   ],
-   "source": [
-    "Dfull = design.transform(Carseats)\n",
-    "Mfull = sm.OLS(Y, Dfull).fit()\n",
-    "for i, D in enumerate(design.build_sequence(Carseats, anova_type='drop')):\n",
-    "    if i == 0:\n",
-    "        D0 = D\n",
-    "    print(set(D.columns) ^ set(Dfull.columns))\n",
-    "    print(sm.stats.anova_lm(sm.OLS(Y, D).fit(), Mfull))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 73,
-   "id": "a0c71948",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Analysis of Variance Table\n",
-      "\n",
-      "Model 1: Sales ~ US + Advertising\n",
-      "Model 2: Sales ~ Price + UIncome + US + Advertising\n",
-      "  Res.Df    RSS Df Sum of Sq      F    Pr(>F)    \n",
-      "1    397 2950.8                                  \n",
-      "2    394 2252.4  3    698.41 40.723 < 2.2e-16 ***\n",
-      "---\n",
-      "Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1\n",
-      "Analysis of Variance Table\n",
-      "\n",
-      "Model 1: Sales ~ Price + UIncome\n",
-      "Model 2: Sales ~ Price + UIncome + US + Advertising\n",
-      "  Res.Df    RSS Df Sum of Sq      F    Pr(>F)    \n",
-      "1    396 2508.2                                  \n",
-      "2    394 2252.4  2    255.79 22.372 6.268e-10 ***\n",
-      "---\n",
-      "Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1\n"
-     ]
-    }
-   ],
-   "source": [
-    "%%R\n",
-    "M1 = lm(Sales ~ Price + UIncome + US + Advertising, data=Carseats)\n",
-    "M2 = lm(Sales ~ US + Advertising, data=Carseats)\n",
-    "print(anova(M2, M1))\n",
-    "M3 = lm(Sales ~ Price + UIncome, data=Carseats)\n",
-    "print(anova(M3, M1))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "a5e4880d",
-   "metadata": {},
-   "source": [
-    "The comparison without the intercept here is actually very hard to achieve in `R` with `anova` due to its inspection\n",
-    "of the formula."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 74,
-   "id": "4b383401",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Analysis of Variance Table\n",
-      "\n",
-      "Model 1: Sales ~ Price + UIncome + US + Advertising - 1\n",
-      "Model 2: Sales ~ Price + UIncome + US + Advertising\n",
-      "  Res.Df    RSS Df  Sum of Sq F Pr(>F)\n",
-      "1    394 2252.4                       \n",
-      "2    394 2252.4  0 9.0949e-13         \n"
-     ]
-    }
-   ],
-   "source": [
-    "%%R\n",
-    "M1 = lm(Sales ~ Price + UIncome + US + Advertising, data=Carseats)\n",
-    "M4 = lm(Sales ~ Price + UIncome + US + Advertising - 1, data=Carseats)\n",
-    "print(anova(M4, M1))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "72d7c83b",
-   "metadata": {},
-   "source": [
-    "It can be found with `summary`."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 75,
-   "id": "4d5ce789",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "Call:\n",
-      "lm(formula = Sales ~ Price + UIncome + US + Advertising, data = Carseats)\n",
-      "\n",
-      "Residuals:\n",
-      "    Min      1Q  Median      3Q     Max \n",
-      "-7.4437 -1.6351 -0.0932  1.4920  6.8076 \n",
-      "\n",
-      "Coefficients:\n",
-      "             Estimate Std. Error t value Pr(>|t|)    \n",
-      "(Intercept) 12.520356   0.643390  19.460  < 2e-16 ***\n",
-      "Price       -0.054000   0.005072 -10.647  < 2e-16 ***\n",
-      "UIncomeM     0.548906   0.281693   1.949   0.0521 .  \n",
-      "UIncomeH     0.708219   0.322028   2.199   0.0284 *  \n",
-      "USYes        0.024181   0.343246   0.070   0.9439    \n",
-      "Advertising  0.119509   0.024692   4.840 1.87e-06 ***\n",
-      "---\n",
-      "Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1\n",
-      "\n",
-      "Residual standard error: 2.391 on 394 degrees of freedom\n",
-      "Multiple R-squared:  0.2922,\tAdjusted R-squared:  0.2832 \n",
-      "F-statistic: 32.53 on 5 and 394 DF,  p-value: < 2.2e-16\n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "%%R\n",
-    "summary(M1)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 76,
-   "id": "56b82d02",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(378.690726, 378.69160000000005)"
-      ]
-     },
-     "execution_count": 76,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "378.690726, 19.46**2"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "872f645c-1d6f-4d08-9eec-2b80276bc82c",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "jupytext": {
-   "formats": "source/models///ipynb,jupyterbook/models///md:myst,jupyterbook/models///ipynb"
-  },
-  "kernelspec": {
-   "display_name": "python3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.9.13"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
diff --git a/docs/source/transforms/PCA.ipynb b/docs/source/transforms/PCA.ipynb
index 224992b..ec1e0ae 100644
--- a/docs/source/transforms/PCA.ipynb
+++ b/docs/source/transforms/PCA.ipynb
@@ -19,9 +19,14 @@
    "outputs": [],
    "source": [
     "import numpy as np\n",
+    "from sklearn.decomposition import PCA\n",
+    "\n",
     "from ISLP import load_data\n",
-    "from ISLP.models import ModelSpec, pca, Variable, derived_variable\n",
-    "from sklearn.decomposition import PCA"
+    "from ISLP.models import (ModelSpec, \n",
+    "                         pca, \n",
+    "                         Feature, \n",
+    "                         derived_feature,\n",
+    "                         build_columns)"
    ]
   },
   {
@@ -71,7 +76,7 @@
    "id": "fff603bf",
    "metadata": {},
    "source": [
-    "Suppose we want to make a `Variable` representing the first 3 principal components of the\n",
+    "Suppose we want to make a `Feature` representing the first 3 principal components of the\n",
     " features `['CompPrice', 'Income', 'Advertising', 'Population', 'Price']`."
    ]
   },
@@ -80,8 +85,8 @@
    "id": "eab49ad1-3957-478f-8a76-28a8f58551e9",
    "metadata": {},
    "source": [
-    "We first make a `Variable` that represents these five features columns, then `pca`\n",
-    "can be used to compute a new `Variable` that returns the first three principal components."
+    "We first make a `Feature` that represents these five features columns, then `pca`\n",
+    "can be used to compute a new `Feature` that returns the first three principal components."
    ]
   },
   {
@@ -91,7 +96,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "grouped = Variable(('CompPrice', 'Income', 'Advertising', 'Population', 'Price'), name='grouped', encoder=None)\n",
+    "grouped = Feature(('CompPrice', 'Income', 'Advertising', 'Population', 'Price'), name='grouped', encoder=None)\n",
     "sklearn_pca = PCA(n_components=3, whiten=True)"
    ]
   },
@@ -100,7 +105,7 @@
    "id": "b45655a3-393d-4b4c-b754-cda61ed0e014",
    "metadata": {},
    "source": [
-    "We can now fit `sklearn_pca` and create our new variable."
+    "We can now fit `sklearn_pca` and create our new feature."
    ]
   },
   {
@@ -108,175 +113,18 @@
    "execution_count": 5,
    "id": "6cfe8861-ad07-47b9-95d1-5d5513ff6fbe",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/Users/jonathantaylor/miniconda3/envs/islp_test/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but PCA was fitted with feature names\n",
-      "  warnings.warn(\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
-    "sklearn_pca.fit(design.build_columns(Carseats, grouped)[0]) \n",
-    "pca_var = derived_variable(['CompPrice', 'Income', 'Advertising', 'Population', 'Price'],\n",
+    "grouped_features = build_columns(design.column_info_,\n",
+    "                                 Carseats,\n",
+    "                                 grouped)[0]\n",
+    "sklearn_pca.fit(grouped_features) \n",
+    "pca_var = derived_feature(['CompPrice', 'Income', 'Advertising', 'Population', 'Price'],\n",
     "                           name='pca(grouped)', encoder=sklearn_pca)\n",
-    "derived_features, _ = design.build_columns(Carseats, pca_var)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "id": "aeb47184-9e15-4a6e-b60a-916f5ff89063",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>CompPrice</th>\n",
-       "      <th>Income</th>\n",
-       "      <th>Advertising</th>\n",
-       "      <th>Population</th>\n",
-       "      <th>Price</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>138</td>\n",
-       "      <td>73</td>\n",
-       "      <td>11</td>\n",
-       "      <td>276</td>\n",
-       "      <td>120</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>111</td>\n",
-       "      <td>48</td>\n",
-       "      <td>16</td>\n",
-       "      <td>260</td>\n",
-       "      <td>83</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>113</td>\n",
-       "      <td>35</td>\n",
-       "      <td>10</td>\n",
-       "      <td>269</td>\n",
-       "      <td>80</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>117</td>\n",
-       "      <td>100</td>\n",
-       "      <td>4</td>\n",
-       "      <td>466</td>\n",
-       "      <td>97</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>141</td>\n",
-       "      <td>64</td>\n",
-       "      <td>3</td>\n",
-       "      <td>340</td>\n",
-       "      <td>128</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>...</th>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>395</th>\n",
-       "      <td>138</td>\n",
-       "      <td>108</td>\n",
-       "      <td>17</td>\n",
-       "      <td>203</td>\n",
-       "      <td>128</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>396</th>\n",
-       "      <td>139</td>\n",
-       "      <td>23</td>\n",
-       "      <td>3</td>\n",
-       "      <td>37</td>\n",
-       "      <td>120</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>397</th>\n",
-       "      <td>162</td>\n",
-       "      <td>26</td>\n",
-       "      <td>12</td>\n",
-       "      <td>368</td>\n",
-       "      <td>159</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>398</th>\n",
-       "      <td>100</td>\n",
-       "      <td>79</td>\n",
-       "      <td>7</td>\n",
-       "      <td>284</td>\n",
-       "      <td>95</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>399</th>\n",
-       "      <td>134</td>\n",
-       "      <td>37</td>\n",
-       "      <td>0</td>\n",
-       "      <td>27</td>\n",
-       "      <td>120</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>400 rows × 5 columns</p>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "     CompPrice  Income  Advertising  Population  Price\n",
-       "0          138      73           11         276    120\n",
-       "1          111      48           16         260     83\n",
-       "2          113      35           10         269     80\n",
-       "3          117     100            4         466     97\n",
-       "4          141      64            3         340    128\n",
-       "..         ...     ...          ...         ...    ...\n",
-       "395        138     108           17         203    128\n",
-       "396        139      23            3          37    120\n",
-       "397        162      26           12         368    159\n",
-       "398        100      79            7         284     95\n",
-       "399        134      37            0          27    120\n",
-       "\n",
-       "[400 rows x 5 columns]"
-      ]
-     },
-     "execution_count": 6,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "design.build_columns(Carseats, grouped)[0]"
+    "derived_features, _ = build_columns(design.column_info_,\n",
+    "                                    Carseats, \n",
+    "                                    pca_var,\n",
+    "                                    encoders=design.encoders_)"
    ]
   },
   {
@@ -291,7 +139,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 6,
    "id": "9f4b0955",
    "metadata": {},
    "outputs": [],
@@ -304,22 +152,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 7,
    "id": "6b382699-eb86-457f-8e91-09a63eb21d49",
    "metadata": {},
    "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/Users/jonathantaylor/miniconda3/envs/islp_test/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but PCA was fitted with feature names\n",
-      "  warnings.warn(\n",
-      "/Users/jonathantaylor/miniconda3/envs/islp_test/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but PCA was fitted with feature names\n",
-      "  warnings.warn(\n",
-      "/Users/jonathantaylor/miniconda3/envs/islp_test/lib/python3.9/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but PCA was fitted with feature names\n",
-      "  warnings.warn(\n"
-     ]
-    },
     {
      "data": {
       "text/plain": [
@@ -329,7 +165,7 @@
        "      dtype='object')"
       ]
      },
-     "execution_count": 8,
+     "execution_count": 7,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -350,7 +186,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 8,
    "id": "4a8d9b28",
    "metadata": {},
    "outputs": [],
@@ -361,7 +197,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 13,
    "id": "6efa6c67-86e1-4f51-86c2-25c838a90bf4",
    "metadata": {},
    "outputs": [
@@ -371,7 +207,7 @@
        "(4.073428490498941e-14, 0.0)"
       ]
      },
-     "execution_count": 10,
+     "execution_count": 13,
      "metadata": {},
      "output_type": "execute_result"
     }
diff --git a/docs/source/transforms/poly.ipynb b/docs/source/transforms/poly.ipynb
index c2b740b..45c862e 100644
--- a/docs/source/transforms/poly.ipynb
+++ b/docs/source/transforms/poly.ipynb
@@ -168,7 +168,7 @@
    "source": [
     "## Underlying model\n",
     "\n",
-    "If we look at `quartic`, we see it is a `Variable`, i.e. it can be used to produce a set of columns\n",
+    "If we look at `quartic`, we see it is a `Feature`, i.e. it can be used to produce a set of columns\n",
     "in a design matrix when it is a term used in creating the `ModelSpec`.\n",
     "\n",
     "Its encoder is `Poly(degree=4)`. This is a special `sklearn` transform that expects a single column\n",
diff --git a/environment.yml b/environment.yml
deleted file mode 100644
index 3b4fd24..0000000
--- a/environment.yml
+++ /dev/null
@@ -1,240 +0,0 @@
-name: islp_test
-channels:
-  - defaults
-dependencies:
-  - ca-certificates=2022.07.19=hca03da5_0
-  - certifi=2022.9.14=py39hca03da5_0
-  - libcxx=14.0.6=h848a8c0_0
-  - libffi=3.4.2=hc377ac9_4
-  - ncurses=6.3=h1a28f6b_3
-  - openssl=1.1.1q=h1a28f6b_0
-  - python=3.9.13=hbdb9e5c_1
-  - readline=8.1.2=h1a28f6b_1
-  - sqlite=3.39.2=h1058600_0
-  - tk=8.6.12=hb8d0fd4_0
-  - wheel=0.37.1=pyhd3eb1b0_0
-  - xz=5.2.5=h1a28f6b_1
-  - zlib=1.2.12=h5a0b063_3
-  - pip:
-    - absl-py==1.2.0
-    - aiohttp==3.8.1
-    - aiosignal==1.2.0
-    - alabaster==0.7.12
-    - ansiwrap==0.8.4
-    - anyio==3.6.1
-    - appnope==0.1.3
-    - argon2-cffi==21.3.0
-    - argon2-cffi-bindings==21.2.0
-    - astor==0.8.1
-    - asttokens==2.0.8
-    - astunparse==1.6.3
-    - async-timeout==4.0.2
-    - attrs==22.1.0
-    - autograd==1.4
-    - autograd-gamma==0.5.0
-    - babel==2.10.3
-    - backcall==0.2.0
-    - beautifulsoup4==4.11.1
-    - bleach==5.0.1
-    - build==0.8.0
-    - cachetools==4.2.4
-    - cffi==1.15.1
-    - charset-normalizer==2.1.1
-    - click==8.1.3
-    - commonmark==0.9.1
-    - contourpy==1.0.5
-    - cycler==0.11.0
-    - debugpy==1.6.3
-    - decorator==5.1.1
-    - defusedxml==0.7.1
-    - docutils==0.17.1
-    - entrypoints==0.4
-    - exceptiongroup==1.1.0
-    - executing==1.0.0
-    - fastjsonschema==2.16.2
-    - flatbuffers==2.0.7
-    - fonttools==4.37.2
-    - formulaic==0.5.2
-    - frozenlist==1.3.1
-    - fsspec==2022.8.2
-    - future==0.18.2
-    - gast==0.4.0
-    - google-auth==1.35.0
-    - google-auth-oauthlib==0.4.6
-    - google-pasta==0.2.0
-    - grpcio==1.48.1
-    - h5py==3.7.0
-    - html2text==2020.1.16
-    - idna==3.4
-    - imagesize==1.4.1
-    - importlib-metadata==4.12.0
-    - iniconfig==2.0.0
-    - interface-meta==1.3.0
-    - ipykernel==6.15.3
-    - ipython==8.5.0
-    - ipython-genutils==0.2.0
-    - ipywidgets==8.0.2
-    - jaraco-classes==3.2.2
-    - jedi==0.18.1
-    - jinja2==3.1.2
-    - joblib==1.2.0
-    - json5==0.9.10
-    - jsonschema==4.16.0
-    - jupyter==1.0.0
-    - jupyter-cache==0.5.0
-    - jupyter-client==7.3.5
-    - jupyter-console==6.4.4
-    - jupyter-core==4.11.1
-    - jupyter-server==1.18.1
-    - jupyterlab==3.4.7
-    - jupyterlab-pygments==0.2.2
-    - jupyterlab-server==2.15.1
-    - jupyterlab-widgets==3.0.3
-    - jupytext==1.14.5
-    - keras==2.10.0
-    - keras-preprocessing==1.1.2
-    - keyring==23.9.3
-    - kiwisolver==1.4.4
-    - l0bnb==1.0.0
-    - libclang==14.0.6
-    - lifelines==0.27.2
-    - llvmlite==0.39.1
-    - lxml==4.9.1
-    - markdown==3.4.1
-    - markdown-it-py==2.1.0
-    - markupsafe==2.1.1
-    - matplotlib==3.6.0
-    - matplotlib-inline==0.1.6
-    - mdit-py-plugins==0.3.0
-    - mdurl==0.1.2
-    - mistune==2.0.4
-    - more-itertools==8.14.0
-    - multidict==6.0.2
-    - myst==1.0.4
-    - myst-nb==0.16.0
-    - myst-parser==0.18.0
-    - nbclassic==0.4.3
-    - nbclient==0.5.13
-    - nbconvert==7.0.0
-    - nbformat==5.5.0
-    - nbsphinx==0.8.11
-    - nest-asyncio==1.5.5
-    - notebook==6.4.12
-    - notebook-shim==0.1.0
-    - numba==0.56.2
-    - numpy==1.23.3
-    - numpydoc==1.4.0
-    - oauthlib==3.2.1
-    - opt-einsum==3.3.0
-    - packaging==21.3
-    - pandas==1.5.0
-    - pandocfilters==1.5.0
-    - papermill==2.4.0
-    - parso==0.8.3
-    - patsy==0.5.2
-    - pep517==0.13.0
-    - pexpect==4.8.0
-    - pickleshare==0.7.5
-    - pillow==9.2.0
-    - pip==22.2.2
-    - pkginfo==1.8.3
-    - pluggy==1.0.0
-    - portalocker==2.5.1
-    - progressbar2==4.0.0
-    - prometheus-client==0.14.1
-    - prompt-toolkit==3.0.31
-    - protobuf==3.19.5
-    - psutil==5.9.2
-    - ptyprocess==0.7.0
-    - pure-eval==0.2.2
-    - pyasn1==0.4.8
-    - pyasn1-modules==0.2.8
-    - pycparser==2.21
-    - pydash==5.1.0
-    - pydeprecate==0.3.2
-    - pygam==0.8.0
-    - pygments==2.13.0
-    - pyparsing==3.0.9
-    - pyrsistent==0.18.1
-    - pytest==7.2.0
-    - python-dateutil==2.8.2
-    - python-utils==3.3.3
-    - pytorch-lightning==1.7.6
-    - pytz==2022.2.1
-    - pytz-deprecation-shim==0.1.0.post0
-    - pyyaml==6.0
-    - pyzmq==24.0.0
-    - qtconsole==5.3.2
-    - qtpy==2.2.0
-    - readme-renderer==37.1
-    - requests==2.28.1
-    - requests-oauthlib==1.3.1
-    - requests-toolbelt==0.9.1
-    - rfc3986==2.0.0
-    - rich==12.5.1
-    - rpy2==3.5.7
-    - rsa==4.9
-    - scikit-learn==1.1.2
-    - scipy==1.9.1
-    - send2trash==1.8.0
-    - setuptools==59.8.0
-    - six==1.16.0
-    - sniffio==1.3.0
-    - snowballstemmer==2.2.0
-    - soupsieve==2.3.2.post1
-    - sphinx==5.1.1
-    - sphinx-markdown-builder==0.5.4
-    - sphinx-rst-builder==0.0.3
-    - sphinx-rtd-theme==1.1.1
-    - sphinx-togglebutton==0.3.2
-    - sphinxcontrib-applehelp==1.0.2
-    - sphinxcontrib-devhelp==1.0.2
-    - sphinxcontrib-htmlhelp==2.0.0
-    - sphinxcontrib-jsmath==1.0.1
-    - sphinxcontrib-qthelp==1.0.3
-    - sphinxcontrib-serializinghtml==1.1.5
-    - sqlalchemy==1.4.41
-    - stack-data==0.5.0
-    - statsmodels==0.13.2
-    - tabulate==0.8.10
-    - tenacity==6.3.1
-    - tensorboard==2.10.0
-    - tensorboard-data-server==0.6.1
-    - tensorboard-plugin-wit==1.8.1
-    - tensorflow-estimator==2.10.0
-    - tensorflow-macos==2.10.0
-    - tensorflow-metal==0.6.0
-    - termcolor==2.0.1
-    - terminado==0.15.0
-    - texext==0.6.7
-    - textwrap3==0.9.2
-    - threadpoolctl==3.1.0
-    - tinycss2==1.1.1
-    - toml==0.10.2
-    - tomli==2.0.1
-    - torch==1.12.1
-    - torchdata==0.4.1
-    - torchinfo==1.7.0
-    - torchmetrics==0.9.3
-    - torchvision==0.13.1
-    - tornado==6.2
-    - tqdm==4.64.1
-    - traitlets==5.4.0
-    - twine==4.0.1
-    - typing-extensions==4.3.0
-    - tzdata==2022.7
-    - tzlocal==4.2
-    - unidecode==1.3.4
-    - unify==0.5
-    - untokenize==0.1.1
-    - urllib3==1.26.12
-    - wcwidth==0.2.5
-    - webencodings==0.5.1
-    - websocket-client==1.4.1
-    - werkzeug==2.2.2
-    - widgetsnbextension==4.0.3
-    - wrapt==1.14.1
-    - yapf==0.32.0
-    - yarl==1.8.1
-    - zipp==3.8.1
-prefix: /Users/jonathantaylor/miniconda3/envs/islp_test
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..5fe63fd
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,88 @@
+[project]
+name = "ISLP"
+dependencies = ["numpy>=1.7.1",
+               "scipy>=0.9",
+               "pandas>=1.5",
+               "lxml", # pandas needs this for html
+               "scikit-learn>=1.2",
+               "joblib",
+               "statsmodels>=0.13",
+               "lifelines",
+               "pygam", # for GAM in Ch7
+               "torch",
+               "pytorch_lightning",
+               "torchmetrics",
+               ]
+description  = "Library for ISLP labs"
+readme = "README.md"
+requires-python = ">=3.10"
+license = {file = "LICENSE"}
+keywords = []
+authors = [
+    {name = "Trevor Hastie", email="hastie@stanford.edu" },
+    {name = "Gareth James", email="gareth@emory.edu"},
+    {name = "Jonathan Taylor", email="jonathan.taylor@stanford.edu" },
+    {name = "Rob Tibshirani", email="tibs@stanford.edu" },
+    {name = "Daniela Witten", email="dwitten@uw.edu" },    
+    ]  
+maintainers = [
+    {name = "Jonathan Taylor", email="jonathan.taylor@stanford.edu" },
+    ]
+classifiers = ["Development Status :: 3 - Alpha",
+               "Environment :: Console",
+               "Intended Audience :: Science/Research",
+               "License :: OSI Approved :: BSD License",
+               "Operating System :: OS Independent",
+               "Programming Language :: Python",
+               "Topic :: Scientific/Engineering"
+	       ]
+dynamic = ["version"]
+
+[tool.setuptools]
+packages = [
+    "ISLP",
+    "ISLP.models", 
+    "ISLP.bart",
+    "ISLP.torch",
+    "ISLP.data"
+]
+include-package-data = true
+
+[tool.setuptools.package-data]
+ISLP = ["data/*.csv", "data/*.npy", "data/*.data"]
+
+[tool.setuptools.dynamic]
+version = {attr = "ISLP.__version__"}  # Assuming ISLP.__version__ holds your version
+
+
+[project.urls]  # Optional
+"Homepage" = "https://github.com/intro-stat-learning/ISLP"
+"Bug Reports" = "https://github.com/intro-stat-learning/ISLP/issues"
+"Funding" = "https://donate.pypi.org"
+"Say Thanks!" = "http://saythanks.io/to/example"
+"Source" = "https://github.com/pypa/sampleproject/"
+  
+[project.optional-dependencies]
+doc = ['Sphinx>=3.0']
+
+[build-system]
+requires = ["setuptools>=42",
+            "wheel",
+	    "Sphinx>=1.0",
+            "numpy",
+            "pandas",
+            "scipy",
+            "scikit-learn",
+            "joblib",
+            "statsmodels",
+	    "versioneer[toml]"
+	    ]
+build-backend = "setuptools.build_meta"
+
+[tool.versioneer]
+VCS = "git"
+style = "pep440"
+versionfile_source = "ISLP/_version.py"
+versionfile_build = "ISLP/_version.py"
+tag_prefix = "v"
+parentdir_prefix = "ISLP-"
diff --git a/requirements.txt b/requirements.txt
index bf393e1..10bff6a 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,11 +1,13 @@
 numpy>=1.7.1
 scipy>=0.9
-jupyter
 pandas>=0.20
+pandas<=1.9
 lxml # pandas needs this for html
-scikit-learn>=1.0
+scikit-learn>=1.2
 joblib
 statsmodels>=0.13
 lifelines
-#l0bnb # for bestsubsets
-#pygam # for GAM in Ch7
+pygam # for GAM in Ch7
+torch
+pytorch_lightning
+torchmetrics
diff --git a/setup.cfg b/setup.cfg
index 14d7ccd..c59c035 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,6 +1,3 @@
-[versioneer]
-VCS = git
-style = pep440
-versionfile_source = ISLP/_version.py
-tag_prefix =
-parentdir_prefix = ISLP-
+
+[metadata]
+license_files = LICENSE.txt
\ No newline at end of file
diff --git a/setup.py b/setup.py
deleted file mode 100755
index 95fca7d..0000000
--- a/setup.py
+++ /dev/null
@@ -1,254 +0,0 @@
-#!/usr/bin/env python
-''' Installation script for ISLP package '''
-
-import os
-import sys
-from os.path import join as pjoin, dirname, exists
-from distutils.version import LooseVersion
-# BEFORE importing distutils, remove MANIFEST. distutils doesn't properly
-# update it when the contents of directories change.
-if exists('MANIFEST'): os.remove('MANIFEST')
-
-# Unconditionally require setuptools
-import setuptools
-
-# Package for getting versions from git tags
-import versioneer
-
-# Import distutils _after_ setuptools import, and after removing
-# MANIFEST
-from distutils.core import setup
-from distutils.extension import Extension
-
-# Get various parameters for this version, stored in ISLP/info.py
-
-class Bunch(object):
-    def __init__(self, vars):
-        for key, name in vars.items():
-            if key.startswith('__'):
-                continue
-            self.__dict__[key] = name
-
-def read_vars_from(ver_file):
-    """ Read variables from Python text file
-
-    Parameters
-    ----------
-    ver_file : str
-        Filename of file to read
-
-    Returns
-    -------
-    info_vars : Bunch instance
-        Bunch object where variables read from `ver_file` appear as
-        attributes
-    """
-    # Use exec for compabibility with Python 3
-    ns = {}
-    with open(ver_file, 'rt') as fobj:
-        exec(fobj.read(), ns)
-    return Bunch(ns)
-
-info = read_vars_from(pjoin('ISLP', 'info.py'))
-
-class SetupDependency(object):
-    """ SetupDependency class
-
-    Parameters
-    ----------
-    import_name : str
-        Name with which required package should be ``import``ed.
-    min_ver : str
-        Distutils version string giving minimum version for package.
-    req_type : {'install_requires', 'setup_requires'}, optional
-        Setuptools dependency type.
-    heavy : {False, True}, optional
-        If True, and package is already installed (importable), then do not add
-        to the setuptools dependency lists.  This prevents setuptools
-        reinstalling big packages when the package was installed without using
-        setuptools, or this is an upgrade, and we want to avoid the pip default
-        behavior of upgrading all dependencies.
-    install_name : str, optional
-        Name identifying package to install from pypi etc, if different from
-        `import_name`.
-    """
-
-    def __init__(self, import_name,
-                 min_ver,
-                 req_type='install_requires',
-                 heavy=False,
-                 install_name=None):
-        self.import_name = import_name
-        self.min_ver = min_ver
-        self.req_type = req_type
-        self.heavy = heavy
-        self.install_name = (import_name if install_name is None
-                             else install_name)
-
-    def check_fill(self, setuptools_kwargs):
-        """ Process this dependency, maybe filling `setuptools_kwargs`
-
-        Run checks on this dependency.  If not using setuptools, then raise
-        error for unmet dependencies.  If using setuptools, add missing or
-        not-heavy dependencies to `setuptools_kwargs`.
-
-        A heavy dependency is one that is inconvenient to install
-        automatically, such as numpy or (particularly) scipy, matplotlib.
-
-        Parameters
-        ----------
-        setuptools_kwargs : dict
-            Dictionary of setuptools keyword arguments that may be modified
-            in-place while checking dependencies.
-        """
-        found_ver = get_pkg_version(self.import_name)
-        ver_err_msg = version_error_msg(self.import_name,
-                                        found_ver,
-                                        self.min_ver)
-        if not 'setuptools' in sys.modules:
-            # Not using setuptools; raise error for any unmet dependencies
-            if ver_err_msg is not None:
-                raise RuntimeError(ver_err_msg)
-            return
-        # Using setuptools; add packages to given section of
-        # setup/install_requires, unless it's a heavy dependency for which we
-        # already have an acceptable importable version.
-        if self.heavy and ver_err_msg is None:
-            return
-        new_req = '{0}>={1}'.format(self.import_name, self.min_ver)
-        old_reqs = setuptools_kwargs.get(self.req_type, [])
-        setuptools_kwargs[self.req_type] = old_reqs + [new_req]
-
-def get_pkg_version(pkg_name):
-    """ Return package version for `pkg_name` if installed
-
-    Returns
-    -------
-    pkg_version : str or None
-        Return None if package not importable.  Return 'unknown' if standard
-        ``__version__`` string not present. Otherwise return version string.
-    """
-    try:
-        pkg = __import__(pkg_name)
-    except ImportError:
-        return None
-    try:
-        return pkg.__version__
-    except AttributeError:
-        return 'unknown'
-
-def version_error_msg(pkg_name, found_ver, min_ver):
-    """ Return informative error message for version or None
-    """
-    if found_ver is None:
-        return 'We need package {0}, but not importable'.format(pkg_name)
-    if found_ver == 'unknown':
-        return 'We need {0} version {1}, but cannot get version'.format(
-            pkg_name, min_ver)
-    if LooseVersion(found_ver) >= LooseVersion(min_ver):
-        return None
-    return 'We need {0} version {1}, but found version {2}'.format(
-        pkg_name, found_ver, min_ver)
-
-
-
-# Try to preempt setuptools monkeypatching of Extension handling when Pyrex
-# is missing.  Otherwise the monkeypatched Extension will change .pyx
-# filenames to .c filenames, and we probably don't have the .c files.
-sys.path.insert(0, pjoin(dirname(__file__), 'fake_pyrex'))
-# Set setuptools extra arguments
-extra_setuptools_args = dict(
-    tests_require=['nose'],
-    test_suite='nose.collector',
-    zip_safe=False,
-    extras_require = dict(
-        doc=['Sphinx>=1.0'],
-        test=['nose>=0.10.1']))
-
-# Define extensions
-EXTS = []
-
-SetupDependency('numpy', info.NUMPY_MIN_VERSION,
-                req_type='install_requires',
-                heavy=True).check_fill(extra_setuptools_args)
-SetupDependency('scipy', info.SCIPY_MIN_VERSION,
-                req_type='install_requires',
-                heavy=True).check_fill(extra_setuptools_args)
-SetupDependency('matplotlib', info.MATPLOTLIB_MIN_VERSION,
-                req_type='install_requires',
-                heavy=True).check_fill(extra_setuptools_args)
-SetupDependency('pandas', info.PANDAS_MIN_VERSION,
-                req_type='install_requires',
-                heavy=True).check_fill(extra_setuptools_args)
-SetupDependency('statsmodels', info.STATSMODELS_MIN_VERSION,
-                req_type='install_requires',
-                heavy=True).check_fill(extra_setuptools_args)
-SetupDependency('scikit-learn', info.SKLEARN_MIN_VERSION,
-                req_type='install_requires',
-                heavy=True).check_fill(extra_setuptools_args)
-
-#requirements = open('requirements.txt').read().strip().split('\n')
-
-requirements = '''numpy
-scipy
-jupyter
-pandas
-lxml # pandas needs this for html
-scikit-learn
-joblib
-pygam # for GAM in Ch7
-lifelines'''.split('\n')
-#l0bnb # for bestsubsets
-
-
-
-for req in requirements:
-    req = req.split('#')[0]
-    import sys; sys.stderr.write(req+'\n')
-    SetupDependency(req, "0.0",
-                    req_type='install_requires',
-                    heavy=True).check_fill(extra_setuptools_args)
-
-cmdclass=versioneer.get_cmdclass()
-
-# get long_description
-
-if sys.version_info[0] > 2:
-    long_description = open('README.md', 'rt', encoding='utf-8').read()
-else:
-    long_description = unicode(file('README.md').read(), 'utf-8')
-
-def main(**extra_args):
-    setup(name=info.NAME,
-          maintainer=info.MAINTAINER,
-          maintainer_email=info.MAINTAINER_EMAIL,
-          description=info.DESCRIPTION,
-          url=info.URL,
-          download_url=info.DOWNLOAD_URL,
-          license=info.LICENSE,
-          classifiers=info.CLASSIFIERS,
-          author=info.AUTHOR,
-          author_email=info.AUTHOR_EMAIL,
-          platforms=info.PLATFORMS,
-          version=versioneer.get_version(),
-          requires=info.REQUIRES,
-          provides=info.PROVIDES,
-          packages     = ['ISLP',
-                          'ISLP.models',
-                          'ISLP.bart',
-                          'ISLP.torch'
-                          ],
-          ext_modules = EXTS,
-          package_data = {"ISLP":["data/*csv", "data/*npy", "data/*data"]},
-          include_package_data=True,
-          data_files=[],
-          scripts=[],
-          long_description=long_description,
-          cmdclass = cmdclass,
-          **extra_args
-         )
-
-#simple way to test what setup will do
-#python setup.py install --prefix=/tmp
-if __name__ == "__main__":
-    main(**extra_setuptools_args)
diff --git a/ISLP/bart/tests/test_bart.py b/tests/bart/test_bart.py
similarity index 96%
rename from ISLP/bart/tests/test_bart.py
rename to tests/bart/test_bart.py
index d12a0a2..903bb83 100644
--- a/ISLP/bart/tests/test_bart.py
+++ b/tests/bart/test_bart.py
@@ -19,8 +19,6 @@ def test_bart():
 
     clone(B)
 
-    return B
-
 if __name__ == "__main__":
 
     test_bart()
diff --git a/ISLP/models/tests/__init__.py b/tests/deeplearning/__init__.py
similarity index 100%
rename from ISLP/models/tests/__init__.py
rename to tests/deeplearning/__init__.py
diff --git a/tests/deeplearning/test_hitters.py b/tests/deeplearning/test_hitters.py
new file mode 100644
index 0000000..bf609b9
--- /dev/null
+++ b/tests/deeplearning/test_hitters.py
@@ -0,0 +1,481 @@
+import numpy as np
+import pandas as pd
+from matplotlib.pyplot import subplots
+from sklearn.linear_model import \
+     (LinearRegression,
+      Lasso)
+from sklearn.preprocessing import StandardScaler
+from sklearn.model_selection import KFold
+from sklearn.pipeline import Pipeline
+from ISLP import load_data
+from ISLP.models import ModelSpec as MS
+from sklearn.model_selection import \
+     (train_test_split,
+      GridSearchCV)
+
+# torch
+
+import torch
+from torch import nn
+from torch.utils.data import TensorDataset
+
+# torch helpers
+
+from torchmetrics import MeanAbsoluteError
+from torchinfo import summary
+
+# pytorch lightning
+
+from pytorch_lightning import Trainer
+from pytorch_lightning.loggers import CSVLogger
+
+# setting seed
+
+from pytorch_lightning import seed_everything
+seed_everything(0, workers=True)
+torch.use_deterministic_algorithms(True, warn_only=True)
+
+# ISLP.torch
+
+from ISLP.torch import (SimpleDataModule,
+                        SimpleModule,
+                        ErrorTracker,
+                        rec_num_workers)
+
+
+def test_hitters(max_epochs=2,
+                 num_lam=5):
+
+    Hitters = load_data('Hitters').dropna()
+    n = Hitters.shape[0]
+
+    #  We will fit two linear models (least squares  and lasso) and  compare their performance
+    # to that of a neural network. For this comparison we will use mean absolute error on a validation dataset.
+    # \begin{equation*}
+    # \begin{split}
+    # \mbox{MAE}(y,\hat{y}) = \frac{1}{n} \sum_{i=1}^n |y_i-\hat{y}_i|.
+    # \end{split}
+    # \end{equation*}
+    # We set up the model matrix and the response.
+
+    # In[11]:
+
+
+    model = MS(Hitters.columns.drop('Salary'), intercept=False)
+    X = model.fit_transform(Hitters).to_numpy()
+    Y = Hitters['Salary'].to_numpy()
+
+
+    # The `to_numpy()`  method above converts `pandas`
+    # data frames or series to `numpy` arrays.
+    # We do this because we will need to  use `sklearn` to fit the lasso model,
+    # and it requires this conversion. 
+    # We also use  a linear regression method from `sklearn`, rather than the method
+    # in Chapter~3 from `statsmodels`, to facilitate the comparisons.
+
+    # We now split the data into test and training, fixing the random
+    # state used by `sklearn` to do the split.
+
+    # In[12]:
+
+
+    (X_train, 
+     X_test,
+     Y_train,
+     Y_test) = train_test_split(X,
+                                Y,
+                                test_size=1/3,
+                                random_state=1)
+
+
+    # ### Linear Models
+    # We fit the linear model and evaluate the test error directly.
+
+    # In[13]:
+
+
+    hit_lm = LinearRegression().fit(X_train, Y_train)
+    Yhat_test = hit_lm.predict(X_test)
+    np.abs(Yhat_test - Y_test).mean()
+
+
+    # Next we fit the lasso using `sklearn`. We are using
+    # mean absolute error to select and evaluate a model, rather than mean squared error.
+    # The specialized solver we used in Section 6.5.2 uses only mean squared error. So here, with a bit more work,  we create a cross-validation grid and perform the cross-validation directly.  
+    # 
+    # We encode a pipeline with two steps: we first normalize the features using a `StandardScaler()` transform,
+    # and then fit the lasso without further normalization.
+
+    # In[14]:
+
+
+    scaler = StandardScaler(with_mean=True, with_std=True)
+    lasso = Lasso(warm_start=True, max_iter=30000)
+    standard_lasso = Pipeline(steps=[('scaler', scaler),
+                                     ('lasso', lasso)])
+
+
+    # We need to create a grid of values for $\lambda$. As is common practice, 
+    # we choose a grid of 100 values of $\lambda$, uniform on the log scale from `lam_max` down to  `0.01*lam_max`. Here  `lam_max` is the smallest value of
+    # $\lambda$ with an  all-zero solution. This value equals the largest absolute inner-product between any predictor and the (centered) response. {The derivation of this result is beyond the scope of this book.}
+
+    # In[15]:
+
+
+    X_s = scaler.fit_transform(X_train)
+    n = X_s.shape[0]
+    lam_max = np.fabs(X_s.T.dot(Y_train - Y_train.mean())).max() / n
+    param_grid = {'alpha': np.exp(np.linspace(0, np.log(0.01), num_lam))
+                 * lam_max}
+
+
+    # Note that we had to transform the data first, since the scale of the variables impacts the choice of $\lambda$.
+    # We now perform cross-validation using this sequence of $\lambda$ values.
+
+    # In[16]:
+
+
+    cv = KFold(10,
+               shuffle=True,
+               random_state=1)
+    grid = GridSearchCV(lasso,
+                        param_grid,
+                        cv=cv,
+                        scoring='neg_mean_absolute_error')
+    grid.fit(X_train, Y_train);
+
+
+    # We extract the lasso model with best cross-validated mean absolute error, and evaluate its
+    # performance on `X_test` and `Y_test`, which were not used in
+    # cross-validation.
+
+    # In[17]:
+
+
+    trained_lasso = grid.best_estimator_
+    Yhat_test = trained_lasso.predict(X_test)
+    np.fabs(Yhat_test - Y_test).mean()
+
+
+    # This is similar to the results we got for the linear model fit by least squares. However, these results can vary a lot for different train/test splits; we encourage the reader to try a different seed in code block 12 and rerun the subsequent code up to this point.
+    # 
+    # ### Specifying a Network: Classes and Inheritance
+    # To fit the neural network, we first set up a model structure
+    # that describes the network.
+    # Doing so requires us to define new classes specific to the model we wish to fit.
+    # Typically this is done in  `pytorch` by sub-classing a generic
+    # representation of a network, which is the approach we take here.
+    # Although this example is simple, we will go through the steps in some detail, since it will serve us well
+    # for the more complex examples to follow.
+
+    # In[18]:
+
+
+    class HittersModel(nn.Module):
+
+        def __init__(self, input_size):
+            super(HittersModel, self).__init__()
+            self.flatten = nn.Flatten()
+            self.sequential = nn.Sequential(
+                nn.Linear(input_size, 50),
+                nn.ReLU(),
+                nn.Dropout(0.4),
+                nn.Linear(50, 1))
+
+        def forward(self, x):
+            x = self.flatten(x)
+            return torch.flatten(self.sequential(x))
+
+
+    # The `class` statement identifies the code chunk as a
+    # declaration for a class `HittersModel`
+    # that inherits from the  base class `nn.Module`. This base
+    # class is ubiquitous in `torch` and represents the
+    # mappings in the neural networks.
+    # 
+    # Indented beneath the `class` statement are the methods of this class:
+    # in this case `__init__` and `forward`.  The `__init__` method is
+    # called when an instance of the class is created as in the cell
+    # below. In the methods, `self` always refers to an instance of the
+    # class. In the `__init__` method, we have attached two objects to
+    # `self` as attributes: `flatten` and `sequential`. These are used in
+    # the `forward` method to describe the map that this module implements.
+    # 
+    # There is one additional line in the `__init__` method, which
+    # is a call to
+    # `super()`. This function allows subclasses (i.e. `HittersModel`)
+    # to access methods of the class they inherit from. For example,
+    # the class `nn.Module` has its own `__init__` method, which is different from
+    # the `HittersModel.__init__()` method we’ve written above.
+    # Using `super()` allows us to call the method of the base class. For
+    # `torch` models, we will always be making this `super()` call as it is necessary
+    # for the model to be properly interpreted by `torch`.
+    # 
+    # The object `nn.Module` has more methods than simply `__init__` and `forward`. These
+    # methods are directly accessible to `HittersModel` instances because of this inheritance.
+    # One such method we will see shortly is the `eval()` method, used
+    # to disable dropout for when we want to evaluate the model on test data.
+
+    # In[19]:
+
+
+    hit_model = HittersModel(X.shape[1])
+
+
+    # The object `self.sequential` is a composition of four maps. The
+    # first maps the 19 features of `Hitters` to 50 dimensions, introducing $50\times 19+50$ parameters
+    # for the weights and *intercept*  of the map (often called the *bias*). This layer
+    # is then mapped to a ReLU layer followed by a 40% dropout layer, and finally a
+    # linear map down to 1 dimension, again with a bias. The total number of
+    # trainable parameters is therefore $50\times 19+50+50+1=1051$.
+
+    # The package `torchinfo` provides a `summary()` function that neatly summarizes
+    # this information. We specify the size of the input and see the size
+    # of each tensor as it passes through layers of the network.
+
+    # In[20]:
+
+
+    summary(hit_model, 
+            input_size=X_train.shape,
+            col_names=['input_size',
+                       'output_size',
+                       'num_params'])
+
+
+    # We have truncated the end of the output slightly, here and in subsequent uses.
+    # 
+    # We now need to transform our training data into a form accessible to `torch`.
+    # The basic
+    # datatype in `torch` is a `tensor`, which is very similar
+    # to an `ndarray` from early chapters.
+    # We also note here that `torch` typically
+    # works with 32-bit (*single precision*)
+    # rather than 64-bit (*double precision*) floating point numbers.
+    # We therefore convert our data to `np.float32` before
+    # forming the tensor.
+    # The $X$ and $Y$ tensors are then arranged into a `Dataset`
+    # recognized by `torch`
+    # using `TensorDataset()`.
+
+    # In[21]:
+
+
+    X_train_t = torch.tensor(X_train.astype(np.float32))
+    Y_train_t = torch.tensor(Y_train.astype(np.float32))
+    hit_train = TensorDataset(X_train_t, Y_train_t)
+
+
+    # We do the same for the test data.
+
+    # In[22]:
+
+
+    X_test_t = torch.tensor(X_test.astype(np.float32))
+    Y_test_t = torch.tensor(Y_test.astype(np.float32))
+    hit_test = TensorDataset(X_test_t, Y_test_t)
+
+
+    # Finally, this dataset is passed to a `DataLoader()` which ultimately
+    # passes data into our network. While this may seem
+    # like a lot of overhead, this structure is helpful for more
+    # complex tasks where data may live on different machines,
+    # or where data must be passed to a GPU.
+    # We provide a helper function `SimpleDataModule()` in `ISLP` to make this task easier for
+    # standard usage.
+    # One of its arguments is `num_workers`, which indicates
+    # how many processes we will use
+    # for loading the data. For small
+    # data like `Hitters` this will have little effect, but
+    # it does provide an advantage for the `MNIST`  and `CIFAR100` examples below.
+    # The `torch` package will inspect the process running and determine a
+    # maximum number of workers. {This depends on the computing hardware and the number of cores available.} We’ve included a function
+    # `rec_num_workers()` to compute this so we know how many
+    # workers might be reasonable (here the max was 16).
+
+    # In[23]:
+
+
+    max_num_workers = rec_num_workers()
+
+
+    # The general training setup in `pytorch_lightning` involves
+    # training, validation and test data. These are each
+    # represented by different data loaders. During each epoch,
+    # we run a training step to learn the model and a validation
+    # step to track the error. The test data is typically
+    # used at the end of training to evaluate the model.
+    # 
+    # In this case, as we had split only into test and training,
+    # we’ll use the test data as validation data with the
+    # argument `validation=hit_test`. The
+    # `validation` argument can be a float between 0 and 1, an
+    # integer, or a
+    # `Dataset`. If a float (respectively, integer), it is interpreted
+    # as a percentage (respectively number) of the *training* observations to be used for validation.
+    # If it is a `Dataset`, it is passed directly to a data loader.
+
+    # In[24]:
+
+
+    hit_dm = SimpleDataModule(hit_train,
+                              hit_test,
+                              batch_size=32,
+                              num_workers=min(4, max_num_workers),
+                              validation=hit_test)
+
+
+    # Next we must provide a `pytorch_lightning` module that controls
+    # the steps performed during the training process. We provide methods for our
+    # `SimpleModule()` that simply record the value
+    # of the loss function and any additional
+    # metrics at the end of each epoch. These operations
+    # are controlled by the methods `SimpleModule.[training/test/validation]_step()`, though
+    # we will not be modifying these in our examples.
+
+    # In[25]:
+
+
+    hit_module = SimpleModule.regression(hit_model,
+                               metrics={'mae':MeanAbsoluteError()})
+
+
+    #  By using the `SimpleModule.regression()` method,  we indicate that we will use squared-error loss as in
+    # (10.23).
+    # We have also asked for mean absolute error to be tracked as well
+    # in the metrics that are logged.
+    # 
+    # We log our results via `CSVLogger()`, which in this case stores the results in a CSV file within a directory `logs/hitters`. After the fitting is complete, this allows us to load the
+    # results as a `pd.DataFrame()` and visualize them below. There are
+    # several ways to log the results within `pytorch_lightning`, though
+    # we will not cover those here in detail.
+
+    # In[26]:
+
+
+    hit_logger = CSVLogger('logs', name='hitters')
+
+
+    # Finally we are ready to train our model and log the results. We
+    # use the `Trainer()` object from `pytorch_lightning`
+    # to do this work. The argument `datamodule=hit_dm` tells the trainer
+    # how training/validation/test logs are produced,
+    # while the first argument `hit_module`
+    # specifies the network architecture
+    # as well as the training/validation/test steps.
+    # The `callbacks` argument allows for
+    # several tasks to be carried out at various
+    # points while training a model. Here
+    # our `ErrorTracker()` callback will enable
+    # us to compute validation error while training
+    # and, finally, the test error.
+    # We now fit the model for 50 epochs.
+
+    # In[27]:
+
+
+    hit_trainer = Trainer(deterministic=True,
+                          max_epochs=max_epochs,
+                          log_every_n_steps=5,
+                          logger=hit_logger,
+                          callbacks=[ErrorTracker()])
+    hit_trainer.fit(hit_module, datamodule=hit_dm)
+
+
+    # At each step of SGD, the algorithm randomly selects 32 training observations for
+    # the computation of the gradient. Recall from Section 10.7
+    # that an epoch amounts to the number of SGD steps required to process $n$
+    # observations. Since the training set has
+    # $n=175$, and we specified a `batch_size` of 32 in the construction of  `hit_dm`, an epoch is $175/32=5.5$ SGD steps.
+    # 
+    # After having fit the model, we can evaluate performance on our test
+    # data using the `test()` method of our trainer.
+
+    # In[28]:
+
+
+    hit_trainer.test(hit_module, datamodule=hit_dm)
+
+
+    # The results of the fit have been logged into a CSV file. We can find the
+    # results specific to this run in the `experiment.metrics_file_path`
+    # attribute of our logger. Note that each time the model is fit, the logger will output
+    # results into a new subdirectory of our directory `logs/hitters`.
+    # 
+    # We now create a plot of the MAE (mean absolute error) as a function of
+    # the number of epochs.
+    # First we retrieve the logged summaries.
+
+    # In[29]:
+
+
+    hit_results = pd.read_csv(hit_logger.experiment.metrics_file_path)
+
+
+    # Since we will produce similar plots in later examples, we write a
+    # simple generic function to produce this plot.
+
+    # In[30]:
+
+
+    def summary_plot(results,
+                     ax,
+                     col='loss',
+                     valid_legend='Validation',
+                     training_legend='Training',
+                     ylabel='Loss',
+                     fontsize=20):
+        for (column,
+             color,
+             label) in zip([f'train_{col}_epoch',
+                            f'valid_{col}'],
+                           ['black',
+                            'red'],
+                           [training_legend,
+                            valid_legend]):
+            results.plot(x='epoch',
+                         y=column,
+                         label=label,
+                         marker='o',
+                         color=color,
+                         ax=ax)
+        ax.set_xlabel('Epoch')
+        ax.set_ylabel(ylabel)
+        return ax
+
+
+    # We now set up our axes, and use our function to produce the MAE plot.
+
+    # In[31]:
+
+
+    fig, ax = subplots(1, 1, figsize=(6, 6))
+    ax = summary_plot(hit_results,
+                      ax,
+                      col='mae',
+                      ylabel='MAE',
+                      valid_legend='Validation (=Test)')
+    ax.set_ylim([0, 400])
+    ax.set_xticks(np.linspace(0, 50, 11).astype(int));
+
+
+    # We can predict directly from the final model, and
+    # evaluate its performance on the test data.
+    # Before fitting, we call the `eval()` method
+    # of `hit_model`.
+    # This tells
+    # `torch` to effectively consider this model to be fitted, so that
+    # we can use it to predict on new data. For our model here,
+    # the biggest change is that the dropout layers will
+    # be turned off, i.e. no weights will be randomly
+    # dropped in predicting on new data.
+
+    # In[32]:
+
+
+    hit_model.eval() 
+    preds = hit_module(X_test_t)
+    torch.abs(Y_test_t - preds).mean()
+
+
+
diff --git a/tests/deeplearning/test_mnist.py b/tests/deeplearning/test_mnist.py
new file mode 100644
index 0000000..c6d39d9
--- /dev/null
+++ b/tests/deeplearning/test_mnist.py
@@ -0,0 +1,258 @@
+
+# torch
+
+import torch
+from torch import nn
+
+# torch helpers
+
+from torchinfo import summary
+
+# pytorch lightning
+
+from pytorch_lightning import Trainer
+from pytorch_lightning.loggers import CSVLogger
+
+# setting seed
+
+from pytorch_lightning import seed_everything
+seed_everything(0, workers=True)
+torch.use_deterministic_algorithms(True, warn_only=True)
+
+# ISLP.torch
+
+from ISLP.torch import (SimpleDataModule,
+                        SimpleModule,
+                        ErrorTracker)
+
+from torchvision.datasets import MNIST
+from torchvision.transforms import ToTensor
+
+def test_mnist(max_epochs=2):
+
+
+    # ## Multilayer Network on the MNIST Digit Data
+    # The `torchvision` package comes with a number of example datasets,
+    # including the `MNIST`  digit data. Our first step is to retrieve
+    # the training and test data sets; the `MNIST()` function within
+    # `torchvision.datasets` is provided for this purpose. The
+    # data will be downloaded the first time this function is executed, and stored in the directory `data/MNIST`.
+
+    # In[34]:
+
+
+    (mnist_train, 
+     mnist_test) = [MNIST(root='data',
+                          train=train,
+                          download=True,
+                          transform=ToTensor())
+                    for train in [True, False]]
+    mnist_train
+
+
+    # There are 60,000 images in the training data and 10,000 in the test
+    # data. The images are $28\times 28$, and stored as a matrix of pixels. We
+    # need to transform each one into a vector.
+    # 
+    # Neural networks are somewhat sensitive to the scale of the inputs, much as ridge and
+    # lasso regularization are affected by scaling.  Here the inputs are eight-bit
+    # grayscale values between 0 and 255, so we rescale to the unit
+    # interval. {Note: eight bits means $2^8$, which equals 256. Since the convention
+    # is to start at $0$, the possible values  range from $0$ to $255$.}
+    # This transformation, along with some reordering
+    # of the axes, is performed by the `ToTensor()` transform
+    # from the `torchvision.transforms` package.
+    # 
+    # As in our `Hitters` example, we form a data module
+    # from the training and test datasets, setting aside 20%
+    # of the training images for validation.
+
+    # In[35]:
+
+
+    mnist_dm = SimpleDataModule(mnist_train,
+                                mnist_test,
+                                validation=0.2,
+                                num_workers=2,
+                                batch_size=256)
+
+
+    # Let’s take a look at the data that will get fed into our network. We loop through the first few
+    # chunks of the test dataset, breaking after 2 batches:
+
+    # In[36]:
+
+
+    for idx, (X_ ,Y_) in enumerate(mnist_dm.train_dataloader()):
+        print('X: ', X_.shape)
+        print('Y: ', Y_.shape)
+        if idx >= 1:
+            break
+
+
+    # We see that the $X$ for each batch consists of 256 images of size `1x28x28`.
+    # Here the `1` indicates a single channel (greyscale). For RGB images such as `CIFAR100` below,
+    # we will see that the `1` in the size will be replaced by `3` for the three RGB channels.
+    # 
+    # Now we are ready to specify our neural network.
+
+    # In[37]:
+
+
+    class MNISTModel(nn.Module):
+        def __init__(self):
+            super(MNISTModel, self).__init__()
+            self.layer1 = nn.Sequential(
+                nn.Flatten(),
+                nn.Linear(28*28, 256),
+                nn.ReLU(),
+                nn.Dropout(0.4))
+            self.layer2 = nn.Sequential(
+                nn.Linear(256, 128),
+                nn.ReLU(),
+                nn.Dropout(0.3))
+            self._forward = nn.Sequential(
+                self.layer1,
+                self.layer2,
+                nn.Linear(128, 10))
+        def forward(self, x):
+            return self._forward(x)
+
+
+    # We see that in the first layer, each `1x28x28` image is flattened, then mapped to
+    # 256 dimensions where we apply a ReLU activation with 40% dropout.
+    # A second layer maps the first layer’s output down to
+    # 128 dimensions, applying a ReLU activation with 30% dropout. Finally,
+    # the 128 dimensions are mapped down to 10, the number of classes in the
+    # `MNIST`  data.
+
+    # In[38]:
+
+
+    mnist_model = MNISTModel()
+
+
+    # We can check that the model produces output of expected size based
+    # on our existing batch `X_` above.
+
+    # In[39]:
+
+
+    mnist_model(X_).size()
+
+
+    # Let’s take a look at the summary of the model. Instead of an `input_size` we can pass
+    # a tensor of correct shape. In this case, we pass through the final
+    # batched `X_` from above.
+
+    # In[40]:
+
+
+    summary(mnist_model,
+            input_data=X_,
+            col_names=['input_size',
+                       'output_size',
+                       'num_params'])
+
+
+    # Having set up both  the model and the data module, fitting this model is
+    # now almost identical to the `Hitters` example. In contrast to our regression model, here we will use the
+    # `SimpleModule.classification()` method which
+    # uses the  cross-entropy loss function instead of mean squared error. It must be supplied with the number of classes in the problem.
+
+    # In[41]:
+
+
+    mnist_module = SimpleModule.classification(mnist_model,
+                                               num_classes=10)
+    mnist_logger = CSVLogger('logs', name='MNIST')
+
+
+    # Now we are ready to go. The final step is to supply training data, and fit the model.
+
+    # In[42]:
+
+
+    mnist_trainer = Trainer(deterministic=True,
+                            max_epochs=max_epochs,
+                            logger=mnist_logger,
+                            callbacks=[ErrorTracker()])
+    mnist_trainer.fit(mnist_module,
+                      datamodule=mnist_dm)
+
+
+    # We have suppressed the output here, which is a progress report on the
+    # fitting of the model, grouped by epoch. This is very useful, since on
+    # large datasets fitting can take time. Fitting this model took 245
+    # seconds on a MacBook Pro with an Apple M1 Pro chip with 10 cores and 16 GB of RAM.
+    # Here we specified a
+    # validation split of 20%, so training is actually performed on
+    # 80% of the 60,000 observations in the training set. This is an
+    # alternative to actually supplying validation data, like we did for the `Hitters` data.
+    # SGD  uses batches
+    # of 256 observations in computing the gradient, and doing the
+    # arithmetic, we see that an epoch corresponds to 188 gradient steps.
+
+    # `SimpleModule.classification()` includes
+    # an accuracy metric by default. Other
+    # classification metrics can be added from `torchmetrics`.
+    # We will use  our `summary_plot()` function to display 
+    # accuracy across epochs.
+
+
+    mnist_trainer.test(mnist_module,
+                       datamodule=mnist_dm)
+
+
+    # Table 10.1 also reports the error rates resulting from LDA (Chapter 4) and multiclass logistic
+    # regression. For LDA we refer the reader to Section 4.7.3.
+    # Although we could use the `sklearn` function `LogisticRegression()` to fit  
+    # multiclass logistic regression, we are set up here to fit such a model
+    # with `torch`.
+    # We just have an input layer and an output layer, and omit the hidden layers!
+
+    # In[45]:
+
+
+    class MNIST_MLR(nn.Module):
+        def __init__(self):
+            super(MNIST_MLR, self).__init__()
+            self.linear = nn.Sequential(nn.Flatten(),
+                                        nn.Linear(784, 10))
+        def forward(self, x):
+            return self.linear(x)
+
+    mlr_model = MNIST_MLR()
+    mlr_module = SimpleModule.classification(mlr_model,
+                                             num_classes=10)
+    mlr_logger = CSVLogger('logs', name='MNIST_MLR')
+
+
+    # In[46]:
+
+
+    mlr_trainer = Trainer(deterministic=True,
+                          max_epochs=30,
+                          callbacks=[ErrorTracker()])
+    mlr_trainer.fit(mlr_module, datamodule=mnist_dm)
+
+
+    # We fit the model just as before and compute the test results.
+
+    # In[47]:
+
+
+    mlr_trainer.test(mlr_module,
+                     datamodule=mnist_dm)
+
+
+    # The accuracy is above 90% even for this pretty simple model.
+    # 
+    # As in the `Hitters` example, we delete some of
+    # the objects we created above.
+
+    # In[48]:
+
+
+
+
diff --git a/tests/models/test_boolean_columns.py b/tests/models/test_boolean_columns.py
new file mode 100644
index 0000000..7b5a429
--- /dev/null
+++ b/tests/models/test_boolean_columns.py
@@ -0,0 +1,23 @@
+import pandas as pd
+import statsmodels.api as sm
+import numpy as np
+from itertools import combinations
+
+from ISLP.models import ModelSpec as MS
+
+rng = np.random.default_rng(0)
+
+df = pd.DataFrame({'A':rng.standard_normal(10),
+                  'B':np.array([1,2,3,2,1,1,1,3,2,1], int),
+                  'C':np.array([True,False,False,True,True]*2, bool),
+                  'D':rng.standard_normal(10)})
+Y = rng.standard_normal(10)
+
+def test_all():
+
+    for i in range(1, 5):
+        for comb in combinations(['A','B','C','D'], i):
+
+            X = MS(comb).fit_transform(df)
+            sm.OLS(Y, X).fit() 
+
diff --git a/ISLP/models/tests/test_columns.py b/tests/models/test_columns.py
similarity index 79%
rename from ISLP/models/tests/test_columns.py
rename to tests/models/test_columns.py
index a86941b..77ba784 100644
--- a/ISLP/models/tests/test_columns.py
+++ b/tests/models/test_columns.py
@@ -3,6 +3,7 @@
 
 from pandas.api.types import CategoricalDtype
 from ISLP.models.columns import _get_column_info
+from ISLP.models.model_spec import Contrast
 
 def test_column_info():
 
@@ -15,5 +16,7 @@ def test_column_info():
     print(_get_column_info(df,
                            df.columns,
                            [False]*4+[True],
-                           [False]*5))
+                           [False]*5,
+                           categorical_encoders={'categorical':Contrast(method='drop')}))
+
 
diff --git a/ISLP/models/tests/test_model_matrix.py b/tests/models/test_model_matrix.py
similarity index 86%
rename from ISLP/models/tests/test_model_matrix.py
rename to tests/models/test_model_matrix.py
index 51e079c..70b9cab 100644
--- a/ISLP/models/tests/test_model_matrix.py
+++ b/tests/models/test_model_matrix.py
@@ -2,7 +2,7 @@
 from sklearn.base import clone
 
 from ISLP.transforms import Poly, NaturalSpline, BSpline, Interaction
-from ISLP.models.model_spec import ModelSpec, Variable, ns, bs, poly, pca, contrast, Contrast
+from ISLP.models.model_spec import ModelSpec, Feature, ns, bs, poly, pca, contrast, Contrast, build_model
 
 from sklearn.preprocessing import (OneHotEncoder,
                                    OrdinalEncoder)
@@ -37,7 +37,7 @@ def test_ndarray():
     X = rng.standard_normal((50,5))
 
     M = ModelSpec(terms=[1, (3,2)],
-                  default_encoders=default_encoders)
+                  categorical_encoders=default_encoders)
     M.fit(X)
     MX = M.transform(X)
 
@@ -51,7 +51,7 @@ def test_dataframe1():
     D = pd.DataFrame(X, columns=['A','B','C','D','E'])
     
     M = ModelSpec(terms=['A','D',('D','E')],
-                  default_encoders=default_encoders)
+                  categorical_encoders=default_encoders)
     clone(M)
     MX = np.asarray(M.fit_transform(D))
 
@@ -66,7 +66,7 @@ def test_dataframe2():
     D = pd.DataFrame(X, columns=['V','B','A','D','E'])
     
     M = ModelSpec(terms=['A', 'D', 'B', ('D','E'), 'V'],
-                  default_encoders=default_encoders)
+                  categorical_encoders=default_encoders)
     clone(M)
 
     MX = M.fit_transform(D)
@@ -83,7 +83,7 @@ def test_dataframe3():
     D['E'] = pd.Categorical(rng.choice(range(4,8), 50, replace=True))
     
     M = ModelSpec(terms=['A', 'E', ('D','E')],
-                  default_encoders=default_encoders)
+                  categorical_encoders=default_encoders)
     MX = np.asarray(M.fit_transform(D))
     M2 = clone(M)
 
@@ -105,7 +105,7 @@ def test_dataframe4():
     D['E'] = pd.Categorical(rng.choice(range(4,8), 50, replace=True))
     
     M = ModelSpec(terms=['A', 'E', ('D','E'), 'D'],
-                  default_encoders=default_encoders)
+                  categorical_encoders=default_encoders)
     MX = np.asarray(M.fit_transform(D))
 
     DE = pd.get_dummies(D['E'])
@@ -119,7 +119,6 @@ def test_dataframe4():
     np.testing.assert_allclose(MX, MX2)
 
     print(MX2.columns)
-    return M, D
     
 def test_dataframe5():
     
@@ -130,7 +129,7 @@ def test_dataframe5():
     D['E'] = pd.Categorical(rng.choice(range(4,8), 50, replace=True))
     
     M = ModelSpec(terms=['A', 'E', ('D','E')],
-                  default_encoders=default_encoders)
+                  categorical_encoders=default_encoders)
     MX = np.asarray(M.fit_transform(D))
 
     # check they agree on copy of dataframe
@@ -144,12 +143,12 @@ def test_dataframe6():
     rng = np.random.default_rng(11)
     X = rng.standard_normal((50,5))
     D = pd.DataFrame(X, columns=['A','B','C','D','E'])
-    W = Variable(('A','E'), 'AE', None)
+    W = Feature(('A','E'), 'AE', None)
     D['D'] = pd.Categorical(rng.choice(['a','b','c'], 50, replace=True))
     D['E'] = pd.Categorical(rng.choice(range(4,8), 50, replace=True))
     
     M = ModelSpec(terms=['A',W,(W,'D',)],
-                  default_encoders=default_encoders)
+                  categorical_encoders=default_encoders)
     MX = M.fit_transform(D)
 
     MX = np.asarray(MX)
@@ -163,7 +162,7 @@ def test_dataframe7():
     D['Eee'] = pd.Categorical(rng.choice(range(4,8), 50, replace=True))
         
     M = ModelSpec(terms=D.columns.drop(['Y','C']),
-                  default_encoders=default_encoders)
+                  categorical_encoders=default_encoders)
     MX = M.fit_transform(D)
     print(MX.columns)
     MX = np.asarray(MX)
@@ -178,9 +177,9 @@ def test_dataframe8():
     
     poly =  Poly(degree=3)
     # raises a ValueError because poly will have been already fit -- need new instance of Poly
-    W = Variable(('A',), 'poly(A)', poly)
+    W = Feature(('A',), 'poly(A)', poly)
     M = ModelSpec(terms=list(D.columns.drop(['Y','C'])) + [(W,'E')],
-                  default_encoders=default_encoders)
+                  categorical_encoders=default_encoders)
     MX = M.fit_transform(D)
 
     print(MX.columns)
@@ -196,10 +195,10 @@ def test_dataframe9():
     
     poly =  Poly(degree=3)
     # raises a ValueError because poly will have been already fit -- need new instance of Poly
-    W = Variable(('A',), 'poly(A)', poly)
-    U = Variable(('B',), 'poly(B)', clone(poly))
+    W = Feature(('A',), 'poly(A)', poly)
+    U = Feature(('B',), 'poly(B)', clone(poly))
     M = ModelSpec(terms=list(D.columns.drop(['Y','C'])) + [W,U],
-                  default_encoders=default_encoders)
+                  categorical_encoders=default_encoders)
     MX = M.fit_transform(D)
 
     print(MX.columns)
@@ -210,13 +209,13 @@ def test_dataframe10():
     rng = np.random.default_rng(15)
     X = rng.standard_normal((50,5))
     D = pd.DataFrame(X, columns=['A','B','C','D','E'])
-    W = Variable(('A','E'), 'AE', None)
-    U = Variable((W, 'C'), 'WC', None)
+    W = Feature(('A','E'), 'AE', None)
+    U = Feature((W, 'C'), 'WC', None)
     D['D'] = pd.Categorical(rng.choice(['a','b','c'], 50, replace=True))
     D['E'] = pd.Categorical(rng.choice(range(4,8), 50, replace=True))
     
     M = ModelSpec(terms=['A', 'E', 'C', W, (W, 'D',), U],
-                  default_encoders=default_encoders)
+                  categorical_encoders=default_encoders)
     MX = M.fit_transform(D)
     print(MX.columns)
     MX = np.asarray(MX)
@@ -258,7 +257,11 @@ def test_submodel():
 
     M.fit(D)
     MX = M.transform(D)
-    MXsub = M.build_submodel(D, M.terms[:2])
+    MXsub = build_model(M.column_info_,
+                        D,
+                        M.terms[:2],
+                        intercept=M.intercept,
+                        encoders=M.encoders_)
     print(MX.columns)
     print(MXsub.columns)
 
@@ -275,7 +278,11 @@ def test_contrast():
 
         M.fit(D)
         MX = M.transform(D)
-        MXsub = M.build_submodel(D, M.terms[:2])
+        MXsub = build_model(M.column_info_,
+                            D,
+                            M.terms[:2],
+                            intercept=M.intercept,
+                            encoders=M.encoders_)
         print(method, MX.columns)
     print(MXsub.columns)
     
@@ -309,7 +316,7 @@ def test_pca():
     X = rng.standard_normal((50,8))
     D = pd.DataFrame(X, columns=['A','B','C','D','E', 'F', 'G', 'H'])
     
-    pca_ = Variable(('A','B','C','D'), 'pca(ABCD)', PCA(n_components=2))
+    pca_ = Feature(('A','B','C','D'), 'pca(ABCD)', PCA(n_components=2))
     M = ModelSpec(terms=[poly('F', intercept=True, degree=3),
                          pca_])
 
diff --git a/ISLP/models/tests/test_selection.py b/tests/models/test_selection.py
similarity index 100%
rename from ISLP/models/tests/test_selection.py
rename to tests/models/test_selection.py
diff --git a/tests/models/test_sklearn_wrap.py b/tests/models/test_sklearn_wrap.py
new file mode 100644
index 0000000..c3616bd
--- /dev/null
+++ b/tests/models/test_sklearn_wrap.py
@@ -0,0 +1,46 @@
+
+import numpy as np
+import pandas as pd
+import statsmodels.api as sm
+from sklearn.base import is_classifier, is_regressor
+import pytest
+
+from ISLP.models.sklearn_wrap import sklearn_sm, sklearn_selected
+from ISLP.models.model_spec import ModelSpec
+from ISLP.models.strategy import min_max
+
+@pytest.fixture
+def model_setup():
+    X = pd.DataFrame({'X1': np.random.rand(10), 'X2': np.random.rand(10), 'X3': np.random.rand(10)})
+    y = pd.Series(np.random.randint(0, 2, 10)) # For classifier
+    model_spec_dummy = ModelSpec(['X1', 'X2', 'X3']).fit(X)
+    min_max_strategy_dummy = min_max(model_spec_dummy, min_terms=1, max_terms=2)
+    return X, y, model_spec_dummy, min_max_strategy_dummy
+
+def test_OLS_is_regressor():
+    model = sklearn_sm(sm.OLS)
+    assert model.__sklearn_tags__().estimator_type == 'regressor'
+    assert is_regressor(model)
+
+def test_GLM_binomial_is_classifier():
+    model = sklearn_sm(sm.GLM, model_args={'family': sm.families.Binomial()})
+    assert model.__sklearn_tags__().estimator_type == 'classifier'
+    assert is_classifier(model)
+
+def test_GLM_binomial_probit_is_classifier():
+    model = sklearn_sm(sm.GLM, model_args={'family': sm.families.Binomial(link=sm.families.links.Probit())})
+    assert model.__sklearn_tags__().estimator_type == 'classifier'
+    assert is_classifier(model)
+
+
+def test_selected_OLS_is_regressor(model_setup):
+    X, y, model_spec_dummy, min_max_strategy_dummy = model_setup
+    model = sklearn_selected(sm.OLS, strategy=min_max_strategy_dummy)
+    assert model.__sklearn_tags__().estimator_type == 'regressor'
+    assert is_regressor(model)
+
+def test_selected_GLM_binomial_is_classifier(model_setup):
+    X, y, model_spec_dummy, min_max_strategy_dummy = model_setup
+    model = sklearn_selected(sm.GLM, strategy=min_max_strategy_dummy, model_args={'family': sm.families.Binomial()})
+    assert model.__sklearn_tags__().estimator_type == 'classifier'
+    assert is_classifier(model)
diff --git a/tests/test_datasets.py b/tests/test_datasets.py
new file mode 100644
index 0000000..39f1447
--- /dev/null
+++ b/tests/test_datasets.py
@@ -0,0 +1,30 @@
+# test that all datasets import
+
+from ISLP import load_data
+import numpy as np
+import pytest
+
+datasets = ['Auto',
+            'Bikeshare',
+            'Boston',
+            'BrainCancer',
+            'Caravan',
+            'Carseats',
+            'College',
+            'Credit',
+            'Default',
+            'Fund',
+            'Hitters',
+            'NYSE',
+            'OJ',
+            'Portfolio',
+            'Publication',
+            'Smarket',
+            'Wage',
+            'Weekly']
+
+@pytest.mark.parametrize('dataset', datasets)
+def test_load(dataset):
+    df = load_data(dataset)
+    for col in df.columns:
+        assert df[col].dtype != np.dtype(object)
diff --git a/torch_requirements.txt b/torch_requirements.txt
deleted file mode 100644
index f3b355a..0000000
--- a/torch_requirements.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-torch
-torchvision
-torchmetrics
-torchdata
-pytorch_lightning
-torchinfo
diff --git a/versioneer.py b/versioneer.py
deleted file mode 100644
index b4cd1d6..0000000
--- a/versioneer.py
+++ /dev/null
@@ -1,2109 +0,0 @@
-
-# Version: 0.21
-
-"""The Versioneer - like a rocketeer, but for versions.
-
-The Versioneer
-==============
-
-* like a rocketeer, but for versions!
-* https://github.com/python-versioneer/python-versioneer
-* Brian Warner
-* License: Public Domain
-* Compatible with: Python 3.6, 3.7, 3.8, 3.9 and pypy3
-* [![Latest Version][pypi-image]][pypi-url]
-* [![Build Status][travis-image]][travis-url]
-
-This is a tool for managing a recorded version number in distutils-based
-python projects. The goal is to remove the tedious and error-prone "update
-the embedded version string" step from your release process. Making a new
-release should be as easy as recording a new tag in your version-control
-system, and maybe making new tarballs.
-
-
-## Quick Install
-
-* `pip install versioneer` to somewhere in your $PATH
-* add a `[versioneer]` section to your setup.cfg (see [Install](INSTALL.md))
-* run `versioneer install` in your source tree, commit the results
-* Verify version information with `python setup.py version`
-
-## Version Identifiers
-
-Source trees come from a variety of places:
-
-* a version-control system checkout (mostly used by developers)
-* a nightly tarball, produced by build automation
-* a snapshot tarball, produced by a web-based VCS browser, like github's
-  "tarball from tag" feature
-* a release tarball, produced by "setup.py sdist", distributed through PyPI
-
-Within each source tree, the version identifier (either a string or a number,
-this tool is format-agnostic) can come from a variety of places:
-
-* ask the VCS tool itself, e.g. "git describe" (for checkouts), which knows
-  about recent "tags" and an absolute revision-id
-* the name of the directory into which the tarball was unpacked
-* an expanded VCS keyword ($Id$, etc)
-* a `_version.py` created by some earlier build step
-
-For released software, the version identifier is closely related to a VCS
-tag. Some projects use tag names that include more than just the version
-string (e.g. "myproject-1.2" instead of just "1.2"), in which case the tool
-needs to strip the tag prefix to extract the version identifier. For
-unreleased software (between tags), the version identifier should provide
-enough information to help developers recreate the same tree, while also
-giving them an idea of roughly how old the tree is (after version 1.2, before
-version 1.3). Many VCS systems can report a description that captures this,
-for example `git describe --tags --dirty --always` reports things like
-"0.7-1-g574ab98-dirty" to indicate that the checkout is one revision past the
-0.7 tag, has a unique revision id of "574ab98", and is "dirty" (it has
-uncommitted changes).
-
-The version identifier is used for multiple purposes:
-
-* to allow the module to self-identify its version: `myproject.__version__`
-* to choose a name and prefix for a 'setup.py sdist' tarball
-
-## Theory of Operation
-
-Versioneer works by adding a special `_version.py` file into your source
-tree, where your `__init__.py` can import it. This `_version.py` knows how to
-dynamically ask the VCS tool for version information at import time.
-
-`_version.py` also contains `$Revision$` markers, and the installation
-process marks `_version.py` to have this marker rewritten with a tag name
-during the `git archive` command. As a result, generated tarballs will
-contain enough information to get the proper version.
-
-To allow `setup.py` to compute a version too, a `versioneer.py` is added to
-the top level of your source tree, next to `setup.py` and the `setup.cfg`
-that configures it. This overrides several distutils/setuptools commands to
-compute the version when invoked, and changes `setup.py build` and `setup.py
-sdist` to replace `_version.py` with a small static file that contains just
-the generated version data.
-
-## Installation
-
-See [INSTALL.md](./INSTALL.md) for detailed installation instructions.
-
-## Version-String Flavors
-
-Code which uses Versioneer can learn about its version string at runtime by
-importing `_version` from your main `__init__.py` file and running the
-`get_versions()` function. From the "outside" (e.g. in `setup.py`), you can
-import the top-level `versioneer.py` and run `get_versions()`.
-
-Both functions return a dictionary with different flavors of version
-information:
-
-* `['version']`: A condensed version string, rendered using the selected
-  style. This is the most commonly used value for the project's version
-  string. The default "pep440" style yields strings like `0.11`,
-  `0.11+2.g1076c97`, or `0.11+2.g1076c97.dirty`. See the "Styles" section
-  below for alternative styles.
-
-* `['full-revisionid']`: detailed revision identifier. For Git, this is the
-  full SHA1 commit id, e.g. "1076c978a8d3cfc70f408fe5974aa6c092c949ac".
-
-* `['date']`: Date and time of the latest `HEAD` commit. For Git, it is the
-  commit date in ISO 8601 format. This will be None if the date is not
-  available.
-
-* `['dirty']`: a boolean, True if the tree has uncommitted changes. Note that
-  this is only accurate if run in a VCS checkout, otherwise it is likely to
-  be False or None
-
-* `['error']`: if the version string could not be computed, this will be set
-  to a string describing the problem, otherwise it will be None. It may be
-  useful to throw an exception in setup.py if this is set, to avoid e.g.
-  creating tarballs with a version string of "unknown".
-
-Some variants are more useful than others. Including `full-revisionid` in a
-bug report should allow developers to reconstruct the exact code being tested
-(or indicate the presence of local changes that should be shared with the
-developers). `version` is suitable for display in an "about" box or a CLI
-`--version` output: it can be easily compared against release notes and lists
-of bugs fixed in various releases.
-
-The installer adds the following text to your `__init__.py` to place a basic
-version in `YOURPROJECT.__version__`:
-
-    from ._version import get_versions
-    __version__ = get_versions()['version']
-    del get_versions
-
-## Styles
-
-The setup.cfg `style=` configuration controls how the VCS information is
-rendered into a version string.
-
-The default style, "pep440", produces a PEP440-compliant string, equal to the
-un-prefixed tag name for actual releases, and containing an additional "local
-version" section with more detail for in-between builds. For Git, this is
-TAG[+DISTANCE.gHEX[.dirty]] , using information from `git describe --tags
---dirty --always`. For example "0.11+2.g1076c97.dirty" indicates that the
-tree is like the "1076c97" commit but has uncommitted changes (".dirty"), and
-that this commit is two revisions ("+2") beyond the "0.11" tag. For released
-software (exactly equal to a known tag), the identifier will only contain the
-stripped tag, e.g. "0.11".
-
-Other styles are available. See [details.md](details.md) in the Versioneer
-source tree for descriptions.
-
-## Debugging
-
-Versioneer tries to avoid fatal errors: if something goes wrong, it will tend
-to return a version of "0+unknown". To investigate the problem, run `setup.py
-version`, which will run the version-lookup code in a verbose mode, and will
-display the full contents of `get_versions()` (including the `error` string,
-which may help identify what went wrong).
-
-## Known Limitations
-
-Some situations are known to cause problems for Versioneer. This details the
-most significant ones. More can be found on Github
-[issues page](https://github.com/python-versioneer/python-versioneer/issues).
-
-### Subprojects
-
-Versioneer has limited support for source trees in which `setup.py` is not in
-the root directory (e.g. `setup.py` and `.git/` are *not* siblings). The are
-two common reasons why `setup.py` might not be in the root:
-
-* Source trees which contain multiple subprojects, such as
-  [Buildbot](https://github.com/buildbot/buildbot), which contains both
-  "master" and "slave" subprojects, each with their own `setup.py`,
-  `setup.cfg`, and `tox.ini`. Projects like these produce multiple PyPI
-  distributions (and upload multiple independently-installable tarballs).
-* Source trees whose main purpose is to contain a C library, but which also
-  provide bindings to Python (and perhaps other languages) in subdirectories.
-
-Versioneer will look for `.git` in parent directories, and most operations
-should get the right version string. However `pip` and `setuptools` have bugs
-and implementation details which frequently cause `pip install .` from a
-subproject directory to fail to find a correct version string (so it usually
-defaults to `0+unknown`).
-
-`pip install --editable .` should work correctly. `setup.py install` might
-work too.
-
-Pip-8.1.1 is known to have this problem, but hopefully it will get fixed in
-some later version.
-
-[Bug #38](https://github.com/python-versioneer/python-versioneer/issues/38) is tracking
-this issue. The discussion in
-[PR #61](https://github.com/python-versioneer/python-versioneer/pull/61) describes the
-issue from the Versioneer side in more detail.
-[pip PR#3176](https://github.com/pypa/pip/pull/3176) and
-[pip PR#3615](https://github.com/pypa/pip/pull/3615) contain work to improve
-pip to let Versioneer work correctly.
-
-Versioneer-0.16 and earlier only looked for a `.git` directory next to the
-`setup.cfg`, so subprojects were completely unsupported with those releases.
-
-### Editable installs with setuptools <= 18.5
-
-`setup.py develop` and `pip install --editable .` allow you to install a
-project into a virtualenv once, then continue editing the source code (and
-test) without re-installing after every change.
-
-"Entry-point scripts" (`setup(entry_points={"console_scripts": ..})`) are a
-convenient way to specify executable scripts that should be installed along
-with the python package.
-
-These both work as expected when using modern setuptools. When using
-setuptools-18.5 or earlier, however, certain operations will cause
-`pkg_resources.DistributionNotFound` errors when running the entrypoint
-script, which must be resolved by re-installing the package. This happens
-when the install happens with one version, then the egg_info data is
-regenerated while a different version is checked out. Many setup.py commands
-cause egg_info to be rebuilt (including `sdist`, `wheel`, and installing into
-a different virtualenv), so this can be surprising.
-
-[Bug #83](https://github.com/python-versioneer/python-versioneer/issues/83) describes
-this one, but upgrading to a newer version of setuptools should probably
-resolve it.
-
-
-## Updating Versioneer
-
-To upgrade your project to a new release of Versioneer, do the following:
-
-* install the new Versioneer (`pip install -U versioneer` or equivalent)
-* edit `setup.cfg`, if necessary, to include any new configuration settings
-  indicated by the release notes. See [UPGRADING](./UPGRADING.md) for details.
-* re-run `versioneer install` in your source tree, to replace
-  `SRC/_version.py`
-* commit any changed files
-
-## Future Directions
-
-This tool is designed to make it easily extended to other version-control
-systems: all VCS-specific components are in separate directories like
-src/git/ . The top-level `versioneer.py` script is assembled from these
-components by running make-versioneer.py . In the future, make-versioneer.py
-will take a VCS name as an argument, and will construct a version of
-`versioneer.py` that is specific to the given VCS. It might also take the
-configuration arguments that are currently provided manually during
-installation by editing setup.py . Alternatively, it might go the other
-direction and include code from all supported VCS systems, reducing the
-number of intermediate scripts.
-
-## Similar projects
-
-* [setuptools_scm](https://github.com/pypa/setuptools_scm/) - a non-vendored build-time
-  dependency
-* [minver](https://github.com/jbweston/miniver) - a lightweight reimplementation of
-  versioneer
-* [versioningit](https://github.com/jwodder/versioningit) - a PEP 518-based setuptools
-  plugin
-
-## License
-
-To make Versioneer easier to embed, all its code is dedicated to the public
-domain. The `_version.py` that it creates is also in the public domain.
-Specifically, both are released under the Creative Commons "Public Domain
-Dedication" license (CC0-1.0), as described in
-https://creativecommons.org/publicdomain/zero/1.0/ .
-
-[pypi-image]: https://img.shields.io/pypi/v/versioneer.svg
-[pypi-url]: https://pypi.python.org/pypi/versioneer/
-[travis-image]:
-https://img.shields.io/travis/com/python-versioneer/python-versioneer.svg
-[travis-url]: https://travis-ci.com/github/python-versioneer/python-versioneer
-
-"""
-# pylint:disable=invalid-name,import-outside-toplevel,missing-function-docstring
-# pylint:disable=missing-class-docstring,too-many-branches,too-many-statements
-# pylint:disable=raise-missing-from,too-many-lines,too-many-locals,import-error
-# pylint:disable=too-few-public-methods,redefined-outer-name,consider-using-with
-# pylint:disable=attribute-defined-outside-init,too-many-arguments
-
-import configparser
-import errno
-import json
-import os
-import re
-import subprocess
-import sys
-from typing import Callable, Dict
-
-
-class VersioneerConfig:
-    """Container for Versioneer configuration parameters."""
-
-
-def get_root():
-    """Get the project root directory.
-
-    We require that all commands are run from the project root, i.e. the
-    directory that contains setup.py, setup.cfg, and versioneer.py .
-    """
-    root = os.path.realpath(os.path.abspath(os.getcwd()))
-    setup_py = os.path.join(root, "setup.py")
-    versioneer_py = os.path.join(root, "versioneer.py")
-    if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)):
-        # allow 'python path/to/setup.py COMMAND'
-        root = os.path.dirname(os.path.realpath(os.path.abspath(sys.argv[0])))
-        setup_py = os.path.join(root, "setup.py")
-        versioneer_py = os.path.join(root, "versioneer.py")
-    if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)):
-        err = ("Versioneer was unable to run the project root directory. "
-               "Versioneer requires setup.py to be executed from "
-               "its immediate directory (like 'python setup.py COMMAND'), "
-               "or in a way that lets it use sys.argv[0] to find the root "
-               "(like 'python path/to/setup.py COMMAND').")
-        raise VersioneerBadRootError(err)
-    try:
-        # Certain runtime workflows (setup.py install/develop in a setuptools
-        # tree) execute all dependencies in a single python process, so
-        # "versioneer" may be imported multiple times, and python's shared
-        # module-import table will cache the first one. So we can't use
-        # os.path.dirname(__file__), as that will find whichever
-        # versioneer.py was first imported, even in later projects.
-        my_path = os.path.realpath(os.path.abspath(__file__))
-        me_dir = os.path.normcase(os.path.splitext(my_path)[0])
-        vsr_dir = os.path.normcase(os.path.splitext(versioneer_py)[0])
-        if me_dir != vsr_dir:
-            print("Warning: build in %s is using versioneer.py from %s"
-                  % (os.path.dirname(my_path), versioneer_py))
-    except NameError:
-        pass
-    return root
-
-
-def get_config_from_root(root):
-    """Read the project setup.cfg file to determine Versioneer config."""
-    # This might raise OSError (if setup.cfg is missing), or
-    # configparser.NoSectionError (if it lacks a [versioneer] section), or
-    # configparser.NoOptionError (if it lacks "VCS="). See the docstring at
-    # the top of versioneer.py for instructions on writing your setup.cfg .
-    setup_cfg = os.path.join(root, "setup.cfg")
-    parser = configparser.ConfigParser()
-    with open(setup_cfg, "r") as cfg_file:
-        parser.read_file(cfg_file)
-    VCS = parser.get("versioneer", "VCS")  # mandatory
-
-    # Dict-like interface for non-mandatory entries
-    section = parser["versioneer"]
-
-    cfg = VersioneerConfig()
-    cfg.VCS = VCS
-    cfg.style = section.get("style", "")
-    cfg.versionfile_source = section.get("versionfile_source")
-    cfg.versionfile_build = section.get("versionfile_build")
-    cfg.tag_prefix = section.get("tag_prefix")
-    if cfg.tag_prefix in ("''", '""'):
-        cfg.tag_prefix = ""
-    cfg.parentdir_prefix = section.get("parentdir_prefix")
-    cfg.verbose = section.get("verbose")
-    return cfg
-
-
-class NotThisMethod(Exception):
-    """Exception raised if a method is not valid for the current scenario."""
-
-
-# these dictionaries contain VCS-specific tools
-LONG_VERSION_PY: Dict[str, str] = {}
-HANDLERS: Dict[str, Dict[str, Callable]] = {}
-
-
-def register_vcs_handler(vcs, method):  # decorator
-    """Create decorator to mark a method as the handler of a VCS."""
-    def decorate(f):
-        """Store f in HANDLERS[vcs][method]."""
-        HANDLERS.setdefault(vcs, {})[method] = f
-        return f
-    return decorate
-
-
-def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
-                env=None):
-    """Call the given command(s)."""
-    assert isinstance(commands, list)
-    process = None
-    for command in commands:
-        try:
-            dispcmd = str([command] + args)
-            # remember shell=False, so use git.cmd on windows, not just git
-            process = subprocess.Popen([command] + args, cwd=cwd, env=env,
-                                       stdout=subprocess.PIPE,
-                                       stderr=(subprocess.PIPE if hide_stderr
-                                               else None))
-            break
-        except OSError:
-            e = sys.exc_info()[1]
-            if e.errno == errno.ENOENT:
-                continue
-            if verbose:
-                print("unable to run %s" % dispcmd)
-                print(e)
-            return None, None
-    else:
-        if verbose:
-            print("unable to find command, tried %s" % (commands,))
-        return None, None
-    stdout = process.communicate()[0].strip().decode()
-    if process.returncode != 0:
-        if verbose:
-            print("unable to run %s (error)" % dispcmd)
-            print("stdout was %s" % stdout)
-        return None, process.returncode
-    return stdout, process.returncode
-
-
-LONG_VERSION_PY['git'] = r'''
-# This file helps to compute a version number in source trees obtained from
-# git-archive tarball (such as those provided by githubs download-from-tag
-# feature). Distribution tarballs (built by setup.py sdist) and build
-# directories (produced by setup.py build) will contain a much shorter file
-# that just contains the computed version number.
-
-# This file is released into the public domain. Generated by
-# versioneer-0.21 (https://github.com/python-versioneer/python-versioneer)
-
-"""Git implementation of _version.py."""
-
-import errno
-import os
-import re
-import subprocess
-import sys
-from typing import Callable, Dict
-
-
-def get_keywords():
-    """Get the keywords needed to look up the version information."""
-    # these strings will be replaced by git during git-archive.
-    # setup.py/versioneer.py will grep for the variable names, so they must
-    # each be defined on a line of their own. _version.py will just call
-    # get_keywords().
-    git_refnames = "%(DOLLAR)sFormat:%%d%(DOLLAR)s"
-    git_full = "%(DOLLAR)sFormat:%%H%(DOLLAR)s"
-    git_date = "%(DOLLAR)sFormat:%%ci%(DOLLAR)s"
-    keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
-    return keywords
-
-
-class VersioneerConfig:
-    """Container for Versioneer configuration parameters."""
-
-
-def get_config():
-    """Create, populate and return the VersioneerConfig() object."""
-    # these strings are filled in when 'setup.py versioneer' creates
-    # _version.py
-    cfg = VersioneerConfig()
-    cfg.VCS = "git"
-    cfg.style = "%(STYLE)s"
-    cfg.tag_prefix = "%(TAG_PREFIX)s"
-    cfg.parentdir_prefix = "%(PARENTDIR_PREFIX)s"
-    cfg.versionfile_source = "%(VERSIONFILE_SOURCE)s"
-    cfg.verbose = False
-    return cfg
-
-
-class NotThisMethod(Exception):
-    """Exception raised if a method is not valid for the current scenario."""
-
-
-LONG_VERSION_PY: Dict[str, str] = {}
-HANDLERS: Dict[str, Dict[str, Callable]] = {}
-
-
-def register_vcs_handler(vcs, method):  # decorator
-    """Create decorator to mark a method as the handler of a VCS."""
-    def decorate(f):
-        """Store f in HANDLERS[vcs][method]."""
-        if vcs not in HANDLERS:
-            HANDLERS[vcs] = {}
-        HANDLERS[vcs][method] = f
-        return f
-    return decorate
-
-
-def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
-                env=None):
-    """Call the given command(s)."""
-    assert isinstance(commands, list)
-    process = None
-    for command in commands:
-        try:
-            dispcmd = str([command] + args)
-            # remember shell=False, so use git.cmd on windows, not just git
-            process = subprocess.Popen([command] + args, cwd=cwd, env=env,
-                                       stdout=subprocess.PIPE,
-                                       stderr=(subprocess.PIPE if hide_stderr
-                                               else None))
-            break
-        except OSError:
-            e = sys.exc_info()[1]
-            if e.errno == errno.ENOENT:
-                continue
-            if verbose:
-                print("unable to run %%s" %% dispcmd)
-                print(e)
-            return None, None
-    else:
-        if verbose:
-            print("unable to find command, tried %%s" %% (commands,))
-        return None, None
-    stdout = process.communicate()[0].strip().decode()
-    if process.returncode != 0:
-        if verbose:
-            print("unable to run %%s (error)" %% dispcmd)
-            print("stdout was %%s" %% stdout)
-        return None, process.returncode
-    return stdout, process.returncode
-
-
-def versions_from_parentdir(parentdir_prefix, root, verbose):
-    """Try to determine the version from the parent directory name.
-
-    Source tarballs conventionally unpack into a directory that includes both
-    the project name and a version string. We will also support searching up
-    two directory levels for an appropriately named parent directory
-    """
-    rootdirs = []
-
-    for _ in range(3):
-        dirname = os.path.basename(root)
-        if dirname.startswith(parentdir_prefix):
-            return {"version": dirname[len(parentdir_prefix):],
-                    "full-revisionid": None,
-                    "dirty": False, "error": None, "date": None}
-        rootdirs.append(root)
-        root = os.path.dirname(root)  # up a level
-
-    if verbose:
-        print("Tried directories %%s but none started with prefix %%s" %%
-              (str(rootdirs), parentdir_prefix))
-    raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
-
-
-@register_vcs_handler("git", "get_keywords")
-def git_get_keywords(versionfile_abs):
-    """Extract version information from the given file."""
-    # the code embedded in _version.py can just fetch the value of these
-    # keywords. When used from setup.py, we don't want to import _version.py,
-    # so we do it with a regexp instead. This function is not used from
-    # _version.py.
-    keywords = {}
-    try:
-        with open(versionfile_abs, "r") as fobj:
-            for line in fobj:
-                if line.strip().startswith("git_refnames ="):
-                    mo = re.search(r'=\s*"(.*)"', line)
-                    if mo:
-                        keywords["refnames"] = mo.group(1)
-                if line.strip().startswith("git_full ="):
-                    mo = re.search(r'=\s*"(.*)"', line)
-                    if mo:
-                        keywords["full"] = mo.group(1)
-                if line.strip().startswith("git_date ="):
-                    mo = re.search(r'=\s*"(.*)"', line)
-                    if mo:
-                        keywords["date"] = mo.group(1)
-    except OSError:
-        pass
-    return keywords
-
-
-@register_vcs_handler("git", "keywords")
-def git_versions_from_keywords(keywords, tag_prefix, verbose):
-    """Get version information from git keywords."""
-    if "refnames" not in keywords:
-        raise NotThisMethod("Short version file found")
-    date = keywords.get("date")
-    if date is not None:
-        # Use only the last line.  Previous lines may contain GPG signature
-        # information.
-        date = date.splitlines()[-1]
-
-        # git-2.2.0 added "%%cI", which expands to an ISO-8601 -compliant
-        # datestamp. However we prefer "%%ci" (which expands to an "ISO-8601
-        # -like" string, which we must then edit to make compliant), because
-        # it's been around since git-1.5.3, and it's too difficult to
-        # discover which version we're using, or to work around using an
-        # older one.
-        date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
-    refnames = keywords["refnames"].strip()
-    if refnames.startswith("$Format"):
-        if verbose:
-            print("keywords are unexpanded, not using")
-        raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
-    refs = {r.strip() for r in refnames.strip("()").split(",")}
-    # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
-    # just "foo-1.0". If we see a "tag: " prefix, prefer those.
-    TAG = "tag: "
-    tags = {r[len(TAG):] for r in refs if r.startswith(TAG)}
-    if not tags:
-        # Either we're using git < 1.8.3, or there really are no tags. We use
-        # a heuristic: assume all version tags have a digit. The old git %%d
-        # expansion behaves like git log --decorate=short and strips out the
-        # refs/heads/ and refs/tags/ prefixes that would let us distinguish
-        # between branches and tags. By ignoring refnames without digits, we
-        # filter out many common branch names like "release" and
-        # "stabilization", as well as "HEAD" and "master".
-        tags = {r for r in refs if re.search(r'\d', r)}
-        if verbose:
-            print("discarding '%%s', no digits" %% ",".join(refs - tags))
-    if verbose:
-        print("likely tags: %%s" %% ",".join(sorted(tags)))
-    for ref in sorted(tags):
-        # sorting will prefer e.g. "2.0" over "2.0rc1"
-        if ref.startswith(tag_prefix):
-            r = ref[len(tag_prefix):]
-            # Filter out refs that exactly match prefix or that don't start
-            # with a number once the prefix is stripped (mostly a concern
-            # when prefix is '')
-            if not re.match(r'\d', r):
-                continue
-            if verbose:
-                print("picking %%s" %% r)
-            return {"version": r,
-                    "full-revisionid": keywords["full"].strip(),
-                    "dirty": False, "error": None,
-                    "date": date}
-    # no suitable tags, so version is "0+unknown", but full hex is still there
-    if verbose:
-        print("no suitable tags, using unknown + full revision id")
-    return {"version": "0+unknown",
-            "full-revisionid": keywords["full"].strip(),
-            "dirty": False, "error": "no suitable tags", "date": None}
-
-
-@register_vcs_handler("git", "pieces_from_vcs")
-def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command):
-    """Get version from 'git describe' in the root of the source tree.
-
-    This only gets called if the git-archive 'subst' keywords were *not*
-    expanded, and _version.py hasn't already been rewritten with a short
-    version string, meaning we're inside a checked out source tree.
-    """
-    GITS = ["git"]
-    TAG_PREFIX_REGEX = "*"
-    if sys.platform == "win32":
-        GITS = ["git.cmd", "git.exe"]
-        TAG_PREFIX_REGEX = r"\*"
-
-    _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root,
-                   hide_stderr=True)
-    if rc != 0:
-        if verbose:
-            print("Directory %%s not under git control" %% root)
-        raise NotThisMethod("'git rev-parse --git-dir' returned error")
-
-    # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
-    # if there isn't one, this yields HEX[-dirty] (no NUM)
-    describe_out, rc = runner(GITS, ["describe", "--tags", "--dirty",
-                                     "--always", "--long",
-                                     "--match",
-                                     "%%s%%s" %% (tag_prefix, TAG_PREFIX_REGEX)],
-                              cwd=root)
-    # --long was added in git-1.5.5
-    if describe_out is None:
-        raise NotThisMethod("'git describe' failed")
-    describe_out = describe_out.strip()
-    full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root)
-    if full_out is None:
-        raise NotThisMethod("'git rev-parse' failed")
-    full_out = full_out.strip()
-
-    pieces = {}
-    pieces["long"] = full_out
-    pieces["short"] = full_out[:7]  # maybe improved later
-    pieces["error"] = None
-
-    branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"],
-                             cwd=root)
-    # --abbrev-ref was added in git-1.6.3
-    if rc != 0 or branch_name is None:
-        raise NotThisMethod("'git rev-parse --abbrev-ref' returned error")
-    branch_name = branch_name.strip()
-
-    if branch_name == "HEAD":
-        # If we aren't exactly on a branch, pick a branch which represents
-        # the current commit. If all else fails, we are on a branchless
-        # commit.
-        branches, rc = runner(GITS, ["branch", "--contains"], cwd=root)
-        # --contains was added in git-1.5.4
-        if rc != 0 or branches is None:
-            raise NotThisMethod("'git branch --contains' returned error")
-        branches = branches.split("\n")
-
-        # Remove the first line if we're running detached
-        if "(" in branches[0]:
-            branches.pop(0)
-
-        # Strip off the leading "* " from the list of branches.
-        branches = [branch[2:] for branch in branches]
-        if "master" in branches:
-            branch_name = "master"
-        elif not branches:
-            branch_name = None
-        else:
-            # Pick the first branch that is returned. Good or bad.
-            branch_name = branches[0]
-
-    pieces["branch"] = branch_name
-
-    # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
-    # TAG might have hyphens.
-    git_describe = describe_out
-
-    # look for -dirty suffix
-    dirty = git_describe.endswith("-dirty")
-    pieces["dirty"] = dirty
-    if dirty:
-        git_describe = git_describe[:git_describe.rindex("-dirty")]
-
-    # now we have TAG-NUM-gHEX or HEX
-
-    if "-" in git_describe:
-        # TAG-NUM-gHEX
-        mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
-        if not mo:
-            # unparsable. Maybe git-describe is misbehaving?
-            pieces["error"] = ("unable to parse git-describe output: '%%s'"
-                               %% describe_out)
-            return pieces
-
-        # tag
-        full_tag = mo.group(1)
-        if not full_tag.startswith(tag_prefix):
-            if verbose:
-                fmt = "tag '%%s' doesn't start with prefix '%%s'"
-                print(fmt %% (full_tag, tag_prefix))
-            pieces["error"] = ("tag '%%s' doesn't start with prefix '%%s'"
-                               %% (full_tag, tag_prefix))
-            return pieces
-        pieces["closest-tag"] = full_tag[len(tag_prefix):]
-
-        # distance: number of commits since tag
-        pieces["distance"] = int(mo.group(2))
-
-        # commit: short hex revision ID
-        pieces["short"] = mo.group(3)
-
-    else:
-        # HEX: no tags
-        pieces["closest-tag"] = None
-        count_out, rc = runner(GITS, ["rev-list", "HEAD", "--count"], cwd=root)
-        pieces["distance"] = int(count_out)  # total number of commits
-
-    # commit date: see ISO-8601 comment in git_versions_from_keywords()
-    date = runner(GITS, ["show", "-s", "--format=%%ci", "HEAD"], cwd=root)[0].strip()
-    # Use only the last line.  Previous lines may contain GPG signature
-    # information.
-    date = date.splitlines()[-1]
-    pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
-
-    return pieces
-
-
-def plus_or_dot(pieces):
-    """Return a + if we don't already have one, else return a ."""
-    if "+" in pieces.get("closest-tag", ""):
-        return "."
-    return "+"
-
-
-def render_pep440(pieces):
-    """Build up version string, with post-release "local version identifier".
-
-    Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
-    get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
-
-    Exceptions:
-    1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        if pieces["distance"] or pieces["dirty"]:
-            rendered += plus_or_dot(pieces)
-            rendered += "%%d.g%%s" %% (pieces["distance"], pieces["short"])
-            if pieces["dirty"]:
-                rendered += ".dirty"
-    else:
-        # exception #1
-        rendered = "0+untagged.%%d.g%%s" %% (pieces["distance"],
-                                          pieces["short"])
-        if pieces["dirty"]:
-            rendered += ".dirty"
-    return rendered
-
-
-def render_pep440_branch(pieces):
-    """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] .
-
-    The ".dev0" means not master branch. Note that .dev0 sorts backwards
-    (a feature branch will appear "older" than the master branch).
-
-    Exceptions:
-    1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty]
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        if pieces["distance"] or pieces["dirty"]:
-            if pieces["branch"] != "master":
-                rendered += ".dev0"
-            rendered += plus_or_dot(pieces)
-            rendered += "%%d.g%%s" %% (pieces["distance"], pieces["short"])
-            if pieces["dirty"]:
-                rendered += ".dirty"
-    else:
-        # exception #1
-        rendered = "0"
-        if pieces["branch"] != "master":
-            rendered += ".dev0"
-        rendered += "+untagged.%%d.g%%s" %% (pieces["distance"],
-                                          pieces["short"])
-        if pieces["dirty"]:
-            rendered += ".dirty"
-    return rendered
-
-
-def pep440_split_post(ver):
-    """Split pep440 version string at the post-release segment.
-
-    Returns the release segments before the post-release and the
-    post-release version number (or -1 if no post-release segment is present).
-    """
-    vc = str.split(ver, ".post")
-    return vc[0], int(vc[1] or 0) if len(vc) == 2 else None
-
-
-def render_pep440_pre(pieces):
-    """TAG[.postN.devDISTANCE] -- No -dirty.
-
-    Exceptions:
-    1: no tags. 0.post0.devDISTANCE
-    """
-    if pieces["closest-tag"]:
-        if pieces["distance"]:
-            # update the post release segment
-            tag_version, post_version = pep440_split_post(pieces["closest-tag"])
-            rendered = tag_version
-            if post_version is not None:
-                rendered += ".post%%d.dev%%d" %% (post_version+1, pieces["distance"])
-            else:
-                rendered += ".post0.dev%%d" %% (pieces["distance"])
-        else:
-            # no commits, use the tag as the version
-            rendered = pieces["closest-tag"]
-    else:
-        # exception #1
-        rendered = "0.post0.dev%%d" %% pieces["distance"]
-    return rendered
-
-
-def render_pep440_post(pieces):
-    """TAG[.postDISTANCE[.dev0]+gHEX] .
-
-    The ".dev0" means dirty. Note that .dev0 sorts backwards
-    (a dirty tree will appear "older" than the corresponding clean one),
-    but you shouldn't be releasing software with -dirty anyways.
-
-    Exceptions:
-    1: no tags. 0.postDISTANCE[.dev0]
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        if pieces["distance"] or pieces["dirty"]:
-            rendered += ".post%%d" %% pieces["distance"]
-            if pieces["dirty"]:
-                rendered += ".dev0"
-            rendered += plus_or_dot(pieces)
-            rendered += "g%%s" %% pieces["short"]
-    else:
-        # exception #1
-        rendered = "0.post%%d" %% pieces["distance"]
-        if pieces["dirty"]:
-            rendered += ".dev0"
-        rendered += "+g%%s" %% pieces["short"]
-    return rendered
-
-
-def render_pep440_post_branch(pieces):
-    """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] .
-
-    The ".dev0" means not master branch.
-
-    Exceptions:
-    1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty]
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        if pieces["distance"] or pieces["dirty"]:
-            rendered += ".post%%d" %% pieces["distance"]
-            if pieces["branch"] != "master":
-                rendered += ".dev0"
-            rendered += plus_or_dot(pieces)
-            rendered += "g%%s" %% pieces["short"]
-            if pieces["dirty"]:
-                rendered += ".dirty"
-    else:
-        # exception #1
-        rendered = "0.post%%d" %% pieces["distance"]
-        if pieces["branch"] != "master":
-            rendered += ".dev0"
-        rendered += "+g%%s" %% pieces["short"]
-        if pieces["dirty"]:
-            rendered += ".dirty"
-    return rendered
-
-
-def render_pep440_old(pieces):
-    """TAG[.postDISTANCE[.dev0]] .
-
-    The ".dev0" means dirty.
-
-    Exceptions:
-    1: no tags. 0.postDISTANCE[.dev0]
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        if pieces["distance"] or pieces["dirty"]:
-            rendered += ".post%%d" %% pieces["distance"]
-            if pieces["dirty"]:
-                rendered += ".dev0"
-    else:
-        # exception #1
-        rendered = "0.post%%d" %% pieces["distance"]
-        if pieces["dirty"]:
-            rendered += ".dev0"
-    return rendered
-
-
-def render_git_describe(pieces):
-    """TAG[-DISTANCE-gHEX][-dirty].
-
-    Like 'git describe --tags --dirty --always'.
-
-    Exceptions:
-    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        if pieces["distance"]:
-            rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"])
-    else:
-        # exception #1
-        rendered = pieces["short"]
-    if pieces["dirty"]:
-        rendered += "-dirty"
-    return rendered
-
-
-def render_git_describe_long(pieces):
-    """TAG-DISTANCE-gHEX[-dirty].
-
-    Like 'git describe --tags --dirty --always -long'.
-    The distance/hash is unconditional.
-
-    Exceptions:
-    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"])
-    else:
-        # exception #1
-        rendered = pieces["short"]
-    if pieces["dirty"]:
-        rendered += "-dirty"
-    return rendered
-
-
-def render(pieces, style):
-    """Render the given version pieces into the requested style."""
-    if pieces["error"]:
-        return {"version": "unknown",
-                "full-revisionid": pieces.get("long"),
-                "dirty": None,
-                "error": pieces["error"],
-                "date": None}
-
-    if not style or style == "default":
-        style = "pep440"  # the default
-
-    if style == "pep440":
-        rendered = render_pep440(pieces)
-    elif style == "pep440-branch":
-        rendered = render_pep440_branch(pieces)
-    elif style == "pep440-pre":
-        rendered = render_pep440_pre(pieces)
-    elif style == "pep440-post":
-        rendered = render_pep440_post(pieces)
-    elif style == "pep440-post-branch":
-        rendered = render_pep440_post_branch(pieces)
-    elif style == "pep440-old":
-        rendered = render_pep440_old(pieces)
-    elif style == "git-describe":
-        rendered = render_git_describe(pieces)
-    elif style == "git-describe-long":
-        rendered = render_git_describe_long(pieces)
-    else:
-        raise ValueError("unknown style '%%s'" %% style)
-
-    return {"version": rendered, "full-revisionid": pieces["long"],
-            "dirty": pieces["dirty"], "error": None,
-            "date": pieces.get("date")}
-
-
-def get_versions():
-    """Get version information or return default if unable to do so."""
-    # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
-    # __file__, we can work backwards from there to the root. Some
-    # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which
-    # case we can only use expanded keywords.
-
-    cfg = get_config()
-    verbose = cfg.verbose
-
-    try:
-        return git_versions_from_keywords(get_keywords(), cfg.tag_prefix,
-                                          verbose)
-    except NotThisMethod:
-        pass
-
-    try:
-        root = os.path.realpath(__file__)
-        # versionfile_source is the relative path from the top of the source
-        # tree (where the .git directory might live) to this file. Invert
-        # this to find the root from __file__.
-        for _ in cfg.versionfile_source.split('/'):
-            root = os.path.dirname(root)
-    except NameError:
-        return {"version": "0+unknown", "full-revisionid": None,
-                "dirty": None,
-                "error": "unable to find root of source tree",
-                "date": None}
-
-    try:
-        pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose)
-        return render(pieces, cfg.style)
-    except NotThisMethod:
-        pass
-
-    try:
-        if cfg.parentdir_prefix:
-            return versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
-    except NotThisMethod:
-        pass
-
-    return {"version": "0+unknown", "full-revisionid": None,
-            "dirty": None,
-            "error": "unable to compute version", "date": None}
-'''
-
-
-@register_vcs_handler("git", "get_keywords")
-def git_get_keywords(versionfile_abs):
-    """Extract version information from the given file."""
-    # the code embedded in _version.py can just fetch the value of these
-    # keywords. When used from setup.py, we don't want to import _version.py,
-    # so we do it with a regexp instead. This function is not used from
-    # _version.py.
-    keywords = {}
-    try:
-        with open(versionfile_abs, "r") as fobj:
-            for line in fobj:
-                if line.strip().startswith("git_refnames ="):
-                    mo = re.search(r'=\s*"(.*)"', line)
-                    if mo:
-                        keywords["refnames"] = mo.group(1)
-                if line.strip().startswith("git_full ="):
-                    mo = re.search(r'=\s*"(.*)"', line)
-                    if mo:
-                        keywords["full"] = mo.group(1)
-                if line.strip().startswith("git_date ="):
-                    mo = re.search(r'=\s*"(.*)"', line)
-                    if mo:
-                        keywords["date"] = mo.group(1)
-    except OSError:
-        pass
-    return keywords
-
-
-@register_vcs_handler("git", "keywords")
-def git_versions_from_keywords(keywords, tag_prefix, verbose):
-    """Get version information from git keywords."""
-    if "refnames" not in keywords:
-        raise NotThisMethod("Short version file found")
-    date = keywords.get("date")
-    if date is not None:
-        # Use only the last line.  Previous lines may contain GPG signature
-        # information.
-        date = date.splitlines()[-1]
-
-        # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant
-        # datestamp. However we prefer "%ci" (which expands to an "ISO-8601
-        # -like" string, which we must then edit to make compliant), because
-        # it's been around since git-1.5.3, and it's too difficult to
-        # discover which version we're using, or to work around using an
-        # older one.
-        date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
-    refnames = keywords["refnames"].strip()
-    if refnames.startswith("$Format"):
-        if verbose:
-            print("keywords are unexpanded, not using")
-        raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
-    refs = {r.strip() for r in refnames.strip("()").split(",")}
-    # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
-    # just "foo-1.0". If we see a "tag: " prefix, prefer those.
-    TAG = "tag: "
-    tags = {r[len(TAG):] for r in refs if r.startswith(TAG)}
-    if not tags:
-        # Either we're using git < 1.8.3, or there really are no tags. We use
-        # a heuristic: assume all version tags have a digit. The old git %d
-        # expansion behaves like git log --decorate=short and strips out the
-        # refs/heads/ and refs/tags/ prefixes that would let us distinguish
-        # between branches and tags. By ignoring refnames without digits, we
-        # filter out many common branch names like "release" and
-        # "stabilization", as well as "HEAD" and "master".
-        tags = {r for r in refs if re.search(r'\d', r)}
-        if verbose:
-            print("discarding '%s', no digits" % ",".join(refs - tags))
-    if verbose:
-        print("likely tags: %s" % ",".join(sorted(tags)))
-    for ref in sorted(tags):
-        # sorting will prefer e.g. "2.0" over "2.0rc1"
-        if ref.startswith(tag_prefix):
-            r = ref[len(tag_prefix):]
-            # Filter out refs that exactly match prefix or that don't start
-            # with a number once the prefix is stripped (mostly a concern
-            # when prefix is '')
-            if not re.match(r'\d', r):
-                continue
-            if verbose:
-                print("picking %s" % r)
-            return {"version": r,
-                    "full-revisionid": keywords["full"].strip(),
-                    "dirty": False, "error": None,
-                    "date": date}
-    # no suitable tags, so version is "0+unknown", but full hex is still there
-    if verbose:
-        print("no suitable tags, using unknown + full revision id")
-    return {"version": "0+unknown",
-            "full-revisionid": keywords["full"].strip(),
-            "dirty": False, "error": "no suitable tags", "date": None}
-
-
-@register_vcs_handler("git", "pieces_from_vcs")
-def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command):
-    """Get version from 'git describe' in the root of the source tree.
-
-    This only gets called if the git-archive 'subst' keywords were *not*
-    expanded, and _version.py hasn't already been rewritten with a short
-    version string, meaning we're inside a checked out source tree.
-    """
-    GITS = ["git"]
-    TAG_PREFIX_REGEX = "*"
-    if sys.platform == "win32":
-        GITS = ["git.cmd", "git.exe"]
-        TAG_PREFIX_REGEX = r"\*"
-
-    _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root,
-                   hide_stderr=True)
-    if rc != 0:
-        if verbose:
-            print("Directory %s not under git control" % root)
-        raise NotThisMethod("'git rev-parse --git-dir' returned error")
-
-    # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
-    # if there isn't one, this yields HEX[-dirty] (no NUM)
-    describe_out, rc = runner(GITS, ["describe", "--tags", "--dirty",
-                                     "--always", "--long",
-                                     "--match",
-                                     "%s%s" % (tag_prefix, TAG_PREFIX_REGEX)],
-                              cwd=root)
-    # --long was added in git-1.5.5
-    if describe_out is None:
-        raise NotThisMethod("'git describe' failed")
-    describe_out = describe_out.strip()
-    full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root)
-    if full_out is None:
-        raise NotThisMethod("'git rev-parse' failed")
-    full_out = full_out.strip()
-
-    pieces = {}
-    pieces["long"] = full_out
-    pieces["short"] = full_out[:7]  # maybe improved later
-    pieces["error"] = None
-
-    branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"],
-                             cwd=root)
-    # --abbrev-ref was added in git-1.6.3
-    if rc != 0 or branch_name is None:
-        raise NotThisMethod("'git rev-parse --abbrev-ref' returned error")
-    branch_name = branch_name.strip()
-
-    if branch_name == "HEAD":
-        # If we aren't exactly on a branch, pick a branch which represents
-        # the current commit. If all else fails, we are on a branchless
-        # commit.
-        branches, rc = runner(GITS, ["branch", "--contains"], cwd=root)
-        # --contains was added in git-1.5.4
-        if rc != 0 or branches is None:
-            raise NotThisMethod("'git branch --contains' returned error")
-        branches = branches.split("\n")
-
-        # Remove the first line if we're running detached
-        if "(" in branches[0]:
-            branches.pop(0)
-
-        # Strip off the leading "* " from the list of branches.
-        branches = [branch[2:] for branch in branches]
-        if "master" in branches:
-            branch_name = "master"
-        elif not branches:
-            branch_name = None
-        else:
-            # Pick the first branch that is returned. Good or bad.
-            branch_name = branches[0]
-
-    pieces["branch"] = branch_name
-
-    # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
-    # TAG might have hyphens.
-    git_describe = describe_out
-
-    # look for -dirty suffix
-    dirty = git_describe.endswith("-dirty")
-    pieces["dirty"] = dirty
-    if dirty:
-        git_describe = git_describe[:git_describe.rindex("-dirty")]
-
-    # now we have TAG-NUM-gHEX or HEX
-
-    if "-" in git_describe:
-        # TAG-NUM-gHEX
-        mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
-        if not mo:
-            # unparsable. Maybe git-describe is misbehaving?
-            pieces["error"] = ("unable to parse git-describe output: '%s'"
-                               % describe_out)
-            return pieces
-
-        # tag
-        full_tag = mo.group(1)
-        if not full_tag.startswith(tag_prefix):
-            if verbose:
-                fmt = "tag '%s' doesn't start with prefix '%s'"
-                print(fmt % (full_tag, tag_prefix))
-            pieces["error"] = ("tag '%s' doesn't start with prefix '%s'"
-                               % (full_tag, tag_prefix))
-            return pieces
-        pieces["closest-tag"] = full_tag[len(tag_prefix):]
-
-        # distance: number of commits since tag
-        pieces["distance"] = int(mo.group(2))
-
-        # commit: short hex revision ID
-        pieces["short"] = mo.group(3)
-
-    else:
-        # HEX: no tags
-        pieces["closest-tag"] = None
-        count_out, rc = runner(GITS, ["rev-list", "HEAD", "--count"], cwd=root)
-        pieces["distance"] = int(count_out)  # total number of commits
-
-    # commit date: see ISO-8601 comment in git_versions_from_keywords()
-    date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip()
-    # Use only the last line.  Previous lines may contain GPG signature
-    # information.
-    date = date.splitlines()[-1]
-    pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
-
-    return pieces
-
-
-def do_vcs_install(manifest_in, versionfile_source, ipy):
-    """Git-specific installation logic for Versioneer.
-
-    For Git, this means creating/changing .gitattributes to mark _version.py
-    for export-subst keyword substitution.
-    """
-    GITS = ["git"]
-    if sys.platform == "win32":
-        GITS = ["git.cmd", "git.exe"]
-    files = [manifest_in, versionfile_source]
-    if ipy:
-        files.append(ipy)
-    try:
-        my_path = __file__
-        if my_path.endswith(".pyc") or my_path.endswith(".pyo"):
-            my_path = os.path.splitext(my_path)[0] + ".py"
-        versioneer_file = os.path.relpath(my_path)
-    except NameError:
-        versioneer_file = "versioneer.py"
-    files.append(versioneer_file)
-    present = False
-    try:
-        with open(".gitattributes", "r") as fobj:
-            for line in fobj:
-                if line.strip().startswith(versionfile_source):
-                    if "export-subst" in line.strip().split()[1:]:
-                        present = True
-                        break
-    except OSError:
-        pass
-    if not present:
-        with open(".gitattributes", "a+") as fobj:
-            fobj.write(f"{versionfile_source} export-subst\n")
-        files.append(".gitattributes")
-    run_command(GITS, ["add", "--"] + files)
-
-
-def versions_from_parentdir(parentdir_prefix, root, verbose):
-    """Try to determine the version from the parent directory name.
-
-    Source tarballs conventionally unpack into a directory that includes both
-    the project name and a version string. We will also support searching up
-    two directory levels for an appropriately named parent directory
-    """
-    rootdirs = []
-
-    for _ in range(3):
-        dirname = os.path.basename(root)
-        if dirname.startswith(parentdir_prefix):
-            return {"version": dirname[len(parentdir_prefix):],
-                    "full-revisionid": None,
-                    "dirty": False, "error": None, "date": None}
-        rootdirs.append(root)
-        root = os.path.dirname(root)  # up a level
-
-    if verbose:
-        print("Tried directories %s but none started with prefix %s" %
-              (str(rootdirs), parentdir_prefix))
-    raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
-
-
-SHORT_VERSION_PY = """
-# This file was generated by 'versioneer.py' (0.21) from
-# revision-control system data, or from the parent directory name of an
-# unpacked source archive. Distribution tarballs contain a pre-generated copy
-# of this file.
-
-import json
-
-version_json = '''
-%s
-'''  # END VERSION_JSON
-
-
-def get_versions():
-    return json.loads(version_json)
-"""
-
-
-def versions_from_file(filename):
-    """Try to determine the version from _version.py if present."""
-    try:
-        with open(filename) as f:
-            contents = f.read()
-    except OSError:
-        raise NotThisMethod("unable to read _version.py")
-    mo = re.search(r"version_json = '''\n(.*)'''  # END VERSION_JSON",
-                   contents, re.M | re.S)
-    if not mo:
-        mo = re.search(r"version_json = '''\r\n(.*)'''  # END VERSION_JSON",
-                       contents, re.M | re.S)
-    if not mo:
-        raise NotThisMethod("no version_json in _version.py")
-    return json.loads(mo.group(1))
-
-
-def write_to_version_file(filename, versions):
-    """Write the given version number to the given _version.py file."""
-    os.unlink(filename)
-    contents = json.dumps(versions, sort_keys=True,
-                          indent=1, separators=(",", ": "))
-    with open(filename, "w") as f:
-        f.write(SHORT_VERSION_PY % contents)
-
-    print("set %s to '%s'" % (filename, versions["version"]))
-
-
-def plus_or_dot(pieces):
-    """Return a + if we don't already have one, else return a ."""
-    if "+" in pieces.get("closest-tag", ""):
-        return "."
-    return "+"
-
-
-def render_pep440(pieces):
-    """Build up version string, with post-release "local version identifier".
-
-    Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
-    get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
-
-    Exceptions:
-    1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        if pieces["distance"] or pieces["dirty"]:
-            rendered += plus_or_dot(pieces)
-            rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
-            if pieces["dirty"]:
-                rendered += ".dirty"
-    else:
-        # exception #1
-        rendered = "0+untagged.%d.g%s" % (pieces["distance"],
-                                          pieces["short"])
-        if pieces["dirty"]:
-            rendered += ".dirty"
-    return rendered
-
-
-def render_pep440_branch(pieces):
-    """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] .
-
-    The ".dev0" means not master branch. Note that .dev0 sorts backwards
-    (a feature branch will appear "older" than the master branch).
-
-    Exceptions:
-    1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty]
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        if pieces["distance"] or pieces["dirty"]:
-            if pieces["branch"] != "master":
-                rendered += ".dev0"
-            rendered += plus_or_dot(pieces)
-            rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
-            if pieces["dirty"]:
-                rendered += ".dirty"
-    else:
-        # exception #1
-        rendered = "0"
-        if pieces["branch"] != "master":
-            rendered += ".dev0"
-        rendered += "+untagged.%d.g%s" % (pieces["distance"],
-                                          pieces["short"])
-        if pieces["dirty"]:
-            rendered += ".dirty"
-    return rendered
-
-
-def pep440_split_post(ver):
-    """Split pep440 version string at the post-release segment.
-
-    Returns the release segments before the post-release and the
-    post-release version number (or -1 if no post-release segment is present).
-    """
-    vc = str.split(ver, ".post")
-    return vc[0], int(vc[1] or 0) if len(vc) == 2 else None
-
-
-def render_pep440_pre(pieces):
-    """TAG[.postN.devDISTANCE] -- No -dirty.
-
-    Exceptions:
-    1: no tags. 0.post0.devDISTANCE
-    """
-    if pieces["closest-tag"]:
-        if pieces["distance"]:
-            # update the post release segment
-            tag_version, post_version = pep440_split_post(pieces["closest-tag"])
-            rendered = tag_version
-            if post_version is not None:
-                rendered += ".post%d.dev%d" % (post_version+1, pieces["distance"])
-            else:
-                rendered += ".post0.dev%d" % (pieces["distance"])
-        else:
-            # no commits, use the tag as the version
-            rendered = pieces["closest-tag"]
-    else:
-        # exception #1
-        rendered = "0.post0.dev%d" % pieces["distance"]
-    return rendered
-
-
-def render_pep440_post(pieces):
-    """TAG[.postDISTANCE[.dev0]+gHEX] .
-
-    The ".dev0" means dirty. Note that .dev0 sorts backwards
-    (a dirty tree will appear "older" than the corresponding clean one),
-    but you shouldn't be releasing software with -dirty anyways.
-
-    Exceptions:
-    1: no tags. 0.postDISTANCE[.dev0]
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        if pieces["distance"] or pieces["dirty"]:
-            rendered += ".post%d" % pieces["distance"]
-            if pieces["dirty"]:
-                rendered += ".dev0"
-            rendered += plus_or_dot(pieces)
-            rendered += "g%s" % pieces["short"]
-    else:
-        # exception #1
-        rendered = "0.post%d" % pieces["distance"]
-        if pieces["dirty"]:
-            rendered += ".dev0"
-        rendered += "+g%s" % pieces["short"]
-    return rendered
-
-
-def render_pep440_post_branch(pieces):
-    """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] .
-
-    The ".dev0" means not master branch.
-
-    Exceptions:
-    1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty]
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        if pieces["distance"] or pieces["dirty"]:
-            rendered += ".post%d" % pieces["distance"]
-            if pieces["branch"] != "master":
-                rendered += ".dev0"
-            rendered += plus_or_dot(pieces)
-            rendered += "g%s" % pieces["short"]
-            if pieces["dirty"]:
-                rendered += ".dirty"
-    else:
-        # exception #1
-        rendered = "0.post%d" % pieces["distance"]
-        if pieces["branch"] != "master":
-            rendered += ".dev0"
-        rendered += "+g%s" % pieces["short"]
-        if pieces["dirty"]:
-            rendered += ".dirty"
-    return rendered
-
-
-def render_pep440_old(pieces):
-    """TAG[.postDISTANCE[.dev0]] .
-
-    The ".dev0" means dirty.
-
-    Exceptions:
-    1: no tags. 0.postDISTANCE[.dev0]
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        if pieces["distance"] or pieces["dirty"]:
-            rendered += ".post%d" % pieces["distance"]
-            if pieces["dirty"]:
-                rendered += ".dev0"
-    else:
-        # exception #1
-        rendered = "0.post%d" % pieces["distance"]
-        if pieces["dirty"]:
-            rendered += ".dev0"
-    return rendered
-
-
-def render_git_describe(pieces):
-    """TAG[-DISTANCE-gHEX][-dirty].
-
-    Like 'git describe --tags --dirty --always'.
-
-    Exceptions:
-    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        if pieces["distance"]:
-            rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
-    else:
-        # exception #1
-        rendered = pieces["short"]
-    if pieces["dirty"]:
-        rendered += "-dirty"
-    return rendered
-
-
-def render_git_describe_long(pieces):
-    """TAG-DISTANCE-gHEX[-dirty].
-
-    Like 'git describe --tags --dirty --always -long'.
-    The distance/hash is unconditional.
-
-    Exceptions:
-    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
-    else:
-        # exception #1
-        rendered = pieces["short"]
-    if pieces["dirty"]:
-        rendered += "-dirty"
-    return rendered
-
-
-def render(pieces, style):
-    """Render the given version pieces into the requested style."""
-    if pieces["error"]:
-        return {"version": "unknown",
-                "full-revisionid": pieces.get("long"),
-                "dirty": None,
-                "error": pieces["error"],
-                "date": None}
-
-    if not style or style == "default":
-        style = "pep440"  # the default
-
-    if style == "pep440":
-        rendered = render_pep440(pieces)
-    elif style == "pep440-branch":
-        rendered = render_pep440_branch(pieces)
-    elif style == "pep440-pre":
-        rendered = render_pep440_pre(pieces)
-    elif style == "pep440-post":
-        rendered = render_pep440_post(pieces)
-    elif style == "pep440-post-branch":
-        rendered = render_pep440_post_branch(pieces)
-    elif style == "pep440-old":
-        rendered = render_pep440_old(pieces)
-    elif style == "git-describe":
-        rendered = render_git_describe(pieces)
-    elif style == "git-describe-long":
-        rendered = render_git_describe_long(pieces)
-    else:
-        raise ValueError("unknown style '%s'" % style)
-
-    return {"version": rendered, "full-revisionid": pieces["long"],
-            "dirty": pieces["dirty"], "error": None,
-            "date": pieces.get("date")}
-
-
-class VersioneerBadRootError(Exception):
-    """The project root directory is unknown or missing key files."""
-
-
-def get_versions(verbose=False):
-    """Get the project version from whatever source is available.
-
-    Returns dict with two keys: 'version' and 'full'.
-    """
-    if "versioneer" in sys.modules:
-        # see the discussion in cmdclass.py:get_cmdclass()
-        del sys.modules["versioneer"]
-
-    root = get_root()
-    cfg = get_config_from_root(root)
-
-    assert cfg.VCS is not None, "please set [versioneer]VCS= in setup.cfg"
-    handlers = HANDLERS.get(cfg.VCS)
-    assert handlers, "unrecognized VCS '%s'" % cfg.VCS
-    verbose = verbose or cfg.verbose
-    assert cfg.versionfile_source is not None, \
-        "please set versioneer.versionfile_source"
-    assert cfg.tag_prefix is not None, "please set versioneer.tag_prefix"
-
-    versionfile_abs = os.path.join(root, cfg.versionfile_source)
-
-    # extract version from first of: _version.py, VCS command (e.g. 'git
-    # describe'), parentdir. This is meant to work for developers using a
-    # source checkout, for users of a tarball created by 'setup.py sdist',
-    # and for users of a tarball/zipball created by 'git archive' or github's
-    # download-from-tag feature or the equivalent in other VCSes.
-
-    get_keywords_f = handlers.get("get_keywords")
-    from_keywords_f = handlers.get("keywords")
-    if get_keywords_f and from_keywords_f:
-        try:
-            keywords = get_keywords_f(versionfile_abs)
-            ver = from_keywords_f(keywords, cfg.tag_prefix, verbose)
-            if verbose:
-                print("got version from expanded keyword %s" % ver)
-            return ver
-        except NotThisMethod:
-            pass
-
-    try:
-        ver = versions_from_file(versionfile_abs)
-        if verbose:
-            print("got version from file %s %s" % (versionfile_abs, ver))
-        return ver
-    except NotThisMethod:
-        pass
-
-    from_vcs_f = handlers.get("pieces_from_vcs")
-    if from_vcs_f:
-        try:
-            pieces = from_vcs_f(cfg.tag_prefix, root, verbose)
-            ver = render(pieces, cfg.style)
-            if verbose:
-                print("got version from VCS %s" % ver)
-            return ver
-        except NotThisMethod:
-            pass
-
-    try:
-        if cfg.parentdir_prefix:
-            ver = versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
-            if verbose:
-                print("got version from parentdir %s" % ver)
-            return ver
-    except NotThisMethod:
-        pass
-
-    if verbose:
-        print("unable to compute version")
-
-    return {"version": "0+unknown", "full-revisionid": None,
-            "dirty": None, "error": "unable to compute version",
-            "date": None}
-
-
-def get_version():
-    """Get the short version string for this project."""
-    return get_versions()["version"]
-
-
-def get_cmdclass(cmdclass=None):
-    """Get the custom setuptools/distutils subclasses used by Versioneer.
-
-    If the package uses a different cmdclass (e.g. one from numpy), it
-    should be provide as an argument.
-    """
-    if "versioneer" in sys.modules:
-        del sys.modules["versioneer"]
-        # this fixes the "python setup.py develop" case (also 'install' and
-        # 'easy_install .'), in which subdependencies of the main project are
-        # built (using setup.py bdist_egg) in the same python process. Assume
-        # a main project A and a dependency B, which use different versions
-        # of Versioneer. A's setup.py imports A's Versioneer, leaving it in
-        # sys.modules by the time B's setup.py is executed, causing B to run
-        # with the wrong versioneer. Setuptools wraps the sub-dep builds in a
-        # sandbox that restores sys.modules to it's pre-build state, so the
-        # parent is protected against the child's "import versioneer". By
-        # removing ourselves from sys.modules here, before the child build
-        # happens, we protect the child from the parent's versioneer too.
-        # Also see https://github.com/python-versioneer/python-versioneer/issues/52
-
-    cmds = {} if cmdclass is None else cmdclass.copy()
-
-    # we add "version" to both distutils and setuptools
-    from distutils.core import Command
-
-    class cmd_version(Command):
-        description = "report generated version string"
-        user_options = []
-        boolean_options = []
-
-        def initialize_options(self):
-            pass
-
-        def finalize_options(self):
-            pass
-
-        def run(self):
-            vers = get_versions(verbose=True)
-            print("Version: %s" % vers["version"])
-            print(" full-revisionid: %s" % vers.get("full-revisionid"))
-            print(" dirty: %s" % vers.get("dirty"))
-            print(" date: %s" % vers.get("date"))
-            if vers["error"]:
-                print(" error: %s" % vers["error"])
-    cmds["version"] = cmd_version
-
-    # we override "build_py" in both distutils and setuptools
-    #
-    # most invocation pathways end up running build_py:
-    #  distutils/build -> build_py
-    #  distutils/install -> distutils/build ->..
-    #  setuptools/bdist_wheel -> distutils/install ->..
-    #  setuptools/bdist_egg -> distutils/install_lib -> build_py
-    #  setuptools/install -> bdist_egg ->..
-    #  setuptools/develop -> ?
-    #  pip install:
-    #   copies source tree to a tempdir before running egg_info/etc
-    #   if .git isn't copied too, 'git describe' will fail
-    #   then does setup.py bdist_wheel, or sometimes setup.py install
-    #  setup.py egg_info -> ?
-
-    # we override different "build_py" commands for both environments
-    if 'build_py' in cmds:
-        _build_py = cmds['build_py']
-    elif "setuptools" in sys.modules:
-        from setuptools.command.build_py import build_py as _build_py
-    else:
-        from distutils.command.build_py import build_py as _build_py
-
-    class cmd_build_py(_build_py):
-        def run(self):
-            root = get_root()
-            cfg = get_config_from_root(root)
-            versions = get_versions()
-            _build_py.run(self)
-            # now locate _version.py in the new build/ directory and replace
-            # it with an updated value
-            if cfg.versionfile_build:
-                target_versionfile = os.path.join(self.build_lib,
-                                                  cfg.versionfile_build)
-                print("UPDATING %s" % target_versionfile)
-                write_to_version_file(target_versionfile, versions)
-    cmds["build_py"] = cmd_build_py
-
-    if 'build_ext' in cmds:
-        _build_ext = cmds['build_ext']
-    elif "setuptools" in sys.modules:
-        from setuptools.command.build_ext import build_ext as _build_ext
-    else:
-        from distutils.command.build_ext import build_ext as _build_ext
-
-    class cmd_build_ext(_build_ext):
-        def run(self):
-            root = get_root()
-            cfg = get_config_from_root(root)
-            versions = get_versions()
-            _build_ext.run(self)
-            if self.inplace:
-                # build_ext --inplace will only build extensions in
-                # build/lib<..> dir with no _version.py to write to.
-                # As in place builds will already have a _version.py
-                # in the module dir, we do not need to write one.
-                return
-            # now locate _version.py in the new build/ directory and replace
-            # it with an updated value
-            target_versionfile = os.path.join(self.build_lib,
-                                              cfg.versionfile_build)
-            print("UPDATING %s" % target_versionfile)
-            write_to_version_file(target_versionfile, versions)
-    cmds["build_ext"] = cmd_build_ext
-
-    if "cx_Freeze" in sys.modules:  # cx_freeze enabled?
-        from cx_Freeze.dist import build_exe as _build_exe
-        # nczeczulin reports that py2exe won't like the pep440-style string
-        # as FILEVERSION, but it can be used for PRODUCTVERSION, e.g.
-        # setup(console=[{
-        #   "version": versioneer.get_version().split("+", 1)[0], # FILEVERSION
-        #   "product_version": versioneer.get_version(),
-        #   ...
-
-        class cmd_build_exe(_build_exe):
-            def run(self):
-                root = get_root()
-                cfg = get_config_from_root(root)
-                versions = get_versions()
-                target_versionfile = cfg.versionfile_source
-                print("UPDATING %s" % target_versionfile)
-                write_to_version_file(target_versionfile, versions)
-
-                _build_exe.run(self)
-                os.unlink(target_versionfile)
-                with open(cfg.versionfile_source, "w") as f:
-                    LONG = LONG_VERSION_PY[cfg.VCS]
-                    f.write(LONG %
-                            {"DOLLAR": "$",
-                             "STYLE": cfg.style,
-                             "TAG_PREFIX": cfg.tag_prefix,
-                             "PARENTDIR_PREFIX": cfg.parentdir_prefix,
-                             "VERSIONFILE_SOURCE": cfg.versionfile_source,
-                             })
-        cmds["build_exe"] = cmd_build_exe
-        del cmds["build_py"]
-
-    if 'py2exe' in sys.modules:  # py2exe enabled?
-        from py2exe.distutils_buildexe import py2exe as _py2exe
-
-        class cmd_py2exe(_py2exe):
-            def run(self):
-                root = get_root()
-                cfg = get_config_from_root(root)
-                versions = get_versions()
-                target_versionfile = cfg.versionfile_source
-                print("UPDATING %s" % target_versionfile)
-                write_to_version_file(target_versionfile, versions)
-
-                _py2exe.run(self)
-                os.unlink(target_versionfile)
-                with open(cfg.versionfile_source, "w") as f:
-                    LONG = LONG_VERSION_PY[cfg.VCS]
-                    f.write(LONG %
-                            {"DOLLAR": "$",
-                             "STYLE": cfg.style,
-                             "TAG_PREFIX": cfg.tag_prefix,
-                             "PARENTDIR_PREFIX": cfg.parentdir_prefix,
-                             "VERSIONFILE_SOURCE": cfg.versionfile_source,
-                             })
-        cmds["py2exe"] = cmd_py2exe
-
-    # we override different "sdist" commands for both environments
-    if 'sdist' in cmds:
-        _sdist = cmds['sdist']
-    elif "setuptools" in sys.modules:
-        from setuptools.command.sdist import sdist as _sdist
-    else:
-        from distutils.command.sdist import sdist as _sdist
-
-    class cmd_sdist(_sdist):
-        def run(self):
-            versions = get_versions()
-            self._versioneer_generated_versions = versions
-            # unless we update this, the command will keep using the old
-            # version
-            self.distribution.metadata.version = versions["version"]
-            return _sdist.run(self)
-
-        def make_release_tree(self, base_dir, files):
-            root = get_root()
-            cfg = get_config_from_root(root)
-            _sdist.make_release_tree(self, base_dir, files)
-            # now locate _version.py in the new base_dir directory
-            # (remembering that it may be a hardlink) and replace it with an
-            # updated value
-            target_versionfile = os.path.join(base_dir, cfg.versionfile_source)
-            print("UPDATING %s" % target_versionfile)
-            write_to_version_file(target_versionfile,
-                                  self._versioneer_generated_versions)
-    cmds["sdist"] = cmd_sdist
-
-    return cmds
-
-
-CONFIG_ERROR = """
-setup.cfg is missing the necessary Versioneer configuration. You need
-a section like:
-
- [versioneer]
- VCS = git
- style = pep440
- versionfile_source = src/myproject/_version.py
- versionfile_build = myproject/_version.py
- tag_prefix =
- parentdir_prefix = myproject-
-
-You will also need to edit your setup.py to use the results:
-
- import versioneer
- setup(version=versioneer.get_version(),
-       cmdclass=versioneer.get_cmdclass(), ...)
-
-Please read the docstring in ./versioneer.py for configuration instructions,
-edit setup.cfg, and re-run the installer or 'python versioneer.py setup'.
-"""
-
-SAMPLE_CONFIG = """
-# See the docstring in versioneer.py for instructions. Note that you must
-# re-run 'versioneer.py setup' after changing this section, and commit the
-# resulting files.
-
-[versioneer]
-#VCS = git
-#style = pep440
-#versionfile_source =
-#versionfile_build =
-#tag_prefix =
-#parentdir_prefix =
-
-"""
-
-OLD_SNIPPET = """
-from ._version import get_versions
-__version__ = get_versions()['version']
-del get_versions
-"""
-
-INIT_PY_SNIPPET = """
-from . import {0}
-__version__ = {0}.get_versions()['version']
-"""
-
-
-def do_setup():
-    """Do main VCS-independent setup function for installing Versioneer."""
-    root = get_root()
-    try:
-        cfg = get_config_from_root(root)
-    except (OSError, configparser.NoSectionError,
-            configparser.NoOptionError) as e:
-        if isinstance(e, (OSError, configparser.NoSectionError)):
-            print("Adding sample versioneer config to setup.cfg",
-                  file=sys.stderr)
-            with open(os.path.join(root, "setup.cfg"), "a") as f:
-                f.write(SAMPLE_CONFIG)
-        print(CONFIG_ERROR, file=sys.stderr)
-        return 1
-
-    print(" creating %s" % cfg.versionfile_source)
-    with open(cfg.versionfile_source, "w") as f:
-        LONG = LONG_VERSION_PY[cfg.VCS]
-        f.write(LONG % {"DOLLAR": "$",
-                        "STYLE": cfg.style,
-                        "TAG_PREFIX": cfg.tag_prefix,
-                        "PARENTDIR_PREFIX": cfg.parentdir_prefix,
-                        "VERSIONFILE_SOURCE": cfg.versionfile_source,
-                        })
-
-    ipy = os.path.join(os.path.dirname(cfg.versionfile_source),
-                       "__init__.py")
-    if os.path.exists(ipy):
-        try:
-            with open(ipy, "r") as f:
-                old = f.read()
-        except OSError:
-            old = ""
-        module = os.path.splitext(os.path.basename(cfg.versionfile_source))[0]
-        snippet = INIT_PY_SNIPPET.format(module)
-        if OLD_SNIPPET in old:
-            print(" replacing boilerplate in %s" % ipy)
-            with open(ipy, "w") as f:
-                f.write(old.replace(OLD_SNIPPET, snippet))
-        elif snippet not in old:
-            print(" appending to %s" % ipy)
-            with open(ipy, "a") as f:
-                f.write(snippet)
-        else:
-            print(" %s unmodified" % ipy)
-    else:
-        print(" %s doesn't exist, ok" % ipy)
-        ipy = None
-
-    # Make sure both the top-level "versioneer.py" and versionfile_source
-    # (PKG/_version.py, used by runtime code) are in MANIFEST.in, so
-    # they'll be copied into source distributions. Pip won't be able to
-    # install the package without this.
-    manifest_in = os.path.join(root, "MANIFEST.in")
-    simple_includes = set()
-    try:
-        with open(manifest_in, "r") as f:
-            for line in f:
-                if line.startswith("include "):
-                    for include in line.split()[1:]:
-                        simple_includes.add(include)
-    except OSError:
-        pass
-    # That doesn't cover everything MANIFEST.in can do
-    # (http://docs.python.org/2/distutils/sourcedist.html#commands), so
-    # it might give some false negatives. Appending redundant 'include'
-    # lines is safe, though.
-    if "versioneer.py" not in simple_includes:
-        print(" appending 'versioneer.py' to MANIFEST.in")
-        with open(manifest_in, "a") as f:
-            f.write("include versioneer.py\n")
-    else:
-        print(" 'versioneer.py' already in MANIFEST.in")
-    if cfg.versionfile_source not in simple_includes:
-        print(" appending versionfile_source ('%s') to MANIFEST.in" %
-              cfg.versionfile_source)
-        with open(manifest_in, "a") as f:
-            f.write("include %s\n" % cfg.versionfile_source)
-    else:
-        print(" versionfile_source already in MANIFEST.in")
-
-    # Make VCS-specific changes. For git, this means creating/changing
-    # .gitattributes to mark _version.py for export-subst keyword
-    # substitution.
-    do_vcs_install(manifest_in, cfg.versionfile_source, ipy)
-    return 0
-
-
-def scan_setup_py():
-    """Validate the contents of setup.py against Versioneer's expectations."""
-    found = set()
-    setters = False
-    errors = 0
-    with open("setup.py", "r") as f:
-        for line in f.readlines():
-            if "import versioneer" in line:
-                found.add("import")
-            if "versioneer.get_cmdclass()" in line:
-                found.add("cmdclass")
-            if "versioneer.get_version()" in line:
-                found.add("get_version")
-            if "versioneer.VCS" in line:
-                setters = True
-            if "versioneer.versionfile_source" in line:
-                setters = True
-    if len(found) != 3:
-        print("")
-        print("Your setup.py appears to be missing some important items")
-        print("(but I might be wrong). Please make sure it has something")
-        print("roughly like the following:")
-        print("")
-        print(" import versioneer")
-        print(" setup( version=versioneer.get_version(),")
-        print("        cmdclass=versioneer.get_cmdclass(),  ...)")
-        print("")
-        errors += 1
-    if setters:
-        print("You should remove lines like 'versioneer.VCS = ' and")
-        print("'versioneer.versionfile_source = ' . This configuration")
-        print("now lives in setup.cfg, and should be removed from setup.py")
-        print("")
-        errors += 1
-    return errors
-
-
-if __name__ == "__main__":
-    cmd = sys.argv[1]
-    if cmd == "setup":
-        errors = do_setup()
-        errors += scan_setup_py()
-        if errors:
-            sys.exit(1)