From de11ef24dbb9c2beadea0c239f9947329e4f74ac Mon Sep 17 00:00:00 2001
From: EduardoTerres <EduardoTerres@users.noreply.github.com>
Date: Tue, 28 Oct 2025 17:18:34 +0100
Subject: [PATCH 01/16] Include apex domain for prediction

---
 subwiz/main.py  | 22 ++++++++++++++--------
 subwiz/model.py | 10 +++++++++-
 2 files changed, 23 insertions(+), 9 deletions(-)

diff --git a/subwiz/main.py b/subwiz/main.py
index 4e0a1a3..65c8e79 100644
--- a/subwiz/main.py
+++ b/subwiz/main.py
@@ -32,8 +32,8 @@
 
 
 MODEL_REPO = "HadrianSecurity/subwiz"
-MODEL_FILE = "model.pt"
-TOKENIZER_FILE = "tokenizer.json"
+MODEL_FILE = "model_v2.pt"
+TOKENIZER_FILE = "tokenizer_v2.json"
 CONFIG_FILE = "config.json"
 
 
@@ -102,13 +102,19 @@ def run_inference(
         Set of predicted domain objects
     """
 
-    apex = next(iter(input_domains)).apex_domain
     subs = [dom.subdomain for dom in input_domains]
-    tokenizer_input = ",".join(sorted(subs)) + "[DELIM]"
-    # TODO: pick a different subset, if some were out of context last iteration
+    apex_domain = input_domains[0].apex_domain
+    subdomains_tokenizer_input = ",".join(sorted(subs)) + "[DELIM]"
+    apex_tokenizer_input = "[BOS]" + apex_domain + "[DELIM]"
 
-    x = tokenizer.encode(tokenizer_input)
-    x = [1] * (gpt_model.config.block_size - len(x)) + x
+    subs_x = tokenizer.encode(subdomains_tokenizer_input)
+    apex_x = tokenizer.encode(apex_tokenizer_input)
+
+    # Trim subs to account for the apex part, grab last part
+    subs_x = subs_x[:gpt_model.config.block_size - len(apex_x)]
+
+    x = apex_x + subs_x
+    x = [gpt_model.pad_token] * (gpt_model.config.block_size - len(x)) + x
     x = torch.tensor(x)
 
     blocked_outputs = {dom.subdomain for dom in blocked_domains}
@@ -128,7 +134,7 @@ def run_inference(
         for pred in predictions
     }
 
-    predictions: set[str] = {sub + "." + apex for sub in predictions}
+    predictions: set[str] = {sub + "." + apex_domain for sub in predictions}
 
     predicted_domains: set[Domain] = set()
     for pred in predictions:
diff --git a/subwiz/model.py b/subwiz/model.py
index f230d45..1691695 100644
--- a/subwiz/model.py
+++ b/subwiz/model.py
@@ -428,6 +428,10 @@ def generate(
         idx = idx.to(self.device)
         sequences = idx.unsqueeze(0)
 
+        # Locate the first delimiter token and get its position
+        delim_token_id = self.delim_token
+        trimming_position = (sequences == delim_token_id).nonzero(as_tuple=True)[1][0]
+
         probabilities = torch.tensor([1.0], device=self.device)
 
         finished_sequences = torch.tensor([], device=self.device)
@@ -441,7 +445,11 @@ def generate(
                 on_iteration()
 
             # trim the sequences down to block size
-            sequences = sequences[:, -self.config.block_size :]
+            if sequences.shape[1] > self.config.block_size:
+                sequences = torch.cat(
+                    (sequences[:, :trimming_position], sequences[:, trimming_position + 1:]),
+                    dim=1,
+                )
 
             # remove any invalid subdomain starts
             outputs = self.tokenizer.batch_decode(sequences[:, -i:])

From b641f51bd86da9d71af604c08f9bba3814b1b3aa Mon Sep 17 00:00:00 2001
From: EduardoTerres <EduardoTerres@users.noreply.github.com>
Date: Wed, 29 Oct 2025 19:33:31 +0100
Subject: [PATCH 02/16] Working version

---
 subwiz/main.py  | 14 +++++++-------
 subwiz/model.py | 38 +++++++++++++++++++++++++++++---------
 2 files changed, 36 insertions(+), 16 deletions(-)

diff --git a/subwiz/main.py b/subwiz/main.py
index 65c8e79..b665e0d 100644
--- a/subwiz/main.py
+++ b/subwiz/main.py
@@ -17,10 +17,10 @@
 import torch
 from transformers import PreTrainedTokenizerFast
 
-from subwiz.cli_printer import print_hello, print_log, print_progress_dot
-from subwiz.model import GPT
-from subwiz.resolve import get_registered_domains
-from subwiz.type import (
+from subwiz_v2.cli_printer import print_hello, print_log, print_progress_dot
+from subwiz_v2.model import GPT
+from subwiz_v2.resolve import get_registered_domains
+from subwiz_v2.type import (
     Domain,
     input_domains_type,
     device_type,
@@ -103,15 +103,15 @@ def run_inference(
     """
 
     subs = [dom.subdomain for dom in input_domains]
-    apex_domain = input_domains[0].apex_domain
+    apex_domain = next(iter(input_domains)).apex_domain
     subdomains_tokenizer_input = ",".join(sorted(subs)) + "[DELIM]"
-    apex_tokenizer_input = "[BOS]" + apex_domain + "[DELIM]"
+    apex_tokenizer_input = apex_domain + "[DELIM]"
 
     subs_x = tokenizer.encode(subdomains_tokenizer_input)
     apex_x = tokenizer.encode(apex_tokenizer_input)
 
     # Trim subs to account for the apex part, grab last part
-    subs_x = subs_x[:gpt_model.config.block_size - len(apex_x)]
+    subs_x = subs_x[-(gpt_model.config.block_size - len(apex_x)):]
 
     x = apex_x + subs_x
     x = [gpt_model.pad_token] * (gpt_model.config.block_size - len(x)) + x
diff --git a/subwiz/model.py b/subwiz/model.py
index 1691695..84be7de 100644
--- a/subwiz/model.py
+++ b/subwiz/model.py
@@ -248,6 +248,7 @@ def __init__(self, config: GPTConfig):
         self.end_token = self.tokenizer("[END]")["input_ids"][0]
         self.comma_token = self.tokenizer(",")["input_ids"][0]
         self.delim_token = self.tokenizer("[DELIM]")["input_ids"][0]
+        self.pad_token = self.tokenizer("[PAD]")["input_ids"][0]
 
         self.transformer = nn.ModuleDict(
             dict(
@@ -382,6 +383,28 @@ def device(self) -> str:
         # assign model inputs to the right device
         return next(self.lm_head.parameters()).device.type
 
+    def _trim_subdomains(
+        self,
+        sequences: torch.Tensor,
+        num_initial_pad_tokens: int,
+        num_tokens_generated: int,
+        ) -> torch.Tensor:
+        if num_tokens_generated == 0:
+            return sequences
+
+        if num_tokens_generated > num_initial_pad_tokens:
+            # Trim after the fist delim token, which is the same for every element of batch
+            trimming_position = 1 + (sequences == self.delim_token).nonzero(as_tuple=True)[1][0]
+        else:
+            # Remove the first pad token
+            trimming_position = 0
+
+        sequences = torch.cat(
+            (sequences[:, :trimming_position], sequences[:, trimming_position + 1:]),
+            dim=1,
+        )
+        return sequences
+
     @torch.no_grad()
     def generate(
         self,
@@ -426,12 +449,9 @@ def generate(
         run_uuid = uuid.uuid4()
 
         idx = idx.to(self.device)
+        num_initial_pad_tokens = (idx == self.pad_token).sum().item()
         sequences = idx.unsqueeze(0)
 
-        # Locate the first delimiter token and get its position
-        delim_token_id = self.delim_token
-        trimming_position = (sequences == delim_token_id).nonzero(as_tuple=True)[1][0]
-
         probabilities = torch.tensor([1.0], device=self.device)
 
         finished_sequences = torch.tensor([], device=self.device)
@@ -445,11 +465,11 @@ def generate(
                 on_iteration()
 
             # trim the sequences down to block size
-            if sequences.shape[1] > self.config.block_size:
-                sequences = torch.cat(
-                    (sequences[:, :trimming_position], sequences[:, trimming_position + 1:]),
-                    dim=1,
-                )
+            sequences = self._trim_subdomains(
+                sequences,
+                num_initial_pad_tokens=num_initial_pad_tokens,
+                num_tokens_generated=i,
+            )
 
             # remove any invalid subdomain starts
             outputs = self.tokenizer.batch_decode(sequences[:, -i:])

From 89a29337b643d4227d0758e81e6ba9c6cee92b06 Mon Sep 17 00:00:00 2001
From: EduardoTerres <EduardoTerres@users.noreply.github.com>
Date: Wed, 29 Oct 2025 19:34:49 +0100
Subject: [PATCH 03/16] Fix imports

---
 subwiz/main.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/subwiz/main.py b/subwiz/main.py
index b665e0d..16b4761 100644
--- a/subwiz/main.py
+++ b/subwiz/main.py
@@ -17,10 +17,10 @@
 import torch
 from transformers import PreTrainedTokenizerFast
 
-from subwiz_v2.cli_printer import print_hello, print_log, print_progress_dot
-from subwiz_v2.model import GPT
-from subwiz_v2.resolve import get_registered_domains
-from subwiz_v2.type import (
+from subwiz.cli_printer import print_hello, print_log, print_progress_dot
+from subwiz.model import GPT
+from subwiz.resolve import get_registered_domains
+from subwiz.type import (
     Domain,
     input_domains_type,
     device_type,

From 74e875f91762199b99c8328f01c9eca12afa42fc Mon Sep 17 00:00:00 2001
From: klaasmeinke <48212135+klaasmeinke@users.noreply.github.com>
Date: Wed, 5 Nov 2025 15:54:46 +0100
Subject: [PATCH 04/16] Update subwiz/model.py

---
 subwiz/model.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/subwiz/model.py b/subwiz/model.py
index 84be7de..a8a14fd 100644
--- a/subwiz/model.py
+++ b/subwiz/model.py
@@ -388,7 +388,7 @@ def _trim_subdomains(
         sequences: torch.Tensor,
         num_initial_pad_tokens: int,
         num_tokens_generated: int,
-        ) -> torch.Tensor:
+    ) -> torch.Tensor:
         if num_tokens_generated == 0:
             return sequences
 

From 7907b60170120aad871691d7b824911df0f632d8 Mon Sep 17 00:00:00 2001
From: EduardoTerres <EduardoTerres@users.noreply.github.com>
Date: Wed, 5 Nov 2025 16:06:07 +0100
Subject: [PATCH 05/16] More efficient way of trimming when there is no pad

---
 subwiz/model.py | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/subwiz/model.py b/subwiz/model.py
index a8a14fd..a469a9d 100644
--- a/subwiz/model.py
+++ b/subwiz/model.py
@@ -386,18 +386,17 @@ def device(self) -> str:
     def _trim_subdomains(
         self,
         sequences: torch.Tensor,
-        num_initial_pad_tokens: int,
+        apex_unpadded_position: int,
         num_tokens_generated: int,
     ) -> torch.Tensor:
         if num_tokens_generated == 0:
             return sequences
 
-        if num_tokens_generated > num_initial_pad_tokens:
-            # Trim after the fist delim token, which is the same for every element of batch
-            trimming_position = 1 + (sequences == self.delim_token).nonzero(as_tuple=True)[1][0]
-        else:
-            # Remove the first pad token
-            trimming_position = 0
+        trimming_position = (
+            apex_unpadded_position
+            if num_tokens_generated > apex_unpadded_position
+            else num_tokens_generated
+        )
 
         sequences = torch.cat(
             (sequences[:, :trimming_position], sequences[:, trimming_position + 1:]),
@@ -452,6 +451,10 @@ def generate(
         num_initial_pad_tokens = (idx == self.pad_token).sum().item()
         sequences = idx.unsqueeze(0)
 
+        apex_padded_position = (sequences == self.delim_token).nonzero(as_tuple=True)[1][0]
+
+        apex_unpadded_position = 1 + apex_padded_position - num_initial_pad_tokens
+
         probabilities = torch.tensor([1.0], device=self.device)
 
         finished_sequences = torch.tensor([], device=self.device)
@@ -467,7 +470,7 @@ def generate(
             # trim the sequences down to block size
             sequences = self._trim_subdomains(
                 sequences,
-                num_initial_pad_tokens=num_initial_pad_tokens,
+                apex_unpadded_position=apex_unpadded_position,
                 num_tokens_generated=i,
             )
 

From 192dd23be4bacc3fe024db30ad9848ec46f855a1 Mon Sep 17 00:00:00 2001
From: EduardoTerres <EduardoTerres@users.noreply.github.com>
Date: Wed, 5 Nov 2025 16:07:51 +0100
Subject: [PATCH 06/16] linting

---
 subwiz/main.py  | 2 +-
 subwiz/model.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/subwiz/main.py b/subwiz/main.py
index 16b4761..5959a1b 100644
--- a/subwiz/main.py
+++ b/subwiz/main.py
@@ -111,7 +111,7 @@ def run_inference(
     apex_x = tokenizer.encode(apex_tokenizer_input)
 
     # Trim subs to account for the apex part, grab last part
-    subs_x = subs_x[-(gpt_model.config.block_size - len(apex_x)):]
+    subs_x = subs_x[-(gpt_model.config.block_size - len(apex_x)) :]
 
     x = apex_x + subs_x
     x = [gpt_model.pad_token] * (gpt_model.config.block_size - len(x)) + x
diff --git a/subwiz/model.py b/subwiz/model.py
index a469a9d..6c9360d 100644
--- a/subwiz/model.py
+++ b/subwiz/model.py
@@ -399,7 +399,7 @@ def _trim_subdomains(
         )
 
         sequences = torch.cat(
-            (sequences[:, :trimming_position], sequences[:, trimming_position + 1:]),
+            (sequences[:, :trimming_position], sequences[:, trimming_position + 1 :]),
             dim=1,
         )
         return sequences

From 732c052c76b504bdd488ac73f1225be4e4eb1d9d Mon Sep 17 00:00:00 2001
From: EduardoTerres <EduardoTerres@users.noreply.github.com>
Date: Wed, 5 Nov 2025 16:37:02 +0100
Subject: [PATCH 07/16] add revision system

---
 subwiz/main.py | 30 +++++++++++++++++++++++++-----
 1 file changed, 25 insertions(+), 5 deletions(-)

diff --git a/subwiz/main.py b/subwiz/main.py
index 5959a1b..e826262 100644
--- a/subwiz/main.py
+++ b/subwiz/main.py
@@ -12,6 +12,8 @@
 from collections import defaultdict
 from typing import Callable
 
+from importlib.metadata import version
+
 from huggingface_hub import hf_hub_download
 from huggingface_hub.utils import disable_progress_bars, enable_progress_bars
 import torch
@@ -35,12 +37,13 @@
 MODEL_FILE = "model_v2.pt"
 TOKENIZER_FILE = "tokenizer_v2.json"
 CONFIG_FILE = "config.json"
-
+REVISION = "9a2c505d0312ad6938b27d9b4338020fe37883e8"
 
 def get_model_and_tokenizer(
     force_download: bool,
     device: str,
     quiet: bool,
+    version: str = "weights_v2",
 ) -> tuple[GPT, PreTrainedTokenizerFast]:
     """Download files from HuggingFace to run subwiz. Caches in local file system.
 
@@ -54,14 +57,24 @@ def get_model_and_tokenizer(
     if quiet:
         disable_progress_bars()
 
+    
     model_path = hf_hub_download(
-        repo_id=MODEL_REPO, filename=MODEL_FILE, force_download=force_download
+        repo_id=MODEL_REPO,
+        filename=MODEL_FILE,
+        force_download=force_download,
+        revision=REVISION,
     )
     tokenizer_path = hf_hub_download(
-        repo_id=MODEL_REPO, filename=TOKENIZER_FILE, force_download=force_download
+        repo_id=MODEL_REPO,
+        filename=TOKENIZER_FILE,
+        force_download=force_download,
+        revision=REVISION,
     )
     hf_hub_download(
-        repo_id=MODEL_REPO, filename=CONFIG_FILE, force_download=force_download
+        repo_id=MODEL_REPO,
+        filename=CONFIG_FILE,
+        force_download=force_download,
+        revision=REVISION,
     )
     if quiet:
         enable_progress_bars()
@@ -304,8 +317,15 @@ def run(
             "Use the --multi-apex flag to process them all."
         )
 
+    # Auto-detect version from package
+    try:
+        pkg_version = version("subwiz")
+        model_version = "weights_v2" if pkg_version >= "0.5.0" else "weights_v1"
+    except Exception:
+        model_version = "weights_v1"  # default to old module version
+    
     gpt_model, tokenizer = get_model_and_tokenizer(
-        force_download, device=device, quiet=quiet
+        force_download, device=device, quiet=quiet, version=model_version
     )
     found_domains = set()
 

From ca6b50802a9dcdeb297e8b117fcd4f755ee4d287 Mon Sep 17 00:00:00 2001
From: EduardoTerres <EduardoTerres@users.noreply.github.com>
Date: Wed, 5 Nov 2025 16:37:55 +0100
Subject: [PATCH 08/16] lint

---
 subwiz/model.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/subwiz/model.py b/subwiz/model.py
index 6c9360d..730ae9d 100644
--- a/subwiz/model.py
+++ b/subwiz/model.py
@@ -451,7 +451,9 @@ def generate(
         num_initial_pad_tokens = (idx == self.pad_token).sum().item()
         sequences = idx.unsqueeze(0)
 
-        apex_padded_position = (sequences == self.delim_token).nonzero(as_tuple=True)[1][0]
+        apex_padded_position = (
+            sequences == self.delim_token
+        ).nonzero(as_tuple=True)[1][0]
 
         apex_unpadded_position = 1 + apex_padded_position - num_initial_pad_tokens
 

From 9b0ec959d94e18012459ef82c261d2d1eefb0257 Mon Sep 17 00:00:00 2001
From: EduardoTerres <EduardoTerres@users.noreply.github.com>
Date: Wed, 5 Nov 2025 16:38:59 +0100
Subject: [PATCH 09/16] wicked lint

---
 subwiz/model.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/subwiz/model.py b/subwiz/model.py
index 730ae9d..ddcade2 100644
--- a/subwiz/model.py
+++ b/subwiz/model.py
@@ -451,9 +451,9 @@ def generate(
         num_initial_pad_tokens = (idx == self.pad_token).sum().item()
         sequences = idx.unsqueeze(0)
 
-        apex_padded_position = (
-            sequences == self.delim_token
-        ).nonzero(as_tuple=True)[1][0]
+        apex_padded_position = (sequences == self.delim_token).nonzero(as_tuple=True)[
+            1
+        ][0]
 
         apex_unpadded_position = 1 + apex_padded_position - num_initial_pad_tokens
 

From 7b490babe41f546b51fc56d27b61feae74f428cd Mon Sep 17 00:00:00 2001
From: EduardoTerres <EduardoTerres@users.noreply.github.com>
Date: Wed, 5 Nov 2025 16:43:22 +0100
Subject: [PATCH 10/16] linting

---
 subwiz/main.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/subwiz/main.py b/subwiz/main.py
index e826262..2bbe5f2 100644
--- a/subwiz/main.py
+++ b/subwiz/main.py
@@ -39,6 +39,7 @@
 CONFIG_FILE = "config.json"
 REVISION = "9a2c505d0312ad6938b27d9b4338020fe37883e8"
 
+
 def get_model_and_tokenizer(
     force_download: bool,
     device: str,
@@ -57,7 +58,6 @@ def get_model_and_tokenizer(
     if quiet:
         disable_progress_bars()
 
-    
     model_path = hf_hub_download(
         repo_id=MODEL_REPO,
         filename=MODEL_FILE,
@@ -323,7 +323,7 @@ def run(
         model_version = "weights_v2" if pkg_version >= "0.5.0" else "weights_v1"
     except Exception:
         model_version = "weights_v1"  # default to old module version
-    
+
     gpt_model, tokenizer = get_model_and_tokenizer(
         force_download, device=device, quiet=quiet, version=model_version
     )

From 50e388727132678e1e5b5a017c9a6fc1e6f45fe5 Mon Sep 17 00:00:00 2001
From: EduardoTerres <EduardoTerres@users.noreply.github.com>
Date: Wed, 5 Nov 2025 16:46:51 +0100
Subject: [PATCH 11/16] New language tests

---
 tests/test_results.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/tests/test_results.py b/tests/test_results.py
index 901699a..cd86e8c 100644
--- a/tests/test_results.py
+++ b/tests/test_results.py
@@ -19,8 +19,13 @@ def test_languages():
         no_resolve=True,
     )
     print(results)
-    assert "english.hadrian.io" in results
-
+    assert {
+        "english.hadrian.io",
+        "french.hadrian.io",
+        "spanish.hadrian.io",
+        "portuguese.hadrian.io",
+        "dutch.hadrian.io",
+    } | results
 
 def test_numbers():
     """Test that numeric subdomain patterns are generated correctly.

From 3bf7778e3dec5e0526e09e06f7163ce0d91f8e13 Mon Sep 17 00:00:00 2001
From: EduardoTerres <EduardoTerres@users.noreply.github.com>
Date: Wed, 5 Nov 2025 16:48:11 +0100
Subject: [PATCH 12/16] Intersect the set

---
 tests/test_results.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_results.py b/tests/test_results.py
index cd86e8c..b1edca6 100644
--- a/tests/test_results.py
+++ b/tests/test_results.py
@@ -25,7 +25,7 @@ def test_languages():
         "spanish.hadrian.io",
         "portuguese.hadrian.io",
         "dutch.hadrian.io",
-    } | results
+    } & set(results)
 
 def test_numbers():
     """Test that numeric subdomain patterns are generated correctly.

From bab673ff2dad58f914281475f59cafbe8139390b Mon Sep 17 00:00:00 2001
From: EduardoTerres <EduardoTerres@users.noreply.github.com>
Date: Wed, 5 Nov 2025 16:49:09 +0100
Subject: [PATCH 13/16] add couple comments

---
 subwiz/main.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/subwiz/main.py b/subwiz/main.py
index 2bbe5f2..b3da07f 100644
--- a/subwiz/main.py
+++ b/subwiz/main.py
@@ -32,11 +32,12 @@
     concurrency_type,
 )
 
-
+# v2 stands for the module version not the model weights version
 MODEL_REPO = "HadrianSecurity/subwiz"
 MODEL_FILE = "model_v2.pt"
 TOKENIZER_FILE = "tokenizer_v2.json"
 CONFIG_FILE = "config.json"
+# The revision changes with every new model weights version
 REVISION = "9a2c505d0312ad6938b27d9b4338020fe37883e8"
 
 

From c359cb2bcef752a931804fd6b5f0e75974b7fc2e Mon Sep 17 00:00:00 2001
From: EduardoTerres <EduardoTerres@users.noreply.github.com>
Date: Wed, 5 Nov 2025 16:50:42 +0100
Subject: [PATCH 14/16] linting problems never end

---
 tests/test_results.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/test_results.py b/tests/test_results.py
index b1edca6..9cda550 100644
--- a/tests/test_results.py
+++ b/tests/test_results.py
@@ -27,6 +27,7 @@ def test_languages():
         "dutch.hadrian.io",
     } & set(results)
 
+
 def test_numbers():
     """Test that numeric subdomain patterns are generated correctly.
 

From 761dbbda1a00b22125ae04e9b54a8f86a63221a5 Mon Sep 17 00:00:00 2001
From: EduardoTerres <EduardoTerres@users.noreply.github.com>
Date: Wed, 5 Nov 2025 17:07:31 +0100
Subject: [PATCH 15/16] no need to fetch model version

---
 subwiz/main.py | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/subwiz/main.py b/subwiz/main.py
index b3da07f..38ccfec 100644
--- a/subwiz/main.py
+++ b/subwiz/main.py
@@ -45,7 +45,6 @@ def get_model_and_tokenizer(
     force_download: bool,
     device: str,
     quiet: bool,
-    version: str = "weights_v2",
 ) -> tuple[GPT, PreTrainedTokenizerFast]:
     """Download files from HuggingFace to run subwiz. Caches in local file system.
 
@@ -318,15 +317,8 @@ def run(
             "Use the --multi-apex flag to process them all."
         )
 
-    # Auto-detect version from package
-    try:
-        pkg_version = version("subwiz")
-        model_version = "weights_v2" if pkg_version >= "0.5.0" else "weights_v1"
-    except Exception:
-        model_version = "weights_v1"  # default to old module version
-
     gpt_model, tokenizer = get_model_and_tokenizer(
-        force_download, device=device, quiet=quiet, version=model_version
+        force_download, device=device, quiet=quiet
     )
     found_domains = set()
 

From 295c5a88fba12b39f9814c3eff39d2567646b65e Mon Sep 17 00:00:00 2001
From: EduardoTerres <EduardoTerres@users.noreply.github.com>
Date: Wed, 5 Nov 2025 17:13:21 +0100
Subject: [PATCH 16/16] nit

---
 subwiz/main.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/subwiz/main.py b/subwiz/main.py
index 38ccfec..e200d59 100644
--- a/subwiz/main.py
+++ b/subwiz/main.py
@@ -32,12 +32,12 @@
     concurrency_type,
 )
 
-# v2 stands for the module version not the model weights version
+
 MODEL_REPO = "HadrianSecurity/subwiz"
 MODEL_FILE = "model_v2.pt"
 TOKENIZER_FILE = "tokenizer_v2.json"
 CONFIG_FILE = "config.json"
-# The revision changes with every new model weights version
+# Change revision when realeasing new weights
 REVISION = "9a2c505d0312ad6938b27d9b4338020fe37883e8"