ServeurpersoCom · ServeurpersoCom · Mar 18, 2026 · Mar 18, 2026 · Mar 18, 2026 · Mar 18, 2026
diff --git a/convert.py b/convert.py
@@ -35,7 +35,8 @@ def find_sf_files(model_dir):
         return [single]
     index = os.path.join(model_dir, "model.safetensors.index.json")
     if os.path.exists(index):
-        idx = json.load(open(index))
+        with open(index, "r", encoding="utf-8") as f:
+            idx = json.load(f)
         shards = sorted(set(idx["weight_map"].values()))
         return [os.path.join(model_dir, s) for s in shards]
     diffusers = os.path.join(model_dir, "diffusion_pytorch_model.safetensors")
@@ -108,45 +109,44 @@ def add_metadata(w, cfg, model_type):
     w.add_string("acestep.config_json", json.dumps(cfg, separators=(",", ":")))
 
 # Tensor packing from safetensors
-def add_tensors_from_sf(w, sf_path, tag):
+def add_tensors_from_sf(w, sf_path, tag, model_type):
     meta, hdr_size = read_sf_header(sf_path)
     names = sorted(meta.keys())
-    f = open(sf_path, "rb")
-    count = 0
-    total = 0
-
-    for name in names:
-        info = meta[name]
-
-        # normalize: some upstream checkpoints omit the "model." prefix
-        if not name.startswith("model."):
-            name = "model." + name
-
-        dtype_str = info["dtype"]
-        shape = info["shape"]
-        off0, off1 = info["data_offsets"]
-        nbytes = off1 - off0
-
-        f.seek(hdr_size + off0)
-        raw = f.read(nbytes)
-
-        if dtype_str == "BF16":
-            arr = np.frombuffer(raw, dtype=np.uint16).reshape(shape)
-            w.add_tensor(name, arr, raw_dtype=BF16)
-        elif dtype_str == "F16":
-            arr = np.frombuffer(raw, dtype=np.float16).reshape(shape)
-            w.add_tensor(name, arr)
-        elif dtype_str == "F32":
-            arr = np.frombuffer(raw, dtype=np.float32).reshape(shape)
-            w.add_tensor(name, arr)
-        else:
-            log(tag, "  skip %s: dtype %s" % (name, dtype_str))
-            continue
+    with open(sf_path, "rb") as f:
+        count = 0
+        total = 0
+
+        for name in names:
+            info = meta[name]
+
+            # normalize: some upstream checkpoints omit the "model." prefix
+            if model_type == "lm" and not name.startswith("model."):
+                name = "model." + name
+
+            dtype_str = info["dtype"]
+            shape = info["shape"]
+            off0, off1 = info["data_offsets"]
+            nbytes = off1 - off0
+
+            f.seek(hdr_size + off0)
+            raw = f.read(nbytes)
+
+            if dtype_str == "BF16":
+                arr = np.frombuffer(raw, dtype=np.uint16).reshape(shape)
+                w.add_tensor(name, arr, raw_dtype=BF16)
+            elif dtype_str == "F16":
+                arr = np.frombuffer(raw, dtype=np.float16).reshape(shape)
+                w.add_tensor(name, arr)
+            elif dtype_str == "F32":
+                arr = np.frombuffer(raw, dtype=np.float32).reshape(shape)
+                w.add_tensor(name, arr)
+            else:
+                log(tag, "  skip %s: dtype %s" % (name, dtype_str))
+                continue
 
-        count += 1
-        total += nbytes
+            count += 1
+            total += nbytes
 
-    f.close()
     return count, total
 
 # silence_latent.pt reader (replaces pt2bin C++ tool)
@@ -171,13 +171,14 @@ def add_bpe_tokenizer(w, model_dir, tag):
     if not os.path.exists(vocab_path) or not os.path.exists(merges_path):
         return False
 
-    vocab = json.load(open(vocab_path))
+    with open(vocab_path, "r", encoding="utf-8") as f:
+        vocab = json.load(f)
     tokens = [""] * len(vocab)
     for tok_str, tok_id in vocab.items():
         if 0 <= tok_id < len(tokens):
             tokens[tok_id] = tok_str
 
-    with open(merges_path) as f:
+    with open(merges_path, "r", encoding="utf-8") as f:
         merges = []
         for line in f:
             line = line.rstrip("\n\r")
@@ -202,7 +203,8 @@ def convert_model(name, model_dir, output_path, model_type):
         log(tag, "skip %s: no config.json" % name)
         return False
 
-    cfg = json.load(open(cfg_path))
+    with open(cfg_path, "r", encoding="utf-8") as f:
+        cfg = json.load(f)
     sf_files = find_sf_files(model_dir)
     if not sf_files:
         log(tag, "skip %s: no safetensors" % name)
@@ -225,7 +227,7 @@ def convert_model(name, model_dir, output_path, model_type):
     n_tensors = 0
     n_bytes = 0
     for sf in sf_files:
-        c, b = add_tensors_from_sf(w, sf, tag)
+        c, b = add_tensors_from_sf(w, sf, tag, model_type)
         n_tensors += c
         n_bytes += b
         if len(sf_files) > 1: