From 44917d355e29e605efa8b6adb4e404bda2676bb1 Mon Sep 17 00:00:00 2001 From: unknown <16974509+cynodesmus@users.noreply.github.com> Date: Wed, 18 Mar 2026 10:27:06 +0400 Subject: [PATCH 1/4] fix: apply "model." prefix only for lm models and fix UTF-8 decoding of vocab.json/merges.txt --- convert.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/convert.py b/convert.py index ba8816b..36c76f5 100755 --- a/convert.py +++ b/convert.py @@ -108,7 +108,7 @@ def add_metadata(w, cfg, model_type): w.add_string("acestep.config_json", json.dumps(cfg, separators=(",", ":"))) # Tensor packing from safetensors -def add_tensors_from_sf(w, sf_path, tag): +def add_tensors_from_sf(w, sf_path, tag, model_type): meta, hdr_size = read_sf_header(sf_path) names = sorted(meta.keys()) f = open(sf_path, "rb") @@ -119,7 +119,7 @@ def add_tensors_from_sf(w, sf_path, tag): info = meta[name] # normalize: some upstream checkpoints omit the "model." prefix - if not name.startswith("model."): + if model_type == "lm" and not name.startswith("model."): name = "model." + name dtype_str = info["dtype"] @@ -171,13 +171,14 @@ def add_bpe_tokenizer(w, model_dir, tag): if not os.path.exists(vocab_path) or not os.path.exists(merges_path): return False - vocab = json.load(open(vocab_path)) + with open(vocab_path, "r", encoding="utf-8") as f: + vocab = json.load(f) tokens = [""] * len(vocab) for tok_str, tok_id in vocab.items(): if 0 <= tok_id < len(tokens): tokens[tok_id] = tok_str - with open(merges_path) as f: + with open(merges_path, "r", encoding="utf-8") as f: merges = [] for line in f: line = line.rstrip("\n\r") @@ -225,7 +226,7 @@ def convert_model(name, model_dir, output_path, model_type): n_tensors = 0 n_bytes = 0 for sf in sf_files: - c, b = add_tensors_from_sf(w, sf, tag) + c, b = add_tensors_from_sf(w, sf, tag, model_type) n_tensors += c n_bytes += b if len(sf_files) > 1: From e6eb7a6978696f5fefe89251093c844a77b76897 Mon Sep 17 00:00:00 2001 From: unknown <16974509+cynodesmus@users.noreply.github.com> Date: Wed, 18 Mar 2026 10:44:46 +0400 Subject: [PATCH 2/4] refactor: use context manager for safetensors file handling --- convert.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/convert.py b/convert.py index 36c76f5..1479dc5 100755 --- a/convert.py +++ b/convert.py @@ -111,12 +111,12 @@ def add_metadata(w, cfg, model_type): def add_tensors_from_sf(w, sf_path, tag, model_type): meta, hdr_size = read_sf_header(sf_path) names = sorted(meta.keys()) - f = open(sf_path, "rb") - count = 0 - total = 0 + with open(sf_path, "rb") as f: + count = 0 + total = 0 - for name in names: - info = meta[name] + for name in names: + info = meta[name] # normalize: some upstream checkpoints omit the "model." prefix if model_type == "lm" and not name.startswith("model."): @@ -146,8 +146,7 @@ def add_tensors_from_sf(w, sf_path, tag, model_type): count += 1 total += nbytes - f.close() - return count, total + return count, total # silence_latent.pt reader (replaces pt2bin C++ tool) # PyTorch .pt is a ZIP with entry "*/data/0" containing f32 [64, 15000] From 42918c6d7bb6224195dd2f32e7a18e3c35b93397 Mon Sep 17 00:00:00 2001 From: unknown <16974509+cynodesmus@users.noreply.github.com> Date: Wed, 18 Mar 2026 11:01:22 +0400 Subject: [PATCH 3/4] refactor: use context manager for safetensors file handling and fix model prefix normalization in add_tensors_from_sf --- convert.py | 54 +++++++++++++++++++++++++++--------------------------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/convert.py b/convert.py index 1479dc5..4ec06df 100755 --- a/convert.py +++ b/convert.py @@ -118,35 +118,35 @@ def add_tensors_from_sf(w, sf_path, tag, model_type): for name in names: info = meta[name] - # normalize: some upstream checkpoints omit the "model." prefix - if model_type == "lm" and not name.startswith("model."): - name = "model." + name - - dtype_str = info["dtype"] - shape = info["shape"] - off0, off1 = info["data_offsets"] - nbytes = off1 - off0 - - f.seek(hdr_size + off0) - raw = f.read(nbytes) - - if dtype_str == "BF16": - arr = np.frombuffer(raw, dtype=np.uint16).reshape(shape) - w.add_tensor(name, arr, raw_dtype=BF16) - elif dtype_str == "F16": - arr = np.frombuffer(raw, dtype=np.float16).reshape(shape) - w.add_tensor(name, arr) - elif dtype_str == "F32": - arr = np.frombuffer(raw, dtype=np.float32).reshape(shape) - w.add_tensor(name, arr) - else: - log(tag, " skip %s: dtype %s" % (name, dtype_str)) - continue + # normalize: some upstream checkpoints omit the "model." prefix + if model_type == "lm" and not name.startswith("model."): + name = "model." + name + + dtype_str = info["dtype"] + shape = info["shape"] + off0, off1 = info["data_offsets"] + nbytes = off1 - off0 + + f.seek(hdr_size + off0) + raw = f.read(nbytes) + + if dtype_str == "BF16": + arr = np.frombuffer(raw, dtype=np.uint16).reshape(shape) + w.add_tensor(name, arr, raw_dtype=BF16) + elif dtype_str == "F16": + arr = np.frombuffer(raw, dtype=np.float16).reshape(shape) + w.add_tensor(name, arr) + elif dtype_str == "F32": + arr = np.frombuffer(raw, dtype=np.float32).reshape(shape) + w.add_tensor(name, arr) + else: + log(tag, " skip %s: dtype %s" % (name, dtype_str)) + continue - count += 1 - total += nbytes + count += 1 + total += nbytes - return count, total + return count, total # silence_latent.pt reader (replaces pt2bin C++ tool) # PyTorch .pt is a ZIP with entry "*/data/0" containing f32 [64, 15000] From 393241925d6fa39d72f6612696f63c0dd421628d Mon Sep 17 00:00:00 2001 From: unknown <16974509+cynodesmus@users.noreply.github.com> Date: Wed, 18 Mar 2026 11:09:31 +0400 Subject: [PATCH 4/4] refactor: use context managers for consistency --- convert.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/convert.py b/convert.py index 4ec06df..f5e9b2f 100755 --- a/convert.py +++ b/convert.py @@ -35,7 +35,8 @@ def find_sf_files(model_dir): return [single] index = os.path.join(model_dir, "model.safetensors.index.json") if os.path.exists(index): - idx = json.load(open(index)) + with open(index, "r", encoding="utf-8") as f: + idx = json.load(f) shards = sorted(set(idx["weight_map"].values())) return [os.path.join(model_dir, s) for s in shards] diffusers = os.path.join(model_dir, "diffusion_pytorch_model.safetensors") @@ -202,7 +203,8 @@ def convert_model(name, model_dir, output_path, model_type): log(tag, "skip %s: no config.json" % name) return False - cfg = json.load(open(cfg_path)) + with open(cfg_path, "r", encoding="utf-8") as f: + cfg = json.load(f) sf_files = find_sf_files(model_dir) if not sf_files: log(tag, "skip %s: no safetensors" % name)