Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 42 additions & 40 deletions convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@ def find_sf_files(model_dir):
return [single]
index = os.path.join(model_dir, "model.safetensors.index.json")
if os.path.exists(index):
idx = json.load(open(index))
with open(index, "r", encoding="utf-8") as f:
idx = json.load(f)
shards = sorted(set(idx["weight_map"].values()))
return [os.path.join(model_dir, s) for s in shards]
diffusers = os.path.join(model_dir, "diffusion_pytorch_model.safetensors")
Expand Down Expand Up @@ -108,45 +109,44 @@ def add_metadata(w, cfg, model_type):
w.add_string("acestep.config_json", json.dumps(cfg, separators=(",", ":")))

# Tensor packing from safetensors
def add_tensors_from_sf(w, sf_path, tag):
def add_tensors_from_sf(w, sf_path, tag, model_type):
meta, hdr_size = read_sf_header(sf_path)
names = sorted(meta.keys())
f = open(sf_path, "rb")
count = 0
total = 0

for name in names:
info = meta[name]

# normalize: some upstream checkpoints omit the "model." prefix
if not name.startswith("model."):
name = "model." + name

dtype_str = info["dtype"]
shape = info["shape"]
off0, off1 = info["data_offsets"]
nbytes = off1 - off0

f.seek(hdr_size + off0)
raw = f.read(nbytes)

if dtype_str == "BF16":
arr = np.frombuffer(raw, dtype=np.uint16).reshape(shape)
w.add_tensor(name, arr, raw_dtype=BF16)
elif dtype_str == "F16":
arr = np.frombuffer(raw, dtype=np.float16).reshape(shape)
w.add_tensor(name, arr)
elif dtype_str == "F32":
arr = np.frombuffer(raw, dtype=np.float32).reshape(shape)
w.add_tensor(name, arr)
else:
log(tag, " skip %s: dtype %s" % (name, dtype_str))
continue
with open(sf_path, "rb") as f:
count = 0
total = 0

for name in names:
info = meta[name]

# normalize: some upstream checkpoints omit the "model." prefix
if model_type == "lm" and not name.startswith("model."):
name = "model." + name

dtype_str = info["dtype"]
shape = info["shape"]
off0, off1 = info["data_offsets"]
nbytes = off1 - off0

f.seek(hdr_size + off0)
raw = f.read(nbytes)

if dtype_str == "BF16":
arr = np.frombuffer(raw, dtype=np.uint16).reshape(shape)
w.add_tensor(name, arr, raw_dtype=BF16)
elif dtype_str == "F16":
arr = np.frombuffer(raw, dtype=np.float16).reshape(shape)
w.add_tensor(name, arr)
elif dtype_str == "F32":
arr = np.frombuffer(raw, dtype=np.float32).reshape(shape)
w.add_tensor(name, arr)
else:
log(tag, " skip %s: dtype %s" % (name, dtype_str))
continue

count += 1
total += nbytes
count += 1
total += nbytes

f.close()
return count, total

# silence_latent.pt reader (replaces pt2bin C++ tool)
Expand All @@ -171,13 +171,14 @@ def add_bpe_tokenizer(w, model_dir, tag):
if not os.path.exists(vocab_path) or not os.path.exists(merges_path):
return False

vocab = json.load(open(vocab_path))
with open(vocab_path, "r", encoding="utf-8") as f:
vocab = json.load(f)
tokens = [""] * len(vocab)
for tok_str, tok_id in vocab.items():
if 0 <= tok_id < len(tokens):
tokens[tok_id] = tok_str

with open(merges_path) as f:
with open(merges_path, "r", encoding="utf-8") as f:
merges = []
for line in f:
line = line.rstrip("\n\r")
Expand All @@ -202,7 +203,8 @@ def convert_model(name, model_dir, output_path, model_type):
log(tag, "skip %s: no config.json" % name)
return False

cfg = json.load(open(cfg_path))
with open(cfg_path, "r", encoding="utf-8") as f:
cfg = json.load(f)
sf_files = find_sf_files(model_dir)
if not sf_files:
log(tag, "skip %s: no safetensors" % name)
Expand All @@ -225,7 +227,7 @@ def convert_model(name, model_dir, output_path, model_type):
n_tensors = 0
n_bytes = 0
for sf in sf_files:
c, b = add_tensors_from_sf(w, sf, tag)
c, b = add_tensors_from_sf(w, sf, tag, model_type)
n_tensors += c
n_bytes += b
if len(sf_files) > 1:
Expand Down
Loading