Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions crates/prek-identify/gen.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,14 @@
("TAG_SET_YAML", ["yaml"]),
]

# This override is intentionally undesirable: it may diverge from upstream
# identify, but upstream is unlikely to accept this mapping. We reluctantly
# carry it here because there is no practical upstream path for now.
EXTENSION_OVERRIDES = {
"cts": {"text", "ts"},
"mts": {"text", "ts"},
}

SELF_DIR = Path(__file__).parent
TAGS_FILE = SELF_DIR / "src/tags.rs"

Expand Down Expand Up @@ -77,6 +85,7 @@ def tagset_expr(tag_set):
f.write("};\n\n")

EXTENSIONS.update(EXTENSIONS_NEED_BINARY_CHECK)
EXTENSIONS.update(EXTENSION_OVERRIDES)
f.write("pub const EXTENSIONS: phf::Map<&str, TagSet> = phf::phf_map! {\n")
for ext in sorted(EXTENSIONS):
tag_names = sorted(EXTENSIONS[ext])
Expand Down
6 changes: 6 additions & 0 deletions crates/prek-identify/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -573,6 +573,12 @@ mod tests {
let tags = super::tags_from_filename(Path::new("data.json"));
assert_tagset(&tags, &["json", "text"]);

let tags = super::tags_from_filename(Path::new("index.cts"));
assert_tagset(&tags, &["text", "ts"]);

let tags = super::tags_from_filename(Path::new("index.mts"));
assert_tagset(&tags, &["text", "ts"]);

let tags = super::tags_from_filename(Path::new("build.props"));
assert_tagset(&tags, &["msbuild", "text", "xml"]);

Expand Down
4 changes: 4 additions & 0 deletions crates/prek-identify/src/tags.rs
Original file line number Diff line number Diff line change
Expand Up @@ -488,6 +488,8 @@ pub const EXTENSIONS: phf::Map<&str, TagSet> = phf::phf_map! {
"csv" => TagSet::new(&[48, 255]),
// ["c#", "c#script", "text"]
"csx" => TagSet::new(&[29, 30, 255]),
// ["text", "ts"]
"cts" => TagSet::new(&[255, 261]),
// ["cuda", "text"]
"cu" => TagSet::new(&[49, 255]),
// ["cue", "text"]
Expand Down Expand Up @@ -756,6 +758,8 @@ pub const EXTENSIONS: phf::Map<&str, TagSet> = phf::phf_map! {
"mscx" => TagSet::new(&[169, 255, 297]),
// ["binary", "musescore", "zip"]
"mscz" => TagSet::new(&[21, 169, 308]),
// ["text", "ts"]
"mts" => TagSet::new(&[255, 261]),

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Avoid marking binary .mts media as TypeScript text

When a repository contains .mts media files (MPEG transport stream/AVCHD uses this extension), this unconditional mapping gives them both text and ts; tags_from_path only runs the binary sniffing path when no text/binary tag is already present, so those binary assets will no longer be tagged binary and can be passed to hooks using types: [text] or types: [ts]. Consider making the .mts TypeScript tag conditional on the file actually being text, so existing binary .mts assets are not misclassified.

Useful? React with 👍 / 👎.

// ["mustache", "text"]
"mustache" => TagSet::new(&[170, 255]),
// ["myst", "text"]
Expand Down
Loading