From 0922374e8ee3de9a6efefd976bf587511dec0ae4 Mon Sep 17 00:00:00 2001 From: Jeremy Mill Date: Wed, 25 Jun 2025 10:42:59 -0400 Subject: [PATCH 1/3] add parquet --- matchers/archive.go | 121 +++++++++++++++++++++++--------------------- 1 file changed, 62 insertions(+), 59 deletions(-) diff --git a/matchers/archive.go b/matchers/archive.go index dd892ce..50c3dc3 100644 --- a/matchers/archive.go +++ b/matchers/archive.go @@ -8,65 +8,67 @@ const ( ) var ( - TypeEpub = newType("epub", "application/epub+zip") - TypeZip = newType("zip", "application/zip") - TypeTar = newType("tar", "application/x-tar") - TypeRar = newType("rar", "application/vnd.rar") - TypeGz = newType("gz", "application/gzip") - TypeBz2 = newType("bz2", "application/x-bzip2") - Type7z = newType("7z", "application/x-7z-compressed") - TypeXz = newType("xz", "application/x-xz") - TypeZstd = newType("zst", "application/zstd") - TypePdf = newType("pdf", "application/pdf") - TypeExe = newType("exe", "application/vnd.microsoft.portable-executable") - TypeSwf = newType("swf", "application/x-shockwave-flash") - TypeRtf = newType("rtf", "application/rtf") - TypeEot = newType("eot", "application/octet-stream") - TypePs = newType("ps", "application/postscript") - TypeSqlite = newType("sqlite", "application/vnd.sqlite3") - TypeNes = newType("nes", "application/x-nintendo-nes-rom") - TypeCrx = newType("crx", "application/x-google-chrome-extension") - TypeCab = newType("cab", "application/vnd.ms-cab-compressed") - TypeDeb = newType("deb", "application/vnd.debian.binary-package") - TypeAr = newType("ar", "application/x-unix-archive") - TypeZ = newType("Z", "application/x-compress") - TypeLz = newType("lz", "application/x-lzip") - TypeRpm = newType("rpm", "application/x-rpm") - TypeElf = newType("elf", "application/x-executable") - TypeDcm = newType("dcm", "application/dicom") - TypeIso = newType("iso", "application/x-iso9660-image") - TypeMachO = newType("macho", "application/x-mach-binary") // Mach-O binaries have no common extension. + TypeEpub = newType("epub", "application/epub+zip") + TypeZip = newType("zip", "application/zip") + TypeTar = newType("tar", "application/x-tar") + TypeRar = newType("rar", "application/vnd.rar") + TypeGz = newType("gz", "application/gzip") + TypeBz2 = newType("bz2", "application/x-bzip2") + Type7z = newType("7z", "application/x-7z-compressed") + TypeXz = newType("xz", "application/x-xz") + TypeZstd = newType("zst", "application/zstd") + TypePdf = newType("pdf", "application/pdf") + TypeExe = newType("exe", "application/vnd.microsoft.portable-executable") + TypeSwf = newType("swf", "application/x-shockwave-flash") + TypeRtf = newType("rtf", "application/rtf") + TypeEot = newType("eot", "application/octet-stream") + TypePs = newType("ps", "application/postscript") + TypeSqlite = newType("sqlite", "application/vnd.sqlite3") + TypeNes = newType("nes", "application/x-nintendo-nes-rom") + TypeCrx = newType("crx", "application/x-google-chrome-extension") + TypeCab = newType("cab", "application/vnd.ms-cab-compressed") + TypeDeb = newType("deb", "application/vnd.debian.binary-package") + TypeAr = newType("ar", "application/x-unix-archive") + TypeZ = newType("Z", "application/x-compress") + TypeLz = newType("lz", "application/x-lzip") + TypeRpm = newType("rpm", "application/x-rpm") + TypeElf = newType("elf", "application/x-executable") + TypeDcm = newType("dcm", "application/dicom") + TypeIso = newType("iso", "application/x-iso9660-image") + TypeMachO = newType("macho", "application/x-mach-binary") // Mach-O binaries have no common extension. + TypeParquet = newType("parquet", "application/vnd.apache.parquet") ) var Archive = Map{ - TypeEpub: bytePrefixMatcher(epubMagic), - TypeZip: Zip, - TypeTar: Tar, - TypeRar: Rar, - TypeGz: bytePrefixMatcher(gzMagic), - TypeBz2: bytePrefixMatcher(bz2Magic), - Type7z: bytePrefixMatcher(sevenzMagic), - TypeXz: bytePrefixMatcher(xzMagic), - TypeZstd: Zst, - TypePdf: bytePrefixMatcher(pdfMagic), - TypeExe: bytePrefixMatcher(exeMagic), - TypeSwf: Swf, - TypeRtf: bytePrefixMatcher(rtfMagic), - TypeEot: Eot, - TypePs: bytePrefixMatcher(psMagic), - TypeSqlite: bytePrefixMatcher(sqliteMagic), - TypeNes: bytePrefixMatcher(nesMagic), - TypeCrx: bytePrefixMatcher(crxMagic), - TypeCab: Cab, - TypeDeb: bytePrefixMatcher(debMagic), - TypeAr: bytePrefixMatcher(arMagic), - TypeZ: Z, - TypeLz: bytePrefixMatcher(lzMagic), - TypeRpm: Rpm, - TypeElf: Elf, - TypeDcm: Dcm, - TypeIso: Iso, - TypeMachO: MachO, + TypeEpub: bytePrefixMatcher(epubMagic), + TypeZip: Zip, + TypeTar: Tar, + TypeRar: Rar, + TypeGz: bytePrefixMatcher(gzMagic), + TypeBz2: bytePrefixMatcher(bz2Magic), + Type7z: bytePrefixMatcher(sevenzMagic), + TypeXz: bytePrefixMatcher(xzMagic), + TypeZstd: Zst, + TypePdf: bytePrefixMatcher(pdfMagic), + TypeExe: bytePrefixMatcher(exeMagic), + TypeSwf: Swf, + TypeRtf: bytePrefixMatcher(rtfMagic), + TypeEot: Eot, + TypePs: bytePrefixMatcher(psMagic), + TypeSqlite: bytePrefixMatcher(sqliteMagic), + TypeNes: bytePrefixMatcher(nesMagic), + TypeCrx: bytePrefixMatcher(crxMagic), + TypeCab: Cab, + TypeDeb: bytePrefixMatcher(debMagic), + TypeAr: bytePrefixMatcher(arMagic), + TypeZ: Z, + TypeLz: bytePrefixMatcher(lzMagic), + TypeRpm: Rpm, + TypeElf: Elf, + TypeDcm: Dcm, + TypeIso: Iso, + TypeMachO: MachO, + TypeParquet: bytePrefixMatcher(parquetMagic), } var ( @@ -92,9 +94,10 @@ var ( 0x64, 0x65, 0x62, 0x69, 0x61, 0x6E, 0x2D, 0x62, 0x69, 0x6E, 0x61, 0x72, 0x79, } - arMagic = []byte{0x21, 0x3C, 0x61, 0x72, 0x63, 0x68, 0x3E} - zstdMagic = []byte{0x28, 0xB5, 0x2F, 0xFD} - lzMagic = []byte{0x4C, 0x5A, 0x49, 0x50} + arMagic = []byte{0x21, 0x3C, 0x61, 0x72, 0x63, 0x68, 0x3E} + zstdMagic = []byte{0x28, 0xB5, 0x2F, 0xFD} + lzMagic = []byte{0x4C, 0x5A, 0x49, 0x50} + parquetMagic = []byte{0x50, 0x41, 0x52, 0x31} ) func bytePrefixMatcher(magicPattern []byte) Matcher { From 8c2bb8a1361a8e6ffaf34cb6aea9005d2a73fa70 Mon Sep 17 00:00:00 2001 From: Jeremy Mill Date: Wed, 25 Jun 2025 11:02:11 -0400 Subject: [PATCH 2/3] add test file --- fixtures/sample.parquet | Bin 0 -> 2446 bytes match_test.go | 1 + 2 files changed, 1 insertion(+) create mode 100644 fixtures/sample.parquet diff --git a/fixtures/sample.parquet b/fixtures/sample.parquet new file mode 100644 index 0000000000000000000000000000000000000000..028c64cf9f7d9ea46bfeeaf940482aaee90a1b54 GIT binary patch literal 2446 zcmZ`*L1-Ii7JgDj^8efVlXj z+h~)tReRFWhaP+iY7a$+Qgkq^*-PEa;-d=<2zpU(g6+Yl#l^)qB!vBb$#tBx9gt_{ z{qO(gz4v|Z{Vu=u=TMXW->5#yzNoj-c+To9(buwYj)=C>skKZhHy(ZdSt^wqi|&7y zN?D^(=3;JbG@?w3J<{(uM64qj3mr{oQZ#l|z@TipsH&9LfvnNEcG%`J+fs0g##Ncd zbUX7exWy$nkLrLEd4Po?Zml3GL2Z)^amx`evS2yK8Lb3r>3ybuE+Fm-Q=hc3= zNd<)4#fLqv(M9da4o6rocpN%8X>h^1rgkZHk(+Rj{l-b2p`%XBV1fC_E;2_-tzKG(RB zxnWH|)LsdNuF!!t4fhz?K0wudcULwRJSzEwmaEg@e2aLSeYTC_Yuo}$M_1AGM>@{zU?#KLo&SQ`ZEz>>IwEsSoA>Df=};m6B7Ba zYPh`*Hwha|&mO5BWFjo18v1$OCRwHj$n`+`nLnXJ50r-ichGNo)XJZ@VbL)d_zA^& zgo(2W(^`t(p^AoQy!rplOZJpnJOxz=gF0Hp^ypT|-Ly+QQoK(kQ1y)Z&I9Jjbf985Lr!D=ae)}z)e`vso zO&o&sm({zs?ko@E>94D|@2p?Hb@SG3Y2Mav-&lWt{rWOyZ9#kegVP=yoag^|!e@Gg zR#Gczopx`&49To~aQ*Kq?_Ildy}J63Wj#!Kcp~lO?yT09Z@zSM_5B-ns{_->!1U4j z$}g5&ZT0T6P6N}4Qw~T&A1>csU%mVB1W|q~?H1g>P2%Vob~O&ur@o@oSkK=Z%YQL6 zDPDEN;MBMrIbynhag-}9|3F6rxPXRw2n*3_$6p?|5J?1)tc-D%QCx}lac?$W} z(B!|*pF&9H>%5SA6e=JPiD-;}gstG4MkWHTQ+21e&5i48iNkM-0b*SR?d74NQmGPh0{{e!n B Date: Wed, 25 Jun 2025 11:05:17 -0400 Subject: [PATCH 3/3] update README with pq --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 5cb6810..946f4b5 100644 --- a/README.md +++ b/README.md @@ -275,6 +275,7 @@ func main() { - **wasm** - `application/wasm` - **dex** - `application/vnd.android.dex` - **dey** - `application/vnd.android.dey` +- **parquet** - `application/vnd.apache.parquet` ## Benchmarks