From 7c972deeede7cb659f8eedf5a93e759ff709be5e Mon Sep 17 00:00:00 2001 From: wangtiandong Date: Tue, 27 Jan 2026 18:20:51 +0800 Subject: [PATCH 1/3] Change dropbox links to hugging face dropbox links broked, switch to hugging face --- src/layoutparser/models/base_catalog.py | 13 +++++- src/layoutparser/models/detectron2/catalog.py | 44 +++++++++---------- src/layoutparser/models/effdet/catalog.py | 8 ++-- 3 files changed, 38 insertions(+), 27 deletions(-) diff --git a/src/layoutparser/models/base_catalog.py b/src/layoutparser/models/base_catalog.py index 104d014..58d6904 100644 --- a/src/layoutparser/models/base_catalog.py +++ b/src/layoutparser/models/base_catalog.py @@ -29,6 +29,17 @@ def _get_supported_prefixes(self): def _isfile(self, path): return path in self.cache_map +class HuggingfaceHandler(HTTPURLHandler): + """ + Supports download and file check for Huggingface links + """ + + def _get_supported_prefixes(self): + return ["https://www.huggingface.co"] + + def _isfile(self, path): + return path in self.cache_map PathManager = PathManagerBase() -PathManager.register_handler(DropboxHandler()) \ No newline at end of file +PathManager.register_handler(DropboxHandler()) +PathManager.register_handler(HuggingfaceHandler()) diff --git a/src/layoutparser/models/detectron2/catalog.py b/src/layoutparser/models/detectron2/catalog.py index 12566b6..31200b1 100644 --- a/src/layoutparser/models/detectron2/catalog.py +++ b/src/layoutparser/models/detectron2/catalog.py @@ -18,53 +18,53 @@ MODEL_CATALOG = { "HJDataset": { - "faster_rcnn_R_50_FPN_3x": "https://www.dropbox.com/s/6icw6at8m28a2ho/model_final.pth?dl=1", - "mask_rcnn_R_50_FPN_3x": "https://www.dropbox.com/s/893paxpy5suvlx9/model_final.pth?dl=1", - "retinanet_R_50_FPN_3x": "https://www.dropbox.com/s/yxsloxu3djt456i/model_final.pth?dl=1", + "faster_rcnn_R_50_FPN_3x": "https://huggingface.co/layoutparser/detectron2/resolve/main/HJDataset/faster_rcnn_R_50_FPN_3x/model_final.pth", + "mask_rcnn_R_50_FPN_3x": "https://huggingface.co/layoutparser/detectron2/resolve/main/HJDataset/mask_rcnn_R_50_FPN_3x/model_final.pth", + "retinanet_R_50_FPN_3x": "https://huggingface.co/layoutparser/detectron2/resolve/main/HJDataset/retinanet_R_50_FPN_3x/model_final.pth", }, "PubLayNet": { - "faster_rcnn_R_50_FPN_3x": "https://www.dropbox.com/s/dgy9c10wykk4lq4/model_final.pth?dl=1", - "mask_rcnn_R_50_FPN_3x": "https://www.dropbox.com/s/d9fc9tahfzyl6df/model_final.pth?dl=1", - "mask_rcnn_X_101_32x8d_FPN_3x": "https://www.dropbox.com/s/57zjbwv6gh3srry/model_final.pth?dl=1", + "faster_rcnn_R_50_FPN_3x": "https://huggingface.co/layoutparser/detectron2/resolve/main/PubLayNet/faster_rcnn_R_50_FPN_3x/model_final.pth", + "mask_rcnn_R_50_FPN_3x": "https://huggingface.co/layoutparser/detectron2/resolve/main/PubLayNet/mask_rcnn_R_50_FPN_3x/model_final.pth", + "mask_rcnn_X_101_32x8d_FPN_3x": "https://huggingface.co/layoutparser/detectron2/resolve/main/PubLayNet/mask_rcnn_X_101_32x8d_FPN_3x/model_final.pth", }, "PrimaLayout": { - "mask_rcnn_R_50_FPN_3x": "https://www.dropbox.com/s/h7th27jfv19rxiy/model_final.pth?dl=1" + "mask_rcnn_R_50_FPN_3x": "https://huggingface.co/layoutparser/detectron2/resolve/main/PrimaLayout/mask_rcnn_R_50_FPN_3x/model_final.pth" }, "NewspaperNavigator": { - "faster_rcnn_R_50_FPN_3x": "https://www.dropbox.com/s/6ewh6g8rqt2ev3a/model_final.pth?dl=1", + "faster_rcnn_R_50_FPN_3x": "https://huggingface.co/layoutparser/detectron2/resolve/main/NewspaperNavigator/faster_rcnn_R_50_FPN_3x/model_final.pth", }, "TableBank": { - "faster_rcnn_R_50_FPN_3x": "https://www.dropbox.com/s/8v4uqmz1at9v72a/model_final.pth?dl=1", - "faster_rcnn_R_101_FPN_3x": "https://www.dropbox.com/s/6vzfk8lk9xvyitg/model_final.pth?dl=1", + "faster_rcnn_R_50_FPN_3x": "https://huggingface.co/layoutparser/detectron2/resolve/main/TableBank/faster_rcnn_R_50_FPN_3x/model_final.pth", + "faster_rcnn_R_101_FPN_3x": "https://huggingface.co/layoutparser/detectron2/resolve/main/TableBank/faster_rcnn_R_101_FPN_3x/model_final.pth", }, "MFD": { - "faster_rcnn_R_50_FPN_3x": "https://www.dropbox.com/s/7xel0i3iqpm2p8y/model_final.pth?dl=1", + "faster_rcnn_R_50_FPN_3x": "https://huggingface.co/layoutparser/detectron2/resolve/main/MFD/faster_rcnn_R_50_FPN_3x/model_final.pth", }, } CONFIG_CATALOG = { "HJDataset": { - "faster_rcnn_R_50_FPN_3x": "https://www.dropbox.com/s/j4yseny2u0hn22r/config.yml?dl=1", - "mask_rcnn_R_50_FPN_3x": "https://www.dropbox.com/s/4jmr3xanmxmjcf8/config.yml?dl=1", - "retinanet_R_50_FPN_3x": "https://www.dropbox.com/s/z8a8ywozuyc5c2x/config.yml?dl=1", + "faster_rcnn_R_50_FPN_3x": "https://huggingface.co/layoutparser/detectron2/resolve/main/HJDataset/faster_rcnn_R_50_FPN_3x/config.yml", + "mask_rcnn_R_50_FPN_3x": "https://huggingface.co/layoutparser/detectron2/resolve/main/HJDataset/mask_rcnn_R_50_FPN_3x/config.yml", + "retinanet_R_50_FPN_3x": "https://huggingface.co/layoutparser/detectron2/resolve/main/HJDataset/retinanet_R_50_FPN_3x/config.yml", }, "PubLayNet": { - "faster_rcnn_R_50_FPN_3x": "https://www.dropbox.com/s/f3b12qc4hc0yh4m/config.yml?dl=1", - "mask_rcnn_R_50_FPN_3x": "https://www.dropbox.com/s/u9wbsfwz4y0ziki/config.yml?dl=1", - "mask_rcnn_X_101_32x8d_FPN_3x": "https://www.dropbox.com/s/nau5ut6zgthunil/config.yaml?dl=1", + "faster_rcnn_R_50_FPN_3x": "https://huggingface.co/layoutparser/detectron2/resolve/main/PubLayNet/faster_rcnn_R_50_FPN_3x/config.yml", + "mask_rcnn_R_50_FPN_3x": "https://huggingface.co/layoutparser/detectron2/resolve/main/PubLayNet/mask_rcnn_R_50_FPN_3x/config.yml", + "mask_rcnn_X_101_32x8d_FPN_3x": "https://huggingface.co/layoutparser/detectron2/resolve/main/PubLayNet/mask_rcnn_X_101_32x8d_FPN_3x/config.yml", }, "PrimaLayout": { - "mask_rcnn_R_50_FPN_3x": "https://www.dropbox.com/s/yc92x97k50abynt/config.yaml?dl=1" + "mask_rcnn_R_50_FPN_3x": "https://huggingface.co/layoutparser/detectron2/resolve/main/PrimaLayout/mask_rcnn_R_50_FPN_3x/config.yml" }, "NewspaperNavigator": { - "faster_rcnn_R_50_FPN_3x": "https://www.dropbox.com/s/wnido8pk4oubyzr/config.yml?dl=1", + "faster_rcnn_R_50_FPN_3x": "https://huggingface.co/layoutparser/detectron2/resolve/main/NewspaperNavigator/faster_rcnn_R_50_FPN_3x/config.yml", }, "TableBank": { - "faster_rcnn_R_50_FPN_3x": "https://www.dropbox.com/s/7cqle02do7ah7k4/config.yaml?dl=1", - "faster_rcnn_R_101_FPN_3x": "https://www.dropbox.com/s/h63n6nv51kfl923/config.yaml?dl=1", + "faster_rcnn_R_50_FPN_3x": "https://huggingface.co/layoutparser/detectron2/resolve/main/TableBank/faster_rcnn_R_50_FPN_3x/config.yml", + "faster_rcnn_R_101_FPN_3x": "https://huggingface.co/layoutparser/detectron2/resolve/main/TableBank/faster_rcnn_R_101_FPN_3x/config.yml", }, "MFD": { - "faster_rcnn_R_50_FPN_3x": "https://www.dropbox.com/s/ld9izb95f19369w/config.yaml?dl=1", + "faster_rcnn_R_50_FPN_3x": "https://huggingface.co/layoutparser/detectron2/resolve/main/MFD/faster_rcnn_R_50_FPN_3x/config.yml", }, } diff --git a/src/layoutparser/models/effdet/catalog.py b/src/layoutparser/models/effdet/catalog.py index d530cb3..47ed2f4 100644 --- a/src/layoutparser/models/effdet/catalog.py +++ b/src/layoutparser/models/effdet/catalog.py @@ -18,12 +18,12 @@ MODEL_CATALOG = { "PubLayNet": { - "tf_efficientdet_d0": "https://www.dropbox.com/s/ukbw5s673633hsw/publaynet-tf_efficientdet_d0.pth.tar?dl=1", - "tf_efficientdet_d1": "https://www.dropbox.com/s/gxy11xkkiwnpgog/publaynet-tf_efficientdet_d1.pth.tar?dl=1" + "tf_efficientdet_d0": "https://huggingface.co/layoutparser/efficientdet/resolve/main/PubLayNet/tf_efficientdet_d0/publaynet-tf_efficientdet_d0.pth.tar", + "tf_efficientdet_d1": "https://huggingface.co/layoutparser/efficientdet/resolve/main/PubLayNet/tf_efficientdet_d1/publaynet-tf_efficientdet_d1.pth.tar" }, "MFD": { - "tf_efficientdet_d0": "https://www.dropbox.com/s/dkr22iux7thlhel/mfd-tf_efficientdet_d0.pth.tar?dl=1", - "tf_efficientdet_d1": "https://www.dropbox.com/s/icmbiaqr5s9bz1x/mfd-tf_efficientdet_d1.pth.tar?dl=1" + "tf_efficientdet_d0": "https://huggingface.co/layoutparser/efficientdet/resolve/main/MFD/tf_efficientdet_d0/mfd-tf_efficientdet_d0.pth.tar", + "tf_efficientdet_d1": "https://huggingface.co/layoutparser/efficientdet/resolve/main/MFD/tf_efficientdet_d1/mfd-tf_efficientdet_d1.pth.tar" } } From f97748c64a0ccdcad6a979baf50186a07b153508 Mon Sep 17 00:00:00 2001 From: wangtiandong Date: Tue, 27 Jan 2026 18:25:29 +0800 Subject: [PATCH 2/3] bump version --- src/layoutparser/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/layoutparser/__init__.py b/src/layoutparser/__init__.py index 512f123..5e8b01f 100644 --- a/src/layoutparser/__init__.py +++ b/src/layoutparser/__init__.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "0.3.4" +__version__ = "0.3.5" import sys From 1da3aa8f51cd37c1a85ed5c1c713893350bccdbc Mon Sep 17 00:00:00 2001 From: wangtiandong Date: Tue, 27 Jan 2026 18:32:21 +0800 Subject: [PATCH 3/3] Fix hugging face prefix --- src/layoutparser/models/base_catalog.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/layoutparser/models/base_catalog.py b/src/layoutparser/models/base_catalog.py index 58d6904..447cc2d 100644 --- a/src/layoutparser/models/base_catalog.py +++ b/src/layoutparser/models/base_catalog.py @@ -35,7 +35,7 @@ class HuggingfaceHandler(HTTPURLHandler): """ def _get_supported_prefixes(self): - return ["https://www.huggingface.co"] + return ["https://huggingface.co"] def _isfile(self, path): return path in self.cache_map