From 689c367bf09541703c2252e6eb2277de33dc6317 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Heinz-Alexander=20F=C3=BCtterer?= <35225576+afuetterer@users.noreply.github.com> Date: Wed, 4 Mar 2026 09:40:04 +0100 Subject: [PATCH] test: remove obsolete test utils.py --- tika/tests/memory_benchmark.py | 7 ++--- tika/tests/test_benchmark.py | 22 +++++++------- tika/tests/test_tika.py | 4 +-- tika/tests/utils.py | 55 ---------------------------------- 4 files changed, 15 insertions(+), 73 deletions(-) delete mode 100644 tika/tests/utils.py diff --git a/tika/tests/memory_benchmark.py b/tika/tests/memory_benchmark.py index e4968934..db83a994 100644 --- a/tika/tests/memory_benchmark.py +++ b/tika/tests/memory_benchmark.py @@ -19,14 +19,11 @@ # python tika/tests/memory_benchmark.py import os import zlib - +import gzip import tika.parser -import tika.tika from memory_profiler import profile -from tika.tests.utils import gzip_compress - @profile def test_parser_binary(): @@ -62,7 +59,7 @@ def test_parser_gzip(): file = os.path.join(os.path.dirname(__file__), 'files', 'rwservlet.pdf') with open(file, 'rb') as file_obj: - response = tika.parser.from_buffer(gzip_compress(file_obj.read()), headers={'Accept-Encoding': 'gzip, deflate'}) + response = tika.parser.from_buffer(gzip.compress(file_obj.read()), headers={'Accept-Encoding': 'gzip, deflate'}) if __name__ == '__main__': test_parser_buffer() diff --git a/tika/tests/test_benchmark.py b/tika/tests/test_benchmark.py index 9e824731..88ddc5fd 100644 --- a/tika/tests/test_benchmark.py +++ b/tika/tests/test_benchmark.py @@ -20,10 +20,10 @@ import os import unittest import zlib +import gzip +from http import HTTPStatus import tika.parser -import tika.tika -from tika.tests.utils import HTTPStatusOk, gzip_compress def test_local_binary(benchmark): @@ -31,7 +31,7 @@ def test_local_binary(benchmark): file = os.path.join(os.path.dirname(__file__), 'files', 'rwservlet.pdf') response = benchmark(tika_from_binary, file) - assert response['status'] == HTTPStatusOk + assert response['status'] == HTTPStatus.OK def test_parser_buffer(benchmark): @@ -39,7 +39,7 @@ def test_parser_buffer(benchmark): file = os.path.join(os.path.dirname(__file__), 'files', 'rwservlet.pdf') response = benchmark(tika_from_buffer, file) - assert response['status'] == HTTPStatusOk + assert response['status'] == HTTPStatus.OK def test_parser_buffer_zlib_input(benchmark): @@ -48,7 +48,7 @@ def test_parser_buffer_zlib_input(benchmark): response = benchmark(tika_from_buffer_zlib, file) - assert response['status'] == HTTPStatusOk + assert response['status'] == HTTPStatus.OK def test_parser_buffer_gzip_input(benchmark): @@ -56,7 +56,7 @@ def test_parser_buffer_gzip_input(benchmark): file = os.path.join(os.path.dirname(__file__), 'files', 'rwservlet.pdf') response = benchmark(tika_from_buffer_gzip, file) - assert response['status'] == HTTPStatusOk + assert response['status'] == HTTPStatus.OK def test_local_binary_with_gzip_output(benchmark): @@ -64,7 +64,7 @@ def test_local_binary_with_gzip_output(benchmark): file = os.path.join(os.path.dirname(__file__), 'files', 'rwservlet.pdf') response = benchmark(tika_from_binary, file, headers={'Accept-Encoding': 'gzip, deflate'}) - assert response['status'] == HTTPStatusOk + assert response['status'] == HTTPStatus.OK def test_parser_buffer_with_gzip_output(benchmark): @@ -72,7 +72,7 @@ def test_parser_buffer_with_gzip_output(benchmark): file = os.path.join(os.path.dirname(__file__), 'files', 'rwservlet.pdf') response = benchmark(tika_from_buffer, file, headers={'Accept-Encoding': 'gzip, deflate'}) - assert response['status'] == HTTPStatusOk + assert response['status'] == HTTPStatus.OK def test_parser_buffer_zlib_input_and_gzip_output(benchmark): @@ -81,7 +81,7 @@ def test_parser_buffer_zlib_input_and_gzip_output(benchmark): response = benchmark(tika_from_buffer_zlib, file, headers={'Accept-Encoding': 'gzip, deflate'}) - assert response['status'] == HTTPStatusOk + assert response['status'] == HTTPStatus.OK def test_parser_buffer_gzip_input_and_gzip_output(benchmark): @@ -89,7 +89,7 @@ def test_parser_buffer_gzip_input_and_gzip_output(benchmark): file = os.path.join(os.path.dirname(__file__), 'files', 'rwservlet.pdf') response = benchmark(tika_from_buffer_gzip, file, headers={'Accept-Encoding': 'gzip, deflate'}) - assert response['status'] == HTTPStatusOk + assert response['status'] == HTTPStatus.OK def tika_from_buffer_zlib(file, headers=None): @@ -99,7 +99,7 @@ def tika_from_buffer_zlib(file, headers=None): def tika_from_buffer_gzip(file, headers=None): with open(file, 'rb') as file_obj: - return tika.parser.from_buffer(gzip_compress(file_obj.read()), headers=headers) + return tika.parser.from_buffer(gzip.compress(file_obj.read()), headers=headers) def tika_from_buffer(file, headers=None): diff --git a/tika/tests/test_tika.py b/tika/tests/test_tika.py index 6166c23e..c61cb812 100644 --- a/tika/tests/test_tika.py +++ b/tika/tests/test_tika.py @@ -17,10 +17,10 @@ import os import unittest +from http import HTTPStatus import tika.parser import tika.tika -from tika.tests.utils import HTTPStatusOk class CreateTest(unittest.TestCase): @@ -53,7 +53,7 @@ def test_local_binary(self): def test_local_buffer(self): response = tika.parser.from_buffer('Good evening, Dave') - self.assertEqual(response['status'], HTTPStatusOk) + self.assertEqual(response['status'], HTTPStatus.OK) def test_local_path(self): """parse file path""" diff --git a/tika/tests/utils.py b/tika/tests/utils.py deleted file mode 100644 index b3a4318e..00000000 --- a/tika/tests/utils.py +++ /dev/null @@ -1,55 +0,0 @@ -#!/usr/bin/env python -# encoding: utf-8 -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import gzip - - -def HTTPStatusOk(): - try: - # python 2.7 - import httplib - - return httplib.OK - except ImportError: - try: - # python > 3.4 - from http import HTTPStatus - - return HTTPStatus.OK - except ImportError: - # python 3.4 - import http.client - - return http.client.OK - - -HTTPStatusOk = HTTPStatusOk() - - -def gzip_compress(file_obj): - try: - # python > 3.4 - return gzip.compress(file_obj) - except AttributeError: - # python 2.7 - import StringIO - out = StringIO.StringIO() - gzip_s = gzip.GzipFile(fileobj=out, mode="wb") - gzip_s.write(file_obj.encode('utf-8')) - gzip_s.close() - - # Get the bytes written to the underlying file object - return out.getvalue()