Skip to content

Commit dcdd226

Browse files
authored
Merge pull request #211 from seddonym/multiprocessing-env-disable
Multiprocessing env disable
2 parents d6da591 + f54b9a1 commit dcdd226

File tree

6 files changed

+121
-5
lines changed

6 files changed

+121
-5
lines changed

CHANGELOG.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,12 @@
22
Changelog
33
=========
44

5+
latest
6+
------
7+
8+
* Provide more control of multiprocessing via ``GRIMP_MIN_MULTIPROCESSING_MODULES``
9+
environment variable.
10+
511
3.8.1 (2025-04-23)
612
------------------
713

docs/usage.rst

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,12 @@ Building the graph
8282
:param str, optional cache_dir: The directory to use for caching the graph. Defaults to ``.grimp_cache``. To disable caching,
8383
pass ``None``. See :doc:`caching`.
8484
:return: An import graph that you can use to analyse the package.
85-
:rtype: ImportGraph
85+
:rtype: ``ImportGraph``
86+
87+
This method uses multiple operating system processes to build the graph, if the number of modules to scan (not
88+
including modules in the cache) is 50 or more. This threshold can be adjusted by setting the ``GRIMP_MIN_MULTIPROCESSING_MODULES``
89+
environment variable to a different number. To disable multiprocessing altogether, set it to a large number (more than
90+
the number of modules in the codebase being analyzed).
8691

8792
.. _typing module documentation: https://docs.python.org/3/library/typing.html#typing.TYPE_CHECKING
8893

src/grimp/application/usecases.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,14 +15,19 @@
1515
from ..application.ports.packagefinder import AbstractPackageFinder
1616
from ..domain.valueobjects import DirectImport, Module
1717
from .config import settings
18+
import os
1819

1920

2021
class NotSupplied:
2122
pass
2223

2324

25+
# Calling code can set this environment variable if it wants to tune when to switch to
26+
# multiprocessing, or set it to a large number to disable it altogether.
27+
MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING_ENV_NAME = "GRIMP_MIN_MULTIPROCESSING_MODULES"
2428
# This is an arbitrary number, but setting it too low slows down our functional tests considerably.
25-
MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING = 50
29+
# If you change this, update docs/usage.rst too!
30+
DEFAULT_MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING = 50
2631

2732

2833
def build_graph(
@@ -238,7 +243,13 @@ def _create_chunks(module_files: Collection[ModuleFile]) -> tuple[tuple[ModuleFi
238243

239244

240245
def _decide_number_of_processes(number_of_module_files: int) -> int:
241-
if number_of_module_files < MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING:
246+
min_number_of_modules = int(
247+
os.environ.get(
248+
MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING_ENV_NAME,
249+
DEFAULT_MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING,
250+
)
251+
)
252+
if number_of_module_files < min_number_of_modules:
242253
# Don't incur the overhead of multiple processes.
243254
return 1
244255
return min(joblib.cpu_count(), number_of_module_files)

tests/adaptors/modulefinder.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
from grimp.application.ports.modulefinder import AbstractModuleFinder, FoundPackage, ModuleFile
2+
from grimp.application.ports.filesystem import AbstractFileSystem
3+
from typing import FrozenSet, Dict
4+
5+
6+
class BaseFakeModuleFinder(AbstractModuleFinder):
7+
module_files_by_package_name: Dict[str, FrozenSet[ModuleFile]] = {}
8+
9+
def find_package(
10+
self, package_name: str, package_directory: str, file_system: AbstractFileSystem
11+
) -> FoundPackage:
12+
return FoundPackage(
13+
name=package_name,
14+
directory=package_directory,
15+
module_files=self.module_files_by_package_name[package_name],
16+
)

tests/functional/test_build_and_use_graph.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from unittest.mock import patch
55
from grimp.application import usecases
66

7+
78
"""
89
For ease of reference, these are the imports of all the files:
910
@@ -55,7 +56,7 @@ def test_modules():
5556
}
5657

5758

58-
@patch.object(usecases, "MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING", 0)
59+
@patch.object(usecases, "DEFAULT_MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING", 0)
5960
def test_modules_multiprocessing():
6061
"""
6162
This test runs relatively slowly, but it's important we cover the multiprocessing code.

tests/unit/application/test_usecases.py

Lines changed: 78 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
1+
import os
12
from typing import Dict, Optional, Set
2-
from unittest.mock import sentinel
3+
from unittest.mock import sentinel, patch
34

5+
import joblib # type: ignore
46
import pytest # type: ignore
57

68
from grimp.application import usecases
@@ -9,8 +11,11 @@
911
from grimp.domain.valueobjects import DirectImport, Module
1012
from tests.adaptors.filesystem import FakeFileSystem
1113
from tests.adaptors.packagefinder import BaseFakePackageFinder
14+
from tests.adaptors.modulefinder import BaseFakeModuleFinder
1215
from tests.config import override_settings
1316

17+
SOME_CPU_COUNT = 8
18+
1419

1520
class TestBuildGraph:
1621
@pytest.mark.parametrize("include_external_packages", (True, False))
@@ -131,3 +136,75 @@ def write(
131136
if supplied_cache_dir is not sentinel.not_supplied:
132137
kwargs["cache_dir"] = supplied_cache_dir
133138
usecases.build_graph("mypackage", **kwargs)
139+
140+
@patch.object(usecases, "_scan_chunks", return_value={})
141+
@patch.object(joblib, "cpu_count", return_value=SOME_CPU_COUNT)
142+
@pytest.mark.parametrize(
143+
"number_of_modules, fake_environ, expected_number_of_chunks",
144+
[
145+
(
146+
usecases.DEFAULT_MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING - 1,
147+
{},
148+
1,
149+
),
150+
(
151+
usecases.DEFAULT_MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING,
152+
{},
153+
SOME_CPU_COUNT,
154+
),
155+
(
156+
usecases.DEFAULT_MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING + 1,
157+
{},
158+
SOME_CPU_COUNT,
159+
),
160+
(
161+
149,
162+
{usecases.MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING_ENV_NAME: 150},
163+
1,
164+
),
165+
(
166+
150,
167+
{usecases.MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING_ENV_NAME: 150},
168+
SOME_CPU_COUNT,
169+
),
170+
(
171+
151,
172+
{usecases.MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING_ENV_NAME: 150},
173+
SOME_CPU_COUNT,
174+
),
175+
],
176+
)
177+
def test_scanning_multiprocessing_respects_min_number_of_modules(
178+
self,
179+
mock_cpu_count,
180+
mock_scan_chunks,
181+
number_of_modules,
182+
fake_environ,
183+
expected_number_of_chunks,
184+
):
185+
class FakePackageFinder(BaseFakePackageFinder):
186+
directory_map = {"mypackage": "/path/to/mypackage"}
187+
188+
class FakeModuleFinder(BaseFakeModuleFinder):
189+
module_files_by_package_name = {
190+
"mypackage": frozenset(
191+
{
192+
ModuleFile(
193+
module=Module(f"mypackage.mod_{i}"),
194+
mtime=999,
195+
)
196+
for i in range(number_of_modules)
197+
}
198+
)
199+
}
200+
201+
with override_settings(
202+
FILE_SYSTEM=FakeFileSystem(),
203+
PACKAGE_FINDER=FakePackageFinder(),
204+
MODULE_FINDER=FakeModuleFinder(),
205+
), patch.object(os, "environ", fake_environ):
206+
usecases.build_graph("mypackage", cache_dir=None)
207+
208+
[call] = mock_scan_chunks.call_args_list
209+
chunks = call.args[0]
210+
assert len(chunks) == expected_number_of_chunks

0 commit comments

Comments
 (0)