diff --git a/src/poetry/repositories/link_sources/base.py b/src/poetry/repositories/link_sources/base.py index dfe344c51b0..af133a68cac 100644 --- a/src/poetry/repositories/link_sources/base.py +++ b/src/poetry/repositories/link_sources/base.py @@ -4,6 +4,7 @@ import re import urllib.parse +from collections import defaultdict from functools import cached_property from typing import TYPE_CHECKING from typing import ClassVar @@ -19,18 +20,22 @@ if TYPE_CHECKING: - from collections import defaultdict from collections.abc import Callable from collections.abc import Iterator from packaging.utils import NormalizedName from poetry.core.packages.utils.link import Link - # The cache stores factories that build a Link on demand, so that Links are + # This cache stores factories that build a Link on demand, so that Links are # only constructed for the (few) versions actually retrieved rather than for # every file listed by the repository. LinkFactory = Callable[[], Link] - LinkCache = defaultdict[NormalizedName, defaultdict[Version, list[LinkFactory]]] + LinkFactoryCache = defaultdict[ + NormalizedName, defaultdict[Version, list[LinkFactory]] + ] + # This cache stores link objects so that Links are only constructed once + # for versions that are retrieved multiple times. + LinkCache = defaultdict[NormalizedName, dict[Version, list[Link]]] logger = logging.getLogger(__name__) @@ -63,13 +68,14 @@ class LinkSource: def __init__(self, url: str) -> None: self._url = url + self.__link_cache: LinkCache = defaultdict(dict) @property def url(self) -> str: return self._url def versions(self, name: NormalizedName) -> Iterator[Version]: - yield from self._link_cache[name] + yield from self._link_factory_cache[name] @property def packages(self) -> Iterator[Package]: @@ -81,10 +87,9 @@ def packages(self) -> Iterator[Package]: @property def links(self) -> Iterator[Link]: - for links_per_version in self._link_cache.values(): - for link_factories in links_per_version.values(): - for make_link in link_factories: - yield make_link() + for name, versions in self._link_factory_cache.items(): + for version in versions: + yield from self.links_for_version(name, version) @classmethod def _link_package_name_and_version( @@ -138,8 +143,13 @@ def link_package_data(cls, link: Link) -> Package | None: def links_for_version( self, name: NormalizedName, version: Version ) -> Iterator[Link]: - for make_link in self._link_cache[name][version]: - yield make_link() + if links := self.__link_cache[name].get(version): + yield from links + else: + self.__link_cache[name][version] = [ + make_link() for make_link in self._link_factory_cache[name][version] + ] + yield from self.__link_cache[name][version] def clean_link(self, url: str) -> str: """Makes sure a link is fully encoded. That is, if a ' ' shows up in @@ -162,8 +172,13 @@ def yanked(self, name: NormalizedName, version: Version) -> str | bool: return True @cached_property - def _link_cache(self) -> LinkCache: - raise NotImplementedError() + def _link_factory_cache(self) -> LinkFactoryCache: + """ATTENTION: + The factories should only be called in links_for_version + so that the __link_cache, which avoids calling the same factory twice, + is populated. + """ + raise NotImplementedError class SimpleRepositoryRootPage: diff --git a/src/poetry/repositories/link_sources/html.py b/src/poetry/repositories/link_sources/html.py index 1a111bbe284..155f93446c1 100644 --- a/src/poetry/repositories/link_sources/html.py +++ b/src/poetry/repositories/link_sources/html.py @@ -14,8 +14,8 @@ if TYPE_CHECKING: - from poetry.repositories.link_sources.base import LinkCache from poetry.repositories.link_sources.base import LinkFactory + from poetry.repositories.link_sources.base import LinkFactoryCache def _const_factory(link: Link) -> LinkFactory: @@ -33,8 +33,8 @@ def __init__(self, url: str, content: str) -> None: self._base_url: str | None = parser.base_url @cached_property - def _link_cache(self) -> LinkCache: - links: LinkCache = defaultdict(lambda: defaultdict(list)) + def _link_factory_cache(self) -> LinkFactoryCache: + links: LinkFactoryCache = defaultdict(lambda: defaultdict(list)) base_url = self._base_url or self._url for anchor in self._parsed: if href := anchor.get("href"): diff --git a/src/poetry/repositories/link_sources/json.py b/src/poetry/repositories/link_sources/json.py index d5bfd7937ed..2264b167cfa 100644 --- a/src/poetry/repositories/link_sources/json.py +++ b/src/poetry/repositories/link_sources/json.py @@ -15,7 +15,7 @@ if TYPE_CHECKING: - from poetry.repositories.link_sources.base import LinkCache + from poetry.repositories.link_sources.base import LinkFactoryCache class SimpleJsonPage(LinkSource): @@ -26,13 +26,13 @@ def __init__(self, url: str, content: dict[str, Any]) -> None: self.content = content @cached_property - def _link_cache(self) -> LinkCache: + def _link_factory_cache(self) -> LinkFactoryCache: # Only the filename is needed to enumerate the available versions, so we # defer building the Link (and cleaning its URL) to _make_link, which is # only called when the version's links are actually retrieved. For large # projects this avoids constructing tens of thousands of Link objects # that are never used during resolution. - links: LinkCache = defaultdict(lambda: defaultdict(list)) + links: LinkFactoryCache = defaultdict(lambda: defaultdict(list)) for file in self.content["files"]: filename = file["filename"] if splitext(filename, is_filename=True)[1] not in self.SUPPORTED_FORMATS: diff --git a/tests/repositories/link_sources/test_base.py b/tests/repositories/link_sources/test_base.py index 125226ba84a..9461cdaef8f 100644 --- a/tests/repositories/link_sources/test_base.py +++ b/tests/repositories/link_sources/test_base.py @@ -40,7 +40,7 @@ def link_source(mocker: MockerFixture) -> LinkSource: url = "https://example.org" link_source = LinkSource(url) mocker.patch( - f"{LinkSource.__module__}.{LinkSource.__qualname__}._link_cache", + f"{LinkSource.__module__}.{LinkSource.__qualname__}._link_factory_cache", new_callable=PropertyMock, return_value=defaultdict( lambda: defaultdict(list),