lehinevych · lehinevych · Aug 11, 2025 · Aug 11, 2025
diff --git a/mediawikiapi/mediawikiapi.py b/mediawikiapi/mediawikiapi.py
@@ -17,7 +17,11 @@ def __init__(self, config: Optional[Config] = None) -> None:
 
     @memorized
     def search(
-        self, query: str, results: int = 10, suggestion: bool = False
+        self,
+        query: str,
+        results: int = 10,
+        suggestion: bool = False,
+        follow_continue: bool = False,
     ) -> Union[List[str], Tuple[List[Any], Optional[List[str]]]]:
         """
         Do a Wikipedia search for `query`.
@@ -26,6 +30,7 @@ def search(
 
         * results - the maxmimum number of results returned
         * suggestion - if True, return results and suggestion (if any) in a tuple
+        * follow_continue - if True, automatically follow continuation tokens to get all results
         """
         search_params = {
             "list": "search",
@@ -37,7 +42,9 @@ def search(
         if suggestion:
             search_params["srinfo"] = "suggestion"
 
-        raw_results = self.session.request(search_params, self.config)
+        raw_results = self.session.request(
+            search_params, self.config, follow_continue=follow_continue
+        )
 
         if "error" in raw_results:
             if raw_results["error"]["info"] in (
@@ -69,6 +76,7 @@ def geosearch(
         title: Optional[str] = None,
         results: int = 10,
         radius: int = 1000,
+        follow_continue: bool = False,
     ) -> List[str]:
         """
         Do a wikipedia geo search for `latitude` and `longitude`
@@ -84,6 +92,7 @@ def geosearch(
         * title - The title of an article to search for
         * results - the maximum number of results returned
         * radius - Search radius in meters. The value must be between 10 and 10000
+        * follow_continue - if True, automatically follow continuation tokens to get all results
         """
         search_params = {
             "list": "geosearch",
@@ -94,7 +103,9 @@ def geosearch(
         if title:
             search_params["titles"] = title
 
-        raw_results = self.session.request(search_params, self.config)
+        raw_results = self.session.request(
+            search_params, self.config, follow_continue=follow_continue
+        )
 
         if "error" in raw_results:
             if raw_results["error"]["info"] in (
@@ -274,6 +285,7 @@ def category_members(
         pageid: Optional[int] = None,
         cmlimit: int = 10,
         cmtype: str = "page",
+        follow_continue: bool = False,
     ) -> List[str]:
         """
         Get list of page titles belonging to a category.
@@ -283,6 +295,7 @@ def category_members(
         * pageid - page id of category page. Cannot be used together with "title"
         * cmlimit - the maximum number of titles to return
         * cmtype - which type of page to include. ("page", "subcat", or "file")
+        * follow_continue - if True, automatically follow continuation tokens to get all results
         """
         if title is not None and pageid is not None:
             raise ValueError(
@@ -305,7 +318,9 @@ def category_members(
         else:
             raise ValueError("Either a category or a pageid must be specified")
 
-        response = self.session.request(query_params, self.config)
+        response = self.session.request(
+            query_params, self.config, follow_continue=follow_continue
+        )
         if "error" in response:
             raise ValueError(response["error"].get("info"))
         return [member["title"] for member in response["query"]["categorymembers"]]
@@ -317,3 +332,37 @@ def donate(self) -> None:
         import webbrowser
 
         webbrowser.open(Config().donate_url(), new=2)
+
+    def custom_query(
+        self, query_params: Dict[str, Any], follow_continue: bool = True
+    ) -> Dict[str, Any]:
+        """
+        Make a custom query to the Wikipedia API with the given parameters.
+
+        This method is useful for complex queries that aren't covered by the standard methods,
+        especially those that may return large amounts of data requiring continuation tokens.
+
+        Arguments:
+        * query_params - A dictionary of query parameters to pass to the API
+        * follow_continue - If True, automatically follow continuation tokens to get all results
+
+        Returns:
+        * The raw API response as a dictionary
+
+        Example:
+        ```python
+        # Query that uses geosearch with pageviews property
+        params = {
+            "action": "query",
+            "generator": "geosearch",
+            "ggsradius": 10000,
+            "ggscoord": "40.7128|-74.0060",  # New York coordinates
+            "ggslimit": 50,
+            "prop": "pageviews",
+        }
+        result = mediawikiapi.custom_query(params)
+        ```
+        """
+        return self.session.request(
+            query_params, self.config, follow_continue=follow_continue
+        )
diff --git a/mediawikiapi/requestsession.py b/mediawikiapi/requestsession.py
@@ -30,6 +30,7 @@ def request(
         params: Dict[str, Any],
         config: Config,
         language: Optional[Union[str, Language]] = None,
+        follow_continue: bool = False,
     ) -> Dict[str, Any]:
         """
         Make a request to the Wikipedia API using the given search parameters,
@@ -43,6 +44,7 @@ def request(
         Keyword arguments:
 
         * language - the wiki language
+        * follow_continue - if True, automatically follow 'continue' tokens to get all results
 
         """
         params["format"] = "json"
@@ -72,4 +74,90 @@ def request(
         )
 
         data: Dict[str, Any] = r.json()
-        return data
+
+        # If follow_continue is False or there's no continue token, return the data as is
+        if not follow_continue or "continue" not in data:
+            return data
+
+        # If follow_continue is True, handle continuation
+        result = data  # Start with the initial result
+
+        # Continue requesting while there's a continue token
+        while "continue" in result:
+            # Copy the original parameters and update with continue tokens
+            continue_params = params.copy()
+            continue_params.update(result["continue"])
+
+            # Respect rate limits
+            if (
+                self.__rate_limit_last_call
+                and config.rate_limit
+                and (self.__rate_limit_last_call + config.rate_limit) > datetime.now()
+            ):
+                wait_time = (
+                    self.__rate_limit_last_call + config.rate_limit
+                ) - datetime.now()
+                if wait_time.total_seconds() > 0:
+                    time.sleep(int(wait_time.total_seconds()))
+
+            # Make the continuation request
+            r = self.session.get(
+                config.get_api_url(language),
+                params=continue_params,
+                headers=headers,
+                timeout=config.timeout,
+            )
+            self.__rate_limit_last_call = datetime.now()
+
+            # Get the continued data
+            continued_data = r.json()
+
+            # Merge the data from the continued request with the initial result
+            if "query" in continued_data:
+                # Handle pages
+                if "pages" in continued_data.get("query", {}) and "pages" in result.get(
+                    "query", {}
+                ):
+                    for pageid, page_data in continued_data["query"]["pages"].items():
+                        if pageid in result["query"]["pages"]:
+                            # Page exists in the result, merge properties
+                            for prop, value in page_data.items():
+                                if prop in result["query"]["pages"][pageid]:
+                                    # If the property is a list, extend it
+                                    if isinstance(value, list) and isinstance(
+                                        result["query"]["pages"][pageid][prop], list
+                                    ):
+                                        result["query"]["pages"][pageid][prop].extend(
+                                            value
+                                        )
+                                    else:
+                                        # Otherwise, replace it
+                                        result["query"]["pages"][pageid][prop] = value
+                                else:
+                                    # Property doesn't exist in the result, add it
+                                    result["query"]["pages"][pageid][prop] = value
+                        else:
+                            # Page doesn't exist in the result, add it
+                            result["query"]["pages"][pageid] = page_data
+
+                # Handle lists in the query (like search results, backlinks, etc.)
+                for prop, value in continued_data["query"].items():
+                    if prop != "pages":
+                        if prop not in result["query"]:
+                            result["query"][prop] = value
+                        elif isinstance(value, list) and isinstance(
+                            result["query"][prop], list
+                        ):
+                            # If the property is a list, extend it
+                            result["query"][prop].extend(value)
+
+            # Update the continue token
+            if "continue" in continued_data:
+                result["continue"] = continued_data["continue"]
+            else:
+                # No more continue tokens, we're done
+                if "continue" in result:
+                    del result["continue"]
+                break
+
+        return result