From bd96be89e296fcd722c2bc8bb4e086a4fbb04e1b Mon Sep 17 00:00:00 2001 From: Eu Pin Tien Date: Mon, 31 Mar 2025 12:19:19 +0100 Subject: [PATCH 1/8] Fixed base URL for index pages; optimised URL parsing for MSYS2 index pages; switched to using StreamingResponse for package downloads; added debug logs --- src/murfey/server/api/bootstrap.py | 265 +++++++++++------------------ 1 file changed, 103 insertions(+), 162 deletions(-) diff --git a/src/murfey/server/api/bootstrap.py b/src/murfey/server/api/bootstrap.py index ec1b935a9..72bfc4045 100644 --- a/src/murfey/server/api/bootstrap.py +++ b/src/murfey/server/api/bootstrap.py @@ -111,7 +111,7 @@ def get_bootstrap_instructions(request: Request): ) -@bootstrap.get("/pip.whl", response_class=Response) +@bootstrap.get("/pip.whl", response_class=StreamingResponse) def get_pip_wheel(): """ Return a static version of pip. This does not need to be the newest or best, @@ -125,7 +125,7 @@ def get_pip_wheel(): ) -@bootstrap.get("/murfey.whl", response_class=Response) +@bootstrap.get("/murfey.whl", response_class=StreamingResponse) def get_murfey_wheel(): """ Return a wheel file containing the latest release version of Murfey. We should @@ -133,7 +133,7 @@ def get_murfey_wheel(): murfey.bootstrap is compatible with all relevant versions of Python. This also ignores yanked releases, which again should be fine. """ - full_path_response = requests.get("https://pypi.org/simple/murfey") + full_path_response = http_session.get("https://pypi.org/simple/murfey") wheels = {} for wheel_file in re.findall( @@ -161,7 +161,7 @@ def get_murfey_wheel(): """ -@cygwin.get("/setup-x86_64.exe", response_class=Response) +@cygwin.get("/setup-x86_64.exe", response_class=StreamingResponse) def get_cygwin_setup(): """ Obtain and pass through a Cygwin installer from an official source. @@ -169,11 +169,12 @@ def get_cygwin_setup(): Cygwin distribution that then remains on the client machines. """ filename = "setup-x86_64.exe" - installer = requests.get(f"https://www.cygwin.com/{filename}") - return Response( - content=installer.content, - media_type=installer.headers.get("Content-Type"), - status_code=installer.status_code, + response = http_session.get(f"https://www.cygwin.com/{filename}") + return StreamingResponse( + content=response.iter_content(chunk_size=8192), + media_type=response.headers.get("Content-Type"), + headers={"Content-Disposition": f"attachment; filename=cygwin-{filename}"}, + status_code=response.status_code, ) @@ -185,7 +186,7 @@ def find_cygwin_mirror() -> str: lifetime of the server. """ url = "https://www.cygwin.com/mirrors.lst" - mirrors = requests.get(url) + mirrors = http_session.get(url) logger.info( f"Reading mirrors from {url} returned status code {mirrors.status_code} {mirrors.reason}" ) @@ -226,12 +227,17 @@ def find_cygwin_mirror() -> str: return picked_mirror -@cygwin.get("/{request_path:path}", response_class=Response) -def parse_cygwin_request(request_path: str): +@cygwin.get("/{request_path:path}", response_class=StreamingResponse) +def parse_cygwin_request( + request: Request, + request_path: str, +): """ Forward a Cygwin setup request to an official mirror. """ + logger.debug(f"Received request to access {str(request.url)!r}") + # Validate request path if bool(re.fullmatch(r"^[\w\s\.\-\+/]+$", request_path)) is False: raise ValueError(f"{request_path!r} is not a valid request path") @@ -244,11 +250,11 @@ def parse_cygwin_request(request_path: str): ) logger.info(f"Forwarding Cygwin download request to {_sanitise_str(url)}") - cygwin_data = requests.get(url) - return Response( - content=cygwin_data.content, - media_type=cygwin_data.headers.get("Content-Type"), - status_code=cygwin_data.status_code, + response = http_session.get(url) + return StreamingResponse( + content=response.iter_content(chunk_size=8192), + media_type=response.headers.get("Content-Type"), + status_code=response.status_code, ) @@ -294,14 +300,19 @@ def parse_cygwin_request(request_path: str): ) -@msys2.get("/distrib/{setup_file}", response_class=Response) -def get_msys2_setup(setup_file: str): +@msys2.get("/distrib/{setup_file}", response_class=StreamingResponse) +def get_msys2_setup( + request: Request, + setup_file: str, +): """ Obtain and pass through an MSYS2 installer from an official source. This is used during client bootstrapping, and can download and install the MSYS2 distribution that then remains on the client machines. """ + logger.debug(f"Received request to access {str(request.url)!r}") + # Validate characters in sent path if not bool(re.fullmatch(r"^[\w\.\-]+$", setup_file)): raise ValueError("Unallowed characters present in requested setup file") @@ -312,15 +323,15 @@ def get_msys2_setup(setup_file: str): ): raise ValueError(f"{setup_file!r} is not a valid executable") - installer = requests.get(f"{msys2_url}/distrib/{setup_file}") - return Response( - content=installer.content, - media_type=installer.headers.get("Content-Type"), - status_code=installer.status_code, + response = http_session.get(f"{msys2_url}/distrib/{setup_file}") + return StreamingResponse( + content=response.iter_content(chunk_size=8192), + media_type=response.headers.get("Content-Type"), + status_code=response.status_code, ) -@msys2.get("", response_class=Response) +@msys2.get("/", response_class=Response) def get_msys2_main_index( request: Request, ) -> Response: @@ -329,25 +340,11 @@ def get_msys2_main_index( from the main MSYS2 repository. """ - def _rewrite_url(match): - """ - Use regular expression matching to rewrite the package URLs and point them - explicitly to this current server. - """ - url = ( - f"{base_path}/{match.group(1)}" - if not str(match.group(1)).startswith("http") - else str(match.group(1)) - ) - return f'' + match.group(2) + "" - - # Get base path to current FastAPI endpoint - base_url = str(request.base_url).strip("/") - path = request.url.path.strip("/") - base_path = f"{base_url}/{path}" + logger.debug(f"Received request to access {str(request.url)!r}") + # Construct URL and get response env_url = f"{msys2_url}" - response = requests.get(env_url) + response = http_session.get(env_url) # Parse and rewrite package index content content: bytes = response.content # Get content in bytes @@ -357,16 +354,10 @@ def _rewrite_url(match): if line.startswith("]*)">([^<]*)', # Regex search criteria - _rewrite_url, # Function to apply search criteria to - line, - ) - content_text_list.append(line_new) - + content_text_list.append(line) # Other URLs don't need to be mirrored else: - pass + continue else: content_text_list.append(line) @@ -380,32 +371,17 @@ def _rewrite_url(match): ) -@msys2.get("/{system}", response_class=Response) +@msys2.get("/{system}/", response_class=Response) def get_msys2_environment_index( - system: str, request: Request, + system: str, ) -> Response: """ Returns a list of all MSYS2 environments for a given system from the main MSYS2 repository. """ - def _rewrite_url(match): - """ - Use regular expression matching to rewrite the package URLs and point them - explicitly to this current server. - """ - url = ( - f"{base_path}/{match.group(1)}" - if not str(match.group(1)).startswith("http") - else str(match.group(1)) - ) - return f'' + match.group(2) + "" - - # Get base path to current FastAPI endpoint - base_url = str(request.base_url).strip("/") - path = request.url.path.strip("/") - base_path = f"{base_url}/{path}" + logger.debug(f"Received request to access {str(request.url)!r}") # Validate provided system; use this endpoint to display 'distrib' folder too if not (any(system in env[0] for env in valid_envs) or system == "distrib"): @@ -413,7 +389,7 @@ def _rewrite_url(match): # Construct URL to main MSYS repo and get response arch_url = f'{msys2_url}/{quote(system, safe="/")}' - response = requests.get(arch_url) + response = http_session.get(arch_url) # Parse and rewrite package index content content: bytes = response.content # Get content in bytes @@ -425,14 +401,7 @@ def _rewrite_url(match): if system == "distrib": if not any(ext in line for ext in msys2_file_ext): continue - - # Rewrite URL to point explicitly to current server - line_new = re.sub( - '^]*)">([^<]*)', # Regex search criteria - _rewrite_url, # Function to apply search criteria to - line, - ) - content_text_list.append(line_new) + content_text_list.append(line) else: content_text_list.append(line) @@ -446,33 +415,18 @@ def _rewrite_url(match): ) -@msys2.get("/{system}/{environment}", response_class=Response) +@msys2.get("/{system}/{environment}/", response_class=Response) def get_msys2_package_index( + request: Request, system: str, environment: str, - request: Request, ) -> Response: """ Obtain a list of all available MSYS2 packages for a given environment from the main MSYS2 repo. """ - def _rewrite_url(match): - """ - Use regular expression matching to rewrite the package URLs and point them - explicitly to this current server. - """ - url = ( - f"{base_path}/{match.group(1)}" - if not str(match.group(1)).startswith("http") - else str(match.group(1)) - ) - return f'' + match.group(2) + "" - - # Get base path to current FastAPI endpoint - base_url = str(request.base_url).strip("/") - path = request.url.path.strip("/") - base_path = f"{base_url}/{path}" + logger.debug(f"Received request to access {str(request.url)!r}") # Validate environment if any(system in env[0] and environment in env[1] for env in valid_envs) is False: @@ -482,35 +436,17 @@ def _rewrite_url(match): package_list_url = ( f'{msys2_url}/{quote(system, safe="/")}/{quote(environment, safe="/")}' ) - response = requests.get(package_list_url) - - # Parse and rewrite package index content - content: bytes = response.content # Get content in bytes - content_text: str = content.decode("latin1") # Convert to strings - content_text_list = [] - for line in content_text.splitlines(): - if line.startswith("]*)">([^<]*)', # Regex search criteria - _rewrite_url, # Function to apply search criteria to - line, - ) - content_text_list.append(line_new) - else: - content_text_list.append(line) - - # Reconstruct conent - content_text_new = str("\n".join(content_text_list)) # Regenerate HTML structure - content_new = content_text_new.encode("latin1") # Convert back to bytes + response = http_session.get(package_list_url) return Response( - content=content_new, + content=response.content, status_code=response.status_code, media_type=response.headers.get("Content-Type"), ) -@msys2.get("/{system}/{environment}/{package}", response_class=Response) +@msys2.get("/{system}/{environment}/{package}", response_class=StreamingResponse) def get_msys2_package_file( + request: Request, system: str, environment: str, package: str, @@ -519,6 +455,8 @@ def get_msys2_package_file( Obtain and pass through a specific download for an MSYS2 package. """ + logger.debug(f"Received request to access {str(request.url)!r}") + # Validate environment if any(system in env[0] and environment in env[1] for env in valid_envs) is False: raise ValueError(f"'{system}/{environment}' is not a valid msys2 environment") @@ -535,16 +473,14 @@ def get_msys2_package_file( # Construct URL to main MSYS repo and get response package_url = f'{msys2_url}/{quote(system, safe="/")}/{quote(environment, safe="/")}/{quote(package, safe="/")}' - package_file = requests.get(package_url) - - if package_file.status_code == 200: - return Response( - content=package_file.content, - media_type=package_file.headers.get("Content-Type"), - status_code=package_file.status_code, - ) - else: - raise HTTPException(status_code=package_file.status_code) + response = http_session.get(package_url) + if response.status_code != 200: + raise HTTPException(status_code=response.status_code) + return StreamingResponse( + content=response.iter_content(chunk_size=8192), + media_type=response.headers.get("Content-Type"), + status_code=response.status_code, + ) """ @@ -605,7 +541,7 @@ def get_cargo_config(request: Request): """ -@rust.get("/index") +@rust.get("/index/", response_class=Response) def get_index_page(): """ Returns a mirror of the https://index.crates.io landing page. @@ -669,7 +605,7 @@ def get_index_package_metadata( c1 = 3, and c2 is the first character of the package. """ - logger.debug(f"Received request to access {str(request.url)}") + logger.debug(f"Received request to access {str(request.url)!r}") # Validate path to the package metadata if any(not re.fullmatch(r"[\w\-]{1,2}", char) for char in (c1, c2)): @@ -706,7 +642,7 @@ def get_index_package_metadata_for_short_package_names( /1/{package} or /2/{package}. """ - logger.debug(f"Received request to access {str(request.url)}") + logger.debug(f"Received request to access {str(request.url)!r}") # Validate path to crate if n not in ("1", "2"): @@ -736,7 +672,7 @@ def get_rust_package_download( sparse index registry. """ - logger.debug(f"Received request to access {str(request.url)}") + logger.debug(f"Received request to access {str(request.url)!r}") # Validate package and version if not re.fullmatch(r"[\w\-]+", package): @@ -780,7 +716,7 @@ def get_rust_api_package_index( in a JSON object based on the search query given. """ - logger.debug(f"Received request to access {str(request.url)}") + logger.debug(f"Received request to access {str(request.url)!r}") # Validate package name if package and not re.fullmatch(r"[\w\-]+", package): @@ -816,7 +752,7 @@ def get_rust_api_package_info( to other types of metadata. """ - logger.debug(f"Received request to access {str(request.url)}") + logger.debug(f"Received request to access {str(request.url)!r}") # Validate package name if not re.fullmatch(r"[\w\-]+", package): @@ -837,10 +773,10 @@ def get_rust_api_package_versions( ): """ Displays all available versions for a particular Rust package, along with download - links for said versions. + links for said versions, as a JSON object. """ - logger.debug(f"Received request to access {str(request.url)}") + logger.debug(f"Received request to access {str(request.url)!r}") # Validate crate name if not re.fullmatch(r"[\w\-]+", package): @@ -866,7 +802,7 @@ def get_rust_api_package_download( Obtain and pass through a crate download request for a specific Rust package. """ - logger.debug(f"Received request to access {str(request.url)}") + logger.debug(f"Received request to access {str(request.url)!r}") # Validate package name if not re.fullmatch(r"[\w\-]+", package): @@ -915,7 +851,7 @@ def get_rust_package_crate( (e.g. https://static.crates.io/crates/anyhow will fail) """ - logger.debug(f"Received request to access {str(request.url)}") + logger.debug(f"Received request to access {str(request.url)!r}") # Validate crate and package names if not re.fullmatch(r"[\w\-]+", package): @@ -959,20 +895,18 @@ def _get_full_pypi_path_response(package: str) -> requests.Response: # Check that a package name follows PEP 503 naming conventions, containing only # alphanumerics (including underscores; \w), dashes (\-), and periods (\.) - if re.match(r"^[\w\-\.]+$", package) is not None: - # Sanitise and normalise package name according to PEP 503 - package_clean = quote(re.sub(r"[-_.]+", "-", package.lower()), safe="/") + if not re.fullmatch(r"[\w\-\.]+", package): + raise ValueError(f"{package!r} is not a valid package name") - # Get HTTP response - url = f"https://pypi.org/simple/{package_clean}" - response = requests.get(url) + # Sanitise and normalise package name according to PEP 503 + package_clean = quote(re.sub(r"[-_.]+", "-", package.lower()), safe="/") - if response.status_code == 200: - return response - else: - raise HTTPException(status_code=response.status_code) - else: - raise ValueError(f"{package!r} is not a valid package name") + # Get HTTP response + url = f"https://pypi.org/simple/{package_clean}" + response = http_session.get(url) + if response.status_code != 200: + raise HTTPException(status_code=response.status_code) + return response @pypi.get("/", response_class=Response) @@ -981,17 +915,16 @@ def get_pypi_index(): Obtain list of all PyPI packages via the simple API (PEP 503). """ - index = requests.get("https://pypi.org/simple/") - + response = http_session.get("https://pypi.org/simple/") return Response( - content=index.content, - status_code=index.status_code, - media_type=index.headers.get("Content-Type"), + content=response.content, + status_code=response.status_code, + media_type=response.headers.get("Content-Type"), ) @pypi.get("/{package}/", response_class=Response) -def get_pypi_package_downloads_list(package: str) -> Response: +def get_pypi_package_downloads_list(request: Request, package: str) -> Response: """ Obtain list of all package downloads from PyPI via the simple API (PEP 503), and rewrite all download URLs to point to this server, under the current directory. @@ -1006,6 +939,8 @@ def _rewrite_pypi_url(match): url = match.group(3) return '" + match.group(3) + "" + logger.debug(f"Received request to access {str(request.url)!r}") + # Validate package and URL full_path_response = _get_full_pypi_path_response(package) @@ -1042,8 +977,12 @@ def _rewrite_pypi_url(match): ) -@pypi.get("/{package}/{filename}", response_class=Response) -def get_pypi_file(package: str, filename: str): +@pypi.get("/{package}/{filename}", response_class=StreamingResponse) +def get_pypi_file( + request: Request, + package: str, + filename: str, +): """ Obtain and pass through a specific download for a PyPI package. """ @@ -1081,6 +1020,8 @@ def _expose_wheel_metadata(response_bytes: bytes) -> bytes: return response_bytes_new + logger.debug(f"Received request to access {str(request.url)!r}") + # Validate package and URL full_path_response = _get_full_pypi_path_response(package) @@ -1098,12 +1039,12 @@ def _expose_wheel_metadata(response_bytes: bytes) -> bytes: if not selected_package_link: raise HTTPException(status_code=404, detail="File not found for package") original_url = selected_package_link.group(1) - original_file = requests.get(original_url) + response = http_session.get(original_url) - return Response( - content=original_file.content, - media_type=original_file.headers.get("Content-Type"), - status_code=original_file.status_code, + return StreamingResponse( + content=response.iter_content(chunk_size=8192), + media_type=response.headers.get("Content-Type"), + status_code=response.status_code, ) From 258d8257ec565dcb46f35f7e607d8851f0692f18 Mon Sep 17 00:00:00 2001 From: Eu Pin Tien Date: Mon, 31 Mar 2025 16:09:36 +0100 Subject: [PATCH 2/8] Added logic to reconstruct netloc of a forwarded request on server-side when dynamically generating URLs for downloadable config files --- src/murfey/server/api/bootstrap.py | 39 ++++++++++++++++++++++++------ 1 file changed, 32 insertions(+), 7 deletions(-) diff --git a/src/murfey/server/api/bootstrap.py b/src/murfey/server/api/bootstrap.py index 72bfc4045..54acdc96f 100644 --- a/src/murfey/server/api/bootstrap.py +++ b/src/murfey/server/api/bootstrap.py @@ -505,11 +505,24 @@ def get_cargo_config(request: Request): and its default path on Windows is %USERPROFILE%\\.cargo\\config.toml. """ - # Construct URL to our mirror of the Rust sparse index - index_mirror = ( - f"{request.url.scheme}://{request.url.netloc}/{rust.prefix.strip('/')}/index/" + # Check if this is a forwarded request from somewhere else and construct netloc + netloc = ( + f"{request.headers['X-Forwarded-Host']}:{request.headers['X-Forwarded-Port']}" + if request.headers.get("X-Forwarded-Host") + and request.headers.get("X-Forwarded-Port") + else request.url.netloc ) + # Find path to Rust router using current URL Path + path_to_router = request.url.path.removesuffix("/cargo/config.toml") + + # Construct base URL for subsequent use + base_url = f"{request.url.scheme}://{netloc}{path_to_router}" + logger.debug(f"Base URL to Rust sub-router determined to be {base_url}") + + # Construct URL to our mirror of the Rust sparse index + index_url = f"{base_url}/index/" + # Construct and return the config.toml file config_data = "\n".join( [ @@ -517,10 +530,10 @@ def get_cargo_config(request: Request): 'replace-with = "murfey-crates"', # Redirect to our mirror "", "[source.murfey-crates]", - f'registry = "sparse+{index_mirror}"', # sparse+ to use sparse protocol + f'registry = "sparse+{index_url}"', # sparse+ to use sparse protocol "", "[registries.murfey-crates]", - f'index = "sparse+{index_mirror}"', # sparse+ to use sparse protocol + f'index = "sparse+{index_url}"', # sparse+ to use sparse protocol "", "[registry]", 'default = "murfey-crates"', # Redirect to our mirror @@ -568,8 +581,20 @@ def get_index_config(request: Request): used by Cargo when searching for and downloading packages. """ - # Construct URL for Rust router - base_url = f"{request.url.scheme}://{request.url.netloc}" + rust.prefix + # Check if this is a forwarded request from somewhere else and construct netloc + netloc = ( + f"{request.headers['X-Forwarded-Host']}:{request.headers['X-Forwarded-Port']}" + if request.headers.get("X-Forwarded-Host") + and request.headers.get("X-Forwarded-Port") + else request.url.netloc + ) + + # Find path to Rust router using current URL Path + path_to_router = request.url.path.removesuffix("/index/config.json") + + # Construct base URL for subsequent use + base_url = f"{request.url.scheme}://{netloc}{path_to_router}" + logger.debug(f"Base URL to Rust sub-router determined to be {base_url}") # Construct config file with the necessary endpoints config = { From f39a6c0055a52053040c6d636d78b784a0d14b7f Mon Sep 17 00:00:00 2001 From: Eu Pin Tien Date: Tue, 1 Apr 2025 15:46:58 +0100 Subject: [PATCH 3/8] Added parameters and logic to resolve original proxy path for use in documentation if HTTP request was passed to server via frontend --- src/murfey/server/__init__.py | 3 ++ src/murfey/server/api/bootstrap.py | 15 ++++++++ src/murfey/templates/base.html | 16 +++++---- src/murfey/templates/bootstrap.html | 53 ++++++++++++++++++----------- 4 files changed, 61 insertions(+), 26 deletions(-) diff --git a/src/murfey/server/__init__.py b/src/murfey/server/__init__.py index c7b2c1418..f59247959 100644 --- a/src/murfey/server/__init__.py +++ b/src/murfey/server/__init__.py @@ -216,6 +216,9 @@ def respond_with_template( "hostname": get_hostname(), "microscope": get_microscope(), "version": murfey.__version__, + # Extra parameters to reconstruct URLs for forwarded requests + "netloc": request.url.netloc, + "proxy_path": "", } if parameters: template_parameters.update(parameters) diff --git a/src/murfey/server/api/bootstrap.py b/src/murfey/server/api/bootstrap.py index ec1b935a9..52ab25da4 100644 --- a/src/murfey/server/api/bootstrap.py +++ b/src/murfey/server/api/bootstrap.py @@ -105,8 +105,23 @@ def get_bootstrap_instructions(request: Request): machine with no internet access. """ + # Constructs the netloc (hostname + port) and proxy path depending on if the + # request was forwarded via proxy + netloc = ( + f"{request.headers['X-Forwarded-Host']}:{request.headers['X-Forwarded-Port']}" + if request.headers.get("X-Forwarded-Host") + and request.headers.get("X-Forwarded-Port") + else request.url.netloc + ) + # Additional bit in URL path after the netloc caused by the proxy reroute + proxy_path = request.url.path.removesuffix(f"{bootstrap.prefix}/") + return respond_with_template( request=request, + parameters={ + "netloc": netloc, + "proxy_path": proxy_path, + }, filename="bootstrap.html", ) diff --git a/src/murfey/templates/base.html b/src/murfey/templates/base.html index 942518e3a..ce6fa129e 100644 --- a/src/murfey/templates/base.html +++ b/src/murfey/templates/base.html @@ -1,22 +1,24 @@ Murfey - {% block title %}{% endblock %} - +
- Home - Active Visits - Installation instructions - FastAPI PyPI + Home + Installation Instructions + FastAPI (PyPI)

diff --git a/src/murfey/templates/bootstrap.html b/src/murfey/templates/bootstrap.html index deb83f6ba..998b98c63 100644 --- a/src/murfey/templates/bootstrap.html +++ b/src/murfey/templates/bootstrap.html @@ -1,36 +1,39 @@ {% extends "base.html" %} {% block title %}Bootstrapping instructions{% endblock %} {% block content %}

Bootstrapping instructions

-

Installing a Linux Terminal

+

Installing a Linux Environment

Installing Cygwin

- If you already have a Cygwin install, rename it so that it doesn't get - overwritten (something like "[install name]-old"). + If you already have a Cygwin install that you would like to preserve, rename + it so that it doesn't get overwritten (something like "cygwin64-old").

Download the Cygwin setup executable using this - mirror, and then run the following from - a terminal (both Command Prompt and Windows Powershell work) + mirror, and then run + the following from a terminal (both Command Prompt and Windows Powershell + work)

-    $ setup-x86_64.exe -O -R C:\cygwin64 -s {{ request.url.scheme }}://{{ request.url.netloc }}/cygwin -P curl,python3,rsync -q
+    $ setup-x86_64.exe -O -R C:\cygwin64 -s {{ request.url.scheme }}://{{ netloc }}{{ proxy_path }}/cygwin -P curl,python3,rsync -q
 

- The Cygwin install sometimes hangs even when it is finished, hit Enter to - return to a command prompt. + This will install Cygwin with the minimum packages needed to run Murfey. The + Cygwin install command will occasionally hang after completion. In such an + event, just hit Enter to return to a normal command prompt window.

Installing MSYS2

MSYS2 is a lightweight Linux environment which provides compiler support for - the more modern programming languages used in the backend of Murfey's package - dependencies. + the more modern programming languages used by Murfey's package dependencies.

The Murfey server supports the forwarding of download requests to client PCs that cannot access the wider internet. Download the MSYS2 setup executable - using this mirror, and run the - executable using the default settings. + using this + mirror, + and run the executable using the default settings. This will install it to + C:\msys64.

By default, MSYS2 comes with preset lists of mirrors and servers that it @@ -48,7 +51,7 @@

Installing MSYS2

This is an example of how the URL to the Murfey server should look like:

     Server = https://repo.msys2.org/mingw/x86_64/  # Original URL
-    Server = {{ request.url.scheme }}://{{ request.url.netloc }}/msys2/mingw/x86_64  # Murfey URL
+    Server = {{ request.url.scheme }}://{{ netloc }}{{ proxy_path }}/msys2/mingw/x86_64/  # Murfey URL
 

MSYS2 comes with multiple environments, but UCRT64 is the most modern one. In @@ -57,11 +60,20 @@

Installing MSYS2

environment. This can be achieved using the following commands:

-    $ pacman -Syu --disable-download-timeout  # Downloads the package database and searches for updates
-    $ pacman -S rsync --disable-download-timeout
-    $ pacman -S mingw-w64-python-pip --disable-download-timeout
-    $ pacman -S mingw-w64-x86_64-rust --disable-download-timeout
+    $ pacman -Syu
+    $ pacman -S rsync
+    $ pacman -S mingw-w64-ucrt-x86_64-python-pip
+    $ pacman -S mingw-w64-ucrt-x86_64-rust
 
+

+ Other utility packages such as + vim can also be installed by + running + pacman -S <package-name>. + You can browse the other packages supported by + pacman by searching the repo using + pacman -Ss <package-name> +

Setting Up Python

@@ -73,7 +85,7 @@

Setting Up Python

Setting Up a Virtual Environment

To set up a virtual environment, run the following commands:

-    $ pip install virtualenv --index-url {{ request.url.scheme }}://{{ request.url.netloc }}/pypi --trusted-host {{ request.url.hostname }}
+    $ pip install virtualenv --index-url {{ request.url.scheme }}://{{ netloc }}{{ proxy_path }}/pypi --trusted-host {{ netloc }}
     $ virtualenv your-env-name  # Create the virtual environment
     $ source your-env-name/bin/activate  # Activate the virtual environment
 
@@ -84,6 +96,9 @@

Installing Murfey

commands:

-    $ pip install murfey[client] --index-url {{ request.url.scheme }}://{{ request.url.netloc }}/pypi --trusted-host {{ request.url.hostname }}
+    $ pip install murfey[client] --index-url {{ request.url.scheme }}://{{ netloc }}{{ proxy_path }}/pypi --trusted-host {{ netloc }}
+
+
+  If you wish to install the client-side dependencies needed to run Murfey via the web UI, replace murfey[client] with murfey[client,instrument-server].
 
{% endblock %} From 99baad30be415dd602fe4b4a4536095355293f2a Mon Sep 17 00:00:00 2001 From: Eu Pin Tien Date: Thu, 3 Apr 2025 11:48:43 +0100 Subject: [PATCH 4/8] Added endpoint to dynamically generate mirror URLS for MSYS2's 'pacman' to use; indented all MSYS2 endpoints one level down to prevent endpoint resolution conflicts --- src/murfey/server/api/bootstrap.py | 84 ++++++++++++++++++++++++++++-- 1 file changed, 79 insertions(+), 5 deletions(-) diff --git a/src/murfey/server/api/bootstrap.py b/src/murfey/server/api/bootstrap.py index 41043f155..cc4fb1542 100644 --- a/src/murfey/server/api/bootstrap.py +++ b/src/murfey/server/api/bootstrap.py @@ -19,6 +19,7 @@ import logging import random import re +import zipfile from io import BytesIO from urllib.parse import quote @@ -329,7 +330,80 @@ def parse_cygwin_request( ) -@msys2.get("/distrib/{setup_file}", response_class=StreamingResponse) +@msys2.get("/config/pacman.d.zip", response_class=StreamingResponse) +def get_pacman_mirrors(request: Request): + """ + Dynamically generates a zip file containing mirrorlist files that have been set + up to mirror the MSYS2 package database for each environment. + + The files in this folder should be pasted into, and overwrite, the 'mirrorlist' + files present in the %MSYS64%\\etc\\pacman.d folder. The default path to this + folder is C:\\msys64\\etc\\pacman.d. + """ + + # Check if this is a forwarded request from somewhere else and construct netloc + netloc = ( + f"{request.headers['X-Forwarded-Host']}:{request.headers['X-Forwarded-Port']}" + if request.headers.get("X-Forwarded-Host") + and request.headers.get("X-Forwarded-Port") + else request.url.netloc + ) + + # Find path to Rust router using current URL Path + path_to_router = request.url.path.removesuffix("/config/pacman.d.zip") + + # Construct base URL for subsequent use + base_url = f"{request.url.scheme}://{netloc}{path_to_router}" + logger.debug(f"Base URL to MSYS2 sub-router determined to be {base_url}") + + # Construct package database mirrors + # Files are called mirrorlist.{environment} + # URL format: {scheme}://{netloc}{proxy_path}/{router_prefix}/path/to/repo + url_paths = { + "clang64": "mingw/clang64", + "mingw": "mingw/$repo", + "mingw32": "mingw/i686", + "mingw64": "mingw/x86_64", + "msys": "msys/$arch", + "ucrt64": "mingw/ucrt64", + } + # Construct file names and contents + mirror_lists = { + f"mirrorlist.{env}": "\n".join( + [ + "# See https://www.msys2.org/dev/mirrors", + "", + "## Primary", + f"Server = {base_url}/repo/{repo_path}", + "", + ] + ) + for env, repo_path in url_paths.items() + } + + # Create in-memory buffer for the ZIP file + zip_buffer = BytesIO() + + # Create a zip file in the buffer + with zipfile.ZipFile(zip_buffer, "w", zipfile.ZIP_DEFLATED) as zip_file: + for file_name, content in mirror_lists.items(): + zip_file.writestr(file_name, content) + zip_buffer.seek(0) # Move object pointer back to start + + # Construct and return streaming response + headers = { + "Content-Disposition": "attachment; filename=pacman.d.zip", + "Content-Length": str(zip_buffer.getbuffer().nbytes), + } + return StreamingResponse( + zip_buffer, + status_code=200, + headers=headers, + media_type="application/zip", + ) + + +@msys2.get("/repo/distrib/{setup_file}", response_class=StreamingResponse) def get_msys2_setup( request: Request, setup_file: str, @@ -366,7 +440,7 @@ def get_msys2_setup( ) -@msys2.get("/", response_class=Response) +@msys2.get("/repo/", response_class=Response) def get_msys2_main_index( request: Request, ) -> Response: @@ -406,7 +480,7 @@ def get_msys2_main_index( ) -@msys2.get("/{system}/", response_class=Response) +@msys2.get("/repo/{system}/", response_class=Response) def get_msys2_environment_index( request: Request, system: str, @@ -450,7 +524,7 @@ def get_msys2_environment_index( ) -@msys2.get("/{system}/{environment}/", response_class=Response) +@msys2.get("/repo/{system}/{environment}/", response_class=Response) def get_msys2_package_index( request: Request, system: str, @@ -479,7 +553,7 @@ def get_msys2_package_index( ) -@msys2.get("/{system}/{environment}/{package}", response_class=StreamingResponse) +@msys2.get("/repo/{system}/{environment}/{package}", response_class=StreamingResponse) def get_msys2_package_file( request: Request, system: str, From 3856f3f3d8b7cbc75a398a049529d489d05c4009 Mon Sep 17 00:00:00 2001 From: Eu Pin Tien Date: Thu, 3 Apr 2025 13:57:40 +0100 Subject: [PATCH 5/8] Added instructions on how to configure Rust on MSYS2; updated earlier instructions up to that point --- src/murfey/templates/bootstrap.html | 93 +++++++++++++++++++++-------- 1 file changed, 67 insertions(+), 26 deletions(-) diff --git a/src/murfey/templates/bootstrap.html b/src/murfey/templates/bootstrap.html index 998b98c63..414723bba 100644 --- a/src/murfey/templates/bootstrap.html +++ b/src/murfey/templates/bootstrap.html @@ -1,17 +1,21 @@ {% extends "base.html" %} {% block title %}Bootstrapping instructions{% endblock %} {% block content %}

Bootstrapping instructions

-

Installing a Linux Environment

-

Installing Cygwin

+

1. Setting Up a POSIX Environment

+

Option 1: Installing Cygwin

- If you already have a Cygwin install that you would like to preserve, rename - it so that it doesn't get overwritten (something like "cygwin64-old"). + Cygwin is a lightweight POSIX environment that provides the minimum + requirements needed for a Windows client PC to efficiently transfer files to a + Unix storage server. However, it currently does not support building and + installing packages which have been written in Rust, which many modern Python + packages, including Murfey's dependencies, now make use of. Nevertheless, + older versions of Murfey will still work with it.

- Download the Cygwin setup executable using this + To install, download the Cygwin setup executable using this mirror, and then run the following from a terminal (both Command Prompt and Windows Powershell - work) + work):

     $ setup-x86_64.exe -O -R C:\cygwin64 -s {{ request.url.scheme }}://{{ netloc }}{{ proxy_path }}/cygwin -P curl,python3,rsync -q
@@ -21,38 +25,43 @@ 

Installing Cygwin

Cygwin install command will occasionally hang after completion. In such an event, just hit Enter to return to a normal command prompt window.

+

+ If you already have a Cygwin install that you would like to preserve, rename + it so that it doesn't get overwritten (something like "cygwin64-old") before + running the command line above. +

-

Installing MSYS2

+

Option 2: Installing MSYS2

- MSYS2 is a lightweight Linux environment which provides compiler support for + MSYS2 is a lightweight POSIX environment which provides compiler support for the more modern programming languages used by Murfey's package dependencies.

- The Murfey server supports the forwarding of download requests to client PCs - that cannot access the wider internet. Download the MSYS2 setup executable + The Murfey server supports the forwarding of download requests to + network-restricted client PCs. To install MSYS2, download the setup executable using this - mirror, - and run the executable using the default settings. This will install it to + mirror, then run it using the default settings. This will install MSYS2 to C:\msys64.

+

A. Setting Up the Package Manager (If Network-Restricted)

By default, MSYS2 comes with preset lists of mirrors and servers that it - installs its packages from. These will need to be disabled, and replaced with - URLs of the same format that point to the Murfey server the client PC is - connected to. + installs its packages from. On a network-restricted PC, these will need to be + replaced with files that point to the Murfey server instead. They can be + downloaded via this + link.

- These lists can be found in the following folder, if the default installation - options were chosen: + Once downloaded, extract the files to + %MSYS64%\etc\pacman.d. If MSYS2 + was installed at the default location, this will be:

-    C:\msys64\etc\pacman.d\mirrorlist.{environment}
-
-

This is an example of how the URL to the Murfey server should look like:

-
-    Server = https://repo.msys2.org/mingw/x86_64/  # Original URL
-    Server = {{ request.url.scheme }}://{{ netloc }}{{ proxy_path }}/msys2/mingw/x86_64/  # Murfey URL
+    C:\msys64\etc\pacman.d
 
+

B. Installing Dependencies

MSYS2 comes with multiple environments, but UCRT64 is the most modern one. In order for the Murfey client to be able to install and run its dependencies @@ -70,12 +79,44 @@

Installing MSYS2

vim can also be installed by running pacman -S <package-name>. - You can browse the other packages supported by - pacman by searching the repo using + You can browse the other packages available on MSYS2 by searching the repo + using pacman -Ss <package-name>

+

C. Configuring the Rust Package Manager (If Network-Restricted)

+

+ Many newer Python packages now have dependencies written in Rust that allow + them to operate more efficiently. MSYS2 supports the compilation and + installation of such packages, and is thus our recommended POSIX environment + to use Murfey with. +

+

+ Rust packages and their associated metadata are, by default, stored in + https://crates.io. Package + download and installation is in turn conducted by the package manager + cargo. For network-restricted + client PCs, Murfey also supports mirroring + https://crates.io to facilitate + the installation of Rust packages. +

+

+ To configure cargo, simply + download the pre-configured + config.toml file via this + link. This file should + then be pasted in a .cargo folder, + which, by default, shold be located in your User Profile homespace: +

+
%USERPROFILE%\.cargo
+

For a user named Murfey, for example, this would take the form:

+
C:\Users\Murfey\.cargo
+

+ With this file configured, + cargo will know to look for + package metadata and files via the Murfey mirror instead. +

-

Setting Up Python

+

2. Setting Up Python

Once Python and pip are installed in the terminal, you have the option to install Murfey in either the base environment or a virtual environment. The From a681b4b7ccabac93a11df06303e08d7c1ddddd5f Mon Sep 17 00:00:00 2001 From: Eu Pin Tien Date: Thu, 3 Apr 2025 14:29:28 +0100 Subject: [PATCH 6/8] Added instructions for runnning MSYS2 through Command Prompt --- src/murfey/templates/bootstrap.html | 43 ++++++++++++++++++++++++----- 1 file changed, 36 insertions(+), 7 deletions(-) diff --git a/src/murfey/templates/bootstrap.html b/src/murfey/templates/bootstrap.html index 414723bba..70b6c009c 100644 --- a/src/murfey/templates/bootstrap.html +++ b/src/murfey/templates/bootstrap.html @@ -107,15 +107,41 @@

C. Configuring the Rust Package Manager (If Network-Restricted)

then be pasted in a .cargo folder, which, by default, shold be located in your User Profile homespace:

-
%USERPROFILE%\.cargo
+
+    %USERPROFILE%\.cargo
+

For a user named Murfey, for example, this would take the form:

-
C:\Users\Murfey\.cargo
+
+    C:\Users\Murfey\.cargo
+

With this file configured, cargo will know to look for package metadata and files via the Murfey mirror instead.

+

D. Running MSYS2 Through Command Prompt

+

+ In order to run Murfey via the terminal, MSYS2 will have to be run through + Windoww's Command Prompt terminal, as there is an ongoing bug with MSYS2's + pre-packaged terminal that prevents mouse interaction with interactive apps in + the terminal. +

+

+ To do so, simply right-click on your desktop and navigate to + New > Shortcut. When prompted for the location of the item, enter + the following into the text box: +

+
+    cmd.exe /k "C:\msys64\msys2_shell.cmd -defterm -no-start -ucrt64 -shell bash"
+
+

+ After naming the shortcut, click Finish to create the shortcut. This will run + a UCRT64 instance of MSYS2 through the Command Prompt terminal that starts you + off in MSYS2's default home directory. You can proceed to customise the + shortcut icon to taste. +

+

2. Setting Up Python

Once Python and pip are installed in the terminal, you have the option to @@ -123,14 +149,14 @@

2. Setting Up Python

base environment is simpler, but uninstallation of the Python packages in the future could potentially interfere with the base environment's functionality.

-

Setting Up a Virtual Environment

+

A. (Optional) Setting Up a Virtual Environment

To set up a virtual environment, run the following commands:

     $ pip install virtualenv --index-url {{ request.url.scheme }}://{{ netloc }}{{ proxy_path }}/pypi --trusted-host {{ netloc }}
     $ virtualenv your-env-name  # Create the virtual environment
     $ source your-env-name/bin/activate  # Activate the virtual environment
 
-

Installing Murfey

+

B. Installing Murfey

You can install Murfey in the Python environment (the base one or a virtual environment) in either the Cygwin or UCRT64 terminal using the following @@ -139,7 +165,10 @@

Installing Murfey

     $ pip install murfey[client] --index-url {{ request.url.scheme }}://{{ netloc }}{{ proxy_path }}/pypi --trusted-host {{ netloc }}
 
-
-  If you wish to install the client-side dependencies needed to run Murfey via the web UI, replace murfey[client] with murfey[client,instrument-server].
-
+

+ If you wish to install the client-side dependencies needed to run Murfey via + the web UI, replace + murfey[client] with + murfey[client,instrument-server]. +

{% endblock %} From 344aa92ad59528060f867d2f7e726d5da85d965e Mon Sep 17 00:00:00 2001 From: Eu Pin Tien Date: Thu, 3 Apr 2025 16:23:46 +0100 Subject: [PATCH 7/8] Added 'msys2-runtime-3.6' to list of packages to install for MSYS2 --- src/murfey/templates/bootstrap.html | 1 + 1 file changed, 1 insertion(+) diff --git a/src/murfey/templates/bootstrap.html b/src/murfey/templates/bootstrap.html index 70b6c009c..1a10c9397 100644 --- a/src/murfey/templates/bootstrap.html +++ b/src/murfey/templates/bootstrap.html @@ -70,6 +70,7 @@

B. Installing Dependencies

     $ pacman -Syu
+    $ pacman -S msys2-runtime-3.6
     $ pacman -S rsync
     $ pacman -S mingw-w64-ucrt-x86_64-python-pip
     $ pacman -S mingw-w64-ucrt-x86_64-rust

From 4b2c9eba244146e8093f7cf2e96d9faa96b67a68 Mon Sep 17 00:00:00 2001
From: Eu Pin Tien 
Date: Thu, 3 Apr 2025 16:34:20 +0100
Subject: [PATCH 8/8] Provide different font options for code

---
 src/murfey/templates/bootstrap.html | 125 ++++++++++++++++++++--------
 1 file changed, 90 insertions(+), 35 deletions(-)

diff --git a/src/murfey/templates/bootstrap.html b/src/murfey/templates/bootstrap.html
index 1a10c9397..3880e4ff1 100644
--- a/src/murfey/templates/bootstrap.html
+++ b/src/murfey/templates/bootstrap.html
@@ -17,7 +17,9 @@ 

Option 1: Installing Cygwin

the following from a terminal (both Command Prompt and Windows Powershell work):

-
+
     $ setup-x86_64.exe -O -R C:\cygwin64 -s {{ request.url.scheme }}://{{ netloc }}{{ proxy_path }}/cygwin -P curl,python3,rsync -q
 

@@ -55,10 +57,14 @@

A. Setting Up the Package Manager (If Network-Restricted)

Once downloaded, extract the files to - %MSYS64%\etc\pacman.d. If MSYS2 - was installed at the default location, this will be: -

-
+  %MSYS64%\etc\pacman.d. If MSYS2 was installed at the default location, this will be:
+

+
     C:\msys64\etc\pacman.d
 

B. Installing Dependencies

@@ -68,7 +74,9 @@

B. Installing Dependencies

properly, the following packages will need to be installed in the UCRT64 environment. This can be achieved using the following commands:

-
+
     $ pacman -Syu
     $ pacman -S msys2-runtime-3.6
     $ pacman -S rsync
@@ -77,12 +85,20 @@ 

B. Installing Dependencies

Other utility packages such as - vim can also be installed by - running - pacman -S <package-name>. - You can browse the other packages available on MSYS2 by searching the repo + vim + can also be installed by running + pacman -S <package-name>. You can browse the other packages available on MSYS2 by searching the repo using - pacman -Ss <package-name> + pacman -Ss <package-name>

C. Configuring the Rust Package Manager (If Network-Restricted)

@@ -93,32 +109,58 @@

C. Configuring the Rust Package Manager (If Network-Restricted)

Rust packages and their associated metadata are, by default, stored in - https://crates.io. Package - download and installation is in turn conducted by the package manager - cargo. For network-restricted - client PCs, Murfey also supports mirroring - https://crates.io to facilitate - the installation of Rust packages. -

-

- To configure cargo, simply - download the pre-configured - config.toml file via this - link. This file should - then be pasted in a .cargo folder, - which, by default, shold be located in your User Profile homespace: -

-
+  https://crates.io. Package download and installation is in turn conducted by the package
+  manager
+  cargo. For network-restricted client PCs, Murfey also supports mirroring
+  https://crates.io
+  to facilitate the installation of Rust packages.
+

+

+ To configure + cargo, simply download the pre-configured + config.toml + file via this link. This + file should then be pasted in a + .cargo + folder, which, by default, shold be located in your User Profile homespace: +

+
     %USERPROFILE%\.cargo
 

For a user named Murfey, for example, this would take the form:

-
+
     C:\Users\Murfey\.cargo
 

With this file configured, - cargo will know to look for - package metadata and files via the Murfey mirror instead. + cargo + will know to look for package metadata and files via the Murfey mirror + instead.

D. Running MSYS2 Through Command Prompt

@@ -133,7 +175,9 @@

D. Running MSYS2 Through Command Prompt

New > Shortcut. When prompted for the location of the item, enter the following into the text box:

-
+
     cmd.exe /k "C:\msys64\msys2_shell.cmd -defterm -no-start -ucrt64 -shell bash"
 

@@ -152,7 +196,9 @@

2. Setting Up Python

A. (Optional) Setting Up a Virtual Environment

To set up a virtual environment, run the following commands:

-
+
     $ pip install virtualenv --index-url {{ request.url.scheme }}://{{ netloc }}{{ proxy_path }}/pypi --trusted-host {{ netloc }}
     $ virtualenv your-env-name  # Create the virtual environment
     $ source your-env-name/bin/activate  # Activate the virtual environment
@@ -163,13 +209,22 @@ 

B. Installing Murfey

environment) in either the Cygwin or UCRT64 terminal using the following commands:

-
+
     $ pip install murfey[client] --index-url {{ request.url.scheme }}://{{ netloc }}{{ proxy_path }}/pypi --trusted-host {{ netloc }}
 

If you wish to install the client-side dependencies needed to run Murfey via the web UI, replace - murfey[client] with - murfey[client,instrument-server]. + murfey[client] + with + murfey[client,instrument-server].

{% endblock %}